123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331 |
- #coding:utf8
- import codecs
- import psycopg2
- #此文件是作导入导出数据用
- def importPredict():
- file = "predict.txt"
-
- conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101")
-
- cursor = conn.cursor()
- cursor.execute(" delete from dl_predict ")
- with codecs.open(file,"r",encoding="utf8") as f:
- while(True):
- line = f.readline()
- if not line:
- break
- line_split = line.split("\t")
- entity_id=line_split[0]
- dl_predict = line_split[1]
- sql = " insert into dl_predict(entity_id,dl_predict) values('"+str(entity_id)+"',"+str(dl_predict)+")"
- cursor.execute(sql)
- f.close()
- conn.commit()
- conn.close()
-
- def importIterateLabel():
-
- file = "final_label.txt"
-
- conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101")
-
- cursor = conn.cursor()
-
- cursor.execute(" delete from final_label ")
-
-
- with codecs.open(file,"r",encoding="utf8") as f:
- while(True):
- line = f.readline()
- if not line:
- break
- line_split = line.split("\t")
- entity_id=line_split[0]
- if int(line_split[2])==1:
- label = "1"
- else:
- label = "-1"
- sql = " insert into final_label(entity_id,label) values('"+str(entity_id)+"',"+str(label)+")"
- cursor.execute(sql)
- f.close()
- conn.commit()
- conn.close()
-
- def exportResult():
-
- exporttable = " is_wintenderer "
- column = " win_tenderer "
- conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101")
-
- cursor = conn.cursor()
-
- sql = "select distinct B.doc_id from "+exporttable+" A,entity_mention B,dd_graph_variables_holdout D where A.id=D.variable_id and A.entity_id=B.entity_id"
- cursor.execute(sql)
- rows = cursor.fetchall()
- with codecs.open("testCompare"+exporttable+".html","w",encoding="utf8") as f:
- f.write('<html><head>\
- <meta http-equiv="Content-Type"\
- content="text/html; charset=UTF-8">\
- </head>\
- <body bgcolor="#FFFFFF">\
- <table border="1">\
- <tr>\
- <td>id</td>\
- <td>sumvalue</td>\
- <td>sum期望</td>\
- <td>ddvalue</td>\
- <td>dd期望</td>\
- <td>dlvalue<</td>\
- <td>dl期望</td>\
- <td>标注第一候选</td>\
- <td>相比</td>\
- </tr>')
- for row in rows:
- f.write("<tr>")
- doc_id = row[0]
- sql = " select A.entity_text,B.expectation,B.entity_id from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.expectation desc limit 1"
- cursor.execute(sql)
- dd_row = cursor.fetchall()[0]
- dd_entity = dd_row[0]
- dd_expectation = dd_row[1]
- sql = " select A.entity_text,B.dl_predict,B.entity_id from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.dl_predict desc limit 1"
- cursor.execute(sql)
- dl_row = cursor.fetchall()[0]
- dl_entity = dl_row[0]
- dl_expectation = dl_row[1]
-
- sql = " select A.entity_text,B.dl_predict*0.6+B.expectation*0.4 from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.dl_predict*0.5+B.expectation*0.5 desc limit 1"
- cursor.execute(sql)
- row = cursor.fetchall()[0]
- sum_entity = row[0]
- sum_expectation = row[1]
-
- sql = " select "+column+" from articles where id='"+doc_id+"' "
- cursor.execute(sql)
- re_match = cursor.fetchall()[0][0]
-
- if sum_expectation>0.5:
- if sum_entity==re_match:
- isSame_sum = "#A0"
- else:
- isSame_sum = "#A1"
- else:
- if sum_entity==re_match:
- isSame_sum = "#A2"
- else:
- isSame_sum = "#A3"
-
- if dd_expectation>0.5:
- if dd_entity==re_match:
- isSame_dd_re = "#B0"
- else:
- isSame_dd_re = "#B1"
- else:
- if dd_entity==re_match:
- isSame_dd_re = "#B2"
- else:
- isSame_dd_re = "#B3"
-
- if dl_expectation>0.5:
- if dl_entity==re_match:
- isSame_dl_re = "#C0"
- else:
- isSame_dl_re = "#C1"
- else:
- if dl_entity==re_match:
- isSame_dl_re = "#C2"
- else:
- isSame_dl_re = "#C3"
-
-
- f.write("<td>"+str(doc_id)+"</td>"+"<td>"+str(sum_entity)+"</td>"+"<td>"+str(sum_expectation)+"</td>"+"<td>"+str(dd_entity)+"</td>"+"<td>"+str(dd_expectation)+"</td>"+"<td>"+str(dl_entity)+"</td>"+"<td>"+str(dl_expectation)+"</td>"+"<td>"+str(re_match)+"</td>"+"<td>"+str(isSame_sum+isSame_dd_re+isSame_dl_re)+"</td>")
- f.write("</tr>")
- f.write("\n")
- f.write('</tr>\
- </table>\
- </body>\
- </html>')
- f.flush()
- f.close()
- conn.close()
-
- def exportDDResult():
-
- exporttable = " is_wintenderer"
- column = " win_tenderer "
- conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101")
-
- cursor = conn.cursor()
-
- sql = "select distinct B.doc_id from "+exporttable+" A,entity_mention B,dd_graph_variables_holdout D where A.id=D.variable_id and A.entity_id=B.entity_id"
- cursor.execute(sql)
- rows = cursor.fetchall()
- with codecs.open("testCompare"+exporttable+".html","w",encoding="utf8") as f:
- f.write('<html><head>\
- <meta http-equiv="Content-Type"\
- content="text/html; charset=UTF-8">\
- </head>\
- <body bgcolor="#FFFFFF">\
- <table border="1">\
- <tr>\
- <td>id</td>\
- <td>ddvalue</td>\
- <td>dd期望</td>\
- <td>标注中标</td>\
- <td>标注最大期望</td>\
- <td>相比</td>\
- </tr>')
- for row in rows:
- f.write("<tr>")
- doc_id = row[0]
- sql = " select A.entity_text,B.expectation,B.entity_id from entity_mention A,"+exporttable+"_label_inference B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.expectation desc limit 1"
- cursor.execute(sql)
- dd_row = cursor.fetchall()[0]
- dd_entity = dd_row[0]
- dd_expectation = dd_row[1]
-
-
- sql = " select "+column+" from articles where id='"+doc_id+"' "
- cursor.execute(sql)
- re_match = cursor.fetchall()[0][0]
-
- sql = " select A.entity_text,B.expectation,B.entity_id from entity_mention A,"+exporttable+"_label_inference B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id and A.entity_text='"+re_match+"' order by B.expectation desc limit 1"
- cursor.execute(sql)
- dd_row = cursor.fetchall()
- if len(dd_row)>0:
- re_expectation = dd_row[0][1]
- else:
- re_expectation = -1
-
-
-
- isSame_dd_re = ""
- if re_expectation>0.8:
- isSame_dd_re = "#B0"
- else:
- if dd_entity==re_match:
- isSame_dd_re = "#B0"
- else:
- if dd_expectation>0.8:
- isSame_dd_re = "#B1"
-
-
-
- f.write("<td>"+str(doc_id)+"</td>"+"<td>"+str(dd_entity)+"</td>"+"<td>"+str(dd_expectation)+"</td>"+"<td>"+str(re_match)+"</td>"+"<td>"+str(re_expectation)+"</td>"+"<td>"+str(isSame_dd_re)+"</td>")
- f.write("</tr>")
- f.write("\n")
- f.write('</tr>\
- </table>\
- </body>\
- </html>')
- f.flush()
- f.close()
- conn.close()
-
- def exportDLResult():
-
- exporttable = " is_wintenderer"
- column = " win_tenderer "
- conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101")
-
- cursor = conn.cursor()
-
- sql = "select distinct B.doc_id from "+exporttable+" A,entity_mention B where A.entity_id=B.entity_id and B.doc_id in(select id from articles_processed order by id limit 1000)"
- cursor.execute(sql)
- rows = cursor.fetchall()
- with codecs.open("testCompare"+exporttable+".html","w",encoding="utf8") as f:
- f.write('<html><head>\
- <meta http-equiv="Content-Type"\
- content="text/html; charset=UTF-8">\
- </head>\
- <body bgcolor="#FFFFFF">\
- <table border="1">\
- <tr>\
- <td>id</td>\
- <td>dlvalue</td>\
- <td>dl期望</td>\
- <td>标注中标</td>\
- <td>标注最大期望</td>\
- <td>相比</td>\
- </tr>')
- for row in rows:
- f.write("<tr>")
- doc_id = row[0]
- sql = " select A.entity_text,B.dl_predict from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.dl_predict desc limit 1"
- cursor.execute(sql)
- dl_row = cursor.fetchall()[0]
- dl_entity = dl_row[0]
- dl_expectation = dl_row[1]
-
-
- sql = " select "+column+" from articles where id='"+doc_id+"' "
- cursor.execute(sql)
- re_match = cursor.fetchall()[0][0]
-
- sql = " select A.entity_text,B.dl_predict,B.entity_id from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id and A.entity_text='"+re_match+"' order by B.dl_predict desc limit 1"
- cursor.execute(sql)
- dl_row = cursor.fetchall()
- if len(dl_row)>0:
- re_expectation = dl_row[0][1]
- else:
- re_expectation = -1
-
- isSame_dd_re = ""
- if re_expectation>0.8:
- isSame_dd_re = "#B0"
- else:
- if dl_entity==re_match:
- isSame_dd_re = "#B0"
- else:
- if dl_expectation>0.8:
- isSame_dd_re = "#B1"
-
-
-
- f.write("<td>"+str(doc_id)+"</td>"+"<td>"+str(dl_entity)+"</td>"+"<td>"+str(dl_expectation)+"</td>"+"<td>"+str(re_match)+"</td>"+"<td>"+str(re_expectation)+"</td>"+"<td>"+str(isSame_dd_re)+"</td>")
- f.write("</tr>")
- f.write("\n")
- f.write('</tr>\
- </table>\
- </body>\
- </html>')
- f.flush()
- f.close()
- conn.close()
-
- def importReLabel():
- file = "relabel.txt"
-
- conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101")
-
- cursor = conn.cursor()
-
- cursor.execute(" delete from relabel ")
-
-
- with codecs.open(file,"r",encoding="utf8") as f:
- while(True):
- line = f.readline()
- if not line:
- break
- line_split = line.split("\t")
- entity_id = line_split[0]
- label = line_split[1]
- sql = " insert into relabel(entity_id,label) values('"+str(entity_id)+"',"+str(label)+")"
- cursor.execute(sql)
- f.close()
- conn.commit()
- conn.close()
-
- if __name__=="__main__":
- importPredict()
- #importIterateLabel()
- #exportResult()
- #exportDDResult()
- #exportDLResult()
- #importReLabel()
-
-
|