#coding:utf8 import codecs import psycopg2 #此文件是作导入导出数据用 def importPredict(): file = "predict.txt" conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101") cursor = conn.cursor() cursor.execute(" delete from dl_predict ") with codecs.open(file,"r",encoding="utf8") as f: while(True): line = f.readline() if not line: break line_split = line.split("\t") entity_id=line_split[0] dl_predict = line_split[1] sql = " insert into dl_predict(entity_id,dl_predict) values('"+str(entity_id)+"',"+str(dl_predict)+")" cursor.execute(sql) f.close() conn.commit() conn.close() def importIterateLabel(): file = "final_label.txt" conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101") cursor = conn.cursor() cursor.execute(" delete from final_label ") with codecs.open(file,"r",encoding="utf8") as f: while(True): line = f.readline() if not line: break line_split = line.split("\t") entity_id=line_split[0] if int(line_split[2])==1: label = "1" else: label = "-1" sql = " insert into final_label(entity_id,label) values('"+str(entity_id)+"',"+str(label)+")" cursor.execute(sql) f.close() conn.commit() conn.close() def exportResult(): exporttable = " is_wintenderer " column = " win_tenderer " conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101") cursor = conn.cursor() sql = "select distinct B.doc_id from "+exporttable+" A,entity_mention B,dd_graph_variables_holdout D where A.id=D.variable_id and A.entity_id=B.entity_id" cursor.execute(sql) rows = cursor.fetchall() with codecs.open("testCompare"+exporttable+".html","w",encoding="utf8") as f: f.write('\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ') for row in rows: f.write("") doc_id = row[0] sql = " select A.entity_text,B.expectation,B.entity_id from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.expectation desc limit 1" cursor.execute(sql) dd_row = cursor.fetchall()[0] dd_entity = dd_row[0] dd_expectation = dd_row[1] sql = " select A.entity_text,B.dl_predict,B.entity_id from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.dl_predict desc limit 1" cursor.execute(sql) dl_row = cursor.fetchall()[0] dl_entity = dl_row[0] dl_expectation = dl_row[1] sql = " select A.entity_text,B.dl_predict*0.6+B.expectation*0.4 from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.dl_predict*0.5+B.expectation*0.5 desc limit 1" cursor.execute(sql) row = cursor.fetchall()[0] sum_entity = row[0] sum_expectation = row[1] sql = " select "+column+" from articles where id='"+doc_id+"' " cursor.execute(sql) re_match = cursor.fetchall()[0][0] if sum_expectation>0.5: if sum_entity==re_match: isSame_sum = "#A0" else: isSame_sum = "#A1" else: if sum_entity==re_match: isSame_sum = "#A2" else: isSame_sum = "#A3" if dd_expectation>0.5: if dd_entity==re_match: isSame_dd_re = "#B0" else: isSame_dd_re = "#B1" else: if dd_entity==re_match: isSame_dd_re = "#B2" else: isSame_dd_re = "#B3" if dl_expectation>0.5: if dl_entity==re_match: isSame_dl_re = "#C0" else: isSame_dl_re = "#C1" else: if dl_entity==re_match: isSame_dl_re = "#C2" else: isSame_dl_re = "#C3" f.write(""+""+""+""+""+""+""+""+"") f.write("") f.write("\n") f.write('\
idsumvaluesum期望ddvaluedd期望dlvalue<dl期望标注第一候选相比
"+str(doc_id)+""+str(sum_entity)+""+str(sum_expectation)+""+str(dd_entity)+""+str(dd_expectation)+""+str(dl_entity)+""+str(dl_expectation)+""+str(re_match)+""+str(isSame_sum+isSame_dd_re+isSame_dl_re)+"
\ \ ') f.flush() f.close() conn.close() def exportDDResult(): exporttable = " is_wintenderer" column = " win_tenderer " conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101") cursor = conn.cursor() sql = "select distinct B.doc_id from "+exporttable+" A,entity_mention B,dd_graph_variables_holdout D where A.id=D.variable_id and A.entity_id=B.entity_id" cursor.execute(sql) rows = cursor.fetchall() with codecs.open("testCompare"+exporttable+".html","w",encoding="utf8") as f: f.write('\ \ \ \ \ \ \ \ \ \ \ \ ') for row in rows: f.write("") doc_id = row[0] sql = " select A.entity_text,B.expectation,B.entity_id from entity_mention A,"+exporttable+"_label_inference B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.expectation desc limit 1" cursor.execute(sql) dd_row = cursor.fetchall()[0] dd_entity = dd_row[0] dd_expectation = dd_row[1] sql = " select "+column+" from articles where id='"+doc_id+"' " cursor.execute(sql) re_match = cursor.fetchall()[0][0] sql = " select A.entity_text,B.expectation,B.entity_id from entity_mention A,"+exporttable+"_label_inference B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id and A.entity_text='"+re_match+"' order by B.expectation desc limit 1" cursor.execute(sql) dd_row = cursor.fetchall() if len(dd_row)>0: re_expectation = dd_row[0][1] else: re_expectation = -1 isSame_dd_re = "" if re_expectation>0.8: isSame_dd_re = "#B0" else: if dd_entity==re_match: isSame_dd_re = "#B0" else: if dd_expectation>0.8: isSame_dd_re = "#B1" f.write(""+""+""+""+""+"") f.write("") f.write("\n") f.write('\
idddvaluedd期望标注中标标注最大期望相比
"+str(doc_id)+""+str(dd_entity)+""+str(dd_expectation)+""+str(re_match)+""+str(re_expectation)+""+str(isSame_dd_re)+"
\ \ ') f.flush() f.close() conn.close() def exportDLResult(): exporttable = " is_wintenderer" column = " win_tenderer " conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101") cursor = conn.cursor() sql = "select distinct B.doc_id from "+exporttable+" A,entity_mention B where A.entity_id=B.entity_id and B.doc_id in(select id from articles_processed order by id limit 1000)" cursor.execute(sql) rows = cursor.fetchall() with codecs.open("testCompare"+exporttable+".html","w",encoding="utf8") as f: f.write('\ \ \ \ \ \ \ \ \ \ \ \ ') for row in rows: f.write("") doc_id = row[0] sql = " select A.entity_text,B.dl_predict from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.dl_predict desc limit 1" cursor.execute(sql) dl_row = cursor.fetchall()[0] dl_entity = dl_row[0] dl_expectation = dl_row[1] sql = " select "+column+" from articles where id='"+doc_id+"' " cursor.execute(sql) re_match = cursor.fetchall()[0][0] sql = " select A.entity_text,B.dl_predict,B.entity_id from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id and A.entity_text='"+re_match+"' order by B.dl_predict desc limit 1" cursor.execute(sql) dl_row = cursor.fetchall() if len(dl_row)>0: re_expectation = dl_row[0][1] else: re_expectation = -1 isSame_dd_re = "" if re_expectation>0.8: isSame_dd_re = "#B0" else: if dl_entity==re_match: isSame_dd_re = "#B0" else: if dl_expectation>0.8: isSame_dd_re = "#B1" f.write(""+""+""+""+""+"") f.write("") f.write("\n") f.write('\
iddlvaluedl期望标注中标标注最大期望相比
"+str(doc_id)+""+str(dl_entity)+""+str(dl_expectation)+""+str(re_match)+""+str(re_expectation)+""+str(isSame_dd_re)+"
\ \ ') f.flush() f.close() conn.close() def importReLabel(): file = "relabel.txt" conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101") cursor = conn.cursor() cursor.execute(" delete from relabel ") with codecs.open(file,"r",encoding="utf8") as f: while(True): line = f.readline() if not line: break line_split = line.split("\t") entity_id = line_split[0] label = line_split[1] sql = " insert into relabel(entity_id,label) values('"+str(entity_id)+"',"+str(label)+")" cursor.execute(sql) f.close() conn.commit() conn.close() if __name__=="__main__": importPredict() #importIterateLabel() #exportResult() #exportDDResult() #exportDLResult() #importReLabel()