import_predict.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. #coding:utf8
  2. import codecs
  3. import psycopg2
  4. #此文件是作导入导出数据用
  5. def importPredict():
  6. file = "predict.txt"
  7. conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101")
  8. cursor = conn.cursor()
  9. cursor.execute(" delete from dl_predict ")
  10. with codecs.open(file,"r",encoding="utf8") as f:
  11. while(True):
  12. line = f.readline()
  13. if not line:
  14. break
  15. line_split = line.split("\t")
  16. entity_id=line_split[0]
  17. dl_predict = line_split[1]
  18. sql = " insert into dl_predict(entity_id,dl_predict) values('"+str(entity_id)+"',"+str(dl_predict)+")"
  19. cursor.execute(sql)
  20. f.close()
  21. conn.commit()
  22. conn.close()
  23. def importIterateLabel():
  24. file = "final_label.txt"
  25. conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101")
  26. cursor = conn.cursor()
  27. cursor.execute(" delete from final_label ")
  28. with codecs.open(file,"r",encoding="utf8") as f:
  29. while(True):
  30. line = f.readline()
  31. if not line:
  32. break
  33. line_split = line.split("\t")
  34. entity_id=line_split[0]
  35. if int(line_split[2])==1:
  36. label = "1"
  37. else:
  38. label = "-1"
  39. sql = " insert into final_label(entity_id,label) values('"+str(entity_id)+"',"+str(label)+")"
  40. cursor.execute(sql)
  41. f.close()
  42. conn.commit()
  43. conn.close()
  44. def exportResult():
  45. exporttable = " is_wintenderer "
  46. column = " win_tenderer "
  47. conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101")
  48. cursor = conn.cursor()
  49. sql = "select distinct B.doc_id from "+exporttable+" A,entity_mention B,dd_graph_variables_holdout D where A.id=D.variable_id and A.entity_id=B.entity_id"
  50. cursor.execute(sql)
  51. rows = cursor.fetchall()
  52. with codecs.open("testCompare"+exporttable+".html","w",encoding="utf8") as f:
  53. f.write('<html><head>\
  54. <meta http-equiv="Content-Type"\
  55. content="text/html; charset=UTF-8">\
  56. </head>\
  57. <body bgcolor="#FFFFFF">\
  58. <table border="1">\
  59. <tr>\
  60. <td>id</td>\
  61. <td>sumvalue</td>\
  62. <td>sum期望</td>\
  63. <td>ddvalue</td>\
  64. <td>dd期望</td>\
  65. <td>dlvalue<</td>\
  66. <td>dl期望</td>\
  67. <td>标注第一候选</td>\
  68. <td>相比</td>\
  69. </tr>')
  70. for row in rows:
  71. f.write("<tr>")
  72. doc_id = row[0]
  73. sql = " select A.entity_text,B.expectation,B.entity_id from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.expectation desc limit 1"
  74. cursor.execute(sql)
  75. dd_row = cursor.fetchall()[0]
  76. dd_entity = dd_row[0]
  77. dd_expectation = dd_row[1]
  78. sql = " select A.entity_text,B.dl_predict,B.entity_id from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.dl_predict desc limit 1"
  79. cursor.execute(sql)
  80. dl_row = cursor.fetchall()[0]
  81. dl_entity = dl_row[0]
  82. dl_expectation = dl_row[1]
  83. sql = " select A.entity_text,B.dl_predict*0.6+B.expectation*0.4 from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.dl_predict*0.5+B.expectation*0.5 desc limit 1"
  84. cursor.execute(sql)
  85. row = cursor.fetchall()[0]
  86. sum_entity = row[0]
  87. sum_expectation = row[1]
  88. sql = " select "+column+" from articles where id='"+doc_id+"' "
  89. cursor.execute(sql)
  90. re_match = cursor.fetchall()[0][0]
  91. if sum_expectation>0.5:
  92. if sum_entity==re_match:
  93. isSame_sum = "#A0"
  94. else:
  95. isSame_sum = "#A1"
  96. else:
  97. if sum_entity==re_match:
  98. isSame_sum = "#A2"
  99. else:
  100. isSame_sum = "#A3"
  101. if dd_expectation>0.5:
  102. if dd_entity==re_match:
  103. isSame_dd_re = "#B0"
  104. else:
  105. isSame_dd_re = "#B1"
  106. else:
  107. if dd_entity==re_match:
  108. isSame_dd_re = "#B2"
  109. else:
  110. isSame_dd_re = "#B3"
  111. if dl_expectation>0.5:
  112. if dl_entity==re_match:
  113. isSame_dl_re = "#C0"
  114. else:
  115. isSame_dl_re = "#C1"
  116. else:
  117. if dl_entity==re_match:
  118. isSame_dl_re = "#C2"
  119. else:
  120. isSame_dl_re = "#C3"
  121. f.write("<td>"+str(doc_id)+"</td>"+"<td>"+str(sum_entity)+"</td>"+"<td>"+str(sum_expectation)+"</td>"+"<td>"+str(dd_entity)+"</td>"+"<td>"+str(dd_expectation)+"</td>"+"<td>"+str(dl_entity)+"</td>"+"<td>"+str(dl_expectation)+"</td>"+"<td>"+str(re_match)+"</td>"+"<td>"+str(isSame_sum+isSame_dd_re+isSame_dl_re)+"</td>")
  122. f.write("</tr>")
  123. f.write("\n")
  124. f.write('</tr>\
  125. </table>\
  126. </body>\
  127. </html>')
  128. f.flush()
  129. f.close()
  130. conn.close()
  131. def exportDDResult():
  132. exporttable = " is_wintenderer"
  133. column = " win_tenderer "
  134. conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101")
  135. cursor = conn.cursor()
  136. sql = "select distinct B.doc_id from "+exporttable+" A,entity_mention B,dd_graph_variables_holdout D where A.id=D.variable_id and A.entity_id=B.entity_id"
  137. cursor.execute(sql)
  138. rows = cursor.fetchall()
  139. with codecs.open("testCompare"+exporttable+".html","w",encoding="utf8") as f:
  140. f.write('<html><head>\
  141. <meta http-equiv="Content-Type"\
  142. content="text/html; charset=UTF-8">\
  143. </head>\
  144. <body bgcolor="#FFFFFF">\
  145. <table border="1">\
  146. <tr>\
  147. <td>id</td>\
  148. <td>ddvalue</td>\
  149. <td>dd期望</td>\
  150. <td>标注中标</td>\
  151. <td>标注最大期望</td>\
  152. <td>相比</td>\
  153. </tr>')
  154. for row in rows:
  155. f.write("<tr>")
  156. doc_id = row[0]
  157. sql = " select A.entity_text,B.expectation,B.entity_id from entity_mention A,"+exporttable+"_label_inference B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.expectation desc limit 1"
  158. cursor.execute(sql)
  159. dd_row = cursor.fetchall()[0]
  160. dd_entity = dd_row[0]
  161. dd_expectation = dd_row[1]
  162. sql = " select "+column+" from articles where id='"+doc_id+"' "
  163. cursor.execute(sql)
  164. re_match = cursor.fetchall()[0][0]
  165. sql = " select A.entity_text,B.expectation,B.entity_id from entity_mention A,"+exporttable+"_label_inference B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id and A.entity_text='"+re_match+"' order by B.expectation desc limit 1"
  166. cursor.execute(sql)
  167. dd_row = cursor.fetchall()
  168. if len(dd_row)>0:
  169. re_expectation = dd_row[0][1]
  170. else:
  171. re_expectation = -1
  172. isSame_dd_re = ""
  173. if re_expectation>0.8:
  174. isSame_dd_re = "#B0"
  175. else:
  176. if dd_entity==re_match:
  177. isSame_dd_re = "#B0"
  178. else:
  179. if dd_expectation>0.8:
  180. isSame_dd_re = "#B1"
  181. f.write("<td>"+str(doc_id)+"</td>"+"<td>"+str(dd_entity)+"</td>"+"<td>"+str(dd_expectation)+"</td>"+"<td>"+str(re_match)+"</td>"+"<td>"+str(re_expectation)+"</td>"+"<td>"+str(isSame_dd_re)+"</td>")
  182. f.write("</tr>")
  183. f.write("\n")
  184. f.write('</tr>\
  185. </table>\
  186. </body>\
  187. </html>')
  188. f.flush()
  189. f.close()
  190. conn.close()
  191. def exportDLResult():
  192. exporttable = " is_wintenderer"
  193. column = " win_tenderer "
  194. conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101")
  195. cursor = conn.cursor()
  196. sql = "select distinct B.doc_id from "+exporttable+" A,entity_mention B where A.entity_id=B.entity_id and B.doc_id in(select id from articles_processed order by id limit 1000)"
  197. cursor.execute(sql)
  198. rows = cursor.fetchall()
  199. with codecs.open("testCompare"+exporttable+".html","w",encoding="utf8") as f:
  200. f.write('<html><head>\
  201. <meta http-equiv="Content-Type"\
  202. content="text/html; charset=UTF-8">\
  203. </head>\
  204. <body bgcolor="#FFFFFF">\
  205. <table border="1">\
  206. <tr>\
  207. <td>id</td>\
  208. <td>dlvalue</td>\
  209. <td>dl期望</td>\
  210. <td>标注中标</td>\
  211. <td>标注最大期望</td>\
  212. <td>相比</td>\
  213. </tr>')
  214. for row in rows:
  215. f.write("<tr>")
  216. doc_id = row[0]
  217. sql = " select A.entity_text,B.dl_predict from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id order by B.dl_predict desc limit 1"
  218. cursor.execute(sql)
  219. dl_row = cursor.fetchall()[0]
  220. dl_entity = dl_row[0]
  221. dl_expectation = dl_row[1]
  222. sql = " select "+column+" from articles where id='"+doc_id+"' "
  223. cursor.execute(sql)
  224. re_match = cursor.fetchall()[0][0]
  225. sql = " select A.entity_text,B.dl_predict,B.entity_id from entity_mention A,dl_predict B where A.doc_id='"+doc_id+"' and A.entity_id=B.entity_id and A.entity_text='"+re_match+"' order by B.dl_predict desc limit 1"
  226. cursor.execute(sql)
  227. dl_row = cursor.fetchall()
  228. if len(dl_row)>0:
  229. re_expectation = dl_row[0][1]
  230. else:
  231. re_expectation = -1
  232. isSame_dd_re = ""
  233. if re_expectation>0.8:
  234. isSame_dd_re = "#B0"
  235. else:
  236. if dl_entity==re_match:
  237. isSame_dd_re = "#B0"
  238. else:
  239. if dl_expectation>0.8:
  240. isSame_dd_re = "#B1"
  241. f.write("<td>"+str(doc_id)+"</td>"+"<td>"+str(dl_entity)+"</td>"+"<td>"+str(dl_expectation)+"</td>"+"<td>"+str(re_match)+"</td>"+"<td>"+str(re_expectation)+"</td>"+"<td>"+str(isSame_dd_re)+"</td>")
  242. f.write("</tr>")
  243. f.write("\n")
  244. f.write('</tr>\
  245. </table>\
  246. </body>\
  247. </html>')
  248. f.flush()
  249. f.close()
  250. conn.close()
  251. def importReLabel():
  252. file = "relabel.txt"
  253. conn = psycopg2.connect(dbname="BiddingKM_test_10000",user="postgres",password="postgres",host="192.168.2.101")
  254. cursor = conn.cursor()
  255. cursor.execute(" delete from relabel ")
  256. with codecs.open(file,"r",encoding="utf8") as f:
  257. while(True):
  258. line = f.readline()
  259. if not line:
  260. break
  261. line_split = line.split("\t")
  262. entity_id = line_split[0]
  263. label = line_split[1]
  264. sql = " insert into relabel(entity_id,label) values('"+str(entity_id)+"',"+str(label)+")"
  265. cursor.execute(sql)
  266. f.close()
  267. conn.commit()
  268. conn.close()
  269. if __name__=="__main__":
  270. importPredict()
  271. #importIterateLabel()
  272. #exportResult()
  273. #exportDDResult()
  274. #exportDLResult()
  275. #importReLabel()