val_fromiepy.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. import psycopg2
  2. from DBUtils.PooledDB import PooledDB
  3. from BiddingKG.dl.common.Utils import save,load,getUnifyMoney
  4. from BiddingKG.dl.test.test4 import predict
  5. pool = None
  6. def getConnection():
  7. global pool
  8. if pool is None:
  9. pool = PooledDB(psycopg2, 10,dbname="iepy", host="192.168.2.101",user="postgres",password="postgres",port="5432")
  10. return pool.connection()
  11. def getTendererMoney_fromIEPY():
  12. conn = getConnection()
  13. cursor = conn.cursor()
  14. sql = 'select "user",begin_time,end_time from corpus_payroll'
  15. cursor.execute(sql)
  16. rows_user = cursor.fetchall()
  17. dict_docid_role_money = dict()
  18. for row_user in rows_user:
  19. _user = row_user[0]
  20. begin_time = row_user[1]
  21. end_time = row_user[2]
  22. sql = " select human_identifier,sourcetext from corpus_iedocument where edituser='%s' and edittime>=to_date('%s','yyyy-mm-dd') and edittime<=to_date('%s','yyyy-mm-dd')"%(_user,begin_time,end_time)
  23. cursor.execute(sql)
  24. rows_docId = cursor.fetchall()
  25. for row_docId in rows_docId:
  26. docId = row_docId[0]
  27. sourceText = row_docId[1]
  28. sql = " select value from brat_bratannotation where document_id='%s'"%docId
  29. cursor.execute(sql)
  30. rows_anno = cursor.fetchall()
  31. dict_money_entity_pack = dict()
  32. list_value = []
  33. for row_anno in rows_anno:
  34. value = row_anno[0]
  35. list_value.append(value)
  36. dict_anno = dict()
  37. list_rel_anno = []
  38. dict_docid_role_money[docId] = {}
  39. dict_docid_role_money[docId]["sourceHtml"] = sourceText
  40. dict_docid_role_money[docId]["roleMoney"] = []
  41. for _value1 in list_value:
  42. print(_value1)
  43. if _value1[0]=="T":
  44. ID,VL,ENTITY = _value1.split("\t")
  45. dict_anno[ID] = {"type":VL.split()[0],"text":ENTITY}
  46. if _value1[0]=="R":
  47. ID,VL = _value1.split("\t")
  48. if VL.split()[0]=="rel_tendererMoney":
  49. list_rel_anno.append([VL.split()[1].split(":")[1],VL.split()[2].split(":")[1]])
  50. for item in list_rel_anno:
  51. dict_docid_role_money[docId]["roleMoney"].append([dict_anno[item[0]]["type"],dict_anno[item[0]]["text"],dict_anno[item[1]]["text"]])
  52. save(dict_docid_role_money,"dict_docid_role_money.pk")
  53. def test4(id,content):
  54. import json
  55. _result = json.loads(predict(id,content))
  56. set_docid_role_money = set()
  57. for _p in _result["prem"].keys():
  58. for item in _result["prem"][_p]["roleList"]:
  59. if len(item[0].split("_"))==2:
  60. set_docid_role_money.add("%s$%s$%s"%(id,item[1],str(float(getUnifyMoney(str(item[2]))))))
  61. return set_docid_role_money
  62. def getTendererMoney_extract_percent1():
  63. '''
  64. mongo:98/1101,0.089010
  65. 15:extract:526/all:1000=0.525995
  66. extract:710/all:946=0.750521
  67. :return:
  68. '''
  69. dict_docid_role_money = load("dict_docid_role_money.pk")
  70. set_docid_role_money_iepy = set()
  71. set_docid_role_money_interface = set()
  72. _index = 0
  73. for _key in dict_docid_role_money.keys():
  74. _index += 1
  75. print("%s===%d"%(_key,_index))
  76. _size = 0
  77. if len(dict_docid_role_money[_key]["roleMoney"])>0:
  78. sourceHtml = dict_docid_role_money[_key]["sourceHtml"]
  79. len_ipey = len(set_docid_role_money_iepy)
  80. len_interface = len(set_docid_role_money_interface)
  81. for item in dict_docid_role_money[_key]["roleMoney"]:
  82. _v = "%s$%s$%s"%(_key,item[1],str(float(getUnifyMoney(item[2]))))
  83. set_docid_role_money_iepy.add(_v)
  84. _interface = test4(_key,sourceHtml)
  85. if _interface is not None:
  86. set_docid_role_money_interface = set_docid_role_money_interface | _interface
  87. if len(set_docid_role_money_interface)-len_interface!=len(set_docid_role_money_iepy)-len_ipey:
  88. print("diff-%s-%d-%d"%(_key,len(set_docid_role_money_interface)-len_interface,len(set_docid_role_money_iepy)-len_ipey))
  89. print("extract:%d/all:%d=%f"%(len(set_docid_role_money_iepy&set_docid_role_money_interface),len(set_docid_role_money_iepy),len(set_docid_role_money_iepy&set_docid_role_money_interface)/(len(set_docid_role_money_iepy)+0.01)))
  90. print("extract:%d/all:%d=%f"%(len(set_docid_role_money_iepy&set_docid_role_money_interface),len(set_docid_role_money_iepy),len(set_docid_role_money_iepy&set_docid_role_money_interface)/(len(set_docid_role_money_iepy)+0.01)))
  91. if __name__=="__main__":
  92. # getTendererMoney_fromIEPY()
  93. getTendererMoney_extract_percent1()
  94. # dict_docid_role_money = load("dict_docid_role_money.pk")
  95. # _set = set()
  96. # for item in dict_docid_role_money["95001733"]["roleMoney"]:
  97. # _v = "%s$%s$%s"%("95004646",item[1],str(float(getUnifyMoney(item[2]))))
  98. # _set.add(_v)
  99. # print(_set)
  100. # _v = "%s$%s$%s"%("95005274",item[1],str(float(getUnifyMoney(item[2]))))
  101. # print(dict_docid_role_money["100001185"])
  102. # print(getUnifyMoney(dict_docid_role_money["100001185"]["roleMoney"][0][2]))
  103. # print(len(dict_docid_role_money.keys()))
  104. # _count = 0
  105. # for _key in dict_docid_role_money.keys():
  106. # if len(dict_docid_role_money[_key])>0:
  107. # _count+=1
  108. # print(_count)