123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- import psycopg2
- from DBUtils.PooledDB import PooledDB
- from BiddingKG.dl.common.Utils import save,load,getUnifyMoney
- from BiddingKG.dl.test.test4 import predict
- pool = None
- def getConnection():
- global pool
- if pool is None:
- pool = PooledDB(psycopg2, 10,dbname="iepy", host="192.168.2.101",user="postgres",password="postgres",port="5432")
- return pool.connection()
- def getTendererMoney_fromIEPY():
- conn = getConnection()
- cursor = conn.cursor()
- sql = 'select "user",begin_time,end_time from corpus_payroll'
- cursor.execute(sql)
- rows_user = cursor.fetchall()
- dict_docid_role_money = dict()
- for row_user in rows_user:
- _user = row_user[0]
- begin_time = row_user[1]
- end_time = row_user[2]
- sql = " select human_identifier,sourcetext from corpus_iedocument where edituser='%s' and edittime>=to_date('%s','yyyy-mm-dd') and edittime<=to_date('%s','yyyy-mm-dd')"%(_user,begin_time,end_time)
- cursor.execute(sql)
- rows_docId = cursor.fetchall()
- for row_docId in rows_docId:
- docId = row_docId[0]
- sourceText = row_docId[1]
- sql = " select value from brat_bratannotation where document_id='%s'"%docId
- cursor.execute(sql)
- rows_anno = cursor.fetchall()
- dict_money_entity_pack = dict()
- list_value = []
- for row_anno in rows_anno:
- value = row_anno[0]
- list_value.append(value)
- dict_anno = dict()
- list_rel_anno = []
- dict_docid_role_money[docId] = {}
- dict_docid_role_money[docId]["sourceHtml"] = sourceText
- dict_docid_role_money[docId]["roleMoney"] = []
- for _value1 in list_value:
- print(_value1)
- if _value1[0]=="T":
- ID,VL,ENTITY = _value1.split("\t")
- dict_anno[ID] = {"type":VL.split()[0],"text":ENTITY}
- if _value1[0]=="R":
- ID,VL = _value1.split("\t")
- if VL.split()[0]=="rel_tendererMoney":
- list_rel_anno.append([VL.split()[1].split(":")[1],VL.split()[2].split(":")[1]])
- for item in list_rel_anno:
- dict_docid_role_money[docId]["roleMoney"].append([dict_anno[item[0]]["type"],dict_anno[item[0]]["text"],dict_anno[item[1]]["text"]])
- save(dict_docid_role_money,"dict_docid_role_money.pk")
- def test4(id,content):
- import json
- _result = json.loads(predict(id,content))
- set_docid_role_money = set()
- for _p in _result["prem"].keys():
- for item in _result["prem"][_p]["roleList"]:
- if len(item[0].split("_"))==2:
- set_docid_role_money.add("%s$%s$%s"%(id,item[1],str(float(getUnifyMoney(str(item[2]))))))
- return set_docid_role_money
- def getTendererMoney_extract_percent1():
- '''
- mongo:98/1101,0.089010
- 15:extract:526/all:1000=0.525995
- extract:710/all:946=0.750521
- :return:
- '''
- dict_docid_role_money = load("dict_docid_role_money.pk")
- set_docid_role_money_iepy = set()
- set_docid_role_money_interface = set()
- _index = 0
- for _key in dict_docid_role_money.keys():
- _index += 1
- print("%s===%d"%(_key,_index))
- _size = 0
- if len(dict_docid_role_money[_key]["roleMoney"])>0:
- sourceHtml = dict_docid_role_money[_key]["sourceHtml"]
- len_ipey = len(set_docid_role_money_iepy)
- len_interface = len(set_docid_role_money_interface)
- for item in dict_docid_role_money[_key]["roleMoney"]:
- _v = "%s$%s$%s"%(_key,item[1],str(float(getUnifyMoney(item[2]))))
- set_docid_role_money_iepy.add(_v)
- _interface = test4(_key,sourceHtml)
- if _interface is not None:
- set_docid_role_money_interface = set_docid_role_money_interface | _interface
- if len(set_docid_role_money_interface)-len_interface!=len(set_docid_role_money_iepy)-len_ipey:
- print("diff-%s-%d-%d"%(_key,len(set_docid_role_money_interface)-len_interface,len(set_docid_role_money_iepy)-len_ipey))
- print("extract:%d/all:%d=%f"%(len(set_docid_role_money_iepy&set_docid_role_money_interface),len(set_docid_role_money_iepy),len(set_docid_role_money_iepy&set_docid_role_money_interface)/(len(set_docid_role_money_iepy)+0.01)))
- print("extract:%d/all:%d=%f"%(len(set_docid_role_money_iepy&set_docid_role_money_interface),len(set_docid_role_money_iepy),len(set_docid_role_money_iepy&set_docid_role_money_interface)/(len(set_docid_role_money_iepy)+0.01)))
- if __name__=="__main__":
- # getTendererMoney_fromIEPY()
- getTendererMoney_extract_percent1()
- # dict_docid_role_money = load("dict_docid_role_money.pk")
- # _set = set()
- # for item in dict_docid_role_money["95001733"]["roleMoney"]:
- # _v = "%s$%s$%s"%("95004646",item[1],str(float(getUnifyMoney(item[2]))))
- # _set.add(_v)
- # print(_set)
- # _v = "%s$%s$%s"%("95005274",item[1],str(float(getUnifyMoney(item[2]))))
- # print(dict_docid_role_money["100001185"])
- # print(getUnifyMoney(dict_docid_role_money["100001185"]["roleMoney"][0][2]))
- # print(len(dict_docid_role_money.keys()))
- # _count = 0
- # for _key in dict_docid_role_money.keys():
- # if len(dict_docid_role_money[_key])>0:
- # _count+=1
- # print(_count)
|