import psycopg2 from DBUtils.PooledDB import PooledDB from BiddingKG.dl.common.Utils import save,load,getUnifyMoney from BiddingKG.dl_dev.test.test4 import predict pool = None def getConnection(): global pool if pool is None: pool = PooledDB(psycopg2, 10,dbname="iepy", host="192.168.2.101",user="postgres",password="postgres",port="5432") return pool.connection() def getTendererMoney_fromIEPY(): conn = getConnection() cursor = conn.cursor() sql = 'select "user",begin_time,end_time from corpus_payroll' cursor.execute(sql) rows_user = cursor.fetchall() dict_docid_role_money = dict() for row_user in rows_user: _user = row_user[0] begin_time = row_user[1] end_time = row_user[2] sql = " select human_identifier,sourcetext from corpus_iedocument where edituser='%s' and edittime>=to_date('%s','yyyy-mm-dd') and edittime<=to_date('%s','yyyy-mm-dd')"%(_user,begin_time,end_time) cursor.execute(sql) rows_docId = cursor.fetchall() for row_docId in rows_docId: docId = row_docId[0] sourceText = row_docId[1] sql = " select value from brat_bratannotation where document_id='%s'"%docId cursor.execute(sql) rows_anno = cursor.fetchall() dict_money_entity_pack = dict() list_value = [] for row_anno in rows_anno: value = row_anno[0] list_value.append(value) dict_anno = dict() list_rel_anno = [] dict_docid_role_money[docId] = {} dict_docid_role_money[docId]["sourceHtml"] = sourceText dict_docid_role_money[docId]["roleMoney"] = [] for _value1 in list_value: print(_value1) if _value1[0]=="T": ID,VL,ENTITY = _value1.split("\t") dict_anno[ID] = {"type":VL.split()[0],"text":ENTITY} if _value1[0]=="R": ID,VL = _value1.split("\t") if VL.split()[0]=="rel_tendererMoney": list_rel_anno.append([VL.split()[1].split(":")[1],VL.split()[2].split(":")[1]]) for item in list_rel_anno: dict_docid_role_money[docId]["roleMoney"].append([dict_anno[item[0]]["type"],dict_anno[item[0]]["text"],dict_anno[item[1]]["text"]]) save(dict_docid_role_money,"dict_docid_role_money.pk") def test4(id,content): import json _result = json.loads(predict(id,content)) set_docid_role_money = set() for _p in _result["prem"].keys(): for item in _result["prem"][_p]["roleList"]: if len(item[0].split("_"))==2: set_docid_role_money.add("%s$%s$%s"%(id,item[1],str(float(getUnifyMoney(str(item[2])))))) return set_docid_role_money def getTendererMoney_extract_percent1(): ''' mongo:98/1101,0.089010 15:extract:526/all:1000=0.525995 extract:710/all:946=0.750521 :return: ''' dict_docid_role_money = load("dict_docid_role_money.pk") set_docid_role_money_iepy = set() set_docid_role_money_interface = set() _index = 0 for _key in dict_docid_role_money.keys(): _index += 1 print("%s===%d"%(_key,_index)) _size = 0 if len(dict_docid_role_money[_key]["roleMoney"])>0: sourceHtml = dict_docid_role_money[_key]["sourceHtml"] len_ipey = len(set_docid_role_money_iepy) len_interface = len(set_docid_role_money_interface) for item in dict_docid_role_money[_key]["roleMoney"]: _v = "%s$%s$%s"%(_key,item[1],str(float(getUnifyMoney(item[2])))) set_docid_role_money_iepy.add(_v) _interface = test4(_key,sourceHtml) if _interface is not None: set_docid_role_money_interface = set_docid_role_money_interface | _interface if len(set_docid_role_money_interface)-len_interface!=len(set_docid_role_money_iepy)-len_ipey: print("diff-%s-%d-%d"%(_key,len(set_docid_role_money_interface)-len_interface,len(set_docid_role_money_iepy)-len_ipey)) print("extract:%d/all:%d=%f"%(len(set_docid_role_money_iepy&set_docid_role_money_interface),len(set_docid_role_money_iepy),len(set_docid_role_money_iepy&set_docid_role_money_interface)/(len(set_docid_role_money_iepy)+0.01))) print("extract:%d/all:%d=%f"%(len(set_docid_role_money_iepy&set_docid_role_money_interface),len(set_docid_role_money_iepy),len(set_docid_role_money_iepy&set_docid_role_money_interface)/(len(set_docid_role_money_iepy)+0.01))) if __name__=="__main__": # getTendererMoney_fromIEPY() getTendererMoney_extract_percent1() # dict_docid_role_money = load("dict_docid_role_money.pk") # _set = set() # for item in dict_docid_role_money["95001733"]["roleMoney"]: # _v = "%s$%s$%s"%("95004646",item[1],str(float(getUnifyMoney(item[2])))) # _set.add(_v) # print(_set) # _v = "%s$%s$%s"%("95005274",item[1],str(float(getUnifyMoney(item[2])))) # print(dict_docid_role_money["100001185"]) # print(getUnifyMoney(dict_docid_role_money["100001185"]["roleMoney"][0][2])) # print(len(dict_docid_role_money.keys())) # _count = 0 # for _key in dict_docid_role_money.keys(): # if len(dict_docid_role_money[_key])>0: # _count+=1 # print(_count)