Ver código fonte

新增评审专家的联系人模型提交

rogel 4 anos atrás
pai
commit
1e6c8f7ffc

+ 2 - 2
BiddingKG/dl/interface/Preprocessing.py

@@ -1722,7 +1722,7 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
 
             # 资金来源提取  2020/12/30 新增
             # 资金来源提取  2020/12/30 新增
             list_moneySource = extract_moneySource(sentence_text)
             list_moneySource = extract_moneySource(sentence_text)
-            entity_type = "moneySource"
+            entity_type = "moneysource"
             for moneySource in list_moneySource:
             for moneySource in list_moneySource:
                 begin_index_temp = moneySource['begin_index']
                 begin_index_temp = moneySource['begin_index']
                 for j in range(len(list_tokenbegin)):
                 for j in range(len(list_tokenbegin)):
@@ -1746,7 +1746,7 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
 
             # 服务期限提取 2020/12/30 新增
             # 服务期限提取 2020/12/30 新增
             list_servicetime = extract_servicetime(sentence_text)
             list_servicetime = extract_servicetime(sentence_text)
-            entity_type = "servicetime"
+            entity_type = "serviceTime"
             for servicetime in list_servicetime:
             for servicetime in list_servicetime:
                 begin_index_temp = servicetime['begin_index']
                 begin_index_temp = servicetime['begin_index']
                 for j in range(len(list_tokenbegin)):
                 for j in range(len(list_tokenbegin)):

+ 3 - 2
BiddingKG/dl/interface/modelFactory.py

@@ -195,8 +195,9 @@ class Model_person_classify():
     '''
     '''
     
     
     def encode(self,tokens,begin_index,end_index,**kwargs):
     def encode(self,tokens,begin_index,end_index,**kwargs):
-        return embedding(spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=10),shape=(2,10,128))
-    
+        # return embedding(spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=10),shape=(2,10,128))
+        return embedding(spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=35),shape=(2,35,128))
+
     def predict(self,x):
     def predict(self,x):
         x = np.transpose(np.array(x),(1,0,2,3))
         x = np.transpose(np.array(x),(1,0,2,3))
         
         

BIN
BiddingKG/dl/interface/person_savedmodel/saved_model.pb


BIN
BiddingKG/dl/interface/person_savedmodel/variables/variables.data-00000-of-00001


BIN
BiddingKG/dl/interface/person_savedmodel/variables/variables.index


BIN
BiddingKG/dl/interface/person_savedmodel_backup/saved_model.pb


BIN
BiddingKG/dl/interface/person_savedmodel_backup/variables/variables.data-00000-of-00001


BIN
BiddingKG/dl/interface/person_savedmodel_backup/variables/variables.index


+ 3 - 4
BiddingKG/dl/interface/predictor.py

@@ -239,7 +239,6 @@ class CodeNamePredict():
                 x = [[self.word2index.get(word,index_unk)for word in sentence.sentence_text[:MAX_AREA]]for sentence in list_sentence[_begin_index:_begin_index+_LEN]]
                 x = [[self.word2index.get(word,index_unk)for word in sentence.sentence_text[:MAX_AREA]]for sentence in list_sentence[_begin_index:_begin_index+_LEN]]
                 x = pad_sequences(x,maxlen=MAX_LEN,padding="post",truncating="post")
                 x = pad_sequences(x,maxlen=MAX_LEN,padding="post",truncating="post")
                 if USE_PAI_EAS:
                 if USE_PAI_EAS:
-                    
                     request = tf_predict_pb2.PredictRequest()
                     request = tf_predict_pb2.PredictRequest()
                     request.inputs["inputs"].dtype = tf_predict_pb2.DT_INT32
                     request.inputs["inputs"].dtype = tf_predict_pb2.DT_INT32
                     request.inputs["inputs"].array_shape.dim.extend(np.shape(x))
                     request.inputs["inputs"].array_shape.dim.extend(np.shape(x))
@@ -1167,8 +1166,8 @@ class TimePredictor():
             return
             return
         points_entitys = datas[1]
         points_entitys = datas[1]
         with self.sess.as_default():
         with self.sess.as_default():
-            predict_y = self.sess.run(self.outputs_code, feed_dict={self.inputs_code[0]:datas[0][0]
-                ,self.inputs_code[1]:datas[0][1]})
+            predict_y = limitRun(self.sess,[self.outputs_code], feed_dict={self.inputs_code[0]:datas[0][0]
+                ,self.inputs_code[1]:datas[0][1]})[0]
             for i in range(len(predict_y)):
             for i in range(len(predict_y)):
                 entity = points_entitys[i]
                 entity = points_entitys[i]
                 label = np.argmax(predict_y[i])
                 label = np.argmax(predict_y[i])
@@ -1456,7 +1455,7 @@ def save_timesplit_model():
 
 
 if __name__=="__main__":
 if __name__=="__main__":
     #save_role_model()
     #save_role_model()
-    #save_codename_model()
+    save_codename_model()
     #save_money_model()
     #save_money_model()
     #save_person_model()
     #save_person_model()
     #save_form_model()
     #save_form_model()

BIN
BiddingKG/dl/test/list_sentence_entity.pk


+ 10 - 9
BiddingKG/dl/test/test4.py

@@ -89,7 +89,7 @@ def predict(doc_id,text):
     print("getPREMs")
     print("getPREMs")
     prem = getAttributes.getPREMs(list_sentences,list_entitys,list_articles)
     prem = getAttributes.getPREMs(list_sentences,list_entitys,list_articles)
     print("getPREMs")
     print("getPREMs")
-    punish_dic = punish.get_punish_extracts(list_sentences, list_entitys, title='投诉处理 ', text=text)
+    punish_dic = punish.get_punish_extracts(list_sentences, list_entitys, title='', text=text)
     print(punish_dic)
     print(punish_dic)
     prem[0][1]['punish'] = punish_dic
     prem[0][1]['punish'] = punish_dic
 
 
@@ -99,6 +99,7 @@ def predict(doc_id,text):
     time_release = [] # 发布时间
     time_release = [] # 发布时间
     time_bidopen = [] # 开标时间
     time_bidopen = [] # 开标时间
     time_bidclose = [] # 截标时间
     time_bidclose = [] # 截标时间
+    list_person_review = []
     for entity in list_entitys[0]:
     for entity in list_entitys[0]:
         if entity.entity_type == 'bidway':
         if entity.entity_type == 'bidway':
             bidway.append(entity.entity_text)
             bidway.append(entity.entity_text)
@@ -149,8 +150,8 @@ def test(name,content):
 if __name__=="__main__":
 if __name__=="__main__":
     # filename = "比地_52_79929693.html"
     # filename = "比地_52_79929693.html"
     # #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
     # #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
-    # text = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read()
-    # content = str(BeautifulSoup(text).find("div",id="pcontent"))
+    text = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read()
+    content = str(BeautifulSoup(text).find("div",id="pcontent"))
     # df_a = {"html":[]}
     # df_a = {"html":[]}
     # df_a["html"].append(re.sub('\r|\n|\r\n',"",content))
     # df_a["html"].append(re.sub('\r|\n|\r\n',"",content))
     # import pandas as pd
     # import pandas as pd
@@ -164,14 +165,14 @@ if __name__=="__main__":
     # text = '''大庆禾工煤炭分质清洁利用项目-临时用电二期工程设备、物资采购中标候选人公示,更多咨询报价请点击:http://bulletin.cebpubservice.com/candidateBulletin/2020-03-31/2678597.html,大庆禾工煤炭分质清洁利用顶目-临时用电二期工程设备、物资釆购中标候选人,(招标编号:XYwZ-20200309-5),公示结束时间:2020年04月03日,、评标情况,标段(包)[001大庆禾工煤嶽分质清洁利用项目-临时用屯二期工程设备、物资采购,中标候选人基本情况,
     # text = '''大庆禾工煤炭分质清洁利用项目-临时用电二期工程设备、物资采购中标候选人公示,更多咨询报价请点击:http://bulletin.cebpubservice.com/candidateBulletin/2020-03-31/2678597.html,大庆禾工煤炭分质清洁利用顶目-临时用电二期工程设备、物资釆购中标候选人,(招标编号:XYwZ-20200309-5),公示结束时间:2020年04月03日,、评标情况,标段(包)[001大庆禾工煤嶽分质清洁利用项目-临时用屯二期工程设备、物资采购,中标候选人基本情况,
     # 中标候选人第1名:哈尔滨龙网电力设备有限公司,投标报价:19.98万元,质量,合格,工期/交货期/服务期:30天,中标候选人第2名:
     # 中标候选人第1名:哈尔滨龙网电力设备有限公司,投标报价:19.98万元,质量,合格,工期/交货期/服务期:30天,中标候选人第2名:
     # 哈尔滨昊龙电气没备制造有限公司,投标报价:19.87万元,质,量:合格,工期/交货期/服务期:30天,'''
     # 哈尔滨昊龙电气没备制造有限公司,投标报价:19.87万元,质,量:合格,工期/交货期/服务期:30天,'''
-    text = '中标候选人第1名:哈尔滨龙网电力设备有限公司,投标报价:19.98万元,质量,合格,工期/交货期/服务期:30天。\
-    投诉处理公告,投诉人:张三。文章编号:京财采投字(2018)第42号。政府采购项目招标方式:公开招标,联系人:黎明。\
-    建设资金来源及性质:资本金40%,自筹60%,,xx.=建设资金来源自筹,项目出资比例为100%,\
-    二次供水泵房浊度仪进行国内组织公开招标采购,时间:2020-05-26,15:15:00,竞价结束时间:2020-05-26,15:45:00允许延时:是,'
+    # text = '中标候选人第1名:哈尔滨龙网电力设备有限公司,投标报价:19.98万元,质量,合格,工期/交货期/服务期:30天。\
+    # 投诉处理公告,投诉人:张三。文章编号:京财采投字(2018)第42号。政府采购项目招标方式:公开招标,联系人:黎明。\
+    # 建设资金来源及性质:资本金40%,自筹60%,,xx.=建设资金来源自筹,项目出资比例为100%,\
+    # 二次供水泵房浊度仪进行国内组织公开招标采购,时间:2020-05-26,15:15:00,竞价结束时间:2020-05-26,15:45:00允许延时:是,'
     a = time.time()
     a = time.time()
     print("start")
     print("start")
-    # print(predict("12",content))
-    print(predict("投诉处理公告", text))
+    print(predict("12",content))
+    # print(predict("投诉处理公告", text))
     #test("12",text)
     #test("12",text)
     print("takes",time.time()-a)
     print("takes",time.time()-a)
     pass
     pass

+ 1 - 1
BiddingKG/dl/test/test_model_fjs.py

@@ -563,7 +563,7 @@ def plotTrainTestLoss(history_model):
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
     # getData()
     # getData()
-    # train()
+    train()
     predict()
     predict()
     # predict2Csv()
     # predict2Csv()