Преглед изворни кода

因标签对不上重新训练联系人分类模型

Jiasheng пре 4 година
родитељ
комит
fb7b74e311

+ 2 - 2
BiddingKG/dl/interface/modelFactory.py

@@ -170,7 +170,7 @@ class Model_person_classify():
       if self.model_person is None:
         with self.sess_person.as_default() as sess:
           with sess.graph.as_default():
-            meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=os.path.dirname(__file__)+"/person_savedmodel")
+            meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=os.path.dirname(__file__)+"/person_savedmodel_new")
             signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
             signature_def = meta_graph_def.signature_def
             
@@ -195,8 +195,8 @@ class Model_person_classify():
     '''
     
     def encode(self,tokens,begin_index,end_index,**kwargs):
-        # return embedding(spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=10),shape=(2,10,128))
         return embedding(spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=35),shape=(2,35,128))
+        # return embedding(spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=35),shape=(2,35,128))
 
     def predict(self,x):
         x = np.transpose(np.array(x),(1,0,2,3))

BIN
BiddingKG/dl/interface/person_savedmodel_new/saved_model.pb


BIN
BiddingKG/dl/interface/person_savedmodel_new/variables/variables.data-00000-of-00001


BIN
BiddingKG/dl/interface/person_savedmodel_new/variables/variables.index


BIN
BiddingKG/dl/test/model_person_classify_fjs.model.hdf5


BIN
BiddingKG/dl/test/person_save_model_new/saved_model.pb


BIN
BiddingKG/dl/test/person_save_model_new/variables/variables.data-00000-of-00001


BIN
BiddingKG/dl/test/person_save_model_new/variables/variables.index


+ 9 - 10
BiddingKG/dl/test/test4.py

@@ -79,7 +79,7 @@ def predict(doc_id,text):
     codeName = codeNamePredict.predict(list_sentences,list_entitys=list_entitys)
     print(codeName)
     premPredict.predict(list_sentences,list_entitys)
-    roleRulePredict.predict(list_articles,list_sentences, list_entitys,codeName)
+    # roleRulePredict.predict(list_articles,list_sentences, list_entitys,codeName)
     print("epcPredict")
     epcPredict.predict(list_sentences,list_entitys)
     print("entityLink")
@@ -113,9 +113,9 @@ def test(name,content):
 
 if __name__=="__main__":
     # filename = "比地_52_79929693.html"
-    # #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
-    # text = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read()
-    # content = str(BeautifulSoup(text).find("div",id="pcontent"))
+    #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
+    text = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read()
+    content = str(BeautifulSoup(text).find("div",id="pcontent"))
     # df_a = {"html":[]}
     # df_a["html"].append(re.sub('\r|\n|\r\n',"",content))
     # import pandas as pd
@@ -134,13 +134,12 @@ if __name__=="__main__":
     # 建设资金来源及性质:资本金40%,自筹60%,,xx.=建设资金来源自筹,项目出资比例为100%,\
     # 二次供水泵房浊度仪进行国内组织公开招标采购,时间:2020-05-26,15:15:00,竞价结束时间:2020-05-26,15:45:00允许延时:是,'
     a = time.time()
-    text = '''
-    ,光大证券统一认证系统服务器硬件设备更新项目中标候选人公示,项目名称:光大证券统一认证系统服务器硬件设备更新项目,招标编号:CG-202011-030-001,公告日期:2020年12月3日,评标日期:2020年11月30日13时32分,评标地点:光大证券集中采购管理平台,推荐中标候选人:上海致为信息技术有限公司,联系人:殷志超,联系电话:021-22169419
-    '''
+    # text = '''
+    # ,光大证券统一认证系统服务器硬件设备更新项目中标候选人公示,项目名称:光大证券统一认证系统服务器硬件设备更新项目,招标编号:CG-202011-030-001,公告日期:2020年12月3日,评标日期:2020年11月30日13时32分,评标地点:光大证券集中采购管理平台,推荐中标候选人:上海致为信息技术有限公司,联系人:殷志超,联系电话:021-22169419
+    # '''
     print("start")
-    # print(predict("12",content))
-    # 评审专家 100005322
-    print(predict("投诉处理公告", text))
+    print(predict("12",content))
+    # print(predict("投诉处理公告", text))
     #test("12",text)
     print("takes",time.time()-a)
     pass

+ 4 - 4
BiddingKG/dl/test/test_model_fjs.py

@@ -537,7 +537,7 @@ def predict():
     predict_y = model.predict([test_x[0], test_x[1]])
     # predict_y = model.predict([test_x[0], test_x[0]])
     # predict_y = model.predict([test_x[0]])
-    targets_name = ['人名', '联系人', '招标联系人', '代理联系人', '评审专家']
+    targets_name = ['人名', '招标联系人', '代理联系人', '联系人', '评审专家']
     print(classification_report(np.argmax(test_y, axis=1), np.argmax(predict_y, axis=1), target_names=targets_name))
     return predict_y
 
@@ -570,7 +570,7 @@ def hdf52savemodel():
             sess.run(tf.global_variables_initializer())
             h5_to_graph(sess, graph, filepath)
             tf.saved_model.simple_save(sess,
-                                       "./person_save_model/",
+                                       "./person_save_model_new/",
                                        inputs={"input0":time_model.input[0],
                                                "input1":time_model.input[1]},
                                        outputs={"outputs":time_model.output})
@@ -580,7 +580,7 @@ if __name__ == "__main__":
     # getData()
     # train()
     # predict()
-    predict2Csv()
-    # hdf52savemodel()
+    # predict2Csv()
+    hdf52savemodel()
 
     # getData3()