Browse Source

项目编码和名称代码修改

admin 4 years ago
parent
commit
fc2c04dcde
2 changed files with 13 additions and 6 deletions
  1. 6 2
      BiddingKG/dl/interface/predictor.py
  2. 7 4
      BiddingKG/dl/test/test4.py

+ 6 - 2
BiddingKG/dl/interface/predictor.py

@@ -248,7 +248,8 @@ class CodeNamePredict():
                 _LEN = MAX_AREA//MAX_LEN
                 #预测
 
-                x = [[self.word2index.get(word,index_unk)for word in sentence.sentence_text[:MAX_AREA]]for sentence in list_sentence[_begin_index:_begin_index+_LEN]]
+                # x = [[self.word2index.get(word,index_unk)for word in sentence.sentence_text[:MAX_AREA]]for sentence in list_sentence[_begin_index:_begin_index+_LEN]]
+                x = [[getIndexOfWord(word) for word in sentence.sentence_text[:MAX_AREA]]for sentence in list_sentence[_begin_index:_begin_index+_LEN]]
                 x_len = [len(_x) if len(_x) < MAX_LEN else MAX_LEN for _x in x]
                 x = pad_sequences(x,maxlen=MAX_LEN,padding="post",truncating="post")
 
@@ -273,6 +274,7 @@ class CodeNamePredict():
                                                                                               t_input_length:x_len,
                                                                                               t_keepprob:1.0})
                         predict_y = self.decode(_logits,_trans,x_len,7)
+                        # print('==========',_logits)
 
                         '''
                         for item11 in np.argmax(predict_y,-1):
@@ -339,7 +341,7 @@ class CodeNamePredict():
 
                                 if the_code not in code_set:
                                     code_set.add(the_code)
-                                    item[1]['code'] = list(code_set)
+                                    item['code'] = list(code_set)
                     for iter in re.finditer(self.PN_pattern,join_predict):
                         _name = self.fitDataByRule(pad_sentence[iter.span()[0]:iter.span()[1]])
 
@@ -1421,6 +1423,8 @@ def save_codename_model():
         # model.load_weights(filepath)
         saver = tf.train.Saver()
         saver.restore(sess, filepath)
+
+        print("logits",sess.run(logits))
         
         # print("#",sess.run("time_distributed_1/kernel:0"))
 

+ 7 - 4
BiddingKG/dl/test/test4.py

@@ -114,8 +114,8 @@ def test(name,content):
 if __name__=="__main__":
     # filename = "比地_52_79929693.html"
     # #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
-    text = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read()
-    content = str(BeautifulSoup(text).find("div",id="pcontent"))
+    # text = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read()
+    # content = str(BeautifulSoup(text).find("div",id="pcontent"))
     # df_a = {"html":[]}
     # df_a["html"].append(re.sub('\r|\n|\r\n',"",content))
     # import pandas as pd
@@ -134,9 +134,12 @@ if __name__=="__main__":
     # 建设资金来源及性质:资本金40%,自筹60%,,xx.=建设资金来源自筹,项目出资比例为100%,\
     # 二次供水泵房浊度仪进行国内组织公开招标采购,时间:2020-05-26,15:15:00,竞价结束时间:2020-05-26,15:45:00允许延时:是,'
     a = time.time()
+    text = '''
+    SC2020113000007成交结果,一、项目信息,采购日期:2020-11-3011:39:12,采购单位:机械科学与工程学院,成交供应商:上海晨光科力普办公用品有限公司,支付方式:货到付款,订单编号:SC2020113000007,二、成交结果,商品名称:威联通(QNAP),网络存储服务器,TS-873,八盘位企业级nas,8G内存,64TB,1TSSD,八盘位企业级nas,8G内存,64TB,1TSSD,规格型号:TS-873,数量:1,:X,单价(元):24600.00,:=,小计(元):¥24600.00。
+成交金额::¥24600.00。'''
     print("start")
-    print(predict("12",content))
-    # print(predict("投诉处理公告", text))
+    # print(predict("12",content))
+    print(predict("投诉处理公告", text))
     #test("12",text)
     print("takes",time.time()-a)
     pass