4 жил өмнө · 100a9c905a
--- a/BiddingKG/dl/bidway/re_bidway.py
+++ b/BiddingKG/dl/bidway/re_bidway.py
@@ -215,13 +215,15 @@ def calculateLen(ss, i):
 
				 def extract_bidway(text):
			
 
				     list_bidway = []
			
 
				     word, text_index_list = re_bidway(text)
			
 
				-    d = {"body": word, "begin_index": text_index_list[0], "end_index": text_index_list[1]}
			
 
				-    list_bidway.append(d)
			
 
				-    print(d)
			
 
				+    if word is not None:
			
 
				+        d = {"body": word, "begin_index": text_index_list[0], "end_index": text_index_list[1]}
			
 
				+        list_bidway.append(d)
			
 
				+    # print(d)
			
 
				     return list_bidway
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    df = pd.read_csv("C:\\Users\\admin\\Desktop\\bidway_text.csv")
			
 
				-    s = df["text"].iloc[1]
			
 
				+    # df = pd.read_csv("C:\\Users\\admin\\Desktop\\bidway_text.csv")
			
 
				+    # s = df["text"].iloc[1]
			
 
				+    s = '政府采购项目招标方式：公开招标，联系人：黎明。代理机构地址：广州市天河区'
			
 
				     extract_bidway(s)
			
--- a/BiddingKG/dl/common/Utils.py
+++ b/BiddingKG/dl/common/Utils.py
@@ -12,7 +12,7 @@ import os
 
				 
			
 
				 from threading import RLock
			
 
				 
			
 
				-from pai_tf_predict_proto import tf_predict_pb2
			
 
				+# from pai_tf_predict_proto import tf_predict_pb2
			
 
				 import requests
			
 
				 
			
 
				 
			
--- a/BiddingKG/dl/complaint/punish_rule.py
+++ b/BiddingKG/dl/complaint/punish_rule.py
@@ -3,6 +3,7 @@
 
				 # @Author  : bidikeji
			
 
				 # @Time    : 2020/12/24 0024 15:23
			
 
				 import re
			
 
				+import os
			
 
				 import time
			
 
				 import tensorflow as tf
			
 
				 from BiddingKG.dl.common.Utils import *
			
@@ -85,7 +86,8 @@ def decode(logits, trans, sequence_lengths, tag_num):
 
				     return viterbi_sequences
			
 
				 
			
 
				 class Punish_Extract():
			
 
				-    def __init__(self, model_file = "models/21-0.9990081295021194-0.3647936/model.ckpt"):
			
 
				+    def __init__(self, model_file = os.path.dirname(__file__)+"/models/21-0.9990081295021194-0.3647936/model.ckpt"):
			
 
				+        print('model_file_path:',model_file)
			
 
				         self.sess = tf.Session(graph=tf.Graph())
			
 
				         self.code = ""
			
 
				         self.punish_dicition = ""
			
@@ -325,7 +327,7 @@ class Punish_Extract():
 
				         ins = ""
			
 
				         ptime = ""
			
 
				         # 如果前面步骤找不到处罚机构则在标题找实体，并正则检查是否有关键词
			
 
				-        if institutions == []:
			
 
				+        if institutions == [] and len(title)>10:
			
 
				             title_ners = getNers([title], useselffool=True)
			
 
				             if title_ners[0]:
			
 
				                 for title_ner in title_ners[0]:
			
@@ -426,22 +428,56 @@ class Punish_Extract():
 
				         punishPeople = set([it.entity_text for l in punishPeople for it in l])
			
 
				         return '；'.join(complainants), '；'.join(punishPeople)
			
 
				 
			
 
				-def get_punish_extracts(doc_id=' ', title=' ', text=' '):
			
 
				-    list_articles, list_sentences, list_entitys, _ = Preprocessing.get_preprocessed([[doc_id, text, "", "", ""]],
			
 
				-                                                                                    useselffool=True)
			
 
				-    punish_code = punish.predict_punishCode(list_sentences)
			
 
				-    # print('处罚编号： ',punish_code)
			
 
				-    institutions, punishTimes = punish.get_institution(title, list_sentences[0], list_entitys[0])
			
 
				-    # print('执法机构:',institutions, '\n 处罚时间：', punishTimes)
			
 
				-    keyword, punishType = punish.get_punishType(title, text)
			
 
				-    # print('处罚类型：',punishType)
			
 
				-    punishDecision = punish.get_punishDecision(text, punishType)
			
 
				-    # print('处罚决定：',punishDecision)
			
 
				-    punishWhether= punish.get_punishWhether(punishDecision, text, punishType)
			
 
				-    # print('投诉是否成立：',punishWhether)
			
 
				-    complainants, punishPeople = punish.get_complainant(punishType, list_sentences[0], list_entitys[0])
			
 
				-    # print('投诉人：%s  被投诉人：%s'%(complainants, punishPeople))
			
 
				-    return punish_code, punishType, punishDecision, complainants, punishPeople, punishWhether,institutions, punishTimes
			
 
				+    def get_punish_extracts_backup(self, doc_id=' ', title=' ', text=' '):
			
 
				+        list_articles, list_sentences, list_entitys, _ = Preprocessing.get_preprocessed([[doc_id, text, "", "", ""]],
			
 
				+                                                                                        useselffool=True)
			
 
				+        punish_code = punish.predict_punishCode(list_sentences)
			
 
				+        # print('处罚编号： ',punish_code)
			
 
				+        institutions, punishTimes = punish.get_institution(title, list_sentences[0], list_entitys[0])
			
 
				+        # print('执法机构:',institutions, '\n 处罚时间：', punishTimes)
			
 
				+        keyword, punishType = punish.get_punishType(title, text)
			
 
				+        # print('处罚类型：',punishType)
			
 
				+        punishDecision = punish.get_punishDecision(text, punishType)
			
 
				+        # print('处罚决定：',punishDecision)
			
 
				+        punishWhether= punish.get_punishWhether(punishDecision, text, punishType)
			
 
				+        # print('投诉是否成立：',punishWhether)
			
 
				+        complainants, punishPeople = punish.get_complainant(punishType, list_sentences[0], list_entitys[0])
			
 
				+        # print('投诉人：%s  被投诉人：%s'%(complainants, punishPeople))
			
 
				+        punish_dic = {'punish_code':punish_code,
			
 
				+                      'punishType':punishType,
			
 
				+                      'punishDecision':punishDecision,
			
 
				+                     'complainants':complainants,
			
 
				+                     'punishPeople':punishPeople,
			
 
				+                     'punishWhether':punishWhether,
			
 
				+                     'institutions':institutions,
			
 
				+                     'punishTimes':punishTimes}
			
 
				+        return punish_dic
			
 
				+        # return punish_code, punishType, punishDecision, complainants, punishPeople, punishWhether,institutions, punishTimes
			
 
				+
			
 
				+    def get_punish_extracts(self,list_sentences, list_entitys, title=' ', text=' '):
			
 
				+        keyword, punishType = self.get_punishType(title, text)
			
 
				+        if punishType == "未知类别":
			
 
				+            return {'punishType':punishType}
			
 
				+        # print('处罚类型：',punishType)
			
 
				+        punish_code = self.predict_punishCode(list_sentences)
			
 
				+        # print('处罚编号： ',punish_code)
			
 
				+        institutions, punishTimes = self.get_institution(title, list_sentences[0], list_entitys[0])
			
 
				+        # print('执法机构:',institutions, '\n 处罚时间：', punishTimes)
			
 
				+        punishDecision = self.get_punishDecision(text, punishType)
			
 
				+        # print('处罚决定：',punishDecision)
			
 
				+        punishWhether= self.get_punishWhether(punishDecision, text, punishType)
			
 
				+        # print('投诉是否成立：',punishWhether)
			
 
				+        complainants, punishPeople = self.get_complainant(punishType, list_sentences[0], list_entitys[0])
			
 
				+        # print('投诉人：%s  被投诉人：%s'%(complainants, punishPeople))
			
 
				+        punish_dic = {'punish_code':punish_code,
			
 
				+                      'punishType':punishType,
			
 
				+                      'punishDecision':punishDecision,
			
 
				+                     'complainants':complainants,
			
 
				+                     'punishPeople':punishPeople,
			
 
				+                     'punishWhether':punishWhether,
			
 
				+                     'institutions':institutions,
			
 
				+                     'punishTimes':punishTimes}
			
 
				+        return punish_dic
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     punish = Punish_Extract(model_file = "models/21-0.9990081295021194-0.3647936/model.ckpt")
			
@@ -478,11 +514,12 @@ if __name__ == "__main__":
 
				     #    'DETAILLINK', 'sentences', 'PAGE_TIME'])
			
 
				     # t3 = time.time()
			
 
				     # print('处理耗时：%.4f, 保存耗时：%.4f'%(t2-t1, t3-t2))
			
 
				-    s = '厦财企〔2020〕12号，各有关单位：341号。厦财企〔2020〕12号，各有关单位：行政处罚厦建招诉决【2019】342号。行政处罚厦建招诉决【2019】343号。行政处罚厦建招诉决【2019】344号,'
			
 
				+    s = '编号：厦财企〔2020〕12号，各有关单位：341号。处罚编号：厦财企〔2020〕12号，文章编号：京财采投字(2018)第42号。公告编号：闽建筑招〔2018〕5号。处罚编号：松公管监[2020]2号,'
			
 
				     # list_sentences = [s.split('。')]
			
 
				     # punish_code= punish.predict_punishCode( list_sentences)
			
 
				     # print(punish_code)
			
 
				 
			
 
				-    punish_code, punishType, punishDecision, complainants, punishPeople, punishWhether, institutions, punishTimes = \
			
 
				-                get_punish_extracts(text=s)
			
 
				-    print(punish_code)
			
 
				+    # punish_code, punishType, punishDecision, complainants, punishPeople, punishWhether, institutions, punishTimes = \
			
 
				+    #             get_punish_extracts(text=s)
			
 
				+    punish_dic = punish.get_punish_extracts_backup(text=s)
			
 
				+    print(punish_dic)
			
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
@@ -13,6 +13,9 @@ from BiddingKG.dl.common.Utils import *
 
				 from BiddingKG.dl.interface.Entitys import *
			
 
				 from BiddingKG.dl.interface.predictor import *
			
 
				 from BiddingKG.dl.foolnltk import selffool
			
 
				+from BiddingKG.dl.money.moneySource.ruleExtra import extract_moneySource
			
 
				+from BiddingKG.dl.time.re_servicetime import extract_servicetime
			
 
				+from BiddingKG.dl.bidway.re_bidway import extract_bidway
			
 
				 
			
 
				 
			
 
				 
			
@@ -1678,6 +1681,67 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
				                 else:
			
 
				                     index += 1
			
 
				 
			
 
				+            # 资金来源提取  2020/12/30 新增
			
 
				+            list_moneySource = extract_moneySource(sentence_text)
			
 
				+            entity_type = "moneySource"
			
 
				+            for moneySource in list_moneySource:
			
 
				+                begin_index_temp = moneySource['begin_index']
			
 
				+                for j in range(len(list_tokenbegin)):
			
 
				+                    if list_tokenbegin[j] == begin_index_temp:
			
 
				+                        begin_index = j
			
 
				+                        break
			
 
				+                    elif list_tokenbegin[j] > begin_index_temp:
			
 
				+                        begin_index = j - 1
			
 
				+                        break
			
 
				+                index = moneySource['end_index']
			
 
				+                end_index_temp = index
			
 
				+                for j in range(begin_index, len(list_tokenbegin)):
			
 
				+                    if list_tokenbegin[j] >= index:
			
 
				+                        end_index = j - 1
			
 
				+                        break
			
 
				+                entity_id = "%s_%d_%d_%d" % (doc_id, sentence_index, begin_index, end_index)
			
 
				+                entity_text = moneySource['body']
			
 
				+                list_sentence_entitys.append(
			
 
				+                    Entity(doc_id, entity_id, entity_text, entity_type, sentence_index, begin_index, end_index,
			
 
				+                           begin_index_temp, end_index_temp))
			
 
				+
			
 
				+            # 服务期限提取 2020/12/30 新增
			
 
				+            list_servicetime = extract_servicetime(sentence_text)
			
 
				+            entity_type = "servicetime"
			
 
				+            for servicetime in list_servicetime:
			
 
				+                begin_index_temp = servicetime['begin_index']
			
 
				+                for j in range(len(list_tokenbegin)):
			
 
				+                    if list_tokenbegin[j] == begin_index_temp:
			
 
				+                        begin_index = j
			
 
				+                        break
			
 
				+                    elif list_tokenbegin[j] > begin_index_temp:
			
 
				+                        begin_index = j - 1
			
 
				+                        break
			
 
				+                index = servicetime['end_index']
			
 
				+                end_index_temp = index
			
 
				+                for j in range(begin_index, len(list_tokenbegin)):
			
 
				+                    if list_tokenbegin[j] >= index:
			
 
				+                        end_index = j - 1
			
 
				+                        break
			
 
				+                entity_id = "%s_%d_%d_%d" % (doc_id, sentence_index, begin_index, end_index)
			
 
				+                entity_text = servicetime['body']
			
 
				+                list_sentence_entitys.append(
			
 
				+                    Entity(doc_id, entity_id, entity_text, entity_type, sentence_index, begin_index, end_index,
			
 
				+                           begin_index_temp, end_index_temp))
			
 
				+
			
 
				+            # 招标方式提取 2020/12/30 新增
			
 
				+            list_bidway = extract_bidway(sentence_text)
			
 
				+            entity_type = "bidway"
			
 
				+            for bidway in list_bidway:
			
 
				+                begin_index_temp = bidway['begin_index']
			
 
				+                end_index_temp = bidway['end_index']
			
 
				+                begin_index = changeIndexFromWordToWords(tokens, begin_index_temp)
			
 
				+                end_index = changeIndexFromWordToWords(tokens, end_index_temp)
			
 
				+                entity_id = "%s_%d_%d_%d" % (doc_id, sentence_index, begin_index, end_index)
			
 
				+                entity_text = bidway['body']
			
 
				+                list_sentence_entitys.append(
			
 
				+                    Entity(doc_id, entity_id, entity_text, entity_type, sentence_index, begin_index, end_index,
			
 
				+                           begin_index_temp, end_index_temp))
			
 
				 
			
 
				             list_sentence_entitys.sort(key=lambda x:x.begin_index)
			
 
				             list_entitys_temp = list_entitys_temp+list_sentence_entitys
			
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -1101,8 +1101,77 @@ class RoleRulePredictor():
 
				                 #将属于集合中的不可能是中标人的标签置为无
			
 
				                 if p_entity.entity_text in self.SET_NOT_TENDERER:
			
 
				                     p_entity.label=5
			
 
				-    
			
 
				-    
			
 
				+
			
 
				+# 时间类别
			
 
				+class TimePredictor():
			
 
				+    def __init__(self):
			
 
				+        self.sess = tf.Session(graph=tf.Graph())
			
 
				+        self.inputs_code = None
			
 
				+        self.outputs_code = None
			
 
				+        self.input_shape = (2,30,60)
			
 
				+        self.load_model()
			
 
				+
			
 
				+    def load_model(self):
			
 
				+        model_path = os.path.dirname(__file__)+'/timesplit_model'
			
 
				+        if self.inputs_code is None:
			
 
				+            log("get model of time")
			
 
				+            with self.sess.as_default():
			
 
				+                with self.sess.graph.as_default():
			
 
				+                    meta_graph_def = tf.saved_model.loader.load(self.sess, tags=["serve"], export_dir=model_path)
			
 
				+                    signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
			
 
				+                    signature_def = meta_graph_def.signature_def
			
 
				+                    self.inputs_code = []
			
 
				+                    self.inputs_code.append(
			
 
				+                        self.sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name))
			
 
				+                    self.inputs_code.append(
			
 
				+                        self.sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name))
			
 
				+                    self.outputs_code = self.sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name)
			
 
				+                    return self.inputs_code, self.outputs_code
			
 
				+        else:
			
 
				+            return self.inputs_code, self.outputs_code
			
 
				+
			
 
				+    def search_time_data(self,list_sentences,list_entitys):
			
 
				+        data_x = []
			
 
				+        points_entitys = []
			
 
				+        for list_sentence, list_entity in zip(list_sentences, list_entitys):
			
 
				+            p_entitys = 0
			
 
				+            p_sentences = 0
			
 
				+            while(p_entitys<len(list_entity)):
			
 
				+                entity = list_entity[p_entitys]
			
 
				+                if entity.entity_type in ['time']:
			
 
				+                    while(p_sentences<len(list_sentence)):
			
 
				+                        sentence = list_sentence[p_sentences]
			
 
				+                        if entity.doc_id == sentence.doc_id and entity.sentence_index == sentence.sentence_index:
			
 
				+                            left = sentence.sentence_text[max(0,entity.wordOffset_begin-self.input_shape[1]):entity.wordOffset_begin]
			
 
				+                            right = sentence.sentence_text[entity.wordOffset_end:entity.wordOffset_end+self.input_shape[1]]
			
 
				+                            context = [left, right]
			
 
				+                            x = embedding_word(context, shape=self.input_shape)
			
 
				+                            data_x.append(x)
			
 
				+                            points_entitys.append(entity)
			
 
				+                            break
			
 
				+                        p_sentences += 1
			
 
				+                p_entitys += 1
			
 
				+        if len(points_entitys)==0:
			
 
				+            return None
			
 
				+        data_x = np.transpose(np.array(data_x), (1, 0, 2, 3))
			
 
				+        return [data_x, points_entitys]
			
 
				+
			
 
				+    def predict(self, list_sentences,list_entitys):
			
 
				+        datas = self.search_time_data(list_sentences, list_entitys)
			
 
				+        if datas is None:
			
 
				+            return
			
 
				+        points_entitys = datas[1]
			
 
				+        with self.sess.as_default():
			
 
				+            predict_y = self.sess.run(self.outputs_code, feed_dict={self.inputs_code[0]:datas[0][0]
			
 
				+                ,self.inputs_code[1]:datas[0][1]})
			
 
				+            for i in range(len(predict_y)):
			
 
				+                entity = points_entitys[i]
			
 
				+                label = np.argmax(predict_y[i])
			
 
				+                values = []
			
 
				+                for item in predict_y[i]:
			
 
				+                    values.append(item)
			
 
				+                    entity.set_Role(label, values)
			
 
				+
			
 
				 def getSavedModel():
			
 
				     #predictor = FormPredictor()
			
 
				     graph = tf.Graph()
			
@@ -1367,6 +1436,19 @@ def save_codesplit_model():
 
				                                            "input2":model_code.input[2]},
			
 
				                                    outputs={"outputs":model_code.output})
			
 
				 
			
 
				+def save_timesplit_model():
			
 
				+    filepath = '../time/model_label_time_classify.model.hdf5'
			
 
				+    with tf.Graph().as_default() as graph:
			
 
				+        time_model = models.load_model(filepath, custom_objects={'precision': precision, 'recall': recall, 'f1_score': f1_score})
			
 
				+        with tf.Session() as sess:
			
 
				+            sess.run(tf.global_variables_initializer())
			
 
				+            h5_to_graph(sess, graph, filepath)
			
 
				+            tf.saved_model.simple_save(sess,
			
 
				+                                       "./timesplit_model/",
			
 
				+                                       inputs={"input0":time_model.input[0],
			
 
				+                                               "input1":time_model.input[1]},
			
 
				+                                       outputs={"outputs":time_model.output})
			
 
				+
			
 
				 if __name__=="__main__":
			
 
				     #save_role_model()
			
 
				     #save_codename_model()
			
@@ -1374,6 +1456,7 @@ if __name__=="__main__":
 
				     #save_person_model()
			
 
				     #save_form_model()
			
 
				     #save_codesplit_model()
			
 
				+    save_timesplit_model()
			
 
				     '''
			
 
				     with tf.Session(graph=tf.Graph()) as sess:
			
 
				         from tensorflow.python.saved_model import tag_constants
			
--- a/BiddingKG/dl/money/moneySource/ruleExtra.py
+++ b/BiddingKG/dl/money/moneySource/ruleExtra.py
@@ -154,7 +154,7 @@ def extract_moneySource(text):
 
				         _moneySource['body'] = entity_text
			
 
				         _moneySource['begin_index'] = wordOffset_begin
			
 
				         _moneySource['end_index'] = wordOffset_end
			
 
				-        print(_moneySource)
			
 
				+        # print(_moneySource)
			
 
				         list_moneySource.append(_moneySource)
			
 
				     return list_moneySource
			
 
				 
			
--- a/BiddingKG/dl/time/re_servicetime.py
+++ b/BiddingKG/dl/time/re_servicetime.py
@@ -142,13 +142,13 @@ def extract_servicetime(text):
 
				         word_list = word.split(" ")
			
 
				         d = {"body": word_list[i], "begin_index": text_index_list[i][0], "end_index": text_index_list[i][1]}
			
 
				         list_servicetime.append(d)
			
 
				-    print(list_servicetime)
			
 
				+    # print(list_servicetime)
			
 
				     return list_servicetime
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    df = pd.read_csv("C:\\Users\\admin\\Desktop\\serviceTime_text.csv")
			
 
				-    s = df["text"].iloc[5]
			
 
				+    # df = pd.read_csv("C:\\Users\\admin\\Desktop\\serviceTime_text.csv")
			
 
				+    # s = df["text"].iloc[5]
			
 
				     s = u'''
			
 
				     ，大庆禾工煤炭分质清洁利用项目-临时用电二期工程设备、物资采购中标候选人公示，更多咨询报价请点击：http://bulletin.cebpubservice.com/candidateBulletin/2020-03-31/2678597.html，大庆禾工煤炭分质清洁利用顶目-临时用电二期工程设备、物资釆购中标候选人，(招标编号：XYwZ-20200309-5)，公示结束时间：2020年04月03日，、评标情况，标段(包)[001大庆禾工煤嶽分质清洁利用项目-临时用屯二期工程设备、物资采购，中标候选人基本情况，
			
 
				     中标候选人第1名：哈尔滨龙网电力设备有限公司，投标报价：19.98万元，质量，合格，工期/交货期/服务期：30天，中标候选人第2名：
			
--- a/BiddingKG/dl/time/train_2.py
+++ b/BiddingKG/dl/time/train_2.py
@@ -258,8 +258,21 @@ def plot_loss(history):
 
				 if __name__ == '__main__':
			
 
				     # getModel()
			
 
				     # getModel_center()
			
 
				-    training()
			
 
				+    # training()
			
 
				     # training_center()
			
 
				     # predict()
			
 
				     # predict_center()
			
 
				+    model1 = models.load_model("model_label_time_classify.model.hdf5",
			
 
				+                               custom_objects={'precision': precision, 'recall': recall, 'f1_score': f1_score})
			
 
				+    test_x = []
			
 
				+    test_y = []
			
 
				+    left = '8675.20元人民币，(3)服务期限：'
			
 
				+    right = '(4)质量：符合竞争性磋商文件规定的质'
			
 
				+    context = [left, right]
			
 
				+    x = embedding_word(context, shape=input_shape)
			
 
				+    test_x.append(x)
			
 
				+    test_x = np.transpose(np.array(test_x), (1, 0, 2, 3))
			
 
				+    pre_y = model1.predict([test_x[0],test_x[1]])
			
 
				+    rs = [np.argmax(item) for item in pre_y]
			
 
				+    print(pre_y, rs)
			
 
				     pass