3 jaren geleden · f529113ce6
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
@@ -1084,7 +1084,7 @@ def segment(soup,final=True):
 
				     # 感叹号替换为中文句号
			
 
				     text = re.sub("(?<=[\u4e00-\u9fa5])[!！]|[!！](?=[\u4e00-\u9fa5])","。",text)
			
 
				     #替换"？"为 " " ,update:2021/7/20
			
 
				-    text = re.sub("？"," ",text)
			
 
				+    text = re.sub("？{1,}"," ",text)
			
 
				 
			
 
				 
			
 
				     #替换"""为"“",否则导入deepdive出错
			
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -1129,10 +1129,10 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
 
				                 if not re.search("电，?话", phone_left):
			
 
				                     last_phone_mask = False
			
 
				                     continue
			
 
				-            if re.search("注册[证号]|帐，?号|编，?[号码]|报，?价|证，?号|价，?格|[\(\（]万?元[\)\）]|[a-zA-Z]+\d*$", phone_left):
			
 
				+            if re.search("注册[证号]|帐，?号|编，?[号码]|报，?价|标，?价|证，?号|价，?格|[\(\（]万?元[\)\）]|[a-zA-Z]+\d*$", phone_left):
			
 
				                 last_phone_mask = False
			
 
				                 continue
			
 
				-            if re.search("^\d{0,4}[.,]\d{2,}|^[0-9a-zA-Z\.]*@|^\d*[a-zA-Z]+", phone_right):
			
 
				+            if re.search("^\d{0,4}[.,]\d{2,}|^[0-9a-zA-Z\.]*@|^\d*[a-zA-Z]+|元", phone_right):
			
 
				                 last_phone_mask = False
			
 
				                 continue
			
 
				             # if:上一个phone实体不符合条件
			
@@ -2195,22 +2195,191 @@ def turnBidWay(bidway):
 
				     else:
			
 
				         return "其他"
			
 
				 
			
 
				+my_time_format_pattern = re.compile("((?P<year>\d{4}|\d{2})\s*[-\/年\.]\s*(?P<month>\d{1,2})\s*[-\/月\.]\s*(?P<day>\d{1,2}))")
			
 
				+import time
			
 
				+def my_timeFormat(_time):
			
 
				+    current_year = time.strftime("%Y",time.localtime())
			
 
				+    all_match = re.finditer(my_time_format_pattern,_time)
			
 
				+    time_list = []
			
 
				+    for _match in all_match:
			
 
				+        if len(_match.group())>0:
			
 
				+            legal = True
			
 
				+            year = ""
			
 
				+            month = ""
			
 
				+            day = ""
			
 
				+            for k,v in _match.groupdict().items():
			
 
				+                if k=="year":
			
 
				+                    year = v
			
 
				+                if k=="month":
			
 
				+                    month = v
			
 
				+                if k=="day":
			
 
				+                    day = v
			
 
				+            if year!="":
			
 
				+                if len(year)==2:
			
 
				+                    year = "20"+year
			
 
				+                if int(year)>int(current_year):
			
 
				+                    legal = False
			
 
				+            else:
			
 
				+                legal = False
			
 
				+            if month!="":
			
 
				+                if int(month)>12:
			
 
				+                    legal = False
			
 
				+            else:
			
 
				+                legal = False
			
 
				+            if day!="":
			
 
				+                if int(day)>31:
			
 
				+                    legal = False
			
 
				+            else:
			
 
				+                legal = False
			
 
				+            if legal:
			
 
				+                # return "%s-%s-%s"%(year,month.rjust(2,"0"),day.rjust(2,"0"))
			
 
				+                time_list.append("%s-%s-%s"%(year,month.rjust(2,"0"),day.rjust(2,"0")))
			
 
				+    return time_list
			
 
				+
			
 
				+def getTimeAttributes(list_entity,list_sentence):
			
 
				+    # list_entity = [i for i in list_entity if i.entity_type=='time']
			
 
				+    list_sentence = sorted(list_sentence,key=lambda x:x.sentence_index)
			
 
				+    dict_time = {
			
 
				+        "time_release": [],
			
 
				+        "time_bidopen": [],
			
 
				+        "time_bidclose": [],
			
 
				+        'time_bidstart': [],  # 12 投标（开始）时间、响应文件接收（开始）时间
			
 
				+
			
 
				+        'time_publicityStart': [],  # 4 公示开始时间（公示时间、公示期）
			
 
				+        'time_publicityEnd': [],  # 5 公示截止时间
			
 
				+        'time_getFileStart': [],  # 6 文件获取开始时间（文件获取时间）
			
 
				+        'time_getFileEnd': [],  # 7 文件获取截止时间
			
 
				+        'time_registrationStart': [],  # 8 报名开始时间（报名时间）
			
 
				+        'time_registrationEnd': [],  # 9 报名截止时间
			
 
				+        'time_earnestMoneyStart': [], #10 保证金递交开始时间（保证金递交时间）
			
 
				+        'time_earnestMoneyEnd': [] , # 11 保证金递交截止时间
			
 
				+        'time_commencement':[] , #13 开工日期
			
 
				+        'time_completion': []  # 14 竣工日期
			
 
				+    }
			
 
				+    for entity in list_entity:
			
 
				+        if entity.label!=0:
			
 
				+            entity_text = entity.entity_text
			
 
				+            extract_time = my_timeFormat(entity_text)
			
 
				+            if extract_time:
			
 
				+                sentence_text = list_sentence[entity.sentence_index].sentence_text
			
 
				+                entity_left = sentence_text[max(0,entity.wordOffset_begin-2):entity.wordOffset_begin]
			
 
				+                entity_right = sentence_text[entity.wordOffset_end:entity.wordOffset_end+3]
			
 
				+                label_prob = entity.values[entity.label]
			
 
				+                if entity.label==1 and label_prob>0.5:
			
 
				+                    dict_time['time_release'].append((extract_time[0],label_prob))
			
 
				+                elif entity.label==2 and label_prob>0.5:
			
 
				+                    dict_time['time_bidopen'].append((extract_time[0],label_prob))
			
 
				+                elif entity.label==3 and label_prob>0.5:
			
 
				+                    dict_time['time_bidclose'].append((extract_time[0],label_prob))
			
 
				+                elif entity.label==12 and label_prob>0.5:
			
 
				+                    if len(extract_time)==1:
			
 
				+                        if re.search("前|止|截止",entity_right) or re.search("至|止",entity_left) or re.search("前",entity_text[-2:]):
			
 
				+                            dict_time['time_bidclose'].append((extract_time[0], label_prob))
			
 
				+                        else:
			
 
				+                            dict_time['time_bidstart'].append((extract_time[0], label_prob))
			
 
				+                    else:
			
 
				+                        dict_time['time_bidstart'].append((extract_time[0],label_prob))
			
 
				+                        dict_time['time_bidclose'].append((extract_time[1],label_prob))
			
 
				+                elif entity.label==4 and label_prob>0.5:
			
 
				+                    if len(extract_time)==1:
			
 
				+                        if re.search("前|止|截止",entity_right) or re.search("至|止",entity_left) or re.search("前",entity_text[-2:]):
			
 
				+                            dict_time['time_publicityEnd'].append((extract_time[0], label_prob))
			
 
				+                        else:
			
 
				+                            dict_time['time_publicityStart'].append((extract_time[0], label_prob))
			
 
				+                    else:
			
 
				+                        dict_time['time_publicityStart'].append((extract_time[0],label_prob))
			
 
				+                        dict_time['time_publicityEnd'].append((extract_time[1],label_prob))
			
 
				+                elif entity.label==5 and label_prob>0.5:
			
 
				+                    if len(extract_time)==1:
			
 
				+                        dict_time['time_publicityEnd'].append((extract_time[0], label_prob))
			
 
				+                    else:
			
 
				+                        dict_time['time_publicityStart'].append((extract_time[0],label_prob))
			
 
				+                        dict_time['time_publicityEnd'].append((extract_time[1],label_prob))
			
 
				+                elif entity.label==6 and label_prob>0.5:
			
 
				+                    if len(extract_time)==1:
			
 
				+                        if re.search("前|止|截止",entity_right) or re.search("至|止",entity_left) or re.search("前",entity_text[-2:]):
			
 
				+                            dict_time['time_getFileEnd'].append((extract_time[0], label_prob))
			
 
				+                        else:
			
 
				+                            dict_time['time_getFileStart'].append((extract_time[0], label_prob))
			
 
				+                    else:
			
 
				+                        dict_time['time_getFileStart'].append((extract_time[0],label_prob))
			
 
				+                        dict_time['time_getFileEnd'].append((extract_time[1],label_prob))
			
 
				+                elif entity.label==7 and label_prob>0.5:
			
 
				+                    if len(extract_time)==1:
			
 
				+                        dict_time['time_getFileEnd'].append((extract_time[0], label_prob))
			
 
				+                    else:
			
 
				+                        dict_time['time_getFileStart'].append((extract_time[0],label_prob))
			
 
				+                        dict_time['time_getFileEnd'].append((extract_time[1],label_prob))
			
 
				+                elif entity.label==8 and label_prob>0.5:
			
 
				+                    if len(extract_time)==1:
			
 
				+                        if re.search("前|止|截止",entity_right) or re.search("至|止",entity_left) or re.search("前",entity_text[-2:]):
			
 
				+                            dict_time['time_registrationEnd'].append((extract_time[0], label_prob))
			
 
				+                        else:
			
 
				+                            dict_time['time_registrationStart'].append((extract_time[0], label_prob))
			
 
				+                    else:
			
 
				+                        dict_time['time_registrationStart'].append((extract_time[0],label_prob))
			
 
				+                        dict_time['time_registrationEnd'].append((extract_time[1],label_prob))
			
 
				+                elif entity.label==9 and label_prob>0.5:
			
 
				+                    if len(extract_time)==1:
			
 
				+                        dict_time['time_registrationEnd'].append((extract_time[0], label_prob))
			
 
				+                    else:
			
 
				+                        dict_time['time_registrationStart'].append((extract_time[0],label_prob))
			
 
				+                        dict_time['time_registrationEnd'].append((extract_time[1],label_prob))
			
 
				+                elif entity.label==10 and label_prob>0.5:
			
 
				+                    if len(extract_time)==1:
			
 
				+                        if re.search("前|止|截止",entity_right) or re.search("至|止",entity_left) or re.search("前",entity_text[-2:]):
			
 
				+                            dict_time['time_earnestMoneyEnd'].append((extract_time[0], label_prob))
			
 
				+                        else:
			
 
				+                            dict_time['time_earnestMoneyStart'].append((extract_time[0], label_prob))
			
 
				+                    else:
			
 
				+                        dict_time['time_earnestMoneyStart'].append((extract_time[0],label_prob))
			
 
				+                        dict_time['time_earnestMoneyEnd'].append((extract_time[1],label_prob))
			
 
				+                elif entity.label==11 and label_prob>0.5:
			
 
				+                    if len(extract_time)==1:
			
 
				+                        dict_time['time_earnestMoneyEnd'].append((extract_time[0], label_prob))
			
 
				+                    else:
			
 
				+                        dict_time['time_earnestMoneyStart'].append((extract_time[0],label_prob))
			
 
				+                        dict_time['time_earnestMoneyEnd'].append((extract_time[1],label_prob))
			
 
				+                elif entity.label==13 and label_prob>0.5:
			
 
				+                    if len(extract_time)==1:
			
 
				+                        if re.search("前|止|截止",entity_right) or re.search("至|止",entity_left) or re.search("前",entity_text[-2:]):
			
 
				+                            dict_time['time_completion'].append((extract_time[0], label_prob))
			
 
				+                        else:
			
 
				+                            dict_time['time_commencement'].append((extract_time[0], label_prob))
			
 
				+                    else:
			
 
				+                        dict_time['time_commencement'].append((extract_time[0],label_prob))
			
 
				+                        dict_time['time_completion'].append((extract_time[1],label_prob))
			
 
				+                elif entity.label==14 and label_prob>0.5:
			
 
				+                    if len(extract_time)==1:
			
 
				+                        dict_time['time_completion'].append((extract_time[0], label_prob))
			
 
				+                    else:
			
 
				+                        dict_time['time_commencement'].append((extract_time[0],label_prob))
			
 
				+                        dict_time['time_completion'].append((extract_time[1],label_prob))
			
 
				+
			
 
				+
			
 
				+    result_dict = dict((key,"") for key in dict_time.keys())
			
 
				+    for time_type,value in dict_time.items():
			
 
				+        list_time = dict_time[time_type]
			
 
				+        if list_time:
			
 
				+            list_time.sort(key=lambda x:x[1],reverse=True)
			
 
				+            result_dict[time_type] = list_time[0][0]
			
 
				+    return result_dict
			
 
				+
			
 
				 def getOtherAttributes(list_entity):
			
 
				     dict_other = {"moneysource":"",
			
 
				                   "person_review":[],
			
 
				-                  "time_release":"",
			
 
				-                  "time_bidopen":"",
			
 
				-                  "time_bidclose":"",
			
 
				+                  # "time_release":"",
			
 
				+                  # "time_bidopen":"",
			
 
				+                  # "time_bidclose":"",
			
 
				                   "serviceTime":"",
			
 
				                   "product":[],
			
 
				                   "total_tendereeMoney":0,
			
 
				-                  "total_tendereeMoneyUnit":''
			
 
				-                   }
			
 
				-    dict_time = {
			
 
				-        "time_release": [],
			
 
				-        "time_bidopen": [],
			
 
				-        "time_bidclose": []
			
 
				-    }
			
 
				+                  "total_tendereeMoneyUnit":''}
			
 
				+    # dict_time = {
			
 
				+    #     "time_release": [],
			
 
				+    #     "time_bidopen": [],
			
 
				+    #     "time_bidclose": []
			
 
				+    # }
			
 
				     for entity in list_entity:
			
 
				         if entity.entity_type == 'bidway':
			
 
				             dict_other["bidway"] = turnBidWay(entity.entity_text)
			
@@ -2218,18 +2387,18 @@ def getOtherAttributes(list_entity):
 
				             dict_other["moneysource"] = entity.entity_text
			
 
				         elif entity.entity_type=='serviceTime':
			
 
				             dict_other["serviceTime"] = entity.entity_text
			
 
				-        elif entity.entity_type == 'time' and entity.label==1:
			
 
				-            if entity.values[entity.label]>0.6:
			
 
				-                dict_time['time_release'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
			
 
				-            # dict_other["time_release"] = timeFormat(entity.entity_text)
			
 
				-        elif entity.entity_type == 'time' and entity.label==2:
			
 
				-            if entity.values[entity.label]>0.6:
			
 
				-                dict_time['time_bidopen'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
			
 
				-            # dict_other["time_bidopen"] = timeFormat(entity.entity_text)
			
 
				-        elif entity.entity_type == 'time' and entity.label == 3:
			
 
				-            if entity.values[entity.label]>0.6:
			
 
				-                dict_time['time_bidclose'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
			
 
				-            # dict_other["time_bidclose"] = timeFormat(entity.entity_text)
			
 
				+        # elif entity.entity_type == 'time' and entity.label==1:
			
 
				+        #     if entity.values[entity.label]>0.6:
			
 
				+        #         dict_time['time_release'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
			
 
				+        #     # dict_other["time_release"] = timeFormat(entity.entity_text)
			
 
				+        # elif entity.entity_type == 'time' and entity.label==2:
			
 
				+        #     if entity.values[entity.label]>0.6:
			
 
				+        #         dict_time['time_bidopen'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
			
 
				+        #     # dict_other["time_bidopen"] = timeFormat(entity.entity_text)
			
 
				+        # elif entity.entity_type == 'time' and entity.label == 3:
			
 
				+        #     if entity.values[entity.label]>0.6:
			
 
				+        #         dict_time['time_bidclose'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
			
 
				+        #     # dict_other["time_bidclose"] = timeFormat(entity.entity_text)
			
 
				         elif entity.entity_type=="person" and entity.label ==4:
			
 
				             dict_other["person_review"].append(entity.entity_text)
			
 
				         elif entity.entity_type=='product':
			
@@ -2238,11 +2407,11 @@ def getOtherAttributes(list_entity):
 
				                 dict_other["total_tendereeMoney"] = float(entity.entity_text)
			
 
				                 dict_other["total_tendereeMoneyUnit"] = entity.money_unit
			
 
				     # 时间类别
			
 
				-    for time_type,value in dict_time.items():
			
 
				-        list_time = dict_time[time_type]
			
 
				-        if list_time:
			
 
				-            list_time.sort(key=lambda x:x[1],reverse=True)
			
 
				-            dict_other[time_type] = list_time[0][0]
			
 
				+    # for time_type,value in dict_time.items():
			
 
				+    #     list_time = dict_time[time_type]
			
 
				+    #     if list_time:
			
 
				+    #         list_time.sort(key=lambda x:x[1],reverse=True)
			
 
				+    #         dict_other[time_type] = list_time[0][0]
			
 
				     dict_other["product"] = list(set(dict_other["product"]))
			
 
				     return dict_other
			
 
				 
			
@@ -2259,7 +2428,7 @@ def getPREMs(list_sentences,list_entitys,list_articles):
 
				     result = []
			
 
				     for list_sentence,list_entity,list_article in zip(list_sentences,list_entitys,list_articles):
			
 
				         RoleList = getPackageRoleMoney(list_sentence,list_entity)
			
 
				-        result.append(dict({"prem":RoleList,"docid":list_article.id},**getOtherAttributes(list_entity),
			
 
				+        result.append(dict({"prem":RoleList,"docid":list_article.doc_id},**getOtherAttributes(list_entity),**getTimeAttributes(list_entity,list_sentence),
			
 
				                            **{"fingerprint":list_article.fingerprint,"match_enterprise":list_article.match_enterprise,
			
 
				                               "match_enterprise_type":list_article.match_enterprise_type,"process_time":getCurrent_date(),
			
 
				                               "attachmentTypes":list_article.attachmentTypes, "bidway": list_article.bidway}))
			
--- a/BiddingKG/dl/interface/timesplit_model/saved_model.pb
+++ b/BiddingKG/dl/interface/timesplit_model/saved_model.pb
--- a/BiddingKG/dl/interface/timesplit_model/variables/variables.data-00000-of-00001
+++ b/BiddingKG/dl/interface/timesplit_model/variables/variables.data-00000-of-00001
--- a/BiddingKG/dl/interface/timesplit_model/variables/variables.index
+++ b/BiddingKG/dl/interface/timesplit_model/variables/variables.index
--- a/BiddingKG/dl/test/测试整个要素提取流程.py
+++ b/BiddingKG/dl/test/测试整个要素提取流程.py
@@ -147,6 +147,8 @@ def predict(doc_id,text):
 
				                     # print(_sentence.sentence_text,_sentence.tokens[entity.begin_index:entity.end_index+1])
			
 
				                     pass
			
 
				                 # print(entity.pointer_pack)
			
 
				+            # elif entity.entity_type =='serviceTime':
			
 
				+            #     print(entity.entity_text)
			
 
				             #     if entity.pointer_pack:
			
 
				             #         print('pointer_pack_name:',entity.pointer_pack.entity_text)
			
 
				             # elif entity.entity_type in ['package']:
			
@@ -439,8 +441,8 @@ if __name__=="__main__":
 
				     a = time.time()
			
 
				     print("start")
			
 
				     # print(predict("12",content))
			
 
				-    # result = predict("12",text)
			
 
				-    result = predict("12",content)
			
 
				+    result = predict("12",text)
			
 
				+    # result = predict("12",content)
			
 
				     # print(json.loads(result))
			
 
				     #test("12",text)
			
 
				     print("takes",time.time()-a)
			
--- a/BiddingKG/dl/time/model_time_classify.weights
+++ b/BiddingKG/dl/time/model_time_classify.weights
--- a/BiddingKG/dl/time/train_2.py
+++ b/BiddingKG/dl/time/train_2.py
@@ -13,10 +13,32 @@ from BiddingKG.dl.common.models import *
 
				 from sklearn.metrics import classification_report
			
 
				 from sklearn.utils import shuffle,class_weight
			
 
				 import matplotlib.pyplot as plt
			
 
				+import random
			
 
				 
			
 
				 input_shape = (2,30,60)
			
 
				 input_shape2 = (2,40,128)
			
 
				-output_shape = [4]
			
 
				+# output_shape = [4]
			
 
				+
			
 
				+time_label_dict = {
			
 
				+             'time': 0,
			
 
				+            'time_release': 1, #发布时间
			
 
				+            'time_bidopen': 2, #开标时间
			
 
				+            'time_bidclose': 3, #截标时间
			
 
				+            'time_bidstart': 12, #投标（开始）时间、响应文件接收（开始）时间
			
 
				+
			
 
				+            'time_publicityStart': 4, #公示开始时间（公示时间、公示期）
			
 
				+            'time_publicityEnd': 5, #公示截止时间
			
 
				+            'time_getFileStart': 6, #文件获取开始时间（文件获取时间）
			
 
				+            'time_getFileEnd': 7, #文件获取截止时间
			
 
				+            'time_registrationStart': 8, #报名开始时间（报名时间）
			
 
				+            'time_registrationEnd': 9, #报名截止时间
			
 
				+            'time_earnestMoneyStart': 10, #保证金递交开始时间（保证金递交时间）
			
 
				+            'time_earnestMoneyEnd': 11, #保证金递交截止时间
			
 
				+            'time_commencement': 13, #开工日期
			
 
				+            'time_completion': 14 #竣工日期
			
 
				+        }
			
 
				+output_shape = [len(time_label_dict)]
			
 
				+
			
 
				 
			
 
				 def get_data():
			
 
				     data_load = pd.read_csv("newdata_30_prc.csv", index_col=0)
			
@@ -91,16 +113,23 @@ def getModel2():
 
				     R_input = layers.Input(shape=input_shape2[1:], dtype='float32')
			
 
				     R_mask = Lambda(lambda x: K.cast(K.not_equal(K.sum(x,axis=-1,keepdims=True), 0), 'float32'))(R_input)
			
 
				 
			
 
				-    L_input_drop = Dropout(0.2)(L_input)
			
 
				-    R_input_drop = Dropout(0.2)(R_input)
			
 
				+    L_input_drop = Dropout(0.3)(L_input)
			
 
				+    R_input_drop = Dropout(0.3)(R_input)
			
 
				     # L_lstm = layers.Bidirectional(layers.GRU(40,return_sequences=True,dropout=0.1))(L_input)
			
 
				     L_lstm = OurBidirectional(GRU(64, return_sequences=True))([L_input_drop,L_mask])
			
 
				     L_att = Attention02()(L_lstm,mask=K.squeeze(L_mask,axis=-1))
			
 
				     # R_lstm = layers.Bidirectional(layers.GRU(40,return_sequences=True,dropout=0.1))(R_input)
			
 
				     R_lstm = OurBidirectional(GRU(64, return_sequences=True))([R_input_drop,R_mask])
			
 
				     R_att = Attention02()(R_lstm,mask=K.squeeze(R_mask,axis=-1))
			
 
				+    L_R = layers.merge([L_lstm, R_lstm],concat_axis=1, mode='concat')
			
 
				+    L_R_mask = layers.merge([L_mask, R_mask],concat_axis=1, mode='concat')
			
 
				+    L_R_att = Attention02()(L_R,mask=K.squeeze(L_R_mask,axis=-1))
			
 
				+
			
 
				+    L_att = layers.add([L_att,L_R_att])
			
 
				+    R_att = layers.add([R_att,L_R_att])
			
 
				     concat = layers.merge([L_att, R_att], mode='concat')
			
 
				-    concat = Dropout(0.3)(concat)
			
 
				+
			
 
				+    concat = Dropout(0.2)(concat)
			
 
				     output = layers.Dense(output_shape[0],activation="softmax")(concat)
			
 
				 
			
 
				     model = models.Model(inputs=[L_input,R_input], outputs=output)
			
@@ -111,6 +140,36 @@ def getModel2():
 
				                   metrics=[precision,recall,f1_score])
			
 
				     model.summary()
			
 
				     return model
			
 
				+# def getModel2():
			
 
				+#     '''
			
 
				+#     @summary: 时间分类模型
			
 
				+#     '''
			
 
				+#     L_input = layers.Input(shape=input_shape2[1:], dtype='float32')
			
 
				+#     L_mask = Lambda(lambda x: K.cast(K.not_equal(K.sum(x,axis=-1,keepdims=True), 0), 'float32'))(L_input)
			
 
				+#     R_input = layers.Input(shape=input_shape2[1:], dtype='float32')
			
 
				+#     R_mask = Lambda(lambda x: K.cast(K.not_equal(K.sum(x,axis=-1,keepdims=True), 0), 'float32'))(R_input)
			
 
				+#
			
 
				+#     L_input_drop = Dropout(0.3)(L_input)
			
 
				+#     R_input_drop = Dropout(0.3)(R_input)
			
 
				+#     # L_lstm = layers.Bidirectional(layers.GRU(40,return_sequences=True,dropout=0.1))(L_input)
			
 
				+#     L_lstm = OurBidirectional(GRU(64, return_sequences=True))([L_input_drop,L_mask])
			
 
				+#     L_att = Attention02()(L_lstm,mask=K.squeeze(L_mask,axis=-1))
			
 
				+#     # R_lstm = layers.Bidirectional(layers.GRU(40,return_sequences=True,dropout=0.1))(R_input)
			
 
				+#     R_lstm = OurBidirectional(GRU(64, return_sequences=True))([R_input_drop,R_mask])
			
 
				+#     R_att = Attention02()(R_lstm,mask=K.squeeze(R_mask,axis=-1))
			
 
				+#     concat = layers.merge([L_att, R_att], mode='concat')
			
 
				+#
			
 
				+#     concat = Dropout(0.2)(concat)
			
 
				+#     output = layers.Dense(output_shape[0],activation="softmax")(concat)
			
 
				+#
			
 
				+#     model = models.Model(inputs=[L_input,R_input], outputs=output)
			
 
				+#
			
 
				+#     learn_rate = 0.00005
			
 
				+#     model.compile(optimizer=optimizers.Adam(lr=learn_rate),
			
 
				+#                   loss=losses.binary_crossentropy,
			
 
				+#                   metrics=[precision,recall,f1_score])
			
 
				+#     model.summary()
			
 
				+#     return model
			
 
				 
			
 
				 def getModel3():
			
 
				     '''
			
@@ -121,8 +180,8 @@ def getModel3():
 
				     R_input = layers.Input(shape=input_shape2[1:], dtype='float32')
			
 
				     R_mask = Lambda(lambda x: K.cast(K.not_equal(K.sum(x,axis=-1,keepdims=True), 0), 'float32'))(R_input)
			
 
				 
			
 
				-    L_input_drop = Dropout(0.2)(L_input)
			
 
				-    R_input_drop = Dropout(0.2)(R_input)
			
 
				+    L_input_drop = Dropout(0.3)(L_input)
			
 
				+    R_input_drop = Dropout(0.3)(R_input)
			
 
				     # L_lstm = layers.Bidirectional(layers.GRU(40,return_sequences=True,dropout=0.1))(L_input)
			
 
				     L_lstm = OurBidirectional(GRU(64, return_sequences=True))([L_input_drop,L_mask])
			
 
				     # L_att = Attention02()(L_lstm,mask=K.squeeze(L_mask,axis=-1))
			
@@ -133,7 +192,7 @@ def getModel3():
 
				     att = Attention02()(concat,mask=K.squeeze(concat_mask,axis=-1))
			
 
				     # R_att = Attention02()(R_lstm,mask=K.squeeze(R_mask,axis=-1))
			
 
				     # concat = layers.merge([L_att, R_att], mode='concat')
			
 
				-    att = Dropout(0.3)(att)
			
 
				+    att = Dropout(0.2)(att)
			
 
				     output = layers.Dense(output_shape[0],activation="softmax")(att)
			
 
				 
			
 
				     model = models.Model(inputs=[L_input,R_input], outputs=output)
			
@@ -145,6 +204,72 @@ def getModel3():
 
				     model.summary()
			
 
				     return model
			
 
				 
			
 
				+class Attention(Layer):
			
 
				+    """多头注意力机制
			
 
				+    """
			
 
				+    def __init__(self, nb_head, size_per_head, **kwargs):
			
 
				+        self.nb_head = nb_head
			
 
				+        self.size_per_head = size_per_head
			
 
				+        self.out_dim = nb_head * size_per_head
			
 
				+        super(Attention, self).__init__(**kwargs)
			
 
				+    def build(self, input_shape):
			
 
				+        super(Attention, self).build(input_shape)
			
 
				+        q_in_dim = input_shape[0][-1]
			
 
				+        k_in_dim = input_shape[1][-1]
			
 
				+        v_in_dim = input_shape[2][-1]
			
 
				+        self.q_kernel = self.add_weight(name='q_kernel',
			
 
				+                                        shape=(q_in_dim, self.out_dim),
			
 
				+                                        initializer='glorot_normal')
			
 
				+        self.k_kernel = self.add_weight(name='k_kernel',
			
 
				+                                        shape=(k_in_dim, self.out_dim),
			
 
				+                                        initializer='glorot_normal')
			
 
				+        self.v_kernel = self.add_weight(name='w_kernel',
			
 
				+                                        shape=(v_in_dim, self.out_dim),
			
 
				+                                        initializer='glorot_normal')
			
 
				+    def mask(self, x, mask, mode='mul'):
			
 
				+        if mask is None:
			
 
				+            return x
			
 
				+        else:
			
 
				+            for _ in range(K.ndim(x) - K.ndim(mask)):
			
 
				+                mask = K.expand_dims(mask, K.ndim(mask))
			
 
				+            if mode == 'mul':
			
 
				+                return x * mask
			
 
				+            else:
			
 
				+                return x - (1 - mask) * 1e10
			
 
				+    def call(self, inputs):
			
 
				+        q, k, v = inputs[:3]
			
 
				+        v_mask, q_mask = None, None
			
 
				+        if len(inputs) > 3:
			
 
				+            v_mask = inputs[3]
			
 
				+            if len(inputs) > 4:
			
 
				+                q_mask = inputs[4]
			
 
				+        # 线性变换
			
 
				+        qw = K.dot(q, self.q_kernel)
			
 
				+        kw = K.dot(k, self.k_kernel)
			
 
				+        vw = K.dot(v, self.v_kernel)
			
 
				+        # 形状变换
			
 
				+        qw = K.reshape(qw, (-1, K.shape(qw)[1], self.nb_head, self.size_per_head))
			
 
				+        kw = K.reshape(kw, (-1, K.shape(kw)[1], self.nb_head, self.size_per_head))
			
 
				+        vw = K.reshape(vw, (-1, K.shape(vw)[1], self.nb_head, self.size_per_head))
			
 
				+        # 维度置换
			
 
				+        qw = K.permute_dimensions(qw, (0, 2, 1, 3))
			
 
				+        kw = K.permute_dimensions(kw, (0, 2, 1, 3))
			
 
				+        vw = K.permute_dimensions(vw, (0, 2, 1, 3))
			
 
				+        # Attention
			
 
				+        a = K.batch_dot(qw, kw, [3, 3]) / self.size_per_head**0.5
			
 
				+        a = K.permute_dimensions(a, (0, 3, 2, 1))
			
 
				+        a = self.mask(a, v_mask, 'add')
			
 
				+        a = K.permute_dimensions(a, (0, 3, 2, 1))
			
 
				+        a = K.softmax(a)
			
 
				+        # 完成输出
			
 
				+        o = K.batch_dot(a, vw, [3, 2])
			
 
				+        o = K.permute_dimensions(o, (0, 2, 1, 3))
			
 
				+        o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim))
			
 
				+        o = self.mask(o, q_mask, 'mul')
			
 
				+        return o
			
 
				+    def compute_output_shape(self, input_shape):
			
 
				+        return (input_shape[0][0], input_shape[0][1], self.out_dim)
			
 
				+
			
 
				 class Attention02(Layer):
			
 
				     def __init__(self, **kwargs):
			
 
				         self.init = initializers.get('normal')
			
@@ -530,11 +655,216 @@ def train3():
 
				     # # y_pre2 = load_model.predict(train_x[0])
			
 
				     # res2 = classification_report(np.argmax(train_y, axis=1), np.argmax(y_pre2, axis=1))
			
 
				     # print(res2)
			
 
				+
			
 
				+def train4():
			
 
				+    # data_load = pd.read_excel("tokens_tolabel_data1.xlsx", index_col=0)
			
 
				+    data_load = pd.read_excel("tokens_tolabel_data1_res13New.xlsx", index_col=0)
			
 
				+    # data_load = pd.concat([data_load[data_load['re_label']==0],data_load])
			
 
				+    # data_load = data_load[data_load['pre_label_prob']>0.97]
			
 
				+    # data_load = data_load[data_load['is_same']==1]
			
 
				+    data_zero = pd.read_excel("time_entity5.xlsx")
			
 
				+    data_zero = data_zero[(data_zero['viewed']==1)|(data_zero['is_same']==2)]
			
 
				+    # data_old = pd.read_excel("tokens_data_02.xlsx")
			
 
				+    data_old = pd.read_excel("tokens_data_02_res7New.xlsx")
			
 
				+    data_delay1 = pd.read_excel("delayTime_entity1.xlsx")
			
 
				+    data_delay1 = data_delay1[data_delay1['label']!=0]
			
 
				+    data_delay2 = pd.read_excel("delayTime_entity2.xlsx")
			
 
				+
			
 
				+    # data_zero = pd.concat([data_zero,data_zero])
			
 
				+    # data_zero = pd.concat([data_zero[(data_zero['label']!=0)|(data_zero['is_same']==2)],data_zero.sample(n=3000)])
			
 
				+    # data_zero = data_zero.sample(n=80000)
			
 
				+    print("输入shape：",input_shape2)
			
 
				+    data_x = []
			
 
				+    data_y = []
			
 
				+    import random
			
 
				+    for left, right, label,_label in zip(data_load['context_left'], data_load['context_right'], data_load['re_label'], data_load['label']):
			
 
				+        # if label==_label:
			
 
				+
			
 
				+        y = np.zeros(output_shape)
			
 
				+        y[label] = 1
			
 
				+        left = eval(left)
			
 
				+        left = left[-40:]
			
 
				+        right = eval(right)
			
 
				+        right = right[:40]
			
 
				+        context = [left, right]
			
 
				+        # x = embedding(context, shape=input_shape2)
			
 
				+        data_x.append(context)
			
 
				+        data_y.append(y)
			
 
				+    # data_load2 = data_load[data_load['re_label']==0]
			
 
				+    # for left, right, label,_label in zip(data_load2['context_left'], data_load2['context_right'], data_load2['re_label'], data_load2['label']):
			
 
				+    #         if label==_label:
			
 
				+    #             y = np.zeros(output_shape)
			
 
				+    #             y[label] = 1
			
 
				+    #             left = eval(left)
			
 
				+    #             left = left[-40:]
			
 
				+    #             if len(left)>30:
			
 
				+    #                 left = left[2:]
			
 
				+    #             elif len(left)>15:
			
 
				+    #                 left = left[1:]
			
 
				+    #             right = eval(right)
			
 
				+    #             right = right[:40]
			
 
				+    #             if len(right)>15:
			
 
				+    #                 right = right[:-1]
			
 
				+    #             context = [left, right]
			
 
				+    #             # x = embedding(context, shape=input_shape2)
			
 
				+    #             data_x.append(context)
			
 
				+    #             data_y.append(y)
			
 
				+
			
 
				+    for left, right, label in zip(data_zero['context_left'], data_zero['context_right'], data_zero['re_label']):
			
 
				+
			
 
				+        y = np.zeros(output_shape)
			
 
				+        y[label] = 1
			
 
				+        left = eval(left)
			
 
				+        left = left[-40:]
			
 
				+        right = eval(right)
			
 
				+        right = right[:40]
			
 
				+        context = [left, right]
			
 
				+        # x = embedding(context, shape=input_shape2)
			
 
				+        data_x.append(context)
			
 
				+        data_y.append(y)
			
 
				+
			
 
				+    for left, right, label in zip(data_delay1['context_left'], data_delay1['context_right'], data_delay1['label']):
			
 
				+            y = np.zeros(output_shape)
			
 
				+            y[label] = 1
			
 
				+            left = eval(left)
			
 
				+            left = left[-40:]
			
 
				+            right = eval(right)
			
 
				+            right = right[:40]
			
 
				+            context = [left, right]
			
 
				+            # x = embedding(context, shape=input_shape2)
			
 
				+            data_x.append(context)
			
 
				+            data_y.append(y)
			
 
				+    for left, right, label in zip(data_delay2['context_left'], data_delay2['context_right'], data_delay2['re_label']):
			
 
				+                y = np.zeros(output_shape)
			
 
				+                y[label] = 1
			
 
				+                left = eval(left)
			
 
				+                left = left[-40:]
			
 
				+                right = eval(right)
			
 
				+                right = right[:40]
			
 
				+                context = [left, right]
			
 
				+                # x = embedding(context, shape=input_shape2)
			
 
				+                data_x.append(context)
			
 
				+                data_y.append(y)
			
 
				+
			
 
				+    # for left, right, label in zip(data_zero['context_left'], data_zero['context_right'], data_zero['label']):
			
 
				+    #         y = np.zeros(output_shape)
			
 
				+    #         y[label] = 1
			
 
				+    #         left = eval(left)
			
 
				+    #         left = left[-40:]
			
 
				+    #         if len(left) > 30:
			
 
				+    #             left = left[2:]
			
 
				+    #         elif len(left) > 15:
			
 
				+    #             left = left[1:]
			
 
				+    #         right = eval(right)
			
 
				+    #         right = right[:40]
			
 
				+    #         if len(right) > 15:
			
 
				+    #             right = right[:-1]
			
 
				+    #         context = [left, right]
			
 
				+    #         # x = embedding(context, shape=input_shape2)
			
 
				+    #         data_x.append(context)
			
 
				+    #         data_y.append(y)
			
 
				+
			
 
				+    # for left, right, label in zip(data_old['context_left'], data_old['context_right'], data_old['label']):
			
 
				+    #         y = np.zeros(output_shape)
			
 
				+    #         y[label] = 1
			
 
				+    #         left = eval(left)
			
 
				+    #         left = left[-40:]
			
 
				+    #         right = eval(right)
			
 
				+    #         right = right[:40]
			
 
				+    #         context = [left, right]
			
 
				+    #         # x = embedding(context, shape=input_shape2)
			
 
				+    #         data_x.append(context)
			
 
				+    #         data_y.append(y)
			
 
				+    for left, right, label,pre_label,is_same in zip(data_old['context_left'], data_old['context_right'], data_old['label'],
			
 
				+                                                    data_old['pre_label'],data_old['is_same']):
			
 
				+        if label==0:
			
 
				+            if is_same==1:
			
 
				+                pass
			
 
				+            else:
			
 
				+                if pre_label>3:
			
 
				+                    label = pre_label
			
 
				+                else:
			
 
				+                    continue
			
 
				+        y = np.zeros(output_shape)
			
 
				+        y[label] = 1
			
 
				+        left = eval(left)
			
 
				+        left = left[-40:]
			
 
				+        right = eval(right)
			
 
				+        right = right[:40]
			
 
				+        context = [left, right]
			
 
				+        # x = embedding(context, shape=input_shape2)
			
 
				+        data_x.append(context)
			
 
				+        data_y.append(y)
			
 
				+
			
 
				+    _data = [d for d in zip(data_x,data_y)]
			
 
				+    random.shuffle(_data)
			
 
				+    data_x = [i[0] for i in _data]
			
 
				+    data_y = [i[1] for i in _data]
			
 
				+    test_len = int(len(data_x) * 0.11)
			
 
				+    test_x = data_x[:test_len]
			
 
				+    test_y = data_y[:test_len]
			
 
				+    print("测试数据量：", len(test_x))
			
 
				+    train_x = data_x[test_len:]
			
 
				+    train_y = data_y[test_len:]
			
 
				+
			
 
				+    # for left, right, label,pre_label,is_same in zip(data_old['context_left'], data_old['context_right'], data_old['label'],
			
 
				+    #                                                 data_old['pre_label'],data_old['is_same']):
			
 
				+    #     # if label==0:
			
 
				+    #     #     if random.random()>0.25:
			
 
				+    #     #         continue
			
 
				+    #     if label==0:
			
 
				+    #         if is_same==1:
			
 
				+    #             pass
			
 
				+    #         else:
			
 
				+    #             if pre_label>3:
			
 
				+    #                 label = pre_label
			
 
				+    #             else:
			
 
				+    #                 continue
			
 
				+    #     y = np.zeros(output_shape)
			
 
				+    #     y[label] = 1
			
 
				+    #     left = eval(left)
			
 
				+    #     left = left[-40:]
			
 
				+    #     right = eval(right)
			
 
				+    #     right = right[:40]
			
 
				+    #     context = [left, right]
			
 
				+    #     # x = embedding(context, shape=input_shape2)
			
 
				+    #     train_x.append(context)
			
 
				+    #     train_y.append(y)
			
 
				+    print("训练数据量：", len(train_x))
			
 
				+
			
 
				+    # train_y, test_y = np.array(train_y), np.array(test_y)
			
 
				+    # train_x = np.array(train_x)
			
 
				+    # test_x = np.array(test_x)
			
 
				+    # test_x = np.transpose(test_x, (1, 0, 2, 3))
			
 
				+    # train_x, test_x = (np.transpose(train_x, (1, 0, 2, 3)), np.transpose(test_x, (1, 0, 2, 3)))
			
 
				+    training_generator = DataGenerator(train_x, train_y,is_train=True)
			
 
				+    # training_generator = DataGenerator(data_x, data_y)
			
 
				+    validation_generator = DataGenerator(test_x, test_y,is_train=False,shuffle=False)
			
 
				+
			
 
				+    # model = getModel3()
			
 
				+    model = getModel2()
			
 
				+    epochs = 100
			
 
				+    # batch_size = 256
			
 
				+    checkpoint = ModelCheckpoint("model_time_classify.weights",save_weights_only=True, monitor="val_loss", verbose=1,
			
 
				+                                 save_best_only=True, mode='min')
			
 
				+    # checkpoint = ModelCheckpoint("model_time_classify2.weights",save_weights_only=True, monitor="loss", verbose=1,
			
 
				+    #                                  save_best_only=True, mode='min')
			
 
				+
			
 
				+    history = model.fit_generator(
			
 
				+        generator=training_generator,
			
 
				+        validation_data=validation_generator,
			
 
				+        use_multiprocessing=True, workers=2,
			
 
				+        epochs=epochs,
			
 
				+        shuffle=True,
			
 
				+        callbacks=[checkpoint],
			
 
				+        class_weight='auto'
			
 
				+    )
			
 
				+
			
 
				 from keras.utils import Sequence,to_categorical
			
 
				 class DataGenerator(Sequence):
			
 
				     'Generates data for Keras'
			
 
				-    def __init__(self, texts, labels, batch_size=256,
			
 
				-                 n_classes=4, shuffle=True):
			
 
				+    def __init__(self, texts, labels, is_train=True,batch_size=256,
			
 
				+                 n_classes=len(time_label_dict), shuffle=True):
			
 
				         'Initialization'
			
 
				         # self.dim = dim
			
 
				         self.batch_size = batch_size
			
@@ -542,6 +872,7 @@ class DataGenerator(Sequence):
 
				         self.texts = texts
			
 
				         self.n_classes = n_classes
			
 
				         self.shuffle = shuffle
			
 
				+        self.is_train = is_train
			
 
				         self.on_epoch_end()
			
 
				 
			
 
				     def __len__(self):
			
@@ -583,8 +914,22 @@ class DataGenerator(Sequence):
 
				         # Generate data
			
 
				         for i, context in enumerate(list_texts):
			
 
				             # Store sample
			
 
				-            # tokens = preprocess2(text)
			
 
				-            # tokens = tokens[:maxlen]
			
 
				+            if self.is_train:
			
 
				+                left = context[0]
			
 
				+                if len(left) > 30:
			
 
				+                    if random.random() > 0.5:
			
 
				+                        left = left[2:]
			
 
				+                elif len(left) > 15:
			
 
				+                    if random.random() > 0.5:
			
 
				+                        left = left[1:]
			
 
				+                right = context[1]
			
 
				+                if len(right) > 30:
			
 
				+                    if random.random() > 0.5:
			
 
				+                        right = right[:-2]
			
 
				+                elif len(right) > 15:
			
 
				+                    if random.random() > 0.5:
			
 
				+                        right = right[:-1]
			
 
				+                context = [left, right]
			
 
				             words_matrix = embedding_mywords(context, shape=input_shape2)
			
 
				             # Store class
			
 
				             # y[i] = _label[i]
			
@@ -647,7 +992,11 @@ def predict3():
 
				     new_data.to_excel("new_tokens_data1_res.xlsx")
			
 
				 
			
 
				 def predict4():
			
 
				-    data = pd.read_csv("tokens_tolabel_data1_res11.csv", chunksize=3000)
			
 
				+    data = pd.read_csv("tokens_data_02_res6New.csv", chunksize=3000)
			
 
				+    # data = pd.read_excel("C:\\Users\\Administrator\\Desktop\\time_entity4.xlsx")
			
 
				+    # data.to_csv("C:\\Users\\Administrator\\Desktop\\time_entity4.csv")
			
 
				+    # data = pd.read_csv("C:\\Users\\Administrator\\Desktop\\time_entity4.csv", chunksize=3000)
			
 
				+
			
 
				     model1 = getModel2()
			
 
				     model1.load_weights("model_time_classify.weights")
			
 
				     new_data = pd.DataFrame()
			
@@ -671,14 +1020,15 @@ def predict4():
 
				         pre_y = model1.predict([test_x[0], test_x[1]])
			
 
				         _data['pre_label'] = [np.argmax(item) for item in pre_y]
			
 
				         _data['pre_label_prob'] = [max(item) for item in pre_y]
			
 
				-        _data['is_same'] = [1 if int(_label)==_pre else 0 for _label,_pre in zip(_data['re_label'],_data['pre_label'])]
			
 
				+        _data['is_same'] = [1 if int(_label)==_pre else 0 for _label,_pre in zip(_data['label'],_data['pre_label'])]
			
 
				         # _data['is_same'] = [1 if int(_re)==int(_pre) and int(_re)==int(_label) else 0 for _label,_re,_pre in zip(_data['label'],_data['re_label'],_data['pre_label'])]
			
 
				         # data['label'] = label
			
 
				         new_data = pd.concat([new_data, _data])
			
 
				         idx += 3000
			
 
				         print(idx)
			
 
				-    # data.to_csv("new_tokens_data1.csv")
			
 
				-    new_data.to_excel("tokens_tolabel_data1_res12.xlsx")
			
 
				+    # new_data.to_csv("tokens_data_02_res7New.csv")
			
 
				+    new_data.to_excel("tokens_data_02_res7New.xlsx")
			
 
				+    # new_data.to_excel("C:\\Users\\Administrator\\Desktop\\tokens_data_02_res7New.xlsx")
			
 
				 
			
 
				 
			
 
				 def predict():
			
@@ -863,7 +1213,7 @@ def save_model():
 
				             test_model = getModel2()
			
 
				             test_model.load_weights("model_time_classify.weights")
			
 
				             tf.saved_model.simple_save(sess,
			
 
				-                                       "models/timesplit_model/",
			
 
				+                                       "models/timesplit_model2/",
			
 
				                                        inputs={"input0": test_model.input[0],
			
 
				                                                "input1":test_model.input[1]
			
 
				                                                },
			
@@ -879,6 +1229,7 @@ if __name__ == '__main__':
 
				     # training()
			
 
				     # train2()
			
 
				     # train3()
			
 
				+    # train4()
			
 
				     # data_process()
			
 
				     # data_process2()
			
 
				     # data_process3()