3 years ago · 32fc2c0d10
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -1093,7 +1093,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
 
				         return [(main_roles[row], attributes[col]) for row, col in max_dispatch]
			
 
				 
			
 
				     # 正则提取电话号码实体
			
 
				-    key_word = re.compile('((?:电话|联系方式|联系人).{0,4}?)([0-1]\d{6,11})')
			
 
				+    # key_word = re.compile('((?:电话|联系方式|联系人).{0,4}?)([0-1]\d{6,11})')
			
 
				     phone = re.compile('1[3|4|5|6|7|8|9][0-9][-—－―]?\d{4}[-—－―]?\d{4}|'
			
 
				                        '\+86.?1[3|4|5|6|7|8|9]\d{9}|'
			
 
				                        # '0[^0]\d{1,2}[-—－―][1-9]\d{6,7}/[1-9]\d{6,10}|'
			
@@ -1118,8 +1118,6 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
 
				         res_set = set()
			
 
				         for i in re.finditer(phone, sentence_text):
			
 
				             res_set.add((i.group(), i.start(), i.end()))
			
 
				-        # for i in re.finditer(key_word, sentence_text):
			
 
				-        #     res_set.add((i.group(2), i.start() + len(i.group(1)), i.end()))
			
 
				         res_set = sorted(list(res_set),key=lambda x:x[1])
			
 
				         last_phone_mask = True
			
 
				         for item_idx in range(len(res_set)):
			
@@ -1742,6 +1740,73 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
 
				         for _item in remove_list:
			
 
				             PackDict["Project"]["roleList"][i].linklist.remove(_item)
			
 
				 
			
 
				+    # 联系人——电子邮箱链接
			
 
				+    temporary_list3 = [entity for entity in list_entity if entity.entity_type=='email' or (entity.entity_type=='person' and entity.label in [1,2,3])]
			
 
				+    temporary_list3 = sorted(temporary_list3, key=lambda x: (x.sentence_index, x.begin_index))
			
 
				+    new_temporary_list3 = []
			
 
				+    for _split in new_split_list:
			
 
				+        temp_list = []
			
 
				+        for _entity in temporary_list3:
			
 
				+            if words_num_dict[_entity.sentence_index] + _entity.wordOffset_begin >= _split[0] and words_num_dict[
			
 
				+                _entity.sentence_index] + _entity.wordOffset_end < _split[1]:
			
 
				+                temp_list.append(_entity)
			
 
				+            elif words_num_dict[_entity.sentence_index] + _entity.wordOffset_begin >= _split[1]:
			
 
				+                break
			
 
				+        new_temporary_list3.append(temp_list)
			
 
				+    # print(new_temporary_list3)
			
 
				+    match_list3 = []
			
 
				+    for split_index in range(len(new_temporary_list3)):
			
 
				+        split_entitys = new_temporary_list3[split_index]
			
 
				+        for index in range(len(split_entitys)):
			
 
				+            entity = split_entitys[index]
			
 
				+            if entity.entity_type == 'person':
			
 
				+                match_nums = 0
			
 
				+                for after_index in range(index + 1, min(len(split_entitys), index + 4)):
			
 
				+                    after_entity = split_entitys[after_index]
			
 
				+                    if match_nums > 2:
			
 
				+                        break
			
 
				+                    if after_entity.entity_type == 'email':
			
 
				+                        distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
			
 
				+                                           tokens_num_dict[entity.sentence_index] + entity.end_index)
			
 
				+                        sentence_distance = after_entity.sentence_index - entity.sentence_index
			
 
				+                        if sentence_distance == 0:
			
 
				+                            if distance < 100:
			
 
				+                                if (entity.label == 0 and after_entity.label == 1) or (
			
 
				+                                        entity.label == 1 and after_entity.label == 2):
			
 
				+                                    distance = distance / 100
			
 
				+                                value = (-1 / 2 * (distance ** 2)) / 10000
			
 
				+                                match_list3.append(Match(entity, after_entity, value))
			
 
				+                                match_nums += 1
			
 
				+                        else:
			
 
				+                            if distance < 60:
			
 
				+                                if (entity.label == 0 and after_entity.label == 1) or (
			
 
				+                                        entity.label == 1 and after_entity.label == 2):
			
 
				+                                    distance = distance / 100
			
 
				+                                value = (-1 / 2 * (distance ** 2)) / 10000
			
 
				+                                match_list3.append(Match(entity, after_entity, value))
			
 
				+                                match_nums += 1
			
 
				+                # 前向查找匹配
			
 
				+                # if not match_nums:
			
 
				+                if index != 0:
			
 
				+                    previous_entity = split_entitys[index - 1]
			
 
				+                    if previous_entity.entity_type == 'email':
			
 
				+                        if previous_entity.sentence_index == entity.sentence_index:
			
 
				+                            distance = (tokens_num_dict[entity.sentence_index] + entity.begin_index) - (
			
 
				+                                    tokens_num_dict[
			
 
				+                                        previous_entity.sentence_index] + previous_entity.end_index)
			
 
				+                            if distance < 30:
			
 
				+                                # 距离相等时，前向添加处罚值
			
 
				+                                # distance += 1
			
 
				+                                # 前向 没有 /10000
			
 
				+                                value = (-1 / 2 * (distance ** 2))
			
 
				+                                match_list3.append(Match(entity, previous_entity, value))
			
 
				+    # print(match_list3)
			
 
				+    # km算法分配求解
			
 
				+    result3 = dispatch(match_list3)
			
 
				+    for match in result3:
			
 
				+        match_person = match[0]
			
 
				+        match_email = match[1]
			
 
				+        match_person.pointer_email = match_email
			
 
				 
			
 
				     # # 1）第一个公司实体的招标人，则看看下一个实体是否为代理人，如果是则联系人错位连接 。2）在同一句中往后找联系人。3）连接不上在整个文章找联系人。
			
 
				     # temp_ent_list = []  # 临时列表，记录0,1角色及3联系人
			
--- a/BiddingKG/dl/relation_extraction/model.py
+++ b/BiddingKG/dl/relation_extraction/model.py
@@ -234,7 +234,7 @@ class Relation_extraction():
 
				         # self.word2vec = None
			
 
				         # if self.is_train:
			
 
				         #     self.word2vec = load('words2v_matrix.pkl')
			
 
				-        self.model_path = os.path.dirname(__file__)+'/../relation_extraction/models/my_best_model_oneoutput.weights'
			
 
				+        self.model_path = os.path.dirname(__file__)+'/../relation_extraction/models/my_best_model_oneoutput2.weights'
			
 
				         self.get_model()
			
 
				         if self.model_path:
			
 
				             self.train_model.load_weights(self.model_path)
			
--- a/BiddingKG/dl/relation_extraction/models/my_best_model_oneoutput2.weights
+++ b/BiddingKG/dl/relation_extraction/models/my_best_model_oneoutput2.weights
--- a/BiddingKG/dl/relation_extraction/models2/object_model/saved_model.pb
+++ b/BiddingKG/dl/relation_extraction/models2/object_model/saved_model.pb
--- a/BiddingKG/dl/relation_extraction/models2/object_model/variables/variables.data-00000-of-00001
+++ b/BiddingKG/dl/relation_extraction/models2/object_model/variables/variables.data-00000-of-00001
--- a/BiddingKG/dl/relation_extraction/models2/object_model/variables/variables.index
+++ b/BiddingKG/dl/relation_extraction/models2/object_model/variables/variables.index
--- a/BiddingKG/dl/relation_extraction/models2/subject_model/saved_model.pb
+++ b/BiddingKG/dl/relation_extraction/models2/subject_model/saved_model.pb
--- a/BiddingKG/dl/relation_extraction/models2/subject_model/variables/variables.data-00000-of-00001
+++ b/BiddingKG/dl/relation_extraction/models2/subject_model/variables/variables.data-00000-of-00001
--- a/BiddingKG/dl/relation_extraction/models2/subject_model/variables/variables.index
+++ b/BiddingKG/dl/relation_extraction/models2/subject_model/variables/variables.index
--- a/BiddingKG/dl/test/测试整个要素提取流程.py
+++ b/BiddingKG/dl/test/测试整个要素提取流程.py
@@ -121,8 +121,10 @@ def predict(doc_id,text):
 
				         for entity in entitys:
			
 
				             # print('**********实体信息****************')
			
 
				             if entity.entity_type=='person':
			
 
				-                print("联系方式：",end=' ')
			
 
				+                print("联系人-电话：",end=' ')
			
 
				                 print(entity.entity_text,[i.entity_text for i in entity.person_phone] if entity.person_phone else None,entity.label,entity.values)
			
 
				+                if entity.pointer_email:
			
 
				+                    print("联系人-邮箱：",entity.entity_text,entity.pointer_email.entity_text)
			
 
				                 # print(entity.begin_index, entity.end_index)
			
 
				                 print(entity.sentence_index)
			
 
				                 pass