Ver Fonte

联系人补充规则修正

znj há 2 anos atrás
pai
commit
d0ca0068d7
1 ficheiros alterados com 23 adições e 4 exclusões
  1. 23 4
      BiddingKG/dl/interface/getAttributes.py

+ 23 - 4
BiddingKG/dl/interface/getAttributes.py

@@ -1379,7 +1379,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
             _entity = Entity(_sentence.doc_id, None, item[0], "phone", _sentence.sentence_index, begin_index, end_index, item[1],
                              item[2],in_attachment=in_attachment)
             phone_entitys.append(_entity)
-    print('phone_set:',set([ent.entity_text for ent in phone_entitys]))
+    # print('phone_set:',set([ent.entity_text for ent in phone_entitys]))
     def is_company(entity,text):
         # 判断"公司"实体是否为地址地点
         if entity.label!=5 and entity.values[entity.label]>0.5:
@@ -1444,6 +1444,9 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                     if predicate=="rel_person":
                         if (_subject.label==0 and _object.entity_text in agency_contact ) or (_subject.label==1 and _object.entity_text in tenderee_contact):
                             continue
+                        # 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
+                        if _subject.label in [2,3,4] and re.search("质疑|投诉|监督|受理",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-10):_object.wordOffset_begin]):
+                            continue
                     distance = (tokens_num_dict[_object.sentence_index] + _object.begin_index) - (
                                 tokens_num_dict[_subject.sentence_index] + _subject.end_index)
                     if distance>0:
@@ -1474,7 +1477,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                             if temp.begin_index>combo[0].begin_index:
                                 is_continue = True
                                 break
-                if is_continue: continue
+                if is_continue:
+                    continue
                 combo[0].pointer_person.append(combo[1])
                 linked_company.add(combo[0])
                 linked_person.add(combo[1])
@@ -1800,9 +1804,13 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                         for after_index in range(index + 1, min(len(split_entitys), index + 4)):
                             after_entity = split_entitys[after_index]
                             if after_entity.entity_type in ['person']:
+
                                 # 实体为中标人/候选人,联系人已确定类别【1,2】
                                 if entity.label in [2, 3, 4] and after_entity.label in [1, 2]:
                                     break
+                                # 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
+                                if entity.label in [2, 3, 4] and re.search("质疑|投诉|监督|受理", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
+                                    break
                                 if after_entity.label in [1, 2, 3]:
                                     distance = (tokens_num_dict[
                                                     after_entity.sentence_index] + after_entity.begin_index) - (
@@ -1825,6 +1833,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                             match_list2.append(Match(entity, after_entity, value))
                                             match_nums += 1
                             if after_entity.entity_type in ['org', 'company']:
+                                if entity.label not in [2, 3, 4] and after_entity.label in [0, 1]:
+                                    break
                                 # 解决在‘地址’中识别出org/company的问题
                                 # if entity.label in [0,1] and after_index==index+1 and after_entity.label not in [0,1]:
                                 if entity.label != 5 and after_index == index + 1 and (
@@ -2067,14 +2077,15 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
             # if PackDict["Project"]["roleList"][i].role_name in ["tenderee","agency"]:
             for _entity in list_entity:
                 if _entity.entity_type in ['org','company']:
+                    is_same = False
                     is_similar = False
                     # entity_text相同
                     if _entity.entity_text==PackDict["Project"]["roleList"][i].entity_text:
-                        is_similar = True
+                        is_same = True
                     # entity.label为【0,1】
                     if _entity.label in [0,1] and dict_role_id[str(_entity.label)]==PackDict["Project"]["roleList"][i].role_name:
                         is_similar = True
-                    if is_similar:
+                    if is_same:
                         linked_entitys = _entity.linked_entitys
                         if linked_entitys:
                             for linked_entity in linked_entitys:
@@ -2084,6 +2095,14 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                     for _p in _phone:
                                         if (_pointer_person.entity_text,_p) not in PackDict["Project"]["roleList"][i].linklist:
                                             PackDict["Project"]["roleList"][i].linklist.append((_pointer_person.entity_text,_p))
+                    elif is_similar:
+                        pointer_person = _entity.pointer_person if _entity.pointer_person else []
+                        for _pointer_person in pointer_person:
+                            _phone = [p.entity_text for p in _pointer_person.person_phone] if _pointer_person.person_phone else []
+                            for _p in _phone:
+                                if (_pointer_person.entity_text, _p) not in PackDict["Project"]["roleList"][i].linklist:
+                                    PackDict["Project"]["roleList"][i].linklist.append(
+                                        (_pointer_person.entity_text, _p))
 
     # "roleList"中联系人电话去重
     for i in range(len(PackDict["Project"]["roleList"])):