|
@@ -1292,11 +1292,18 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
agency_phone = set()
|
|
|
winter_contact = set()
|
|
|
rule_winter_phone = set()
|
|
|
+ tenderee_entity_set = set()
|
|
|
+ agency_entity_set = set()
|
|
|
for _person in person_list:
|
|
|
if _person.label == 1:
|
|
|
tenderee_contact.add(_person.entity_text)
|
|
|
if _person.label == 2:
|
|
|
agency_contact.add(_person.entity_text)
|
|
|
+ for _entity in [entity for entity in list_entity if entity.entity_type in ['company','org']]:
|
|
|
+ if _entity.label==0:
|
|
|
+ tenderee_entity_set.add(_entity.entity_text)
|
|
|
+ elif _entity.label==1:
|
|
|
+ agency_entity_set.add(_entity.entity_text)
|
|
|
# 正则匹配无 '主体/联系人' 的电话
|
|
|
# 例:"采购人联系方式:0833-5226788,"
|
|
|
phone_pattern = '(1[3-9][0-9][-—-―]?\d{4}[-—-―]?\d{4}|' \
|
|
@@ -1653,7 +1660,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
# print('not_win_tenderer_contact1')
|
|
|
continue
|
|
|
# 角色为招标/代理人,排除"纪检|监察"相关的联系人
|
|
|
- if _subject.label in [0,1] and re.search("纪检|监察|乙方|中标",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin - 10):_object.wordOffset_begin]):
|
|
|
+ if (_subject.label in [0,1] or _subject.entity_text in tenderee_entity_set|agency_entity_set) and re.search("纪检|监察|投诉|监督|乙方|中标",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin - 10):_object.wordOffset_begin]):
|
|
|
# if _subject.label in [0,1] and re.search("纪检|监察|乙方|中标",list_sentence[_object.sentence_index].sentence_text[_subject.end_index:_object.wordOffset_begin]):
|
|
|
continue
|
|
|
if _object.sentence_index!=0 and _object.wordOffset_begin<=10:
|
|
@@ -2041,7 +2048,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
"联系人|联系方式|电话|负责人|经理|法人|法定代表人", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
|
|
|
continue
|
|
|
# 角色为招标/代理人,排除"纪检|监察"相关的联系人
|
|
|
- if entity.label in [0, 1] and re.search("纪检|监察|乙方|中标", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
|
|
|
+ if (entity.label in [0,1] or entity.entity_text in tenderee_entity_set|agency_entity_set) and re.search("纪检|监察|投诉|监督|乙方|中标", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
|
|
|
break
|
|
|
if after_entity.sentence_index != 0 and after_entity.wordOffset_begin <= 10:
|
|
|
if entity.label in [2, 3, 4] and re.search("请.{0,5}联系",list_sentence[after_entity.sentence_index - 1].sentence_text[-10:] +
|
|
@@ -2070,7 +2077,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
max(0, previous_entity.wordOffset_begin - 10):previous_entity.wordOffset_begin]):
|
|
|
continue
|
|
|
# 角色为招标/代理人,排除"纪检|监察"相关的联系人
|
|
|
- if entity.label in [0, 1] and re.search("纪检|监察|乙方|中标", list_sentence[previous_entity.sentence_index].sentence_text[
|
|
|
+ if (entity.label in [0,1] or entity.entity_text in tenderee_entity_set|agency_entity_set) and re.search("纪检|监察|投诉|监督|乙方|中标", list_sentence[previous_entity.sentence_index].sentence_text[
|
|
|
max(0,previous_entity.wordOffset_begin - 10):previous_entity.wordOffset_begin]):
|
|
|
break
|
|
|
if previous_entity.sentence_index == entity.sentence_index:
|
|
@@ -2219,7 +2226,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
if entity.label in [2, 3, 4] and not after_entity.person_phone and not re.search("联系人|联系方式|电话|负责人|经理|法人|法定代表人",list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
|
|
|
continue
|
|
|
# 角色为招标/代理人,排除"纪检|监察"相关的联系人
|
|
|
- if entity.label in [0,1] and re.search("纪检|监察|乙方|中标",list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
|
|
|
+ if (entity.label in [0,1] or entity.entity_text in tenderee_entity_set|agency_entity_set) and re.search("纪检|监察|投诉|监督|乙方|中标",list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
|
|
|
break
|
|
|
if after_entity.sentence_index != 0 and after_entity.wordOffset_begin <= 10:
|
|
|
if entity.label in [2, 3, 4] and re.search("请.{0,5}联系",
|
|
@@ -2395,7 +2402,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
list_sentence[previous_entity.sentence_index].sentence_text[max(0,previous_entity.wordOffset_begin - 10):previous_entity.wordOffset_begin]):
|
|
|
continue
|
|
|
# 角色为招标/代理人,排除"纪检|监察"相关的联系人
|
|
|
- if entity.label in [0, 1] and re.search("纪检|监察|乙方|中标", list_sentence[previous_entity.sentence_index].sentence_text[
|
|
|
+ if (entity.label in [0,1] or entity.entity_text in tenderee_entity_set|agency_entity_set) and re.search("纪检|监察|投诉|监督|乙方|中标", list_sentence[previous_entity.sentence_index].sentence_text[
|
|
|
max(0,previous_entity.wordOffset_begin - 10):previous_entity.wordOffset_begin]):
|
|
|
break
|
|
|
if previous_entity.sentence_index == entity.sentence_index:
|
|
@@ -3163,23 +3170,23 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
|
|
|
get_contacts = True
|
|
|
break
|
|
|
- if not get_contacts:
|
|
|
- # 直接取文中倒数第一个联系人
|
|
|
- for _entity in temporary_list2[::-1]:
|
|
|
- if _entity.entity_type=='person' and _entity.label==3:
|
|
|
- if _entity.person_phone:
|
|
|
- _phone = [p.entity_text for p in _entity.person_phone]
|
|
|
- for _p in _phone:
|
|
|
- if _entity.entity_text not in exist_person and _p not in ",".join(exist_phone):
|
|
|
- tenderee_agency_role[0].linklist.append((_entity.entity_text, _p))
|
|
|
- get_contacts = True
|
|
|
- break
|
|
|
- if not get_contacts:
|
|
|
- # 如果文中只有一个“phone”实体,则直接取为联系人电话
|
|
|
- if len(phone_entitys) == 1:
|
|
|
- if phone_entitys[0].entity_text not in ",".join(exist_phone):
|
|
|
- tenderee_agency_role[0].linklist.append(("", phone_entitys[0].entity_text))
|
|
|
- get_contacts = True
|
|
|
+ # if not get_contacts: # 会召回错误数据,不启用规则
|
|
|
+ # # 直接取文中倒数第一个联系人
|
|
|
+ # for _entity in temporary_list2[::-1]:
|
|
|
+ # if _entity.entity_type=='person' and _entity.label==3:
|
|
|
+ # if _entity.person_phone:
|
|
|
+ # _phone = [p.entity_text for p in _entity.person_phone]
|
|
|
+ # for _p in _phone:
|
|
|
+ # if _entity.entity_text not in exist_person and _p not in ",".join(exist_phone):
|
|
|
+ # tenderee_agency_role[0].linklist.append((_entity.entity_text, _p))
|
|
|
+ # get_contacts = True
|
|
|
+ # break
|
|
|
+ # if not get_contacts: # 会召回错误数据,不启用规则
|
|
|
+ # # 如果文中只有一个“phone”实体,则直接取为联系人电话
|
|
|
+ # if len(phone_entitys) == 1:
|
|
|
+ # if phone_entitys[0].entity_text not in ",".join(exist_phone):
|
|
|
+ # tenderee_agency_role[0].linklist.append(("", phone_entitys[0].entity_text))
|
|
|
+ # get_contacts = True
|
|
|
if not get_contacts:
|
|
|
# 通过大纲Outline类直接取电话
|
|
|
if len(new_split_list) > 1:
|