|
@@ -1424,6 +1424,11 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
relation_list.extend(relationExtraction_model.predict(_text_data,_pre_data))
|
|
|
temp_data = []
|
|
|
start = start + maxlen - 120
|
|
|
+ if temp_data:
|
|
|
+ deal_data += len(temp_data)
|
|
|
+ if deal_data <= 4:
|
|
|
+ for _text_data, _pre_data in temp_data:
|
|
|
+ relation_list.extend(relationExtraction_model.predict(_text_data, _pre_data))
|
|
|
# print("预测数据:",len(temp_data))
|
|
|
# 去重结果
|
|
|
relation_list = list(set(relation_list))
|
|
@@ -1514,6 +1519,53 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
PackDict["Project"]["roleList"][i].linklist.append((combo[0].entity_text,combo[1].entity_text))
|
|
|
break
|
|
|
# print(3,combo[0].entity_text,combo[1].entity_text)
|
|
|
+
|
|
|
+ # "公司——地址" 链接规则补充
|
|
|
+ company_lacation_EntityList = [ent for ent in pre_entity if ent.entity_type in ['company', 'org', 'location']]
|
|
|
+ company_lacation_EntityList = sorted(company_lacation_EntityList, key=lambda x: (x.sentence_index, x.begin_index))
|
|
|
+ t_match_list = []
|
|
|
+ for ent_idx in range(len(company_lacation_EntityList)):
|
|
|
+ entity = company_lacation_EntityList[ent_idx]
|
|
|
+ if entity.entity_type in ['company', 'org']:
|
|
|
+ match_nums = 0
|
|
|
+ company_nums = 0 # 经过其他公司的数量
|
|
|
+ location_nums = 0 # 经过电话的数量
|
|
|
+ for after_index in range(ent_idx + 1, min(len(company_lacation_EntityList), ent_idx + 5)):
|
|
|
+ after_entity = company_lacation_EntityList[after_index]
|
|
|
+ if after_entity.entity_type == "location":
|
|
|
+ distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
|
|
|
+ tokens_num_dict[entity.sentence_index] + entity.end_index)
|
|
|
+ location_nums += 1
|
|
|
+ if distance > 100 or location_nums >= 3:
|
|
|
+ break
|
|
|
+ sentence_distance = after_entity.sentence_index - entity.sentence_index
|
|
|
+ value = (-1 / 2 * (distance ** 2)) / 10000
|
|
|
+ if sentence_distance == 0:
|
|
|
+ if distance < 80:
|
|
|
+ t_match_list.append(Match(entity, after_entity, value))
|
|
|
+ match_nums += 1
|
|
|
+ if company_nums:
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ if distance < 50:
|
|
|
+ t_match_list.append(Match(entity, after_entity, value))
|
|
|
+ match_nums += 1
|
|
|
+ if company_nums:
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ # type:company/org
|
|
|
+ company_nums += 1
|
|
|
+ if entity.label in [2, 3, 4] and after_entity.label in [0, 1]:
|
|
|
+ break
|
|
|
+
|
|
|
+ # km算法分配求解
|
|
|
+ relate_location_result = dispatch(t_match_list)
|
|
|
+ relate_location_result = sorted(relate_location_result, key=lambda x: (x[0].sentence_index, x[0].begin_index))
|
|
|
+ for match in relate_location_result:
|
|
|
+ _company = match[0]
|
|
|
+ _relation = match[1]
|
|
|
+ if not _company.pointer_address:
|
|
|
+ _company.pointer_address = _relation
|
|
|
# "联系人——联系电话" 链接规则补充
|
|
|
person_phone_EntityList = [ent for ent in pre_entity+ phone_entitys if ent.entity_type not in ['company','org','location']]
|
|
|
person_phone_EntityList = sorted(person_phone_EntityList, key=lambda x: (x.sentence_index, x.begin_index))
|
|
@@ -1833,7 +1885,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
match_list2.append(Match(entity, after_entity, value))
|
|
|
match_nums += 1
|
|
|
if after_entity.entity_type in ['org', 'company']:
|
|
|
- if entity.label not in [2, 3, 4] and after_entity.label in [0, 1]:
|
|
|
+ if entity.label in [2, 3, 4] and after_entity.label in [0, 1]:
|
|
|
break
|
|
|
# 解决在‘地址’中识别出org/company的问题
|
|
|
# if entity.label in [0,1] and after_index==index+1 and after_entity.label not in [0,1]:
|
|
@@ -2072,18 +2124,17 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
continue
|
|
|
|
|
|
# 统一同类角色的属性
|
|
|
- if PackDict.get("Project"):
|
|
|
- for i in range(len(PackDict["Project"]["roleList"])):
|
|
|
- # if PackDict["Project"]["roleList"][i].role_name in ["tenderee","agency"]:
|
|
|
+ for k in PackDict.keys():
|
|
|
+ for i in range(len(PackDict[k]["roleList"])):
|
|
|
for _entity in list_entity:
|
|
|
if _entity.entity_type in ['org','company']:
|
|
|
is_same = False
|
|
|
is_similar = False
|
|
|
# entity_text相同
|
|
|
- if _entity.entity_text==PackDict["Project"]["roleList"][i].entity_text:
|
|
|
+ if _entity.entity_text==PackDict[k]["roleList"][i].entity_text:
|
|
|
is_same = True
|
|
|
# entity.label为【0,1】
|
|
|
- if _entity.label in [0,1] and dict_role_id[str(_entity.label)]==PackDict["Project"]["roleList"][i].role_name:
|
|
|
+ if _entity.label in [0,1] and dict_role_id[str(_entity.label)]==PackDict[k]["roleList"][i].role_name:
|
|
|
is_similar = True
|
|
|
if is_same:
|
|
|
linked_entitys = _entity.linked_entitys
|
|
@@ -2093,35 +2144,48 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
for _pointer_person in pointer_person:
|
|
|
_phone = [p.entity_text for p in _pointer_person.person_phone] if _pointer_person.person_phone else []
|
|
|
for _p in _phone:
|
|
|
- if (_pointer_person.entity_text,_p) not in PackDict["Project"]["roleList"][i].linklist:
|
|
|
- PackDict["Project"]["roleList"][i].linklist.append((_pointer_person.entity_text,_p))
|
|
|
+ if (_pointer_person.entity_text,_p) not in PackDict[k]["roleList"][i].linklist:
|
|
|
+ PackDict[k]["roleList"][i].linklist.append((_pointer_person.entity_text,_p))
|
|
|
elif is_similar:
|
|
|
pointer_person = _entity.pointer_person if _entity.pointer_person else []
|
|
|
for _pointer_person in pointer_person:
|
|
|
_phone = [p.entity_text for p in _pointer_person.person_phone] if _pointer_person.person_phone else []
|
|
|
for _p in _phone:
|
|
|
- if (_pointer_person.entity_text, _p) not in PackDict["Project"]["roleList"][i].linklist:
|
|
|
- PackDict["Project"]["roleList"][i].linklist.append(
|
|
|
+ if (_pointer_person.entity_text, _p) not in PackDict[k]["roleList"][i].linklist:
|
|
|
+ PackDict[k]["roleList"][i].linklist.append(
|
|
|
(_pointer_person.entity_text, _p))
|
|
|
|
|
|
# "roleList"中联系人电话去重
|
|
|
- for i in range(len(PackDict["Project"]["roleList"])):
|
|
|
- # print(123, PackDict["Project"]["roleList"][i].linklist)
|
|
|
- # 带有联系人的电话
|
|
|
- with_person = [person_phone[1] for person_phone in PackDict["Project"]["roleList"][i].linklist if person_phone[0]]
|
|
|
- # 带有电话的联系人
|
|
|
- with_phone = [person_phone[0] for person_phone in PackDict["Project"]["roleList"][i].linklist if person_phone[1]]
|
|
|
- remove_list = []
|
|
|
- for item in PackDict["Project"]["roleList"][i].linklist:
|
|
|
- if not item[0]:
|
|
|
- if item[1] in with_person:
|
|
|
- # 删除重复的无联系人电话
|
|
|
- remove_list.append(item)
|
|
|
- elif not item[1]:
|
|
|
- if item[0] in with_phone:
|
|
|
- remove_list.append(item)
|
|
|
- for _item in remove_list:
|
|
|
- PackDict["Project"]["roleList"][i].linklist.remove(_item)
|
|
|
+ for k in PackDict.keys():
|
|
|
+ for i in range(len(PackDict[k]["roleList"])):
|
|
|
+ # 带有联系人的电话
|
|
|
+ with_person = [person_phone[1] for person_phone in PackDict[k]["roleList"][i].linklist if person_phone[0]]
|
|
|
+ # 带有电话的联系人
|
|
|
+ with_phone = [person_phone[0] for person_phone in PackDict[k]["roleList"][i].linklist if person_phone[1]]
|
|
|
+ remove_list = []
|
|
|
+ for item in PackDict[k]["roleList"][i].linklist:
|
|
|
+ if not item[0]:
|
|
|
+ if item[1] in with_person:
|
|
|
+ # 删除重复的无联系人电话
|
|
|
+ remove_list.append(item)
|
|
|
+ elif not item[1]:
|
|
|
+ if item[0] in with_phone:
|
|
|
+ remove_list.append(item)
|
|
|
+ for _item in remove_list:
|
|
|
+ PackDict[k]["roleList"][i].linklist.remove(_item)
|
|
|
+
|
|
|
+ # PackDict更新company/org地址
|
|
|
+ for ent in pre_entity:
|
|
|
+ if ent.entity_type in ['company','org']:
|
|
|
+ if ent.pointer_address:
|
|
|
+ for k in PackDict.keys():
|
|
|
+ for i in range(len(PackDict[k]["roleList"])):
|
|
|
+ if PackDict[k]["roleList"][i].entity_text == ent.entity_text:
|
|
|
+ if not PackDict[k]["roleList"][i].address:
|
|
|
+ PackDict[k]["roleList"][i].address = ent.pointer_address.entity_text
|
|
|
+ else:
|
|
|
+ if len(ent.pointer_address.entity_text) > len(PackDict[k]["roleList"][i].address):
|
|
|
+ PackDict[k]["roleList"][i].address = ent.pointer_address.entity_text
|
|
|
|
|
|
# 联系人——电子邮箱链接
|
|
|
temporary_list3 = [entity for entity in list_entity if entity.entity_type=='email' or (entity.entity_type=='person' and entity.label in [1,2,3])]
|