|
@@ -1379,7 +1379,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
_entity = Entity(_sentence.doc_id, None, item[0], "phone", _sentence.sentence_index, begin_index, end_index, item[1],
|
|
_entity = Entity(_sentence.doc_id, None, item[0], "phone", _sentence.sentence_index, begin_index, end_index, item[1],
|
|
item[2],in_attachment=in_attachment)
|
|
item[2],in_attachment=in_attachment)
|
|
phone_entitys.append(_entity)
|
|
phone_entitys.append(_entity)
|
|
- print('phone_set:',set([ent.entity_text for ent in phone_entitys]))
|
|
|
|
|
|
+ # print('phone_set:',set([ent.entity_text for ent in phone_entitys]))
|
|
def is_company(entity,text):
|
|
def is_company(entity,text):
|
|
# 判断"公司"实体是否为地址地点
|
|
# 判断"公司"实体是否为地址地点
|
|
if entity.label!=5 and entity.values[entity.label]>0.5:
|
|
if entity.label!=5 and entity.values[entity.label]>0.5:
|
|
@@ -1444,6 +1444,9 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
if predicate=="rel_person":
|
|
if predicate=="rel_person":
|
|
if (_subject.label==0 and _object.entity_text in agency_contact ) or (_subject.label==1 and _object.entity_text in tenderee_contact):
|
|
if (_subject.label==0 and _object.entity_text in agency_contact ) or (_subject.label==1 and _object.entity_text in tenderee_contact):
|
|
continue
|
|
continue
|
|
|
|
+ # 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
|
|
|
|
+ if _subject.label in [2,3,4] and re.search("质疑|投诉|监督|受理",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-10):_object.wordOffset_begin]):
|
|
|
|
+ continue
|
|
distance = (tokens_num_dict[_object.sentence_index] + _object.begin_index) - (
|
|
distance = (tokens_num_dict[_object.sentence_index] + _object.begin_index) - (
|
|
tokens_num_dict[_subject.sentence_index] + _subject.end_index)
|
|
tokens_num_dict[_subject.sentence_index] + _subject.end_index)
|
|
if distance>0:
|
|
if distance>0:
|
|
@@ -1474,7 +1477,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
if temp.begin_index>combo[0].begin_index:
|
|
if temp.begin_index>combo[0].begin_index:
|
|
is_continue = True
|
|
is_continue = True
|
|
break
|
|
break
|
|
- if is_continue: continue
|
|
|
|
|
|
+ if is_continue:
|
|
|
|
+ continue
|
|
combo[0].pointer_person.append(combo[1])
|
|
combo[0].pointer_person.append(combo[1])
|
|
linked_company.add(combo[0])
|
|
linked_company.add(combo[0])
|
|
linked_person.add(combo[1])
|
|
linked_person.add(combo[1])
|
|
@@ -1800,9 +1804,13 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
for after_index in range(index + 1, min(len(split_entitys), index + 4)):
|
|
for after_index in range(index + 1, min(len(split_entitys), index + 4)):
|
|
after_entity = split_entitys[after_index]
|
|
after_entity = split_entitys[after_index]
|
|
if after_entity.entity_type in ['person']:
|
|
if after_entity.entity_type in ['person']:
|
|
|
|
+
|
|
# 实体为中标人/候选人,联系人已确定类别【1,2】
|
|
# 实体为中标人/候选人,联系人已确定类别【1,2】
|
|
if entity.label in [2, 3, 4] and after_entity.label in [1, 2]:
|
|
if entity.label in [2, 3, 4] and after_entity.label in [1, 2]:
|
|
break
|
|
break
|
|
|
|
+ # 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
|
|
|
|
+ if entity.label in [2, 3, 4] and re.search("质疑|投诉|监督|受理", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
|
|
|
|
+ break
|
|
if after_entity.label in [1, 2, 3]:
|
|
if after_entity.label in [1, 2, 3]:
|
|
distance = (tokens_num_dict[
|
|
distance = (tokens_num_dict[
|
|
after_entity.sentence_index] + after_entity.begin_index) - (
|
|
after_entity.sentence_index] + after_entity.begin_index) - (
|
|
@@ -1825,6 +1833,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
match_list2.append(Match(entity, after_entity, value))
|
|
match_list2.append(Match(entity, after_entity, value))
|
|
match_nums += 1
|
|
match_nums += 1
|
|
if after_entity.entity_type in ['org', 'company']:
|
|
if after_entity.entity_type in ['org', 'company']:
|
|
|
|
+ if entity.label not in [2, 3, 4] and after_entity.label in [0, 1]:
|
|
|
|
+ break
|
|
# 解决在‘地址’中识别出org/company的问题
|
|
# 解决在‘地址’中识别出org/company的问题
|
|
# if entity.label in [0,1] and after_index==index+1 and after_entity.label not in [0,1]:
|
|
# if entity.label in [0,1] and after_index==index+1 and after_entity.label not in [0,1]:
|
|
if entity.label != 5 and after_index == index + 1 and (
|
|
if entity.label != 5 and after_index == index + 1 and (
|
|
@@ -2067,14 +2077,15 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
# if PackDict["Project"]["roleList"][i].role_name in ["tenderee","agency"]:
|
|
# if PackDict["Project"]["roleList"][i].role_name in ["tenderee","agency"]:
|
|
for _entity in list_entity:
|
|
for _entity in list_entity:
|
|
if _entity.entity_type in ['org','company']:
|
|
if _entity.entity_type in ['org','company']:
|
|
|
|
+ is_same = False
|
|
is_similar = False
|
|
is_similar = False
|
|
# entity_text相同
|
|
# entity_text相同
|
|
if _entity.entity_text==PackDict["Project"]["roleList"][i].entity_text:
|
|
if _entity.entity_text==PackDict["Project"]["roleList"][i].entity_text:
|
|
- is_similar = True
|
|
|
|
|
|
+ is_same = True
|
|
# entity.label为【0,1】
|
|
# entity.label为【0,1】
|
|
if _entity.label in [0,1] and dict_role_id[str(_entity.label)]==PackDict["Project"]["roleList"][i].role_name:
|
|
if _entity.label in [0,1] and dict_role_id[str(_entity.label)]==PackDict["Project"]["roleList"][i].role_name:
|
|
is_similar = True
|
|
is_similar = True
|
|
- if is_similar:
|
|
|
|
|
|
+ if is_same:
|
|
linked_entitys = _entity.linked_entitys
|
|
linked_entitys = _entity.linked_entitys
|
|
if linked_entitys:
|
|
if linked_entitys:
|
|
for linked_entity in linked_entitys:
|
|
for linked_entity in linked_entitys:
|
|
@@ -2084,6 +2095,14 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
for _p in _phone:
|
|
for _p in _phone:
|
|
if (_pointer_person.entity_text,_p) not in PackDict["Project"]["roleList"][i].linklist:
|
|
if (_pointer_person.entity_text,_p) not in PackDict["Project"]["roleList"][i].linklist:
|
|
PackDict["Project"]["roleList"][i].linklist.append((_pointer_person.entity_text,_p))
|
|
PackDict["Project"]["roleList"][i].linklist.append((_pointer_person.entity_text,_p))
|
|
|
|
+ elif is_similar:
|
|
|
|
+ pointer_person = _entity.pointer_person if _entity.pointer_person else []
|
|
|
|
+ for _pointer_person in pointer_person:
|
|
|
|
+ _phone = [p.entity_text for p in _pointer_person.person_phone] if _pointer_person.person_phone else []
|
|
|
|
+ for _p in _phone:
|
|
|
|
+ if (_pointer_person.entity_text, _p) not in PackDict["Project"]["roleList"][i].linklist:
|
|
|
|
+ PackDict["Project"]["roleList"][i].linklist.append(
|
|
|
|
+ (_pointer_person.entity_text, _p))
|
|
|
|
|
|
# "roleList"中联系人电话去重
|
|
# "roleList"中联系人电话去重
|
|
for i in range(len(PackDict["Project"]["roleList"])):
|
|
for i in range(len(PackDict["Project"]["roleList"])):
|