|
@@ -76,17 +76,19 @@ def link_entitys(list_entitys,on_value=1):#on_value=0.81
|
|
|
if _entity.entity_type in ["org","company"]:
|
|
|
range_entity.append(_entity)
|
|
|
range_entity = range_entity[:1000]
|
|
|
- for first_i in range(len(range_entity)):
|
|
|
- _entity = range_entity[first_i]
|
|
|
- for second_i in range(first_i+1,len(range_entity)):
|
|
|
- _ent = range_entity[second_i]
|
|
|
- # 2021/5/21 update: 两个实体标签互斥(一个是招标人、一个是代理人)且entity_text不相等时,跳过
|
|
|
- if _entity.entity_text != _ent.entity_text and _entity.label != _ent.label and _entity.label in [0,1] and _ent.label in [0, 1]:
|
|
|
- continue
|
|
|
- _score = jaccard_score(re.sub("%s|%s"%("股份|责任|有限|公司",place_pattern),"",_entity.entity_text), re.sub("%s|%s"%("股份|责任|有限|公司",place_pattern),"",_ent.entity_text))
|
|
|
- if _entity.entity_text!=_ent.entity_text and _score>=on_value:
|
|
|
- _entity.linked_entitys.append(_ent)
|
|
|
- _ent.linked_entitys.append(_entity)
|
|
|
+ #替换公司的逻辑有问题,先取消
|
|
|
+ # for first_i in range(len(range_entity)):
|
|
|
+ # _entity = range_entity[first_i]
|
|
|
+ # for second_i in range(first_i+1,len(range_entity)):
|
|
|
+ # _ent = range_entity[second_i]
|
|
|
+ # # 2021/5/21 update: 两个实体标签互斥(一个是招标人、一个是代理人)且entity_text不相等时,跳过
|
|
|
+ # if _entity.entity_text != _ent.entity_text and _entity.label != _ent.label and _entity.label in [0,1] and _ent.label in [0, 1]:
|
|
|
+ # continue
|
|
|
+ # _score = jaccard_score(re.sub("%s|%s"%("股份|责任|有限|公司",place_pattern),"",_entity.entity_text), re.sub("%s|%s"%("股份|责任|有限|公司",place_pattern),"",_ent.entity_text))
|
|
|
+ # if _entity.entity_text!=_ent.entity_text and _score>=on_value:
|
|
|
+ # _entity.linked_entitys.append(_ent)
|
|
|
+ # _ent.linked_entitys.append(_entity)
|
|
|
+ # print("=-===",_entity.entity_text,_ent.entity_text,_score)
|
|
|
#替换公司名称
|
|
|
for _entity in range_entity:
|
|
|
if re.search("公司",_entity.entity_text) is None:
|
|
@@ -433,6 +435,6 @@ if __name__=="__main__":
|
|
|
# print(match_enterprise_max_first(sentences))
|
|
|
#
|
|
|
# print("takes %d s"%(time.time()-_time))
|
|
|
- fix_LEGAL_ENTERPRISE()
|
|
|
- # print(jaccard_score("中国南方航空股份有限公司上海分公司","南方航空上海分公司"))
|
|
|
+ # fix_LEGAL_ENTERPRISE()
|
|
|
+ print(jaccard_score("吉林省九台","吉林省建苑设计集团有限公司"))
|
|
|
# print(match_enterprise_max_first("中国南方航空股份有限公司黑龙江分公司"))
|