|
@@ -1303,12 +1303,12 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
'[\(|\(]0[1-9]\d{1,2}[\)|\)]-?[2-9]\d{6}\d?-?\d{,4}|' \
|
|
'[\(|\(]0[1-9]\d{1,2}[\)|\)]-?[2-9]\d{6}\d?-?\d{,4}|' \
|
|
'[2-9]\d{6,7})'
|
|
'[2-9]\d{6,7})'
|
|
re_tenderee_phone = re.compile(
|
|
re_tenderee_phone = re.compile(
|
|
- "(?:(?:(?:采购|招标|议价|议标|比选)(?:人|公司|单位|组织|部门)|建设(?:单位|业主)|(?:采购|招标|甲)方|询价单位|项目业主|业主)[^。]{0,5}(?:电话|联系方式|联系人|联系电话)[::]?[^。]{0,7}?)"
|
|
|
|
|
|
+ "(?:(?:(?:采购|招标|议价|议标|比选)(?:人|公司|单位|组织|部门)|建设(?:单位|业主)|(?:采购|招标|甲)方|询价单位|项目业主|业主)[^。代理]{0,5}(?:电话|联系方式|联系人|联系电话)[::]?[^。]{0,7}?)"
|
|
# 电话号码
|
|
# 电话号码
|
|
+ phone_pattern)
|
|
+ phone_pattern)
|
|
# 例:"采购人地址和联系方式:峨边彝族自治县教育局,0833-5226788,"
|
|
# 例:"采购人地址和联系方式:峨边彝族自治县教育局,0833-5226788,"
|
|
re_tenderee_phone2 = re.compile(
|
|
re_tenderee_phone2 = re.compile(
|
|
- "(?:(?:(?:采购|招标|议价|议标|比选)(?:人|公司|单位|组织|部门)|建设(?:单位|业主)|(?:采购|招标|甲)方|询价单位|项目业主|业主)[^。]{0,3}(?:地址)[^。]{0,3}(?:电话|联系方式|联系人|联系电话)[::]?[^。]{0,20}?)"
|
|
|
|
|
|
+ "(?:(?:(?:采购|招标|议价|议标|比选)(?:人|公司|单位|组织|部门)|建设(?:单位|业主)|(?:采购|招标|甲)方|询价单位|项目业主|业主)[^。代理]{0,3}(?:地址)[^。]{0,3}(?:电话|联系方式|联系人|联系电话)[::]?[^。]{0,20}?)"
|
|
# 电话号码
|
|
# 电话号码
|
|
+ phone_pattern)
|
|
+ phone_pattern)
|
|
re_agent_phone = re.compile(
|
|
re_agent_phone = re.compile(
|
|
@@ -1586,6 +1586,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
# 去重结果
|
|
# 去重结果
|
|
relation_list = list(set(relation_list))
|
|
relation_list = list(set(relation_list))
|
|
# print([(rel[0].entity_text,rel[2].entity_text) for rel in relation_list])
|
|
# print([(rel[0].entity_text,rel[2].entity_text) for rel in relation_list])
|
|
|
|
+ # relation_list = [] # 放弃原来的模型连接,结果不好控制
|
|
right_combination = [('org','person'),('company','person'),('company','location'),('org','location'),('person','phone')]
|
|
right_combination = [('org','person'),('company','person'),('company','location'),('org','location'),('person','phone')]
|
|
linked_company = set()
|
|
linked_company = set()
|
|
linked_person = set()
|
|
linked_person = set()
|
|
@@ -1604,6 +1605,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
distance = (tokens_num_dict[_object.sentence_index] + _object.begin_index) - (
|
|
distance = (tokens_num_dict[_object.sentence_index] + _object.begin_index) - (
|
|
tokens_num_dict[_subject.sentence_index] + _subject.end_index)
|
|
tokens_num_dict[_subject.sentence_index] + _subject.end_index)
|
|
if predicate=="rel_person":
|
|
if predicate=="rel_person":
|
|
|
|
+ # print(predicate, _subject.entity_text, _object.entity_text)
|
|
if (_subject.label==0 and _object.entity_text in agency_contact ) or (_subject.label==1 and _object.entity_text in tenderee_contact):
|
|
if (_subject.label==0 and _object.entity_text in agency_contact ) or (_subject.label==1 and _object.entity_text in tenderee_contact):
|
|
continue
|
|
continue
|
|
# 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
|
|
# 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
|
|
@@ -1611,6 +1613,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
continue
|
|
continue
|
|
# 角色为招标/代理人,排除"纪检|监察"相关的联系人
|
|
# 角色为招标/代理人,排除"纪检|监察"相关的联系人
|
|
if _subject.label in [0,1] and re.search("纪检|监察|乙方|中标",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin - 10):_object.wordOffset_begin]):
|
|
if _subject.label in [0,1] and re.search("纪检|监察|乙方|中标",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin - 10):_object.wordOffset_begin]):
|
|
|
|
+ # if _subject.label in [0,1] and re.search("纪检|监察|乙方|中标",list_sentence[_object.sentence_index].sentence_text[_subject.end_index:_object.wordOffset_begin]):
|
|
continue
|
|
continue
|
|
if _object.sentence_index!=0 and _object.wordOffset_begin<=10:
|
|
if _object.sentence_index!=0 and _object.wordOffset_begin<=10:
|
|
if _subject.label in [2, 3, 4] and re.search("请.{0,4}联系",
|
|
if _subject.label in [2, 3, 4] and re.search("请.{0,4}联系",
|
|
@@ -4086,7 +4089,8 @@ def extract_serviceTime(service_time,page_time):
|
|
if service_days <= 1 and service_days > 4000:
|
|
if service_days <= 1 and service_days > 4000:
|
|
service_days = 0
|
|
service_days = 0
|
|
|
|
|
|
- if service_days>3:
|
|
|
|
|
|
+ # if service_days>3:
|
|
|
|
+ if service_days>0:
|
|
# service_days = str(service_days) + "天"
|
|
# service_days = str(service_days) + "天"
|
|
serviceTime_dict['service_days'] = service_days
|
|
serviceTime_dict['service_days'] = service_days
|
|
break
|
|
break
|
|
@@ -4153,7 +4157,6 @@ def getOtherAttributes(list_entity,page_time,prem,channel_dic):
|
|
list_serviceTime = [serviceTime for serviceTime in list_serviceTime if serviceTime.in_attachment==0]
|
|
list_serviceTime = [serviceTime for serviceTime in list_serviceTime if serviceTime.in_attachment==0]
|
|
error_serviceTime = []
|
|
error_serviceTime = []
|
|
for list_time in [list_serviceTime,list_serviceTime_inAtt]:
|
|
for list_time in [list_serviceTime,list_serviceTime_inAtt]:
|
|
- # if not dict_other["serviceTime"]:
|
|
|
|
if not serviceTime_dict['service_end'] and not serviceTime_dict['service_days']:
|
|
if not serviceTime_dict['service_end'] and not serviceTime_dict['service_days']:
|
|
list_time.sort(key=lambda x: (x.prob,-x.sentence_index,-x.begin_index), reverse=True)
|
|
list_time.sort(key=lambda x: (x.prob,-x.sentence_index,-x.begin_index), reverse=True)
|
|
for _serviceTime in list_time:
|
|
for _serviceTime in list_time:
|
|
@@ -4171,7 +4174,6 @@ def getOtherAttributes(list_entity,page_time,prem,channel_dic):
|
|
break
|
|
break
|
|
else:
|
|
else:
|
|
error_serviceTime.append(_serviceTime.entity_text)
|
|
error_serviceTime.append(_serviceTime.entity_text)
|
|
- # if not dict_other["serviceTime"]:
|
|
|
|
if not serviceTime_dict['service_end']:
|
|
if not serviceTime_dict['service_end']:
|
|
for _serviceTime in list_time:
|
|
for _serviceTime in list_time:
|
|
# 优先取具体时间(20XX年x月-20XX年x月)
|
|
# 优先取具体时间(20XX年x月-20XX年x月)
|
|
@@ -4181,7 +4183,6 @@ def getOtherAttributes(list_entity,page_time,prem,channel_dic):
|
|
if extract_time['service_end']:
|
|
if extract_time['service_end']:
|
|
serviceTime_dict = extract_time
|
|
serviceTime_dict = extract_time
|
|
break
|
|
break
|
|
- # if not dict_other["serviceTime"]:
|
|
|
|
if not serviceTime_dict['service_end']:
|
|
if not serviceTime_dict['service_end']:
|
|
for _serviceTime in list_time:
|
|
for _serviceTime in list_time:
|
|
# 优先取具体时间(20XX年x月x日)
|
|
# 优先取具体时间(20XX年x月x日)
|
|
@@ -4192,7 +4193,16 @@ def getOtherAttributes(list_entity,page_time,prem,channel_dic):
|
|
if extract_time['service_end']:
|
|
if extract_time['service_end']:
|
|
serviceTime_dict = extract_time
|
|
serviceTime_dict = extract_time
|
|
break
|
|
break
|
|
- # if not dict_other["serviceTime"]:
|
|
|
|
|
|
+ if not serviceTime_dict['service_end'] and not serviceTime_dict['service_days']:
|
|
|
|
+ for _serviceTime in list_time:
|
|
|
|
+ if _serviceTime.entity_text not in error_serviceTime:
|
|
|
|
+ # dict_other["serviceTime"] = _serviceTime.entity_text
|
|
|
|
+ extract_time = extract_serviceTime(_serviceTime.entity_text,page_time)
|
|
|
|
+ # service_days > 3
|
|
|
|
+ if extract_time['service_end'] or extract_time['service_days']>3:
|
|
|
|
+ serviceTime_dict = extract_time
|
|
|
|
+ break
|
|
|
|
+ # 若上一步仍无结果,取消service_days > 3 的条件
|
|
if not serviceTime_dict['service_end'] and not serviceTime_dict['service_days']:
|
|
if not serviceTime_dict['service_end'] and not serviceTime_dict['service_days']:
|
|
for _serviceTime in list_time:
|
|
for _serviceTime in list_time:
|
|
if _serviceTime.entity_text not in error_serviceTime:
|
|
if _serviceTime.entity_text not in error_serviceTime:
|
|
@@ -4201,6 +4211,7 @@ def getOtherAttributes(list_entity,page_time,prem,channel_dic):
|
|
if extract_time['service_end'] or extract_time['service_days']:
|
|
if extract_time['service_end'] or extract_time['service_days']:
|
|
serviceTime_dict = extract_time
|
|
serviceTime_dict = extract_time
|
|
break
|
|
break
|
|
|
|
+
|
|
if serviceTime_dict['service_start'] and serviceTime_dict['service_end']:
|
|
if serviceTime_dict['service_start'] and serviceTime_dict['service_end']:
|
|
service_days = get_days_between(serviceTime_dict['service_start'],serviceTime_dict['service_end'])
|
|
service_days = get_days_between(serviceTime_dict['service_start'],serviceTime_dict['service_end'])
|
|
serviceTime_dict['service_days'] = service_days
|
|
serviceTime_dict['service_days'] = service_days
|