|
@@ -1552,8 +1552,13 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
if (_subject.label==0 and _object.entity_text in agency_contact ) or (_subject.label==1 and _object.entity_text in tenderee_contact):
|
|
|
continue
|
|
|
# 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
|
|
|
- if _subject.label in [2,3,4] and re.search("质疑|投诉|监督|受理|项目(单位)?联系",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-10):_object.wordOffset_begin]):
|
|
|
+ if _subject.label in [2,3,4] and re.search("质疑|投诉|监督|受理|项目(单位)?联系|^联系人|请.{0,4}联系",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-10):_object.wordOffset_begin]):
|
|
|
continue
|
|
|
+ if _object.sentence_index!=0 and _object.wordOffset_begin<=10:
|
|
|
+ if _subject.label in [2, 3, 4] and re.search("请.{0,4}联系",
|
|
|
+ list_sentence[_object.sentence_index-1].sentence_text[-10:]+
|
|
|
+ list_sentence[_object.sentence_index].sentence_text[0:_object.wordOffset_begin]):
|
|
|
+ continue
|
|
|
# 角色为中标候选人,排除距离过远的联系人
|
|
|
if _subject.label in [2, 3, 4] and distance>=40:
|
|
|
continue
|
|
@@ -1979,6 +1984,11 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
# 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
|
|
|
if entity.label in [2, 3, 4] and re.search("质疑|投诉|监督|受理|项目(单位)?联系", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
|
|
|
break
|
|
|
+ if after_entity.sentence_index != 0 and after_entity.wordOffset_begin <= 10:
|
|
|
+ if entity.label in [2, 3, 4] and re.search("请.{0,5}联系",
|
|
|
+ list_sentence[after_entity.sentence_index - 1].sentence_text[-10:] +
|
|
|
+ list_sentence[after_entity.sentence_index].sentence_text[0:after_entity.wordOffset_begin]):
|
|
|
+ continue
|
|
|
if after_entity.label in [1, 2, 3]:
|
|
|
# distance = (tokens_num_dict[
|
|
|
# after_entity.sentence_index] + after_entity.begin_index) - (
|
|
@@ -3032,7 +3042,10 @@ def getTimeAttributes(list_entity,list_sentence):
|
|
|
'time_commencement':[] , #13 开工日期
|
|
|
'time_completion': [], # 14 竣工日期
|
|
|
'time_listingStart': [], # 15 挂牌开始日期(挂牌时间)
|
|
|
- 'time_listingEnd': [] # 16 挂牌结束日期、挂牌截止日期
|
|
|
+ 'time_listingEnd': [], # 16 挂牌结束日期、挂牌截止日期
|
|
|
+ 'time_signContract': [], # 17 合同签订时间
|
|
|
+ 'time_contractStart': [], # 18 合同开始时间
|
|
|
+ 'time_contractEnd': [] # 19 合同结束时间
|
|
|
}
|
|
|
last_sentence_index = 0
|
|
|
last_time_type = ""
|
|
@@ -3043,7 +3056,8 @@ def getTimeAttributes(list_entity,list_sentence):
|
|
|
'time_registrationStart':"time_registrationEnd",
|
|
|
'time_earnestMoneyStart':"time_earnestMoneyEnd",
|
|
|
'time_commencement':"time_completion",
|
|
|
- 'time_listingStart':"time_listingEnd"
|
|
|
+ 'time_listingStart':"time_listingEnd",
|
|
|
+ 'time_contractStart':"time_contractEnd"
|
|
|
}
|
|
|
for entity in time_entitys:
|
|
|
sentence_text = list_sentence[entity.sentence_index].sentence_text
|
|
@@ -3174,7 +3188,32 @@ def getTimeAttributes(list_entity,list_sentence):
|
|
|
last_sentence_index = entity.sentence_index
|
|
|
continue
|
|
|
|
|
|
- if re.search("至|到", entity_left):
|
|
|
+ # 2023/9/13 新增合同相关时间
|
|
|
+ if re.search("合同|服务|履[约行]", entity_left2):
|
|
|
+ if len(extract_time) == 1:
|
|
|
+ if re.search("(合同.{,2}签[订定署].{,2}|签[订定署].{,2}合同.{,2})(?:时间|日期)|合同签[订定署].{,1}$", entity_left2):
|
|
|
+ dict_time['time_signContract'].append((extract_time[0], 0.5, in_attachment))
|
|
|
+ last_time_type = 'time_signContract'
|
|
|
+ elif re.search("(?:合同|服务|履约|(合同|服务)履行)(?:期限?|有效期)|(?:服务|履约|(合同|服务)履行)(?:时间|日期|周期)|服务[时年]限|合同周期", entity_left2):
|
|
|
+ if re.search("到|至|截[至止]",entity_left) or re.search("前|止|截止",entity_right) or re.search("前",entity_text[-2:]):
|
|
|
+ dict_time['time_contractEnd'].append((extract_time[0], 0.5, in_attachment))
|
|
|
+ last_time_type = 'time_contractEnd'
|
|
|
+ else:
|
|
|
+ dict_time['time_contractStart'].append((extract_time[0], 0.5, in_attachment))
|
|
|
+ last_time_type = 'time_contractStart'
|
|
|
+ elif re.search("(合同|服务|履约|(合同|服务)履行).{,2}(?:起始|开始)(?:时间|日期)", entity_left2):
|
|
|
+ dict_time['time_contractStart'].append((extract_time[0], 0.55, in_attachment))
|
|
|
+ last_time_type = 'time_contractStart'
|
|
|
+ elif re.search("(合同|服务|履约).{,2}(?:完成|截止|结束)(?:时间|日期|时限)", entity_left2):
|
|
|
+ dict_time['time_contractEnd'].append((extract_time[0], 0.55, in_attachment))
|
|
|
+ last_time_type = 'time_contractEnd'
|
|
|
+ else:
|
|
|
+ if re.search("(?:合同|服务|履约|(合同|服务)履行)(?:期限?|有效期)|(?:服务|履约|(合同|服务)履行)(?:时间|日期|周期)|服务[时年]限|合同周期", entity_left2):
|
|
|
+ dict_time['time_contractStart'].append((extract_time[0], 0.6, in_attachment))
|
|
|
+ dict_time['time_contractEnd'].append((extract_time[1], 0.6, in_attachment))
|
|
|
+ last_time_type = ''
|
|
|
+
|
|
|
+ if re.search("至|到|[日\d][-—]$", entity_left):
|
|
|
if entity.sentence_index == last_sentence_index:
|
|
|
time_type = last_time_index.get(last_time_type)
|
|
|
if time_type:
|
|
@@ -3349,8 +3388,9 @@ def getOtherAttributes(list_entity):
|
|
|
dict_other["moneysource"] = entity.entity_text
|
|
|
last_moneysource_prob = entity.prob
|
|
|
elif entity.entity_type=='serviceTime':
|
|
|
- if list_serviceTime and entity.in_attachment:
|
|
|
- continue
|
|
|
+ # print(entity.entity_text)
|
|
|
+ # if list_serviceTime and entity.in_attachment:
|
|
|
+ # continue
|
|
|
if re.search("[^之]日|天|年|月|周|星期", entity.entity_text) or re.search("\d{4}[\-\./]\d{1,2}", entity.entity_text):
|
|
|
list_serviceTime.append(entity)
|
|
|
elif entity.entity_type=="person" and entity.label ==4:
|
|
@@ -3361,11 +3401,22 @@ def getOtherAttributes(list_entity):
|
|
|
dict_other["total_tendereeMoney"] = str(Decimal(entity.entity_text))
|
|
|
dict_other["total_tendereeMoneyUnit"] = entity.money_unit
|
|
|
if list_serviceTime:
|
|
|
- list_serviceTime.sort(key=lambda x:x.prob,reverse=True)
|
|
|
- max_prob = list_serviceTime[0].prob
|
|
|
- max_prob_serviceTime = [ent for ent in list_serviceTime if ent.prob==max_prob]
|
|
|
- max_prob_serviceTime.sort(key=lambda x:(x.sentence_index,x.begin_index))
|
|
|
- dict_other["serviceTime"] = max_prob_serviceTime[0].entity_text
|
|
|
+ list_serviceTime_inAtt = [serviceTime for serviceTime in list_serviceTime if serviceTime.in_attachment==1]
|
|
|
+ list_serviceTime = [serviceTime for serviceTime in list_serviceTime if serviceTime.in_attachment==0]
|
|
|
+ if not list_serviceTime:
|
|
|
+ list_serviceTime = list_serviceTime_inAtt
|
|
|
+ list_serviceTime.sort(key=lambda x: (x.prob,-x.sentence_index,-x.begin_index), reverse=True)
|
|
|
+ for _serviceTime in list_serviceTime:
|
|
|
+ # 优先取具体时间
|
|
|
+ if re.search("20\d{2}[年/.\-]\d{1,2}[月/.\-]\d{1,2}",_serviceTime.entity_text):
|
|
|
+ dict_other["serviceTime"] = _serviceTime.entity_text
|
|
|
+ break
|
|
|
+ if not dict_other["serviceTime"]:
|
|
|
+ max_prob = list_serviceTime[0].prob
|
|
|
+ max_prob_serviceTime = [ent for ent in list_serviceTime if ent.prob==max_prob]
|
|
|
+ max_prob_serviceTime.sort(key=lambda x:(x.sentence_index,x.begin_index))
|
|
|
+ dict_other["serviceTime"] = max_prob_serviceTime[0].entity_text
|
|
|
+
|
|
|
if dict_other['moneysource']:
|
|
|
dict_other['moneysource'] = turnMoneySource(dict_other['moneysource'])
|
|
|
# dict_other["product"] = list(set(dict_other["product"])) # 已在添加时 顺序去重保留
|