|
@@ -3106,7 +3106,7 @@ def getTimeAttributes(list_entity,list_sentence):
|
|
|
# definite_time = "00:00:00"
|
|
|
if extract_time:
|
|
|
definite_time_list = []
|
|
|
- t = re.compile("(?P<day>下午|上午|早上)?(?P<hour>\d{1,2})[::时点](?P<half_hour>半)?(?P<minute>\d{2})?[::分]?(?P<second>\d{2})?秒?")
|
|
|
+ t = re.compile("(北京时间)?(?P<day>下午|上午|早上)?(?P<hour>\d{1,2})[::时点](?P<half_hour>半)?(?P<minute>\d{2})?[::分]?(?P<second>\d{2})?秒?")
|
|
|
_entity_text = re.sub(" (?=[^\d])|(?<=[^\d]) ","",entity_text)
|
|
|
t_in_word_num = len(re.findall(t,_entity_text))
|
|
|
t_out_of_word = re.search("^[^\d]{,2}"+t.pattern,re.sub(" (?=[^\d])|(?<=[^\d]) ","",sentence_text[entity.wordOffset_end:]))
|
|
@@ -3207,6 +3207,12 @@ def getTimeAttributes(list_entity,list_sentence):
|
|
|
dict_time['time_bidclose'].append((extract_time[0], 0.5, in_attachment))
|
|
|
if entity.label==3 and re.search("保证金.{,2}(接受|收取)|(接受|收取).{,2}保证金",entity_left3):
|
|
|
dict_time['time_earnestMoneyEnd'].append((extract_time[0], 0.5, in_attachment))
|
|
|
+ if entity.label in [6, 7]:
|
|
|
+ if re.search("文件.{,2}([递提]交|接收)|截标|投标.{,2}截止|([递提]交|接收)(?:文件)?.{,2}截止|报价.{,2}截止|响应.{,2}截止",entity_left3):
|
|
|
+ dict_time['time_bidclose'].append((extract_time[0], 0.5, in_attachment))
|
|
|
+ if entity.label==0:
|
|
|
+ if re.search("文件.{,2}([递提]交|接收)|截标|投标.{,2}截止|([递提]交|接收)(?:文件)?.{,2}截止|报价.{,2}截止|响应.{,2}截止",entity_left3):
|
|
|
+ dict_time['time_bidclose'].append((extract_time[0], 0.45, in_attachment))
|
|
|
|
|
|
# 补充公告末尾处的发布时间
|
|
|
if entity.label==0:
|
|
@@ -3456,10 +3462,21 @@ def getTimeAttributes(list_entity,list_sentence):
|
|
|
break
|
|
|
result_dict[time_type] = _list_time[0][0]
|
|
|
# result_dict 纠错
|
|
|
- if result_dict['time_bidstart'] and not result_dict['time_bidclose']:
|
|
|
- if result_dict['time_bidstart']==result_dict['time_bidopen']:
|
|
|
- result_dict['time_bidstart'] = ""
|
|
|
- result_dict['time_bidclose'] = result_dict['time_bidopen']
|
|
|
+ if not result_dict['time_bidclose']:
|
|
|
+ if result_dict['time_bidstart']: # 无截标时间,投标开始和开标时间一样
|
|
|
+ if result_dict['time_bidstart'][:10] in result_dict['time_bidopen']:
|
|
|
+ result_dict['time_bidstart'] = ""
|
|
|
+ result_dict['time_bidclose'] = result_dict['time_bidopen']
|
|
|
+ if not result_dict['time_bidclose']:
|
|
|
+ if result_dict['time_getFileEnd']: # 无截标时间,获取文件截止时间和开标时间一样
|
|
|
+ if result_dict['time_getFileEnd'][:10] in result_dict['time_bidopen']:
|
|
|
+ result_dict['time_bidclose'] = result_dict['time_bidopen']
|
|
|
+ else:
|
|
|
+ if result_dict['time_bidopen']: # 截标时间 和 开标时间 时分秒互补
|
|
|
+ if len(result_dict['time_bidclose'])<len(result_dict['time_bidopen']) and result_dict['time_bidclose'] in result_dict['time_bidopen']:
|
|
|
+ result_dict['time_bidclose'] = result_dict['time_bidopen']
|
|
|
+ elif len(result_dict['time_bidclose'])>len(result_dict['time_bidopen']) and result_dict['time_bidopen'] in result_dict['time_bidclose']:
|
|
|
+ result_dict['time_bidopen'] = result_dict['time_bidclose']
|
|
|
|
|
|
return result_dict
|
|
|
|