|
@@ -2237,7 +2237,8 @@ def my_timeFormat(_time):
|
|
|
return time_list
|
|
|
|
|
|
def getTimeAttributes(list_entity,list_sentence):
|
|
|
- # list_entity = [i for i in list_entity if i.entity_type=='time']
|
|
|
+ time_entitys = [i for i in list_entity if i.entity_type=='time']
|
|
|
+ time_entitys = sorted(time_entitys,key=lambda x:(x.sentence_index, x.begin_index))
|
|
|
list_sentence = sorted(list_sentence,key=lambda x:x.sentence_index)
|
|
|
dict_time = {
|
|
|
"time_release": [],
|
|
@@ -2256,105 +2257,160 @@ def getTimeAttributes(list_entity,list_sentence):
|
|
|
'time_commencement':[] , #13 开工日期
|
|
|
'time_completion': [] # 14 竣工日期
|
|
|
}
|
|
|
- for entity in list_entity:
|
|
|
- if entity.label!=0:
|
|
|
- entity_text = entity.entity_text
|
|
|
- extract_time = my_timeFormat(entity_text)
|
|
|
- if extract_time:
|
|
|
- sentence_text = list_sentence[entity.sentence_index].sentence_text
|
|
|
- entity_left = sentence_text[max(0,entity.wordOffset_begin-2):entity.wordOffset_begin]
|
|
|
- entity_right = sentence_text[entity.wordOffset_end:entity.wordOffset_end+3]
|
|
|
- label_prob = entity.values[entity.label]
|
|
|
+ last_sentence_index = 0
|
|
|
+ last_time_type = ""
|
|
|
+ last_time_index = {
|
|
|
+ 'time_bidstart':"time_bidclose",
|
|
|
+ 'time_publicityStart':"time_publicityEnd",
|
|
|
+ 'time_getFileStart':"time_getFileEnd",
|
|
|
+ 'time_registrationStart':"time_registrationEnd",
|
|
|
+ 'time_earnestMoneyStart':"time_earnestMoneyEnd",
|
|
|
+ 'time_commencement':"time_completion",
|
|
|
+ }
|
|
|
+ for entity in time_entitys:
|
|
|
+ sentence_text = list_sentence[entity.sentence_index].sentence_text
|
|
|
+ entity_left = sentence_text[max(0, entity.wordOffset_begin - 2):entity.wordOffset_begin]
|
|
|
+ entity_right = sentence_text[entity.wordOffset_end:entity.wordOffset_end + 3]
|
|
|
+ label_prob = entity.values[entity.label]
|
|
|
+ entity_text = entity.entity_text
|
|
|
+ extract_time = my_timeFormat(entity_text)
|
|
|
+ if extract_time:
|
|
|
+ if re.search("至|到", entity_left):
|
|
|
+ if entity.sentence_index == last_sentence_index:
|
|
|
+ time_type = last_time_index.get(last_time_type)
|
|
|
+ if time_type:
|
|
|
+ dict_time[time_type].append((extract_time[0], 0.5 + label_prob / 10))
|
|
|
+ last_time_type = ""
|
|
|
+ continue
|
|
|
+ if entity.label!=0:
|
|
|
if entity.label==1 and label_prob>0.5:
|
|
|
dict_time['time_release'].append((extract_time[0],label_prob))
|
|
|
+ last_time_type = 'time_release'
|
|
|
elif entity.label==2 and label_prob>0.5:
|
|
|
dict_time['time_bidopen'].append((extract_time[0],label_prob))
|
|
|
+ last_time_type = 'time_bidopen'
|
|
|
elif entity.label==3 and label_prob>0.5:
|
|
|
dict_time['time_bidclose'].append((extract_time[0],label_prob))
|
|
|
+ last_time_type = 'time_bidclose'
|
|
|
elif entity.label==12 and label_prob>0.5:
|
|
|
if len(extract_time)==1:
|
|
|
- if re.search("前|止|截止",entity_right) or re.search("至|止",entity_left) or re.search("前",entity_text[-2:]):
|
|
|
+ if re.search("前|止|截止",entity_right) or re.search("至|止|到",entity_left) or re.search("前",entity_text[-2:]):
|
|
|
dict_time['time_bidclose'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_bidclose'
|
|
|
else:
|
|
|
dict_time['time_bidstart'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_bidstart'
|
|
|
else:
|
|
|
dict_time['time_bidstart'].append((extract_time[0],label_prob))
|
|
|
dict_time['time_bidclose'].append((extract_time[1],label_prob))
|
|
|
+ last_time_type = ''
|
|
|
elif entity.label==4 and label_prob>0.5:
|
|
|
if len(extract_time)==1:
|
|
|
- if re.search("前|止|截止",entity_right) or re.search("至|止",entity_left) or re.search("前",entity_text[-2:]):
|
|
|
+ if re.search("前|止|截止",entity_right) or re.search("至|止|到",entity_left) or re.search("前",entity_text[-2:]):
|
|
|
dict_time['time_publicityEnd'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_publicityEnd'
|
|
|
else:
|
|
|
dict_time['time_publicityStart'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_publicityStart'
|
|
|
else:
|
|
|
dict_time['time_publicityStart'].append((extract_time[0],label_prob))
|
|
|
dict_time['time_publicityEnd'].append((extract_time[1],label_prob))
|
|
|
+ last_time_type = ''
|
|
|
elif entity.label==5 and label_prob>0.5:
|
|
|
if len(extract_time)==1:
|
|
|
dict_time['time_publicityEnd'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_publicityEnd'
|
|
|
else:
|
|
|
dict_time['time_publicityStart'].append((extract_time[0],label_prob))
|
|
|
dict_time['time_publicityEnd'].append((extract_time[1],label_prob))
|
|
|
+ last_time_type = ''
|
|
|
elif entity.label==6 and label_prob>0.5:
|
|
|
if len(extract_time)==1:
|
|
|
- if re.search("前|止|截止",entity_right) or re.search("至|止",entity_left) or re.search("前",entity_text[-2:]):
|
|
|
+ if re.search("前|止|截止",entity_right) or re.search("至|止|到",entity_left) or re.search("前",entity_text[-2:]):
|
|
|
dict_time['time_getFileEnd'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_getFileEnd'
|
|
|
else:
|
|
|
dict_time['time_getFileStart'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_getFileStart'
|
|
|
else:
|
|
|
dict_time['time_getFileStart'].append((extract_time[0],label_prob))
|
|
|
dict_time['time_getFileEnd'].append((extract_time[1],label_prob))
|
|
|
+ last_time_type = ''
|
|
|
elif entity.label==7 and label_prob>0.5:
|
|
|
if len(extract_time)==1:
|
|
|
dict_time['time_getFileEnd'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_getFileEnd'
|
|
|
else:
|
|
|
dict_time['time_getFileStart'].append((extract_time[0],label_prob))
|
|
|
dict_time['time_getFileEnd'].append((extract_time[1],label_prob))
|
|
|
+ last_time_type = ''
|
|
|
elif entity.label==8 and label_prob>0.5:
|
|
|
if len(extract_time)==1:
|
|
|
- if re.search("前|止|截止",entity_right) or re.search("至|止",entity_left) or re.search("前",entity_text[-2:]):
|
|
|
+ if re.search("前|止|截止",entity_right) or re.search("至|止|到",entity_left) or re.search("前",entity_text[-2:]):
|
|
|
dict_time['time_registrationEnd'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_registrationEnd'
|
|
|
else:
|
|
|
dict_time['time_registrationStart'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_registrationStart'
|
|
|
else:
|
|
|
dict_time['time_registrationStart'].append((extract_time[0],label_prob))
|
|
|
dict_time['time_registrationEnd'].append((extract_time[1],label_prob))
|
|
|
+ last_time_type = ''
|
|
|
elif entity.label==9 and label_prob>0.5:
|
|
|
if len(extract_time)==1:
|
|
|
dict_time['time_registrationEnd'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_registrationEnd'
|
|
|
else:
|
|
|
dict_time['time_registrationStart'].append((extract_time[0],label_prob))
|
|
|
dict_time['time_registrationEnd'].append((extract_time[1],label_prob))
|
|
|
+ last_time_type = ''
|
|
|
elif entity.label==10 and label_prob>0.5:
|
|
|
if len(extract_time)==1:
|
|
|
- if re.search("前|止|截止",entity_right) or re.search("至|止",entity_left) or re.search("前",entity_text[-2:]):
|
|
|
+ if re.search("前|止|截止",entity_right) or re.search("至|止|到",entity_left) or re.search("前",entity_text[-2:]):
|
|
|
dict_time['time_earnestMoneyEnd'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_earnestMoneyEnd'
|
|
|
else:
|
|
|
dict_time['time_earnestMoneyStart'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_earnestMoneyStart'
|
|
|
else:
|
|
|
dict_time['time_earnestMoneyStart'].append((extract_time[0],label_prob))
|
|
|
dict_time['time_earnestMoneyEnd'].append((extract_time[1],label_prob))
|
|
|
+ last_time_type = ''
|
|
|
elif entity.label==11 and label_prob>0.5:
|
|
|
if len(extract_time)==1:
|
|
|
dict_time['time_earnestMoneyEnd'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_earnestMoneyEnd'
|
|
|
else:
|
|
|
dict_time['time_earnestMoneyStart'].append((extract_time[0],label_prob))
|
|
|
dict_time['time_earnestMoneyEnd'].append((extract_time[1],label_prob))
|
|
|
+ last_time_type = ''
|
|
|
elif entity.label==13 and label_prob>0.5:
|
|
|
if len(extract_time)==1:
|
|
|
- if re.search("前|止|截止",entity_right) or re.search("至|止",entity_left) or re.search("前",entity_text[-2:]):
|
|
|
+ if re.search("前|止|截止",entity_right) or re.search("至|止|到",entity_left) or re.search("前",entity_text[-2:]):
|
|
|
dict_time['time_completion'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_completion'
|
|
|
else:
|
|
|
dict_time['time_commencement'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_commencement'
|
|
|
else:
|
|
|
dict_time['time_commencement'].append((extract_time[0],label_prob))
|
|
|
dict_time['time_completion'].append((extract_time[1],label_prob))
|
|
|
+ last_time_type = ''
|
|
|
elif entity.label==14 and label_prob>0.5:
|
|
|
if len(extract_time)==1:
|
|
|
dict_time['time_completion'].append((extract_time[0], label_prob))
|
|
|
+ last_time_type = 'time_completion'
|
|
|
else:
|
|
|
dict_time['time_commencement'].append((extract_time[0],label_prob))
|
|
|
dict_time['time_completion'].append((extract_time[1],label_prob))
|
|
|
+ last_time_type = ''
|
|
|
+ else:
|
|
|
+ last_time_type = ""
|
|
|
+ else:
|
|
|
+ last_time_type = ""
|
|
|
+ else:
|
|
|
+ last_time_type = ""
|
|
|
+ last_sentence_index = entity.sentence_index
|
|
|
|
|
|
|
|
|
result_dict = dict((key,"") for key in dict_time.keys())
|