Kaynağa Gözat

合同时间提取规则优化

znj 10 ay önce
ebeveyn
işleme
8673200e6f
1 değiştirilmiş dosya ile 10 ekleme ve 4 silme
  1. 10 4
      BiddingKG/dl/interface/getAttributes.py

+ 10 - 4
BiddingKG/dl/interface/getAttributes.py

@@ -3391,12 +3391,18 @@ def getTimeAttributes(list_entity,list_sentence,page_time):
                     if re.search("(合同.{,2}签[订定署].{,2}|签[订定署].{,2}合同.{,2})(?:时间|日期)|合同签[订定署].{,1}$", entity_left2):
                         dict_time['time_signContract'].append((extract_time[0], 0.5, in_attachment))
                         last_time_type = 'time_signContract'
+                        last_sentence_index = entity.sentence_index
+                        continue
                     elif re.search("(合同|服务|履约|(合同|服务)履行).{,4}(?:起始|开始)(?:时间|日期)", entity_left3[-15:]):
                         dict_time['time_contractStart'].append((extract_time[0], 0.55, in_attachment))
                         last_time_type = 'time_contractStart'
+                        last_sentence_index = entity.sentence_index
+                        continue
                     elif re.search("(合同|服务|履约).{,2}(?:完成|截止|结束)(?:时间|日期|时限)", entity_left2):
                         dict_time['time_contractEnd'].append((extract_time[0], 0.55, in_attachment))
                         last_time_type = 'time_contractEnd'
+                        last_sentence_index = entity.sentence_index
+                        continue
                     elif re.search("(?:合同|服务|履约|(合同|服务)履行)(?:期限?|有效期)|(?:服务|履约|(合同|服务)履行)(?:时间|日期|周期)|服务[时年]限|合同周期", entity_left2):
                         if re.search("到|至|截[至止]",entity_left) or re.search("前|止|截止",entity_right) or re.search("前",entity_text[-2:]):
                             dict_time['time_contractEnd'].append((extract_time[0], 0.5, in_attachment))
@@ -3404,8 +3410,8 @@ def getTimeAttributes(list_entity,list_sentence,page_time):
                         else:
                             dict_time['time_contractStart'].append((extract_time[0], 0.5, in_attachment))
                             last_time_type = 'time_contractStart'
-                    last_sentence_index = entity.sentence_index
-                    continue
+                        last_sentence_index = entity.sentence_index
+                        continue
                 else:
                     if re.search("(?:合同|服务|履约|(合同|服务)履行)(?:期限?|有效期)|(?:服务|履约|(合同|服务)履行)(?:时间|日期|周期)|服务[时年]限|合同周期", entity_left2):
                         # 排除开始和借宿时间一样的错误模板,例:“履约期限:2023年02月15日至2023年02月15日”
@@ -3413,8 +3419,8 @@ def getTimeAttributes(list_entity,list_sentence,page_time):
                             dict_time['time_contractStart'].append((extract_time[0], 0.6, in_attachment))
                             dict_time['time_contractEnd'].append((extract_time[1], 0.6, in_attachment))
                             last_time_type = ''
-                        last_sentence_index = entity.sentence_index
-                        continue
+                            last_sentence_index = entity.sentence_index
+                            continue
             # 服务期限表达补充
             if entity.label==0:
                 re_service = '合同期限|工期/交货期/服务期|工期\(交货期\)|合格工期|服务期限|工期' \