ソースを参照

合同时间提取规则优化

znj 10 ヶ月 前
コミット
d6c322c65e
1 ファイル変更16 行追加5 行削除
  1. 16 5
      BiddingKG/dl/interface/getAttributes.py

+ 16 - 5
BiddingKG/dl/interface/getAttributes.py

@@ -3386,12 +3386,12 @@ def getTimeAttributes(list_entity,list_sentence,page_time):
                     continue
 
             # 2023/9/13 新增合同相关时间
-            if re.search("合同|服务|履[约行]", entity_left2):
+            if re.search("合同|服务|履[约行]", entity_left3[-15:]):
                 if len(extract_time) == 1:
                     if re.search("(合同.{,2}签[订定署].{,2}|签[订定署].{,2}合同.{,2})(?:时间|日期)|合同签[订定署].{,1}$", entity_left2):
                         dict_time['time_signContract'].append((extract_time[0], 0.5, in_attachment))
                         last_time_type = 'time_signContract'
-                    elif re.search("(合同|服务|履约|(合同|服务)履行).{,2}(?:起始|开始)(?:时间|日期)", entity_left2):
+                    elif re.search("(合同|服务|履约|(合同|服务)履行).{,4}(?:起始|开始)(?:时间|日期)", entity_left3[-15:]):
                         dict_time['time_contractStart'].append((extract_time[0], 0.55, in_attachment))
                         last_time_type = 'time_contractStart'
                     elif re.search("(合同|服务|履约).{,2}(?:完成|截止|结束)(?:时间|日期|时限)", entity_left2):
@@ -3426,9 +3426,9 @@ def getTimeAttributes(list_entity,list_sentence,page_time):
                     '|完成(时间|日期)|服务期限|中标工期|项目周期|期限要求|供货期|合同履行日期|计划的?周期' \
                     '|履约期限|合同约定完成时限|合同完成日期|承诺完成日期' \
                     '|合同起始日起|合同履约期|履约截止日期|承包期限|合同完成日期' \
-                    '|服务期间|服务履行期|委托(管理)?期限'
+                    '|服务期间|服务履行期|委托(管理)?期限|履约期限、地点等简要信息'
                 if len(extract_time)==2:
-                    if re.search(re_service,entity_left2):
+                    if re.search(re_service,entity_left2) or re.search("履约期限、地点等简要信息",entity_left3[-20:]):
                         dict_time['time_contractStart'].append((extract_time[0], 0.5, in_attachment))
                         dict_time['time_contractEnd'].append((extract_time[1], 0.5, in_attachment))
                         last_time_type = ''
@@ -3476,6 +3476,14 @@ def getTimeAttributes(list_entity,list_sentence,page_time):
             #                     content_text += c["text"] + ""
             #                 print('concat_text', content_text)
 
+            if re.search(",(完成|截止|结束)(时间|日期)", entity_left2[-8:]) and entity.label==0:
+                if entity.sentence_index == last_sentence_index:
+                    time_type = last_time_index.get(last_time_type)
+                    if time_type:
+                        dict_time[time_type].append((extract_time[0], 0.5 + label_prob / 10,in_attachment))
+                        last_time_type = ""
+                        last_sentence_index = entity.sentence_index
+                        continue
 
             if re.search("至|到|[日\d][-—]$|[~~]", entity_left):
                 if entity.sentence_index == last_sentence_index:
@@ -4028,7 +4036,10 @@ def getOtherAttributes(list_entity,page_time,prem):
         service_days = get_days_between(serviceTime_dict['service_start'],serviceTime_dict['service_end'])
         serviceTime_dict['service_days'] = str(service_days) + "天"
     dict_other["serviceTime"] = serviceTime_dict
-
+    if not time_contractEnd and prem[0]['docchannel']['docchannel']=='合同公告': # 用serviceTime补充合同开始结束时间,公告类型为合同公告
+        if serviceTime_dict['service_start'] and serviceTime_dict['service_end']:
+            prem[0]["time_contractStart"] = serviceTime_dict['service_start']
+            prem[0]["time_contractEnd"] = serviceTime_dict['service_end']
 
     if dict_other['moneysource']:
         dict_other['moneysource'] = turnMoneySource(dict_other['moneysource'])