Prechádzať zdrojové kódy

变更公告时间提取规则优化,提取变更后的时间

znj 1 deň pred
rodič
commit
3f39319a92
1 zmenil súbory, kde vykonal 7 pridanie a 5 odobranie
  1. 7 5
      BiddingKG/dl/interface/getAttributes.py

+ 7 - 5
BiddingKG/dl/interface/getAttributes.py

@@ -3622,11 +3622,13 @@ def getTimeAttributes(list_entity,list_sentence,page_time):
                     last_index = item.start() + 1
                 label_prob = label_prob - 0.2 * last_index / len(entity_left2)
                 # print('prob优化',label_prob,extract_time)
-            elif re.search("改正|更正|修正|更改|延期",entity_left2):
-                new_label = dict_time2label.get(last_time_type,None)
-                if new_label and entity.label==0:
-                    entity.label = new_label
-                    label_prob = 1
+            elif re.search("改正|更正|修正|修改|更改|变更|延期",entity_left3[-20:]) and not re.search("更正日期",entity_left3[-8:]):
+                last_time_label = dict_time2label.get(last_time_type,None)
+                if last_time_label and entity.label==0:
+                    entity.label = last_time_label
+                    label_prob = 1.5
+                elif last_time_label and entity.label==last_time_label:# 前后两个相同类型的时间为变更关系
+                    label_prob = 2
 
             # 优化多个并列的时间,如:开标时间和截标时间,截标时间和报名结束时间
             if entity.label in [2,3,9]: