浏览代码

时间分类规则优化

znj 6 月之前
父节点
当前提交
fc75e79750
共有 1 个文件被更改,包括 7 次插入1 次删除
  1. 7 1
      BiddingKG/dl/interface/getAttributes.py

+ 7 - 1
BiddingKG/dl/interface/getAttributes.py

@@ -3306,6 +3306,8 @@ def getTimeAttributes(list_entity,list_sentence,page_time):
                     dict_time['time_bidopen'].append((extract_time[0], label_prob-0.1, in_attachment))
                 if entity.label==3 and re.search("报名",entity_left3):
                     dict_time['time_registrationEnd'].append((extract_time[0], 0.5, in_attachment))
+                if entity.label==3 and re.search("获取",entity_left3[-20:]):
+                    dict_time['time_getFileEnd'].append((extract_time[0], 0.45, in_attachment))
                 if entity.label==9 and re.search("截标|投标.{,2}截止|([递提]交|接收)(?:文件)?.{,2}截止|报价.{,2}截止|响应.{,2}截止|文件.{,2}([递提]交|接收)",entity_left3):
                     dict_time['time_bidclose'].append((extract_time[0], label_prob-0.1, in_attachment))
             if entity.label in [11, 3]:
@@ -3318,7 +3320,11 @@ def getTimeAttributes(list_entity,list_sentence,page_time):
                     dict_time['time_bidclose'].append((extract_time[0], 0.5, in_attachment))
             if entity.label==0:
                 if re.search("文件.{,2}([递提]交|接收)|截标|投标.{,2}截止|([递提]交|接收)(?:文件)?.{,2}截止|报价.{,2}截止|响应.{,2}截止",entity_left3):
-                    dict_time['time_bidclose'].append((extract_time[0], 0.45, in_attachment))
+                    if len(extract_time)>=2:
+                        dict_time['time_bidstart'].append((extract_time[0], 0.45, in_attachment))
+                        dict_time['time_bidclose'].append((extract_time[1], 0.45, in_attachment))
+                    else:
+                        dict_time['time_bidclose'].append((extract_time[0], 0.45, in_attachment))
             if entity.label==6:
                 # "文件获取时间"和"报名时间"并列
                 if re.search("报名",entity_left3):