znj 5 сар өмнө
parent
commit
586d4f07db

+ 59 - 9
BiddingKG/dl/channel/channel_bert.py

@@ -339,6 +339,9 @@ def text_process(text):
     # text = re.sub("\s+", "", text)
     text = re.sub("\s+", " ", text)
 
+    # 优化部分未识别表达
+    text = re.sub("中止", "终止", text)
+
     return text
 
 label2class_dict = {
@@ -445,8 +448,16 @@ def channel_predict(title,text):
     with torch.no_grad():
         outputs = model(None, text)
         predic = torch.max(outputs.data, 1)[1].cpu().numpy()
-        pred_label = predic[0]
-        pred_class = label2class_dict[pred_label]
+        pred_prob = torch.max(outputs.data, 1)[0].cpu().numpy()
+        # print('pred_prob',pred_prob)
+        if pred_prob>0.5:
+            pred_label = predic[0]
+            pred_class = label2class_dict[pred_label]
+        else:
+            return
+    # check
+    if pred_class==101 and re.search("((资格|资质)(审查|预审|后审|审核)|资审)结果(公告|公示)?|(资质|资格)(预审|后审)公示|资审及业绩公示",title): # 纠正部分‘资审结果’模型错误识别为中标
+        pred_class = 105
 
     return pred_class
 
@@ -478,6 +489,37 @@ class_dict = {51: '公告变更',
               }
 
 def merge_channel(list_articles,channel_dic,original_docchannel):
+
+    def merge_rule(title,text,docchannel,pred_channel,channel_dic):
+        front_text_len = len(text)//3 if len(text)>300 else 100
+        front_text = text[:front_text_len]
+        pred_channel = class_dict[pred_channel]
+        if pred_channel == docchannel:
+            channel_dic['docchannel']['use_original_docchannel'] = 0
+        else:
+            if pred_channel in ['采购意向','招标预告'] and docchannel in ['采购意向','招标预告']:
+                merge_res = '采购意向' if re.search("意向|意愿",title) or re.search("意向|意愿",front_text) else "招标预告"
+                channel_dic['docchannel']['docchannel'] = merge_res
+                channel_dic['docchannel']['use_original_docchannel'] = 0
+            elif pred_channel in ['公告变更','招标答疑'] and docchannel in ['公告变更','招标答疑']:
+                channel_dic['docchannel']['docchannel'] = docchannel
+                channel_dic['docchannel']['use_original_docchannel'] = 0
+            elif pred_channel=='公告变更' and docchannel in ['中标信息','废标公告','候选人公示','合同公告']: #中标类的变更还是中标类公告
+                channel_dic['docchannel']['docchannel'] = docchannel
+                channel_dic['docchannel']['use_original_docchannel'] = 0
+            elif docchannel=='公告变更' and pred_channel in ['中标信息','废标公告','候选人公示','合同公告']:
+                channel_dic['docchannel']['docchannel'] = pred_channel
+                channel_dic['docchannel']['use_original_docchannel'] = 0
+
+            else:
+                channel_dic = {'docchannel': {'doctype': '采招数据',
+                                              'docchannel': class_dict.get(original_docchannel, '原始类别'),
+                                              'life_docchannel': class_dict.get(original_docchannel, '原始类别')}}
+                channel_dic['docchannel']['use_original_docchannel'] = 1
+
+        return channel_dic
+
+
     article = list_articles[0]
     title = article.title
     text = article.content
@@ -493,13 +535,21 @@ def merge_channel(list_articles,channel_dic,original_docchannel):
             pred = channel_predict(title, text)
             # print('pred_res', pred)
             if pred is not None and original_docchannel: # 无original_docchannel时不进行对比校正
-                if class_dict[pred] == docchannel:
-                    channel_dic['docchannel']['use_original_docchannel'] = 0
-                else:
-                    channel_dic = {'docchannel': {'docchannel': '采招数据',
-                                                  'doctype': class_dict.get(original_docchannel, '原始类别'),
-                                                  'life_docchannel': class_dict.get(original_docchannel, '原始类别')}}
-                    channel_dic['docchannel']['use_original_docchannel'] = 1
+                # if class_dict[pred] == docchannel:
+                #     channel_dic['docchannel']['use_original_docchannel'] = 0
+                # else:
+                #     channel_dic = {'docchannel': {'docchannel': '采招数据',
+                #                                   'doctype': class_dict.get(original_docchannel, '原始类别'),
+                #                                   'life_docchannel': class_dict.get(original_docchannel, '原始类别')}}
+                #     channel_dic['docchannel']['use_original_docchannel'] = 1
+
+                channel_dic = merge_rule(title,text,docchannel,pred,channel_dic)
+    elif doctype=='采招数据' and docchannel=="":
+        pred = channel_predict(title, text)
+        if pred is not None:
+            pred = class_dict[pred]
+            channel_dic['docchannel']['docchannel'] = pred
+            channel_dic['docchannel']['use_original_docchannel'] = 0
 
     return channel_dic
 

+ 5 - 0
BiddingKG/dl/interface/predictor.py

@@ -4727,6 +4727,11 @@ class DocChannel():
           prem_json) == False and re.search(self.title_life_dic['中标信息'], title) == None:
           result['docchannel']['docchannel'] = origin_dic.get(original_docchannel, '')
           msc += '最终规则修改:中标公告、合同公告无中标人且原始为非中标,返回原类型'
+      elif result['docchannel']['docchannel'] in ['中标信息'] and is_contain_winner(prem_json) == False \
+              and re.search("监督(抽查|检查)结果", title):
+          result['docchannel']['doctype'] = "新闻资讯"
+          result['docchannel']['docchannel'] = ""
+          msc += '最终规则修改:中标公告无中标人且包含新闻资讯关键词,返回新闻资讯类型'
       elif result['docchannel']['docchannel'] == '废标公告' and is_contain_winner(prem_json) and re.search(
               self.title_life_dic['废标公告'], title) == None:
           result['docchannel']['docchannel'] = '中标信息'