|
@@ -339,6 +339,9 @@ def text_process(text):
|
|
# text = re.sub("\s+", "", text)
|
|
# text = re.sub("\s+", "", text)
|
|
text = re.sub("\s+", " ", text)
|
|
text = re.sub("\s+", " ", text)
|
|
|
|
|
|
|
|
+ # 优化部分未识别表达
|
|
|
|
+ text = re.sub("中止", "终止", text)
|
|
|
|
+
|
|
return text
|
|
return text
|
|
|
|
|
|
label2class_dict = {
|
|
label2class_dict = {
|
|
@@ -445,8 +448,16 @@ def channel_predict(title,text):
|
|
with torch.no_grad():
|
|
with torch.no_grad():
|
|
outputs = model(None, text)
|
|
outputs = model(None, text)
|
|
predic = torch.max(outputs.data, 1)[1].cpu().numpy()
|
|
predic = torch.max(outputs.data, 1)[1].cpu().numpy()
|
|
- pred_label = predic[0]
|
|
|
|
- pred_class = label2class_dict[pred_label]
|
|
|
|
|
|
+ pred_prob = torch.max(outputs.data, 1)[0].cpu().numpy()
|
|
|
|
+ # print('pred_prob',pred_prob)
|
|
|
|
+ if pred_prob>0.5:
|
|
|
|
+ pred_label = predic[0]
|
|
|
|
+ pred_class = label2class_dict[pred_label]
|
|
|
|
+ else:
|
|
|
|
+ return
|
|
|
|
+ # check
|
|
|
|
+ if pred_class==101 and re.search("((资格|资质)(审查|预审|后审|审核)|资审)结果(公告|公示)?|(资质|资格)(预审|后审)公示|资审及业绩公示",title): # 纠正部分‘资审结果’模型错误识别为中标
|
|
|
|
+ pred_class = 105
|
|
|
|
|
|
return pred_class
|
|
return pred_class
|
|
|
|
|
|
@@ -478,6 +489,37 @@ class_dict = {51: '公告变更',
|
|
}
|
|
}
|
|
|
|
|
|
def merge_channel(list_articles,channel_dic,original_docchannel):
|
|
def merge_channel(list_articles,channel_dic,original_docchannel):
|
|
|
|
+
|
|
|
|
+ def merge_rule(title,text,docchannel,pred_channel,channel_dic):
|
|
|
|
+ front_text_len = len(text)//3 if len(text)>300 else 100
|
|
|
|
+ front_text = text[:front_text_len]
|
|
|
|
+ pred_channel = class_dict[pred_channel]
|
|
|
|
+ if pred_channel == docchannel:
|
|
|
|
+ channel_dic['docchannel']['use_original_docchannel'] = 0
|
|
|
|
+ else:
|
|
|
|
+ if pred_channel in ['采购意向','招标预告'] and docchannel in ['采购意向','招标预告']:
|
|
|
|
+ merge_res = '采购意向' if re.search("意向|意愿",title) or re.search("意向|意愿",front_text) else "招标预告"
|
|
|
|
+ channel_dic['docchannel']['docchannel'] = merge_res
|
|
|
|
+ channel_dic['docchannel']['use_original_docchannel'] = 0
|
|
|
|
+ elif pred_channel in ['公告变更','招标答疑'] and docchannel in ['公告变更','招标答疑']:
|
|
|
|
+ channel_dic['docchannel']['docchannel'] = docchannel
|
|
|
|
+ channel_dic['docchannel']['use_original_docchannel'] = 0
|
|
|
|
+ elif pred_channel=='公告变更' and docchannel in ['中标信息','废标公告','候选人公示','合同公告']: #中标类的变更还是中标类公告
|
|
|
|
+ channel_dic['docchannel']['docchannel'] = docchannel
|
|
|
|
+ channel_dic['docchannel']['use_original_docchannel'] = 0
|
|
|
|
+ elif docchannel=='公告变更' and pred_channel in ['中标信息','废标公告','候选人公示','合同公告']:
|
|
|
|
+ channel_dic['docchannel']['docchannel'] = pred_channel
|
|
|
|
+ channel_dic['docchannel']['use_original_docchannel'] = 0
|
|
|
|
+
|
|
|
|
+ else:
|
|
|
|
+ channel_dic = {'docchannel': {'doctype': '采招数据',
|
|
|
|
+ 'docchannel': class_dict.get(original_docchannel, '原始类别'),
|
|
|
|
+ 'life_docchannel': class_dict.get(original_docchannel, '原始类别')}}
|
|
|
|
+ channel_dic['docchannel']['use_original_docchannel'] = 1
|
|
|
|
+
|
|
|
|
+ return channel_dic
|
|
|
|
+
|
|
|
|
+
|
|
article = list_articles[0]
|
|
article = list_articles[0]
|
|
title = article.title
|
|
title = article.title
|
|
text = article.content
|
|
text = article.content
|
|
@@ -493,13 +535,21 @@ def merge_channel(list_articles,channel_dic,original_docchannel):
|
|
pred = channel_predict(title, text)
|
|
pred = channel_predict(title, text)
|
|
# print('pred_res', pred)
|
|
# print('pred_res', pred)
|
|
if pred is not None and original_docchannel: # 无original_docchannel时不进行对比校正
|
|
if pred is not None and original_docchannel: # 无original_docchannel时不进行对比校正
|
|
- if class_dict[pred] == docchannel:
|
|
|
|
- channel_dic['docchannel']['use_original_docchannel'] = 0
|
|
|
|
- else:
|
|
|
|
- channel_dic = {'docchannel': {'docchannel': '采招数据',
|
|
|
|
- 'doctype': class_dict.get(original_docchannel, '原始类别'),
|
|
|
|
- 'life_docchannel': class_dict.get(original_docchannel, '原始类别')}}
|
|
|
|
- channel_dic['docchannel']['use_original_docchannel'] = 1
|
|
|
|
|
|
+ # if class_dict[pred] == docchannel:
|
|
|
|
+ # channel_dic['docchannel']['use_original_docchannel'] = 0
|
|
|
|
+ # else:
|
|
|
|
+ # channel_dic = {'docchannel': {'docchannel': '采招数据',
|
|
|
|
+ # 'doctype': class_dict.get(original_docchannel, '原始类别'),
|
|
|
|
+ # 'life_docchannel': class_dict.get(original_docchannel, '原始类别')}}
|
|
|
|
+ # channel_dic['docchannel']['use_original_docchannel'] = 1
|
|
|
|
+
|
|
|
|
+ channel_dic = merge_rule(title,text,docchannel,pred,channel_dic)
|
|
|
|
+ elif doctype=='采招数据' and docchannel=="":
|
|
|
|
+ pred = channel_predict(title, text)
|
|
|
|
+ if pred is not None:
|
|
|
|
+ pred = class_dict[pred]
|
|
|
|
+ channel_dic['docchannel']['docchannel'] = pred
|
|
|
|
+ channel_dic['docchannel']['use_original_docchannel'] = 0
|
|
|
|
|
|
return channel_dic
|
|
return channel_dic
|
|
|
|
|