|
@@ -725,7 +725,7 @@ class PREMPredict():
|
|
|
def predict(self,list_sentences,list_entitys):
|
|
|
self.predict_role(list_sentences,list_entitys)
|
|
|
self.predict_money(list_sentences,list_entitys)
|
|
|
-
|
|
|
+
|
|
|
|
|
|
#联系人模型
|
|
|
class EPCPredict():
|
|
@@ -2265,7 +2265,6 @@ class ProductAttributesPredictor():
|
|
|
i = 0
|
|
|
found_header = False
|
|
|
header_colnum = 0
|
|
|
-
|
|
|
if flag_yx:
|
|
|
col0_l = []
|
|
|
col1_l = []
|
|
@@ -2311,7 +2310,6 @@ class ProductAttributesPredictor():
|
|
|
demand_link.append(link)
|
|
|
headers_demand.append('_'.join(header_list2))
|
|
|
continue
|
|
|
-
|
|
|
while i < (len(inner_table)):
|
|
|
tds = inner_table[i]
|
|
|
not_empty = [it for it in tds if it != ""]
|
|
@@ -3009,9 +3007,9 @@ class DocChannel():
|
|
|
119: '候选人公示',
|
|
|
120: '合同公告'}
|
|
|
if original_docchannel in not_extract_dic:
|
|
|
- return {'docchannel': {'docchannel': '', 'doctype': not_extract_dic[original_docchannel], 'life_docchannel': origin_dic.get(original_docchannel, '原始类别')}}
|
|
|
+ return {'docchannel': {'docchannel': '', 'doctype': not_extract_dic[original_docchannel], 'life_docchannel': origin_dic.get(original_docchannel, '原始类别')}}, '公告类别不在提取范围'
|
|
|
if web_source_no in ['02104-7', '04733', 'DX007628-6']: # 这些数据源无法识别
|
|
|
- return {'docchannel': {'docchannel': '', 'doctype': '采招数据', 'life_docchannel': origin_dic.get(original_docchannel, '原始类别')}}
|
|
|
+ return {'docchannel': {'docchannel': '', 'doctype': '采招数据', 'life_docchannel': origin_dic.get(original_docchannel, '原始类别')}}, '此数据源公告分类不明确,返回数据源类别'
|
|
|
|
|
|
title = re.sub('[^\u4e00-\u9fa5]', '', title)
|
|
|
if len(title) > 50:
|