|
@@ -1863,6 +1863,8 @@ def special_treatment(sourceContent, web_source_no):
|
|
elif web_source_no=='00811-8':
|
|
elif web_source_no=='00811-8':
|
|
if re.search('是否中标:是', sourceContent) and re.search('排名:\d,', sourceContent):
|
|
if re.search('是否中标:是', sourceContent) and re.search('排名:\d,', sourceContent):
|
|
sourceContent = re.sub('排名:\d,', '候选', sourceContent)
|
|
sourceContent = re.sub('排名:\d,', '候选', sourceContent)
|
|
|
|
+ elif web_source_no=='DX000726-6':
|
|
|
|
+ sourceContent = re.sub('卖方[::\s]+宝山钢铁股份有限公司', '招标单位:宝山钢铁股份有限公司', sourceContent)
|
|
return sourceContent
|
|
return sourceContent
|
|
except Exception as e:
|
|
except Exception as e:
|
|
log('特殊数据源: %s 预处理特别修改抛出异常: %s'%(web_source_no, e))
|
|
log('特殊数据源: %s 预处理特别修改抛出异常: %s'%(web_source_no, e))
|
|
@@ -2231,7 +2233,7 @@ def get_preprocessed_article(articles,cost_time = dict(),useselffool=True):
|
|
article_processed_list[1] = attachment_text
|
|
article_processed_list[1] = attachment_text
|
|
article_processed = "##attachment##".join(article_processed_list)
|
|
article_processed = "##attachment##".join(article_processed_list)
|
|
'''特别数据源对 预处理后文本 做特别修改'''
|
|
'''特别数据源对 预处理后文本 做特别修改'''
|
|
- if web_source_no in ['03786-10', '00076-4', 'DX000105-2', '04080-3', '04080-4', '03761-3', '00695-7',"13740-2", '00811-8', '03795-1', '03795-2']:
|
|
|
|
|
|
+ if web_source_no in ['03786-10', '00076-4', 'DX000105-2', '04080-3', '04080-4', '03761-3', '00695-7',"13740-2", '00811-8', '03795-1', '03795-2', 'DX000726-6']:
|
|
article_processed = special_treatment(article_processed, web_source_no)
|
|
article_processed = special_treatment(article_processed, web_source_no)
|
|
|
|
|
|
# 提取bidway
|
|
# 提取bidway
|