Explorar o código

修复web_source_no,'DX000726-6',卖方: 宝山钢铁股份有限公司作为招标人

lsm %!s(int64=2) %!d(string=hai) anos
pai
achega
d3aa8c9195
Modificáronse 1 ficheiros con 3 adicións e 1 borrados
  1. 3 1
      BiddingKG/dl/interface/Preprocessing.py

+ 3 - 1
BiddingKG/dl/interface/Preprocessing.py

@@ -1863,6 +1863,8 @@ def special_treatment(sourceContent, web_source_no):
         elif web_source_no=='00811-8':
             if re.search('是否中标:是', sourceContent) and re.search('排名:\d,', sourceContent):
                 sourceContent = re.sub('排名:\d,', '候选', sourceContent)
+        elif web_source_no=='DX000726-6':
+            sourceContent = re.sub('卖方[::\s]+宝山钢铁股份有限公司', '招标单位:宝山钢铁股份有限公司', sourceContent)
         return sourceContent
     except Exception as e:
         log('特殊数据源: %s 预处理特别修改抛出异常: %s'%(web_source_no, e))
@@ -2231,7 +2233,7 @@ def get_preprocessed_article(articles,cost_time = dict(),useselffool=True):
             article_processed_list[1] = attachment_text
             article_processed = "##attachment##".join(article_processed_list)
         '''特别数据源对 预处理后文本 做特别修改'''
-        if web_source_no in ['03786-10', '00076-4', 'DX000105-2', '04080-3', '04080-4', '03761-3', '00695-7',"13740-2", '00811-8', '03795-1', '03795-2']:
+        if web_source_no in ['03786-10', '00076-4', 'DX000105-2', '04080-3', '04080-4', '03761-3', '00695-7',"13740-2", '00811-8', '03795-1', '03795-2', 'DX000726-6']:
             article_processed = special_treatment(article_processed, web_source_no)
 
         # 提取bidway