Kaynağa Gözat

修复招标单位中招联合信息股份有限公司错误提取

lsm 2 ay önce
ebeveyn
işleme
2fbada9a1c

+ 2 - 0
BiddingKG/dl/interface/Preprocessing.py

@@ -3422,6 +3422,8 @@ def get_preprocessed_article(articles,cost_time = dict(),useselffool=True):
             article_processed = article_processed.replace('推荐供应商:', '公司名称:')
         if web_source_no.startswith('DX016489') and re.search('排名', article_processed) and re.search('成交供应商单位名称', article_processed): # 20250219 处理特殊站源有排名却叫成交供应商
             article_processed = article_processed.replace('成交供应商单位名称', '成交候选人单位名称')
+        if web_source_no.startswith('DX003027') and re.search('招标单位:中招联合信息股份有限公司', article_processed): # 20250402 处理站源招标人错误 明信阳光采购网
+            article_processed = article_processed.replace('招标单位:中招联合信息股份有限公司', '')
 
         '''去除业绩内容'''
         article_processed = del_achievement(article_processed)

+ 1 - 1
BiddingKG/dl/interface/extract.py

@@ -512,7 +512,7 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
 
     # data_res = Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic)[0]
     # data_res = Preprocessing.union_result(Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic), list_channel_dic)[0]
-    version_date = {'version_date': '2025-04-01'}
+    version_date = {'version_date': '2025-04-02'}
     data_res = dict(codeName[0], **prem[0], **channel_dic, **product_attrs[0], **product_attrs[1], **payment_way_dic, **fail_reason, **industry, **district, **candidate_dic, **version_date, **all_moneys, **pb_json)
 
     if original_docchannel == 302: