Forráskód Böngészése

特殊站源修复;补充没工商数据有后缀字典匹配

lsm 1 hónapja
szülő
commit
9773d1e572

+ 2 - 2
BiddingKG/dl/entityLink/entityLink.py

@@ -557,13 +557,13 @@ def match_enterprise_max_first(sentence):
                             continue
                         if len(fix_name)<4: # 20240521 短于4个字的不要
                             break
-                        if enter_tail in SET_TAIL_ENTERPRISE or re.search('(中心|中学|小学|医院|学院|大学|学校|监狱|大队|支队|林场|海关|分局|商行)$', enter_tail):
+                        if enter_tail in SET_TAIL_ENTERPRISE or re.search('(中心|中学|小学|医院|学院|大学|学校|体校|监狱|大队|支队|林场|海关|分局|商行)$', enter_tail):
                             if fix_name not in business_dic:
                                 have_bus, dic = get_business_data(fix_name) # 20210124 改为有工商数据的实体才添加
                                 business_dic[fix_name] = (have_bus, dic)
                             else:
                                 have_bus, dic = business_dic.get(fix_name) # 20240708 字典保存查询过的工商数据,避免重复查询redis
-                            if have_bus:
+                            if have_bus or (re.search('(中心|中学|小学|医院|学院|大学|学校|体校|监狱|大队|支队|林场|海关|分局|商行)$', enter_tail) and is_enterprise_exist(fix_name)):
                             # if is_enterprise_exist(enter_name):
                                 match_item = {"entity_text":"%s"%(fix_name),"begin_index":begin_index,"end_index":begin_index+len(enter_name)}
                                 # print("match_item",key_enter,enter_name)

+ 4 - 0
BiddingKG/dl/interface/Preprocessing.py

@@ -3424,6 +3424,10 @@ def get_preprocessed_article(articles,cost_time = dict(),useselffool=True):
             article_processed = article_processed.replace('成交供应商单位名称', '成交候选人单位名称')
         if web_source_no.startswith('DX003027') and re.search('招标单位:中招联合信息股份有限公司', article_processed): # 20250402 处理站源招标人错误 明信阳光采购网
             article_processed = article_processed.replace('招标单位:中招联合信息股份有限公司', '')
+        if web_source_no == 'DX013859-1' and re.match('YT\d+', article_processed):
+            match = re.search(',(\w{5,25}[,。])$', article_processed)
+            if match:
+                article_processed = article_processed.replace(match.group(1), '中标候选人:%s'%match.group(1))
 
         '''去除业绩内容'''
         article_processed = del_achievement(article_processed)

+ 1 - 1
BiddingKG/dl/interface/extract.py

@@ -512,7 +512,7 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
 
     # data_res = Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic)[0]
     # data_res = Preprocessing.union_result(Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic), list_channel_dic)[0]
-    version_date = {'version_date': '2025-04-22'}
+    version_date = {'version_date': '2025-04-23'}
     data_res = dict(codeName[0], **prem[0], **channel_dic, **product_attrs[0], **product_attrs[1], **payment_way_dic, **fail_reason, **industry, **district, **candidate_dic, **version_date, **all_moneys, **pb_json)
 
     if original_docchannel == 302:

+ 3 - 0
BiddingKG/dl/interface/predictor.py

@@ -1934,6 +1934,9 @@ class RoleRulePredictor():
                                         , _sentence.sentence_text[:p_entity.wordOffset_begin]): # 处理几个标段金额相邻情况 例子:191705231
                                     p_entity.values[0] = 0.6 + p_entity.values[0] / 10
                                     p_entity.label = 0
+                                elif re.search('固定价格?:(人民币|¥)?$', _span[0]): # 20250423 修复 613808422 补充为招标金额
+                                    p_entity.values[0] = 0.5
+                                    p_entity.label = 0
                     if deposit_project and p_entity.label in [1,2]:
                         if req_scope and float(p_entity.entity_text)>1000000 and (p_entity.sentence_index>req_scope[0][0]\
                             or (p_entity.sentence_index==req_scope[0][0] and p_entity.wordOffset_begin>req_scope[0][1])) and (p_entity.sentence_index<req_scope[1][0]\