il y a 1 an · 995f8ec8c4
--- a/BiddingKG/dl/interface/extract.py
+++ b/BiddingKG/dl/interface/extract.py
@@ -345,7 +345,7 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
 
				 
			
 
				     # data_res = Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic)[0]
			
 
				     # data_res = Preprocessing.union_result(Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic), list_channel_dic)[0]
			
 
				-    version_date = {'version_date': '2024-02-01'}
			
 
				+    version_date = {'version_date': '2024-03-01'}
			
 
				     data_res = dict(codeName[0], **prem[0], **channel_dic, **product_attrs[0], **product_attrs[1], **payment_way_dic, **fail_reason, **industry, **district, **candidate_dic, **version_date, **all_moneys)
			
 
				 
			
 
				     '''最终检查修正招标、中标金额'''
			
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -806,7 +806,7 @@ def getPackagesFromArticle(list_sentence, list_entity):
 
				                     else:
			
 
				                         scope_end = [PackageList_scope[j + 1]["sentence_index"],
			
 
				                                      PackageList_scope[j + 1]["offsetWords_begin"]]
			
 
				-                    if PackageList_scope[j - 1]["sentence_index"] == PackageList_scope[j]["sentence_index"] and \
			
 
				+                    if j>0 and PackageList_scope[j - 1]["sentence_index"] == PackageList_scope[j]["sentence_index"] and \
			
 
				                             PackageList_scope[j - 1]["offsetWord_begin"] <= PackageList_scope[j]["offsetWord_begin"] and \
			
 
				                             PackageList_scope[j - 1]["offsetWord_end"] >= PackageList_scope[j]["offsetWord_end"]:
			
 
				                         continue
			
@@ -837,8 +837,8 @@ def getPackagesFromArticle(list_sentence, list_entity):
 
				     if len(True_package2) > 2: # 同时包含多标段及多中标人的
			
 
				         PackageList_scope = PackageList_scope + PackageList_scope2
			
 
				     PackageList = get_package_scope(PackageList_scope)
			
 
				-    if len(PackageSet)<2: # 20230922只提取到一个包号的去掉，都放在默认包project
			
 
				-        return [], set(), {}
			
 
				+    # if len(PackageSet)<2: # 20230922只提取到一个包号的去掉，都放在默认包project 2024/02/02 注释掉，防止多标段每篇公告只公布一个标段的没法提取标段号
			
 
				+        # return [], set(), {}
			
 
				     return PackageList, PackageSet, dict_packageCode
			
 
				 
			
 
				 
			
@@ -3891,7 +3891,7 @@ def get_multi_winner_and_money(channel_dic, prem, list_entitys,list_sentences):
 
				                     e_idx_bh = ent_bh.wordOffset_end
			
 
				                     if ent_bh.entity_type in ['org', 'company'] and ent_bh.label == 5 and ent_bh.sentence_index == ent.sentence_index and b_idx_bh-e_idx_fr==1:
			
 
				                         sentence_text = sentences[ent_bh.sentence_index].sentence_text
			
 
				-                        if sentence_text[e_idx_fr:b_idx_bh] in ['；','、'] and sentence_text[e_idx_bh] in ['、', '，', '。']:
			
 
				+                        if sentence_text[e_idx_fr:b_idx_bh] in ['；','、'] and (len(sentence_text)==e_idx_bh or sentence_text[e_idx_bh] in ['、', '，', '。']): # 修复多中标人刚好在文末index超出报错，例子 407126558
			
 
				                             multi_winner_l.append(ent_bh.entity_text)
			
 
				                             e_idx_fr = e_idx_bh
			
 
				                             i = j + 1
			
--- a/BiddingKG/dl/interface/modelFactory.py
+++ b/BiddingKG/dl/interface/modelFactory.py
@@ -96,7 +96,8 @@ class Model_role_classify_word():
 
				         text = re.sub('[一二三四五六七八九十]{2,}|[四五六七八九十]+', 'd', text)
			
 
				         text = re.sub('\d{2,}(\.\d+)?|\d\.\d+|[04-9]', 'd', text)
			
 
				         text = re.sub('序号：\d+|第?[一二三四五六七八九十\d]+次|[一二三四五六七八九十\d]+、', '', text)
			
 
				-        text = re.sub('(采购|招标|发布)机构', '发布人', text)
			
 
				+        text = re.sub('(中标|成交|中选|入围)(工程|项目)', '工程', text)  # 修复易错分为中标人
			
 
				+        # text = re.sub('(采购|招标|发布)机构', '发布人', text)
			
 
				         return text.replace('(', '（').replace(')', '）').replace('單', '单').replace('稱','承').replace('標', '标').replace('採購', '采购').replace('機構', '机构')
			
 
				 
			
 
				     def encode_word(self, sentence_text, begin_index, end_index, size=20, **kwargs):
			
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -791,10 +791,10 @@ class PREMPredict():
 
				             elif label in [2,3,4] and re.search('序号：\d+，\w{,2}候选', front):
			
 
				                 label = 5
			
 
				             elif label == 0:
			
 
				-                if re.search('拟邀请$', front):
			
 
				+                if re.search('拟邀请$|受邀谈判方', front):
			
 
				                     label = 2
			
 
				                     values[label] = 0.501
			
 
				-                elif re.search('(发布(人|方|单位|机构|组织|用户|业主|主体|部门|公司|企业)|组织(单位|人|方|机构)?)(名称)?[是为：]+', front) and re.search('(招标|采购|咨询|代理|管理)\w*公司|(采购|交易)(中心|市场)', entity.entity_text):
			
 
				+                elif re.search('(发布(人|方|单位|机构|组织|用户|业主|主体|部门|公司|企业)|组织(单位|人|方|机构)?|(采购|招标|发布)机构)(名称)?[是为：]+', front) and re.search('(招标|采购|咨询|代理|管理)\w*公司|(采购|交易)(中心|市场)', entity.entity_text):
			
 
				                     label = 1
			
 
				                     values[label] = 0.501
			
 
				                 elif re.search('采用$', front): # 368177736 因本项目招标采用广西壮族自治区公共资源交易平台系统-