1 year ago · ca5ef1a8fe
--- a/BiddingKG/dl/complaint/punish_predictor.py
+++ b/BiddingKG/dl/complaint/punish_predictor.py
@@ -393,6 +393,8 @@ class Punish_Extract():
 
				             title = article.title
			
 
				             text=article.content
			
 
				             keyword, punishType = self.get_punishType(title, text)
			
 
				+            if punishType == "未知类别":
			
 
				+                punishType = ""
			
 
				 
			
 
				             # print('处罚类型：',punishType)
			
 
				             punish_code = self.predict_punishCode(list_sentences)
			
@@ -413,16 +415,7 @@ class Punish_Extract():
 
				                          'punishWhether':punishWhether,
			
 
				                          'institutions':institutions,
			
 
				                          'punishTimes':punishTimes}
			
 
				-            _count = 0
			
 
				-            for k,v in punish_dic.items():
			
 
				-                if v!="":
			
 
				-                    _count += 1
			
 
				-            if _count>=2 and punish_dic["punishType"]!="未知类别":
			
 
				-                list_result.append({"punish":punish_dic})
			
 
				-            else:
			
 
				-                list_result.append({"punish":{}})
			
 
				-        return list_result
			
 
				-
			
 
				+            return {k: v for k, v in punish_dic.items() if v not in ['', ' ']}
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
@@ -1087,7 +1087,7 @@ def tableToText(soup, docid=None):
 
				                                     continue
			
 
				                                 if re.search(packPattern,head) is not None:
			
 
				                                     pack_text += head+cell["text"]+"，"
			
 
				-                                elif re.search(rankPattern,head) is not None:   # 2020/11/23 大网站规则发现问题，if 改elif
			
 
				+                                elif re.search(rankPattern,head) is not None and re.search('(排名|排序|名次|顺序)：?第?[\d一二三]', rank_text)==None:   # 2020/11/23 大网站规则发现问题，if 改elif 20240620修复同时有排名及评标情况造成错误
			
 
				                                     #排名替换为同一种表达
			
 
				                                     rank_text += head+cell["text"]+"，"
			
 
				                                     #print(rank_text)
			
@@ -1104,7 +1104,7 @@ def tableToText(soup, docid=None):
 
				 
			
 
				                         text += pack_text+rank_text+entity_text+money_text+text_line
			
 
				                         # text = text[:-1] + "。" if len(text) > 0 else text
			
 
				-                        if len(text_set)==1 and head == '' and len(last_text)< 20 and (re.search('[:：]$', last_text) or re.search('[一二三四五六七八九十\d]+[、.]\w{2,}', last_text)):
			
 
				+                        if len(text_set-set([' ']))==1 and head == '' and len(last_text)< 25: # 修复367694716分两行表达
			
 
				                             text = text if re.search('\w$', text[:-1]) else text[:-1]
			
 
				                         else:
			
 
				                             text = text[:-1] + "。"
			
@@ -2626,6 +2626,8 @@ def special_treatment(sourceContent, web_source_no):
 
				             sourceContent = re.sub('卖方[:：\s]+宝山钢铁股份有限公司', '招标单位：宝山钢铁股份有限公司', sourceContent)
			
 
				         elif web_source_no=='DX008791-1':
			
 
				             sourceContent = re.sub('收货单位：', '最终用户：', sourceContent)
			
 
				+        elif web_source_no=='DX011971':
			
 
				+            sourceContent = re.sub('公司主体：', '业主单位：', sourceContent)
			
 
				         return sourceContent
			
 
				     except Exception as e:
			
 
				         log('特殊数据源: %s 预处理特别修改抛出异常: %s'%(web_source_no, e))
			
@@ -3011,6 +3013,10 @@ def get_preprocessed_article(articles,cost_time = dict(),useselffool=True):
 
				         if re.search('中标单位名称：[\w（）]{5,25}，中标候选人名次：\d，', article_processed) and re.search('中标候选人名次：\d，中标单位名称：[\w（）]{5,25}，', article_processed)==None:  # 处理类似 304706608 此篇的数据源正文特殊表达
			
 
				             for it in re.finditer('(?P<tenderer>(中标单位名称：[\w（）]{5,25}，))(?P<rank>(中标候选人名次：\d，))', article_processed):
			
 
				                 article_processed = article_processed.replace(it.group(0), it.group('rank')+it.group('tenderer'))
			
 
				+        ser = re.search('竞得人：\d{8,15}-', article_processed)
			
 
				+        if ser:
			
 
				+            article_processed = article_processed.replace(ser.group(0), '竞得人：') # 修复类似 368120777 关键词角色被编号隔开情况
			
 
				+        article_processed = re.sub("流出方信息：。", "流出方信息：", article_processed) # 修复 367520674 产权批量表格问题
			
 
				 
			
 
				         '''去除业绩内容'''
			
 
				         article_processed = del_achievement(article_processed)
			
@@ -3028,7 +3034,7 @@ def get_preprocessed_article(articles,cost_time = dict(),useselffool=True):
 
				             article_processed_list[1] = attachment_text
			
 
				             article_processed = "##attachment##".join(article_processed_list)
			
 
				         '''特别数据源对 预处理后文本 做特别修改'''
			
 
				-        if web_source_no in ['03786-10', '00076-4', 'DX000105-2', '04080-3', '04080-4', '03761-3', '00695-7',"13740-2", '00811-8', '03795-1', '03795-2', 'DX000726-6','DX008791-1']:
			
 
				+        if web_source_no in ['03786-10', '00076-4', 'DX000105-2', '04080-3', '04080-4', '03761-3', '00695-7',"13740-2", '00811-8', '03795-1', '03795-2', 'DX000726-6','DX008791-1','DX011971']:
			
 
				             article_processed = special_treatment(article_processed, web_source_no)
			
 
				 
			
 
				         # 提取bidway
			
@@ -3546,7 +3552,7 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
				                 entity_type = ner_entity[2]
			
 
				                 entity_text = ner_entity[3]
			
 
				 
			
 
				-                if entity_type == 'location' and re.search('^\w{2,4}[市县]\w{3,15}(中心|监狱|殡仪馆)$', entity_text) and \
			
 
				+                if entity_type == 'location' and re.search('^\w{2,4}[市县]\w{2,15}(中心|监狱|殡仪馆|水利站)$', entity_text) and \
			
 
				                     re.search('\d[楼层号]', entity_text)==None: # 2024/06/07 修改错误地址实体为角色
			
 
				                     entity_type = 'org'
			
 
				 
			
--- a/BiddingKG/dl/interface/extract.py
+++ b/BiddingKG/dl/interface/extract.py
@@ -289,12 +289,6 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
 
				     '''获取联合体信息'''
			
 
				     getAttributes.get_win_joint(prem, list_entitys, list_sentences, list_articles)
			
 
				 
			
 
				-    #暂时不执行
			
 
				-    # start_time = time.time() #失信数据要素提取
			
 
				-    # list_punish_dic = predictor.getPredictor("punish").get_punish_extracts(list_articles,list_sentences, list_entitys)
			
 
				-    # cost_time["punish"] = round(time.time()-start_time,2)
			
 
				-
			
 
				-
			
 
				     '''修正采购公告表格形式多种采购产品中标价格；中标金额小于所有产品总金额则改为总金额'''
			
 
				     getAttributes.correct_rolemoney(prem, total_product_money, list_articles)
			
 
				 
			
@@ -365,13 +359,19 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
 
				 
			
 
				     # data_res = Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic)[0]
			
 
				     # data_res = Preprocessing.union_result(Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic), list_channel_dic)[0]
			
 
				-    version_date = {'version_date': '2024-06-18'}
			
 
				+    version_date = {'version_date': '2024-06-27'}
			
 
				     data_res = dict(codeName[0], **prem[0], **channel_dic, **product_attrs[0], **product_attrs[1], **payment_way_dic, **fail_reason, **industry, **district, **candidate_dic, **version_date, **all_moneys, **pb_json)
			
 
				 
			
 
				     if original_docchannel == 302:
			
 
				         approval = predictor.getPredictor("approval").predict(list_sentences, list_entitys)
			
 
				         data_res['approval'] = approval
			
 
				 
			
 
				+    if channel_dic['docchannel']['doctype'] == '处罚公告': # 20240627 处罚公告进行失信要素提取
			
 
				+        start_time = time.time() #失信数据要素提取
			
 
				+        punish_dic = predictor.getPredictor("punish").get_punish_extracts(list_articles,list_sentences, list_entitys)
			
 
				+        cost_time["punish"] = round(time.time()-start_time,2)
			
 
				+        data_res['punish'] = punish_dic
			
 
				+
			
 
				     '''最终检查修正招标、中标金额'''
			
 
				     getAttributes.limit_maximum_amount(data_res, list_entitys[0])
			
 
				 
			
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -12,6 +12,7 @@ import os
 
				 from scipy.optimize import linear_sum_assignment
			
 
				 from BiddingKG.dl.interface.Entitys import Match
			
 
				 import numpy as np
			
 
				+import uuid
			
 
				 
			
 
				 def getTheRole(entity,role_list):
			
 
				     '''
			
@@ -3571,7 +3572,7 @@ def getOtherAttributes(list_entity,page_time):
 
				             #     continue
			
 
				             if re.search("[^之]日|天|年|月|周|星期", entity.entity_text) or re.search("\d{4}[\-\./]\d{1,2}", entity.entity_text):
			
 
				                 list_serviceTime.append(entity)
			
 
				-        elif entity.entity_type=="person" and entity.label ==4:
			
 
				+        elif entity.entity_type=="person" and entity.label ==4 and entity.entity_text not in dict_other["person_review"]: # 20240624评审专家去重
			
 
				             dict_other["person_review"].append(entity.entity_text)
			
 
				         elif entity.entity_type=='product' and entity.entity_text not in dict_other["product"]: #顺序去重保留
			
 
				             dict_other["product"].append(entity.entity_text)
			
@@ -4095,6 +4096,7 @@ def  confirm_prem(prem, channel_dic):
 
				         other_winner = set()
			
 
				         empty_roleList = []
			
 
				         for k in prem:
			
 
				+            prem[k]['uuid'] = str(uuid.uuid4()) # 20240627 每个包都添加uuid
			
 
				             if prem[k]['roleList'] == []:
			
 
				                 empty_roleList.append(k)
			
 
				             for d in prem[k]['roleList']:
			
@@ -4110,6 +4112,8 @@ def  confirm_prem(prem, channel_dic):
 
				         if other_winner and channel_dic['docchannel']['docchannel'] in ['中标信息', '候选人公示', '合同公告']:
			
 
				             for k in empty_roleList:
			
 
				                 prem.pop(k)
			
 
				+    elif "Project" in prem:
			
 
				+        prem['Project']['uuid'] = str(uuid.uuid4())
			
 
				 
			
 
				 
			
 
				 def fix_single_source(prem, channel_dic, original_docchannel):
			
--- a/BiddingKG/dl/interface/modelFactory.py
+++ b/BiddingKG/dl/interface/modelFactory.py
@@ -103,6 +103,7 @@ class Model_role_classify_word():
 
				         if re.search('(最终)?排名：', text) and re.search('(最终)?排名：第?[123一二三]', text)==None:
			
 
				             text = re.sub('(最终)?排名：', '    ', text)
			
 
				         text = re.sub('交易单位', '发布单位', text)
			
 
				+        text = re.sub('[，：]各种数据：', '：', text) # 20240620优化 478331984 山东省交通运输厅站源提取不到 各种数据：中标单位，各种数据：济南金曰公路工程有限公司，
			
 
				         return text.replace('(', '（').replace(')', '）').replace('單', '单').replace('稱','承').replace('標', '标').replace('採購', '采购').replace('機構', '机构')
			
 
				 
			
 
				     def encode_word(self, sentence_text, begin_index, end_index, size=20, **kwargs):
			
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -835,7 +835,7 @@ class PREMPredict():
 
				                 elif re.search('尊敬的供应商：$', front):
			
 
				                     label = 0
			
 
				                     values[label] = 0.501
			
 
				-                elif re.search('第[4-9四五六]中标候选人', front):  #修复第4以上的预测错为中标人
			
 
				+                elif re.search('第[4-9四五六]中标候选人|(提交单位|竞投单位)：$', front):  #修复第4以上的预测错为中标人
			
 
				                     label = 5
			
 
				                     values[label] = 0.5
			
 
				                 elif re.search('(排名|排序|名次)：([4-9]|\d{2,})，', front) or re.search('序号：\d+，(供应商|投标|候选)', front): # 293225236 附件中 排名预测错误
			
@@ -891,7 +891,7 @@ class PREMPredict():
 
				                 elif re.search('\d+\.\d+，供应商名称：', front): #  341385226 30.2，供应商名称： 预测为第二名
			
 
				                     label = 2
			
 
				                     values[label] = 0.501
			
 
				-                elif re.search('\d+\.\d+[，、]?(中标|成交)候选人', front):
			
 
				+                elif re.search('\d+\.\d+[，、]?(中标|成交)候选人|[；，][23]、(中标|中选|成交)候选人：', front):
			
 
				                     label = 5
			
 
				                     values[label] = 0.501
			
 
				                 elif re.search('第一名：$', front):
			
@@ -1396,7 +1396,7 @@ class RoleRulePredictor():
 
				         self.pattern_agency_left = "(?P<agency_left>((代理|拍卖)(?:人|机构|公司|企业|单位|组织)|专业采购机构|集中采购机构|招标组织机构|交易机构|集采机构|[招议)）]+标机构|(采购|招标)代理)(名称|.{,4}名，?称|全称)?(是|为|：|:|[,，]?\s*)$|(受.{5,20}委托，?$))"
			
 
				         self.pattern_agency_right = "(?P<agency_right>^([(（](以下简称)?[，\"“]*(代理)(人|单位|机构)[，\"”]*[)）])|^受.{5,20}委托|^受委?托，)"  # |^受托  会与 受托生产等冲突，代理表达一般会在后面有逗号
			
 
				         # 2020//11/24 大网站规则 中标关键词添加 选定单位|指定的中介服务机构
			
 
				-        self.pattern_winTenderer_left_50 = "(?P<winTenderer_left_50>" \
			
 
				+        self.pattern_winTenderer_left_50 = "(?P<winTenderer_left_51>" \
			
 
				                "(乙|竞得|受让|买受|签约|施工|供货|供应?|合作|承做|承包|承建|承销|承保|承接|承制|承担|承修|承租(（包）)?|入围|入选|竞买)(候选|投标)?(人|单位|机构|供应商|方|公司|企业|厂商|商|社会资本方?)(：?单位名称|：?名称|盖章)?[：:是为]+$" \
			
 
				                "|(选定单位|指定的中介服务机构|实施主体|中标银行|中标通知书，致|征集结果|选择中介|选择结果|成交对象|勘察人|(，|审计|处置|勘察|设计)服务单位|受托[人方])[：:是为]+$" \
			
 
				                "|((评审结果|名次|排名|中标结果)[:：]*第?[一1]名?)[：:是为]+$|成交供应商信息[，：]?(序号1)?：?|供应商名称$" \
			
@@ -1404,7 +1404,7 @@ class RoleRulePredictor():
 
				                "|现(公布|宣布|公示)中标单位如下：$|现将中标单位(公布|公示)如下：$|现宣布以下(企业|单位|公司)中标：$|经讨论，决定采用$)"  # 承办单位：不作为中标 83914772
			
 
				         self.pattern_winTenderer_left_60 = "(?P<winTenderer_left_60>" \
			
 
				                                            "(，|。|：|^)((中标(投标)?|[拟预]中标|中选|中价|中签|成交)(人|单位|机构|中介(服务)?机构|供应商|客户|方|公司|企业|厂商|商家?|社会资本方?)|(中标候选人)?第?[一1]名|第[一1](中标|中选|成交)?候选人|服务机构)" \
			
 
				-                                           "(：?单位名称|：?名称|盖章)?[,，]?([(（]按综合排名排序[)）]|：择优选取)?[：:,，]$)"  # 解决表头识别不到加逗号情况，需前面为，。空
			
 
				+                                           "(：?单位名称|：?名称|盖章)?[,，]?([(（]按综合排名排序[)）]|：择优选取)?[：:,，]$|选取(情况|说明)：中选，中介机构名称：$|排名如下：1、$)"  # 解决表头识别不到加逗号情况，需前面为，。空 20240621补充 中选 云南省投资审批中介超市 补充排名如下 南阳师范学院
			
 
				         self.pattern_winTenderer_left_55 = "(?P<winTenderer_left_55>(中标(投标)?|[拟预]中标|中选|中价|中签|成交|入选)(人|单位|机构|中介(服务)?机构|供应商|客户|方|公司|企业|厂商|商家?|社会资本方?)" \
			
 
				                                            "(：?单位名称|：?名称|盖章)?([(（]按综合排名排序[)）]|：择优选取)?[：:是为]+$" \
			
 
				                                            "|结果公示如下：摇出球号：\d+号，中介机构：$)"  # 取消逗号 并拒绝执行改进计划的供应商，华新水泥将可能终止与其合作关系  # 中标候选人不能作为中标   # |直购企业：$不能作为中标人，看到有些公告会又多个公司，然后还会发布中选结果的公告，其中一个公司中标
			
@@ -1415,8 +1415,8 @@ class RoleRulePredictor():
 
				                                          "|^通过(挂牌|拍卖)方式(以[\d.,]+万?元)?竞得|^[（(](中标|成交|承包)人名?称?[）)]))" # 去掉 |\w{,20} 修复 460216955 网上公布的与本次采购项目有关的信息视为已送达各响应供应商。 作为中标
			
 
				         self.pattern_winTenderer_whole = "(?P<winTenderer_center>(贵公司|由).{,15}以\w{,15}中标|确定[\w（）]{5,20}为[^，。；]{5,50}的?中标单位" \
			
 
				                                          "|选定报价最低的[“”\w（）]{5,25}为[^，。；]{5,50}的?(服务|中标|成交)单位" \
			
 
				-                                         "|拟邀请[\w（）]{5,20}(进行)?单一来源谈判|(承办单位|报价人|投标人|中介机构)(名称)?：[\w（）]{5,20}，(中标|承办|中选)价格" \
			
 
				-                                         "|(谈判结果：|结果|最终|确定|决定)[以由为][^，。；]{5,25}(向我单位)?(供货|承担|承接|中标|竞买成功)|中标通知书.{,15}你方|单一来源方?式?[从向][（）\w]{5,20}采购)"  # 2020//11/24 大网站规则 中标关键词添加 谈判结果：由.{5,20}供货
			
 
				+                                         "|拟邀请[\w（）]{5,20}(进行)?单一来源谈判|(承办单位|报价人|投标人|中介机构)(名称)?：[\w（）]{5,20}，(中标|承办|中选)(价格|金额)" \
			
 
				+                                         "|(谈判结果：|结果|最终|确定|决定)[以由为][^，。；]{5,25}(向我单位)?(供货|承担|承接|中标|竞买成功)|中标通知书.{,15}你方|单一来源方?式?[从向][（）\w]{5,20}采购|供应商名称：[（）\w]{5,20}，独家采购原因)"  # 2020//11/24 大网站规则 中标关键词添加 谈判结果：由.{5,20}供货
			
 
				 
			
 
				         self.pattern_secondTenderer_left = "(?P<secondTenderer_left>((第[二2]名?(名|((中标|中选|中价|成交|候选)(候选)?(人|单位|机构|供应商|公司))))(名称)?[：:是为]+$)|((评审结果|名次|排名|排序)[:：]第?[二2]名?，?(投标(供应)?商|供应商)(名称)?[:：]+$))"
			
 
				         self.pattern_secondTenderer_right = "(?P<secondTenderer_right>^[是为\(]第[二2](名|(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司)))"
			
@@ -1453,7 +1453,7 @@ class RoleRulePredictor():
 
				 
			
 
				         self.SET_NOT_TENDERER = set(["人民政府","人民法院","中华人民共和国","人民检察院","评标委员会","中国政府","中国海关","中华人民共和国政府"])
			
 
				         
			
 
				-        self.pattern_money_tenderee = re.compile("投?标?最高限价|采购计划金额|项目预算|招标金额|采购金额|项目金额|投资估算|采购(单位|人)委托价|招标限价|拦标价|预算金额|标底|总计|限额|资金来源，?为\w{2,4}资金|采购成本价")  # |建安费用 不作为招标金额
			
 
				+        self.pattern_money_tenderee = re.compile("投?标?最高限价|采购计划金额|项目预算|招标金额|采购金额|项目金额|投资估算|采购(单位|人)委托价|招标限价|拦标价|预算金额|标底|总计|限额|资金来源，?为\w{2,4}资金|采购成本价|总费用约?为")  # |建安费用 不作为招标金额
			
 
				         self.pattern_money_tenderer = re.compile("((合同|成交|中标|应付款|交易|投标|验收|订单)[）\)]?(综合)?(总?金额|结果|[单报总]?价))|标的基本情况|承包价|报酬（含税）：|经评审的价格")  # 单写 总价 不能作为中标金额，很多表格有单价、总价
			
 
				         self.pattern_money_tenderer_whole = re.compile("(以金额.*中标)|中标供应商.*单价|以.*元中标")
			
 
				         self.pattern_money_other = re.compile("代理费|服务费")
			
@@ -1614,7 +1614,7 @@ class RoleRulePredictor():
 
				                                         if _span[2].startswith("："): # 实体后面为冒号的不作为招标人，避免项目名称出错中标变招标  368122675 陇西兴恒建建筑有限责任公司：线路安全保护区内环境治理专项整改（第二标段）项目
			
 
				                                             break
			
 
				                                         if str(_span[0][-len(str(_name)):]+_span[1] + _span[2][:len(str(_name))]).find(
			
 
				-                                                _name) >= 0:
			
 
				+                                                _name) >= 0 or str(_name).startswith(p_entity.entity_text): # 20240621 补充公司开头的项目名称召回，避免name太长召回失败 例 367033697
			
 
				                                             # if p_entity.entity_text in agency_set or re.search('(代理|管理|咨询|招投?标|采购)\w{,6}公司', p_entity.entity_text): # 在代理人集合的作为代理人
			
 
				                                             if is_agency(p_entity.entity_text): # 2024/3/29 统一方法判断是否为代理
			
 
				                                                 find_flag = True
			
@@ -1783,7 +1783,7 @@ class RoleRulePredictor():
 
				                                     p_entity.label = 0
			
 
				                                     # print('规则召回预算金额2：', p_entity.entity_text, _sentence.sentence_text[:p_entity.wordOffset_begin])
			
 
				             if notfound_tenderer and len(set([ent.entity_text for ent in candidates])) == 1 and re.search(
			
 
				-                    '(中标|中选|中价|中租|成交|入选|确认)(候选人|人|供应商|记录|结果|变更)?(公告|公示|结果)|(遴选|采购|招标|竞价|议价|比选|询比?价|评选|谈判|邀标|邀请|洽谈|约谈|评标|发包|磋商|交易)\w{,2}结果|单一来源(采购|招标)?的?(中标|成交|结果)|中标通知书',
			
 
				+                    '(中标|中选|中价|中租|成交|入选|确认)(候选人|人|供应商|记录|结果|变更)?(公告|公示|结果)|(遴选|采购|招标|竞价|议价|比选|询比?价|评选|谈判|邀标|邀请|洽谈|约谈|评标|发包|磋商|交易|评审)\w{,2}结果|单一来源(采购|招标)?的?(中标|成交|结果)|中标通知书',
			
 
				                     article.title+article.content[:100]):
			
 
				                 for p_entity in candidates:
			
 
				                     # print('只有一个候选人的作为中标人', p_entity.entity_text)
			
@@ -2328,7 +2328,7 @@ class RoleGrade():
 
				                     low_prob_tenderee.append(entity)
			
 
				             elif entity.entity_type in ['org', 'company'] and entity.label == 2 and 0.5<=entity.values[entity.label]<0.6:
			
 
				                 low_prob_winner.append(entity)
			
 
				-            if entity.entity_type in ['org', 'company'] and entity.label in [1, 0] and 0.5<entity.values[entity.label]:
			
 
				+            if entity.entity_type in ['org', 'company'] and entity.label in [1, 0] and 0.6<entity.values[entity.label]: # 由0.5调为0.6，避免367217504 同时为低概率招标、中标被改
			
 
				                 all_tenderee_agency.append(entity.entity_text)
			
 
				 
			
 
				 
			
@@ -3915,7 +3915,7 @@ class DocChannel():
 
				           '土地矿产': '(土地|用地|宗地|荒地|山地|海域|矿)(出让|出租|招租|租赁|承包|流转|使用权|经营权|征收|划拨|中标|成交)|供地结果|矿业权|探矿权|采矿权|(土地|用地|宗地|地块)(使用权)?(终止|中止|网上)?(挂牌|出让|拍卖|招拍|划拨)|征收土地',
			
 
				           '拍卖出让': '(拍卖|变卖|流拍|竞拍)的?(公告|公示)|拍卖|变卖|流拍|竞拍',
			
 
				           '产权交易': '经营权|承包权|使用权|租赁权|股权|债权|排污权|化学需氧量|储备量|竞价销售|销售结果|出租|招租|拍租|竞租|续租|挂牌|出让',
			
 
				-          '采招数据': '(采购|招标|询价|议价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判)的?(公告|公示|中标|成交|结果|$)|工程招标|定点服务',
			
 
				+          '采招数据': '(采购|招标|询价|议价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判)的?(公告|公示|中标|成交|结果|$)|工程招标|定点服务|竞价采购|(中标|成交)(结果)?(公告|公示)',
			
 
				           # |竞价 采招/产权都有竞价方式 # 意向|需求|预公?告|报建|总承包|工程|施工|设计|勘察|代理|监理 |变更|答疑|澄清|中标|成交|合同|废标|流标
			
 
				           '新闻资讯': '(考试|面试|笔试)成绩|成绩的?(公告|公示|公布)|公开招聘|招聘(公告|简章|启事|合同制)|疫情防控\s{,5}(通知|情况|提示)|行政审批结果'
			
 
				       }
			
@@ -3938,7 +3938,7 @@ class DocChannel():
 
				           '合同公告': '合同(公告|公示|信息|内容)|合同(编号|名称|主体|基本情况|完成(日期|时间))|(供应商乙方|乙方供应商)：|合同总?金额|履约信息',
			
 
				           '废标公告': '(终止|中止|废标|流标|流采|失败|作废|异常|撤销)(结果)?(公告|公示|招标|采购|竞价)|(谈判结果为|结果类型)：?废标|((本|该)(项目|标段|合同|合同包|采购包|次)\w{,5})((失败|终止|流标|废标)|予以废标|(按|做|作)?(流标|废标|废置)处理)|(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答|项目)(终止|中止|废标|流标|失败|作废|异常|撤销)',
			
 
				           '废标公告2': '(无效|中止|终止|废标|流标|失败|作废|异常|撤销)的?(原因|理由)|本项目因故取消|本(项目|次)(公开)?\w{2}失败|已终止\s*原因：|(人|人数|供应商|单位)(不足|未达\w{,3}数量)|已终止|不足[3三]家|无（废标）|成交情况：\s*[流废]标|现予以废置',
			
 
				-          '废标公告neg': '超过此报价将作为[废流]标处理|否则按[废流]标处理|终止规则：|成交规则：|视为流标|竞价失败的一切其他情形'
			
 
				+          '废标公告neg': '超过此报价将作为[废流]标处理|否则按[废流]标处理|终止规则：|成交规则：|视为流标|竞价失败的一切其他情形|是否废标：否'
			
 
				       }
			
 
				       self.title_life_dic = {
			
 
				           '采购意向': '采购意向|招标意向|选取意向|意向公告|意向公示|意向公开',
			
@@ -4243,16 +4243,16 @@ class DocChannel():
 
				       def get_type(title, text):
			
 
				           if re.search(self.title_type_dic['土地矿产'], title) or re.search(self.type_dic['土地矿产'],
			
 
				                                                                    text):  # and re.search('(土地|用地|宗地|地块)(经营权)?(流转|承包|出租|招租|租赁|确权)', text)==None
			
 
				-              if re.search(self.title_type_dic['采招数据'], title + text.strip().split(' ')[0]):
			
 
				-                  return '采招数据', re.search(self.title_type_dic['采招数据'], title + text.strip().split(' ')[0]).group(0)
			
 
				+              if re.search(self.title_type_dic['采招数据'], text.strip().split(' ')[0] + title):
			
 
				+                  return '采招数据', re.search(self.title_type_dic['采招数据'], text.strip().split(' ')[0] + title).group(0)
			
 
				               return '土地矿产', (re.search(self.title_type_dic['土地矿产'], title) or re.search(self.type_dic['土地矿产'], text)).group(0)
			
 
				           elif (re.search(self.title_type_dic['拍卖出让'], title) or re.search(self.type_dic['拍卖出让'], text)):
			
 
				-              if re.search(self.title_type_dic['采招数据'], title + text.strip().split(' ')[0]):
			
 
				-                  return '采招数据', re.search(self.title_type_dic['采招数据'], title + text.strip().split(' ')[0]).group(0)
			
 
				+              if re.search(self.title_type_dic['采招数据'], text.strip().split(' ')[0] + title):
			
 
				+                  return '采招数据', re.search(self.title_type_dic['采招数据'], text.strip().split(' ')[0] + title).group(0)
			
 
				               return '拍卖出让', (re.search(self.title_type_dic['拍卖出让'], title) or re.search(self.type_dic['拍卖出让'], text)).group(0)
			
 
				           elif re.search(self.title_type_dic['产权交易'], title) or re.search(self.type_dic['产权交易'], text):
			
 
				-              if re.search(self.title_type_dic['采招数据'], title + text.strip().split(' ')[0]):
			
 
				-                  return '采招数据', re.search(self.title_type_dic['采招数据'], title + text.strip().split(' ')[0]).group(0)
			
 
				+              if re.search(self.title_type_dic['采招数据'], text.strip().split(' ')[0] + title):
			
 
				+                  return '采招数据', re.search(self.title_type_dic['采招数据'], text.strip().split(' ')[0] + title).group(0)
			
 
				               return '产权交易', (re.search(self.title_type_dic['产权交易'], title) or re.search(self.type_dic['产权交易'], text)).group(0)
			
 
				           elif re.search(self.title_type_dic['采招数据'], title) or re.search(self.type_dic['采招数据'], title + text):
			
 
				               return '采招数据', (
			
@@ -4487,10 +4487,17 @@ class DocChannel():
 
				 
			
 
				           if result['docchannel']['doctype'] in ['产权交易', '土地矿产', '拍卖出让'] and origin_dic.get(
			
 
				                   original_docchannel, '') not in ['产权交易', '土地矿产', '拍卖出让'] \
			
 
				-                and re.search('产权|转让|受让|招租|招商|出租|承租|竞价|资产|挂牌|出让|拍卖|招拍|划拨|销售', title) == None\
			
 
				-                and re.search('(采购|招投?标|投标)(信息|内容|项目|公告|数量|人|单位|方式)|(建设|工程|服务|施工|监理|勘察|设计)项目', text):
			
 
				+                and re.search('产权|转让|受让|招租|招商|出租|承租|资产|挂牌|出让|拍卖|招拍|划拨|销售', title) == None\
			
 
				+                and re.search('(采购|招投?标|投标)(信息|内容|项目|公告|数量|人|单位|方式)|(建设|工程|服务|施工|监理|勘察|设计)项目|(%s)'%self.type_dic['采招数据'], text):
			
 
				               result['docchannel']['doctype'] = '采招数据'
			
 
				               msc += ' 最终规则修改：预测为非采招数据，原始为采招数据且有招标关键词，返回采招数据'
			
 
				+          elif result['docchannel']['doctype'] in ['土地矿产'] and origin_dic.get(original_docchannel, '') in ['拍卖出让', '产权交易']:
			
 
				+              if origin_dic.get(original_docchannel, '') in ['拍卖出让'] and (re.search(self.title_type_dic['拍卖出让'], title) or re.search(self.type_dic['拍卖出让'], text)):
			
 
				+                  result['docchannel']['doctype'] = '拍卖出让'
			
 
				+                  msc += "最终规则修改：预测为土地矿产原始为拍卖且有拍卖关键词，返回拍卖"
			
 
				+              elif (re.search(self.title_type_dic['产权交易'], title) or re.search(self.type_dic['产权交易'], text)):
			
 
				+                  result['docchannel']['doctype'] = '产权交易'
			
 
				+                  msc += "最终规则修改：预测为土地矿产原始为产权交易且有产权交易关键词，返回产权交易"
			
 
				 
			
 
				           '''下面是新格式增加返回字段'''
			
 
				           if result['docchannel']['docchannel'] != '':  # 预测到生命周期的复制到life_docchannel，否则用数据源结果
			
@@ -4537,6 +4544,8 @@ class DocChannel():
 
				           return {'docchannel': {'docchannel': '', 'doctype': not_extract_dic[original_docchannel], 'life_docchannel': origin_dic.get(original_docchannel, '原始类别')}}, '公告类别不在提取范围'
			
 
				       if web_source_no in ['02104-7', '04733', 'DX007628-6']: # 这些数据源无法识别
			
 
				           return {'docchannel': {'docchannel': '', 'doctype': '采招数据', 'life_docchannel': origin_dic.get(original_docchannel, '原始类别')}}, '此数据源公告分类不明确，返回数据源类别'
			
 
				+      if original_docchannel == 303 or (re.search('处罚|投诉|失信', title) and re.search(self.title_type_dic['采招数据'], title)==None):
			
 
				+          return {'docchannel': {'docchannel': '处罚公告', 'doctype': '处罚公告', 'life_docchannel': '处罚公告'}}, "处罚公告只判断标题和源类别"
			
 
				 
			
 
				       title = re.sub('[^\u4e00-\u9fa5]+|出租车', '', title)
			
 
				       if len(title) > 50:
			
@@ -4572,9 +4581,10 @@ class DocChannel():
 
				           if doc_life=="" and result['docchannel']['doctype'] not in ['', '新闻资讯']:
			
 
				               if len(text)>150 and re.search(self.kws, content):
			
 
				                   life_id, life_prob = life_model_predict()
			
 
				-                  life_model = self.id2life[life_id]
			
 
				-                  result['docchannel']['docchannel'] = life_model
			
 
				-                  msc += life_model + ' 概率：%.4f；\n'%life_prob
			
 
				+                  if life_prob>=0.8:
			
 
				+                      life_model = self.id2life[life_id]
			
 
				+                      result['docchannel']['docchannel'] = life_model
			
 
				+                      msc += life_model + ' 概率：%.4f；\n'%life_prob
			
 
				 
			
 
				       msc = final_change(msc)
			
 
				       # print('channel ', msc)
			
@@ -6711,8 +6721,8 @@ class CandidateExtractor(object):
 
				             'project_code': "(项目|招标|采购|计划|公告|包[段组件]|标[段包的]|标段（包）|分[包标])(编号|编码)",
			
 
				             "project_name": "(包[段组件]|标[段包的项]|标段（包）|分[包标]|采购|项目|工程|货物|商品|产品|设备|通用|主要标的|^包)(名称?|内容)",
			
 
				             "win_sort": "排名|排序|名次|推荐顺序",
			
 
				-            'win_or_not': '是否中标|是否入围|是否入库|入围结论',
			
 
				-            "candidate": "((候选|入围|入选|投标)(供应商库)?的?(人|人?单位|机构|供应商|供货商|服务商|投标人|(中标)?公司|(中标)?企业)|(通过)?名单|中标候选人)(名称|名单|全称|\d)?$|^供应商(名称|信息)?$",
			
 
				+            'win_or_not': '是否(建议|推荐)?(中标|成交)|是否入围|是否入库|入围结论',
			
 
				+            "candidate": "((候选|入围|入选|投标)(供应商库)?的?(人|人?单位|机构|供应商|供货商|服务商|投标人|(中标)?公司|(中标)?企业)|(通过)?名单|中标候选人)(名称|名单|全称|\d)?$|^供应商(名称|信息)?$|投标个人/单位", #补充 368295593 投标个人/单位 提取
			
 
				             "bid_amount": "投标[报总]?价|报价(总?金额|总价|总额)|总报价|^\w{,5}报价(（[\w、/]{1,15}）)?$|(中标|成交|合同)）?([金总]额|[报均总]价|价[格款]?)|承包价|含税价|经评审的价格",
			
 
				             "win_tenderer": "第一名|第一(中标|成交)?候选人",
			
 
				             "second_tenderer": "第二名|第二(中标|成交)?候选人",
			
@@ -6859,6 +6869,9 @@ class CandidateExtractor(object):
 
				             if candidate:
			
 
				                 if win_or_not and re.search('否|未入围', win_or_not):
			
 
				                     pass
			
 
				+                elif re.search('^((建议|推荐)(中标|成交)|是)$', win_or_not) and win_sort in ['', '参与投标单位及排名'] and win_tenderer=='':
			
 
				+                    win_sort = '第一名'
			
 
				+                    candidate_set.add(candidate)
			
 
				                 else:
			
 
				                     candidate_set.add(candidate)