|
@@ -667,8 +667,12 @@ class PREMPredict():
|
|
|
elif re.search('尊敬的供应商:.{,25}我公司', text):
|
|
|
label = 0
|
|
|
values[label] = 0.801
|
|
|
- if label == 1 and re.search('委托(单位|人|方)[是为:]+', text[:10]) and re.search('受委托(单位|人|方)[是为:]+', text[:10])==None:
|
|
|
+ elif label == 1 and re.search('委托(单位|人|方)[是为:]+', text[:10]) and re.search('受委托(单位|人|方)[是为:]+', text[:10])==None:
|
|
|
label = 0
|
|
|
+ values[label] = 0.501
|
|
|
+ elif label == 1 and re.search('([,。:]|^)(服务|中选)机构(名称)?', text[:-10]):
|
|
|
+ label = 2
|
|
|
+ values[label] = 0.501
|
|
|
entity.set_Role(label, values)
|
|
|
|
|
|
def predict_money(self,list_sentences,list_entitys):
|
|
@@ -1096,25 +1100,25 @@ class RoleRulePredictor():
|
|
|
def __init__(self):
|
|
|
# (?P<tenderee_left_w1> 正则组名 后面的 w1 为概率权重关键词
|
|
|
self.pattern_tenderee_left = "(?P<tenderee_left>((项目|需求|最终|建设|业主|转让|招租|甲|议标|合同主体|挂牌|出租|出让|买受|选取|抽取|抽选|出售|标卖|比价|处置)" \
|
|
|
- "(人|公司|单位|组织|用户|业主|主体|方|部门)|文章来源|委托机构|产权所有人|需方|买方|业主|权属人|甲方当事人|询价书企业|比选发起人|项目单位[,:]单位名称|结算单位)"\
|
|
|
- "[))]?(信息[,:])?(名称)?([((](全称|盖章)[))])?(是|为|:|:|,|\s*)+$)"
|
|
|
+ "(人|公司|单位|组织|用户|业主|主体|方|部门)|文章来源|委托机构|产权所有人|需方|买方|业主|(业主|采购人|招标人)联系方式[,:]公司名称:|权属人|甲方当事人|询价书企业|比选发起人|项目单位[,:]单位名称|结算单位)"\
|
|
|
+ "[))]?(信息[,:])?(名称)?([((](全称|盖章)[))])?(是|为|:|:|\s*)+$)"
|
|
|
self.pattern_tenderee_left_w1 = "(?P<tenderee_left_w1>((遴选|采购|招标|竞价|议价|比选|委托|询价|评选|谈判|邀标|邀请|洽谈|约谈)" \
|
|
|
"(人|公司|单位|组织|用户|业主|主体|方|部门))" \
|
|
|
- "(名称)?([((](全称|盖章)[))])?(是|为|:|:|,|\s*)+$)"
|
|
|
- self.pattern_tenderee_center = "(?P<tenderee_center>(受.{,20}委托))"
|
|
|
- self.pattern_tenderee_right = "(?P<tenderee_right>^([((](以下简称)?[,\"“]*(招标|采购)(人|单位|机构)[,\"”]*[))])|^委托|^拟对|^现就|^现委托)" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
|
-
|
|
|
- self.pattern_agency_left = "(?P<agency_left>(代理(?:人|机构|公司|单位|组织)|专业采购机构|集中采购机构|集采机构|[招议))]+标机构)(.{,4}名,?称|全称|是|为|:|:|[,,]?\s*)$|(受.{5,20}委托,?$))"
|
|
|
- self.pattern_agency_right = "(?P<agency_right>^([((](以下简称)?[,\"“]*(代理)(人|单位|机构)[,\"”]*[))])|^受.{5,20}委托)" # |^受托 会与 受托生产等冲突,暂时为发现受托表达代理方式
|
|
|
+ "(名称)?([((](全称|盖章)[))])?(是|为|:|:|\s*)+$)"
|
|
|
+ self.pattern_tenderee_center = "(?P<tenderee_center>(受.{5,20}委托))"
|
|
|
+ self.pattern_tenderee_right = "(?P<tenderee_right>^([((](以下简称)?[,\"“]*(招标|采购)(人|单位|机构)[,\"”]*[))])|^委托|^现委托|^的\w{2,10}正在进行)" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
|
+ self.pattern_tendereeORagency_right = "(?P<tendereeORagency_right>(^拟对|^现?就|^现对))"
|
|
|
+ self.pattern_agency_left = "(?P<agency_left>(代理(?:人|机构|公司|单位|组织)|专业采购机构|集中采购机构|招标组织机构|集采机构|[招议))]+标机构)(.{,4}名,?称|全称|是|为|:|:|[,,]?\s*)$|(受.{5,20}委托,?$))"
|
|
|
+ self.pattern_agency_right = "(?P<agency_right>^([((](以下简称)?[,\"“]*(代理)(人|单位|机构)[,\"”]*[))])|^受.{5,20}委托|^受委?托,)" # |^受托 会与 受托生产等冲突,代理表达一般会在后面有逗号
|
|
|
# 2020//11/24 大网站规则 中标关键词添加 选定单位|指定的中介服务机构
|
|
|
- self.pattern_winTenderer_left = "(?P<winTenderer_left>(乙|承做|施工|供货|承包|竞得|受让|签约)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)[::是为,]+$|" \
|
|
|
- "(选定单位|指定的中介服务机构|实施主体|承制单位)[::是为,]+$|((评审结果|名次|排名|中标结果)[::]*第?[一1]名?)[::,]*$|" \
|
|
|
- "单一来源(采购)?(供应商|供货商|服务商|方式向)$|((中标|成交)(结果|信息))(是|为|:|:)$|(供应|供货|供|承销|服务|实施)(机构|单位|商|方)(名称)?[::是为,]$)"
|
|
|
- self.pattern_winTenderer_left_w1 = "(?P<winTenderer_left_w1>(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)[::是为,]+$)"
|
|
|
+ self.pattern_winTenderer_left = "(?P<winTenderer_left>(乙|承做|施工|供货|承包|承建|竞得|受让|签约)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)[::是为]+$|" \
|
|
|
+ "(选定单位|指定的中介服务机构|实施主体|承制单位|供方)[::是为]+$|((评审结果|名次|排名|中标结果)[::]*第?[一1]名?)[::是为]+$|" \
|
|
|
+ "单一来源(采购)?(供应商|供货商|服务商|方式向)$|((中标|成交)(结果|信息))[::是为]+$|(供应|供货|承销|服务|实施)(机构|单位|商|方)(名称)?[::是为]+$)"
|
|
|
+ self.pattern_winTenderer_left_w1 = "(?P<winTenderer_left_w1>(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)[::是为]+$)" #取消逗号 并拒绝执行改进计划的供应商,华新水泥将可能终止与其合作关系
|
|
|
# self.pattern_winTenderer_center = "(?P<winTenderer_center>第[一1].{,20}[是为]((中标|中选|中价|成交|施工)(人|单位|机构|供应商|公司)|供应商)[::是为])"
|
|
|
# self.pattern_winTenderer_right = "(?P<winTenderer_right>(^[是为\(]((采购(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))))|^(报价|价格)最低,确定为本项目成交供应商)"
|
|
|
self.pattern_winTenderer_right = "(?P<winTenderer_right>(^[是为]((采购|中标)(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))|" \
|
|
|
- "^(报价|价格)最低,确定为本项目成交供应商|^:贵公司参与|^你方于))"
|
|
|
+ "^(报价|价格)最低,确定为本项目成交供应商|^:贵公司参与|^:?你方于|^中标。|^成为[\w、()()]+项目的成交供应商))"
|
|
|
self.pattern_winTenderer_whole = "(?P<winTenderer_center>贵公司.{,15}以.{,15}中标|最终由.{,15}竞买成功|经.{,15}决定[以由].{,15}公司中标|谈判结果:由.{5,20}供货)|中标通知书.{,15}你方" # 2020//11/24 大网站规则 中标关键词添加 谈判结果:由.{5,20}供货
|
|
|
|
|
|
# self.pattern_winTenderer_location = "(中标|中选|中价|乙|成交|承做|施工|供货|承包|竞得|受让)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)|(供应商|供货商|服务商)[::]?$|(第[一1](名|((中标|中选|中价|成交)?(候选)?(人|单位|机构|供应商))))(是|为|:|:|\s*$)|((评审结果|名次|排名)[::]第?[一1]名?)|(单一来源(采购)?方式向.?$)"
|
|
@@ -1129,6 +1133,7 @@ class RoleRulePredictor():
|
|
|
self.pattern_tenderee_left_w1,
|
|
|
self.pattern_tenderee_center,
|
|
|
self.pattern_tenderee_right,
|
|
|
+ self.pattern_tendereeORagency_right,
|
|
|
self.pattern_agency_left,
|
|
|
self.pattern_agency_right,
|
|
|
self.pattern_winTenderer_left,
|
|
@@ -1194,6 +1199,8 @@ class RoleRulePredictor():
|
|
|
find_flag = True
|
|
|
if p_entity.values[0] > on_value:
|
|
|
p_entity.values[0] = 0.6 + (p_entity.values[0] - 0.6) / 10
|
|
|
+ else:
|
|
|
+ p_entity.values[0] = on_value # 2022/03/08 修正类似 223985179 公司在文章开头的项目名称概率又没达到0.5的情况
|
|
|
if find_flag:
|
|
|
continue
|
|
|
|
|
@@ -1258,11 +1265,20 @@ class RoleRulePredictor():
|
|
|
for _group, _v_group in _iter.groupdict().items():
|
|
|
if _v_group is not None and _v_group != "":
|
|
|
_role = _group.split("_")[0]
|
|
|
+ if _role == "tendereeORagency": # 2022/3/9 新增不确定招标代理判断逻辑
|
|
|
+ print('p_entity_sentenceindex:', p_entity.sentence_index)
|
|
|
+ if p_entity.sentence_index>=1: # 只在第一句进行这种模糊匹配
|
|
|
+ continue
|
|
|
+ if re.search('医院|学校|大学|中学|小学|幼儿园|政府|部|委员会|署|行|局|厅|处|室|科|股|站', p_entity.entity_text)\
|
|
|
+ or re.search('(采购|招标|投标|交易|代理|拍卖|咨询|顾问|管理)', p_entity.entity_text) == None:
|
|
|
+ _role = 'tenderee'
|
|
|
+ else:
|
|
|
+ _role = "agency"
|
|
|
_direct = _group.split("_")[1]
|
|
|
_weight = _group.split("_")[2] if len(_group.split("_"))==3 else ""
|
|
|
# _label = {"tenderee": 0, "agency": 1, "winTenderer": 2,
|
|
|
# "secondTenderer": 3, "thirdTenderer": 4}.get(_role)
|
|
|
- if _i_span == 0 and _direct == "left" and re.search('各.{,5}供应商|尊敬的供应商',
|
|
|
+ if _i_span == 0 and _direct == "left" and re.search('各.{,5}供应商|尊敬的供应商|业绩|拟招|交易服务单位', #135463002 拟招一家供应商为宜宾市第三人民医院、李庄同济医院院区提供消防维保服务
|
|
|
list_spans[
|
|
|
0]) == None: # 2021/12/22 修正错误中标召回 例子208668937
|
|
|
_flag = True
|
|
@@ -1369,12 +1385,13 @@ class RoleRulePredictor():
|
|
|
|
|
|
'''正则补充最后一句实体日期格式为招标或代理 2021/12/30'''
|
|
|
class RoleRuleFinalAdd():
|
|
|
- def predict(self, list_articles, list_entitys):
|
|
|
+ def predict(self, list_articles, list_entitys, list_codenames):
|
|
|
text_end = list_articles[0].content[-40:]
|
|
|
# sear_ent = re.search('[,。]([\u4e00-\u9fa5()()]{5,20}),?\s*[.]{2,4}年.{1,2}月.{1,2}日', text_end)
|
|
|
- sear_ent = re.search('[,。]([\u4e00-\u9fa5()()]{5,20}(,?[\u4e00-\u9fa5]{,6}(分公司|部))?),?\s*[0-9零一二三四五六七八九十]{2,4}年.{1,2}月.{1,2}日', text_end)
|
|
|
+ sear_ent = re.search('[,。]([\u4e00-\u9fa5()()]{5,20}(,?[\u4e00-\u9fa5]{,6}(分公司|部))?),?\s*[0-9零一二三四五六七八九十]{2,4}[年\-/][0-9零一二三四五六七八九十]{1,2}[月\-/][0-9零一二三四五六七八九十]{1,2}日?', text_end)
|
|
|
sear_ent2 = re.search('(户名|开户名称)[::]([\u4e00-\u9fa5()()]{5,20})[,。]', list_articles[0].content[:5000])
|
|
|
sear_ent3 = re.search('(报名咨询|收货地点|送货地点)[,:]([\u4e00-\u9fa5()()]{5,20})[0-9\-]*[,。]', list_articles[0].content[:5000])
|
|
|
+
|
|
|
if sear_ent or sear_ent2 or sear_ent3:
|
|
|
if sear_ent3:
|
|
|
ent_re = sear_ent3.group(2)
|
|
@@ -1394,31 +1411,53 @@ class RoleRuleFinalAdd():
|
|
|
agency_notfound = False
|
|
|
elif ent.label == 5:
|
|
|
ents.append(ent)
|
|
|
- if agency_notfound == True and re.search('(采购|招标|投标|交易|代理|拍卖|咨询|顾问|管理)', ent_re):
|
|
|
+ if tenderee_notfound == True and (re.search('医院|学校|大学|中学|小学|幼儿园|政府|部|委员会|署|行|局|厅|处|室|科|股|站', ent_re)
|
|
|
+ or re.search('(采购|招标|投标|交易|代理|拍卖|咨询|顾问|管理)', ent_re) == None):
|
|
|
n = 0
|
|
|
for i in range(len(ents) - 1, -1, -1):
|
|
|
n += 1
|
|
|
- if n > 3 and sear_ent: # 文章末尾角色加日期这种只找后三个实体
|
|
|
+ if n > 3 and sear_ent: # 文章末尾角色加日期这种只找后三个实体
|
|
|
break
|
|
|
if ents[i].entity_text == ent_re or (ents[i].entity_text in ent_re and len(ents[i].entity_text)/len(ent_re)>0.6):
|
|
|
- ents[i].label = 1
|
|
|
- ents[i].values[1] = 0.5
|
|
|
+ ents[i].label = 0
|
|
|
+ ents[i].values[0] = 0.5
|
|
|
# log('正则最后补充实体: %s'%(ent_re))
|
|
|
break
|
|
|
-
|
|
|
- elif tenderee_notfound == True and re.search('(采购|招标|投标|交易|代理|拍卖|咨询|顾问|管理)', ent_re) == None:
|
|
|
+ elif agency_notfound == True and re.search('(采购|招标|投标|交易|代理|拍卖|咨询|顾问|管理)', ent_re):
|
|
|
n = 0
|
|
|
for i in range(len(ents) - 1, -1, -1):
|
|
|
n += 1
|
|
|
- if n > 3 and sear_ent: # 文章末尾角色加日期这种只找后三个实体
|
|
|
+ if n > 3 and sear_ent: # 文章末尾角色加日期这种只找后三个实体
|
|
|
break
|
|
|
if ents[i].entity_text == ent_re or (ents[i].entity_text in ent_re and len(ents[i].entity_text)/len(ent_re)>0.6):
|
|
|
- ents[i].label = 0
|
|
|
- ents[i].values[0] = 0.5
|
|
|
+ ents[i].label = 1
|
|
|
+ ents[i].values[1] = 0.5
|
|
|
# log('正则最后补充实体: %s'%(ent_re))
|
|
|
break
|
|
|
|
|
|
|
|
|
+ elif list_codenames[0]['name'] != "": #把标题包含的公司实体作为招标人
|
|
|
+ tenderee_notfound = True
|
|
|
+ ents = []
|
|
|
+ for ent in list_entitys[0]:
|
|
|
+ if ent.entity_type in ['org', 'company']:
|
|
|
+ if ent.label == 0:
|
|
|
+ tenderee_notfound = False
|
|
|
+ elif ent.label == 1:
|
|
|
+ agency_notfound = False
|
|
|
+ elif ent.label == 5:
|
|
|
+ ents.append(ent)
|
|
|
+ if tenderee_notfound == True:
|
|
|
+ print('list_codenames',list_codenames[0]['name'])
|
|
|
+ for ent in ents:
|
|
|
+ if ent.entity_text in list_codenames[0]['name']:
|
|
|
+ ent.label = 0
|
|
|
+ ent.values[0] = 0.5
|
|
|
+ # log('正则召回标题中包含的实体:%s'%ent.entity_text)
|
|
|
+ break
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
# 时间类别
|
|
|
class TimePredictor():
|
|
|
def __init__(self):
|
|
@@ -1923,7 +1962,7 @@ class ProductAttributesPredictor():
|
|
|
for j in range(i + 1, len(items)):
|
|
|
if len(items[j]) > 20 and len(re.sub('[\((].*[)\)]|[^\u4e00-\u9fa5]', '', items[j])) > 10:
|
|
|
continue
|
|
|
- if re.search('数量', items[j]):
|
|
|
+ if header_dic['数量']=="" and re.search('数量', items[j]):
|
|
|
header_dic['数量'] = j
|
|
|
quantity = items[j]
|
|
|
elif re.search('单价', items[j]):
|
|
@@ -1972,6 +2011,7 @@ class ProductAttributesPredictor():
|
|
|
header_col = []
|
|
|
product_link = []
|
|
|
demand_link = []
|
|
|
+ total_product_money = 0
|
|
|
for i in range(len(tables)-1, -1, -1):
|
|
|
table = tables[i]
|
|
|
if table.parent.name == 'td' and len(table.find_all('td')) <= 3:
|
|
@@ -2111,6 +2151,12 @@ class ProductAttributesPredictor():
|
|
|
'brand': brand[:50], 'specs':specs}
|
|
|
if link not in product_link:
|
|
|
product_link.append(link)
|
|
|
+ mat = re.match('([0-9.,]+)[((]?\w{,3}[))]?$', link['quantity'])
|
|
|
+ if link['unitPrice'] != "" and mat:
|
|
|
+ try:
|
|
|
+ total_product_money += float(link['unitPrice'])*float(mat.group(1).replace(',', ''))
|
|
|
+ except:
|
|
|
+ log('产品属性单价数量相乘出错, 单价: %s, 数量: %s'%(link['unitPrice'], link['quantity']))
|
|
|
if budget != "" and order_time != "" :
|
|
|
link = {'project_name': product, 'product':[], 'demand': demand, 'budget': budget, 'order_begin':order_begin, 'order_end':order_end}
|
|
|
if link not in demand_link:
|
|
@@ -2126,7 +2172,7 @@ class ProductAttributesPredictor():
|
|
|
demand_dic = {'demand_info':{'data':demand_link, 'header':headers_demand, 'header_col':header_col}}
|
|
|
else:
|
|
|
demand_dic = {'demand_info':{'data':[], 'header':[], 'header_col':[]}}
|
|
|
- return [attr_dic, demand_dic]
|
|
|
+ return [attr_dic, demand_dic], total_product_money
|
|
|
|
|
|
# docchannel类型提取
|
|
|
class DocChannel():
|
|
@@ -2260,10 +2306,12 @@ class DocChannel():
|
|
|
else:
|
|
|
return 0
|
|
|
|
|
|
- def predict(self, title='', content=''):
|
|
|
- # print('准备预测')
|
|
|
- if isinstance(content, list):
|
|
|
- token_l = [it.tokens for it in content]
|
|
|
+ def predict(self, title='', list_sentence='', web_source_no=''):
|
|
|
+ if web_source_no in ['02104-7']:
|
|
|
+ return {'docchannel': {'docchannel':'', 'doctype':'采招数据'}}
|
|
|
+
|
|
|
+ if isinstance(list_sentence, list):
|
|
|
+ token_l = [it.tokens for it in list_sentence]
|
|
|
tokens = [it for l in token_l for it in l]
|
|
|
content = ' '.join(tokens[:500])
|
|
|
|
|
@@ -2273,6 +2321,7 @@ class DocChannel():
|
|
|
data_content, data_title = self.predict_process(docid='', doctitle=title[-50:], dochtmlcon=content) # 标题最多取50字
|
|
|
text_len = len(data_content[0]) if len(data_content[0])<self.sequen_len else self.sequen_len
|
|
|
title_len = len(data_title[0]) if len(data_title[0])<self.title_len else self.title_len
|
|
|
+ result = {'docchannel': {'docchannel':'', 'doctype':''}}
|
|
|
|
|
|
array_content = embedding(data_content, shape=(len(data_content), self.sequen_len, 128))
|
|
|
array_title = embedding(data_title, shape=(len(data_title), self.title_len, 128))
|
|
@@ -2286,8 +2335,10 @@ class DocChannel():
|
|
|
)
|
|
|
id = np.argmax(pred, axis=1)[0]
|
|
|
prob = pred[0][id]
|
|
|
+ result['docchannel']['doctype'] = self.id2type[id]
|
|
|
# print('公告类别:', self.id2type[id], '概率:',prob)
|
|
|
- if id == 0:
|
|
|
+ # if id == 0:
|
|
|
+ if result['docchannel']['doctype'] not in ['', '新闻资讯']:
|
|
|
pred = self.lift_sess.run(self.lift_softmax,
|
|
|
feed_dict={
|
|
|
self.lift_title: array_title,
|
|
@@ -2298,16 +2349,61 @@ class DocChannel():
|
|
|
)
|
|
|
id = np.argmax(pred, axis=1)[0]
|
|
|
prob = pred[0][id]
|
|
|
+ result['docchannel']['docchannel'] = self.id2life[id]
|
|
|
# print('生命周期:',self.id2life[id], '概率:',prob)
|
|
|
- if id == 6:
|
|
|
+ # if id == 6:
|
|
|
+ if result['docchannel']['docchannel'] == '中标信息':
|
|
|
if self.is_houxuan(''.join([it for it in title if it.isalpha()]), ''.join([it for it in content if it.isalpha()])):
|
|
|
+ result['docchannel']['docchannel'] = '候选人公示'
|
|
|
# return '候选人公示', prob
|
|
|
- return [{'docchannel': '候选人公示'}]
|
|
|
- # return self.id2life[id], prob
|
|
|
- return [{'docchannel':self.id2life[id]}]
|
|
|
- else:
|
|
|
- # return self.id2type[id], prob
|
|
|
- return [{'docchannel':self.id2type[id]}]
|
|
|
+ # return [{'docchannel': '候选人公示'}]
|
|
|
+
|
|
|
+ return result
|
|
|
+ # return [{'docchannel':self.id2life[id]}]
|
|
|
+ # else:
|
|
|
+ # # return self.id2type[id], prob
|
|
|
+ # return [{'docchannel':self.id2type[id]}]
|
|
|
+
|
|
|
+ def predict_rule(self, title, content, channel_dic, prem_dic):
|
|
|
+ '''2022/2/10加入规则去除某些数据源及内容过短且不包含类别关键词的公告不做预测'''
|
|
|
+ hetong = '(合同|验收|履约)(公告|公示)|合同号?$' # 合同标题正则
|
|
|
+ zhongbiao_t = '(中标|中选|成交|入选|入围|结果|确认)(公告|公示|结果)|(遴选|采购|招标|竞价|议价|比选|询价|评选|谈判|邀标|邀请|洽谈|约谈|评标|发包|遴选)结果|开标(记录|信息|情况)|单一来源|直接(选取|选定)|中标通知书|中标$'
|
|
|
+ zhongbiao_c = '(中标|中选|成交|拟选用|拟邀请|最终选定的?|拟定)(供应商|供货商|服务商|企业|公司|单位|(候选)?人)(名称)?[::]|[,。:.](供应商|供货商|服务商)(名称)?:|指定的中介服务机构:|建设服务单位:'
|
|
|
+ zhaobiao_t = '(遴选|采购|招标|竞价|议价|比选|询价|评选|谈判|邀标|邀请|洽谈|约谈)(公告|公示|$)'
|
|
|
+ title_cn = re.sub('[^\u4e00-\u9fa5]', '', title)
|
|
|
+ if len(re.sub('[^\u4e00-\u9fa5]', "", content))<50 and channel_dic['docchannel']['doctype'] != '新闻资讯':
|
|
|
+ if re.search(hetong, title_cn) != None:
|
|
|
+ channel_dic['docchannel']['docchannel'] = '合同公告'
|
|
|
+ elif re.search(zhongbiao_t, title_cn):
|
|
|
+ channel_dic['docchannel']['docchannel'] = '中标信息'
|
|
|
+ elif re.search(zhaobiao_t, title_cn):
|
|
|
+ channel_dic['docchannel']['docchannel'] = '招标公告'
|
|
|
+ else:
|
|
|
+ channel_dic['docchannel']['docchannel'] = ''
|
|
|
+ elif channel_dic['docchannel'].get('docchannel', '') == '招标公告' and 'win_tenderer' in json.dumps(prem_dic,
|
|
|
+ ensure_ascii=False):
|
|
|
+ if re.search(hetong, title_cn) != None:
|
|
|
+ channel_dic['docchannel']['docchannel'] = '合同公告'
|
|
|
+ log('正则把招标公告修改为合同公告')
|
|
|
+ elif re.search(zhongbiao_t, title_cn) or re.search(zhongbiao_t, content[:200]) or re.search(zhongbiao_c,
|
|
|
+ content):
|
|
|
+ channel_dic['docchannel']['docchannel'] = '中标信息'
|
|
|
+ log('正则把招标公告修改为中标信息')
|
|
|
+ elif channel_dic['docchannel'].get('docchannel', '') == '中标信息' and 'win_tenderer' not in json.dumps(prem_dic,
|
|
|
+ ensure_ascii=False):
|
|
|
+ if re.search(hetong, title_cn):
|
|
|
+ channel_dic['docchannel']['docchannel'] = '合同公告'
|
|
|
+ log('正则把中标信息修改为合同公告')
|
|
|
+ elif re.search(zhongbiao_t, title_cn) or re.search(zhongbiao_t, content[:200]) or re.search(zhongbiao_c,
|
|
|
+ content):
|
|
|
+ pass
|
|
|
+ elif re.search(zhaobiao_t, title_cn):
|
|
|
+ channel_dic['docchannel']['docchannel'] = '招标公告'
|
|
|
+ log('正则把中标信息修改为招标公告')
|
|
|
+ elif re.search('中标|成交|中选|入选|入围|结果|供应商|供货商|候选人', title_cn+content)==None:
|
|
|
+ channel_dic['docchannel']['docchannel'] = ''
|
|
|
+ log('正则把中标信息修改为空')
|
|
|
+ return channel_dic
|
|
|
|
|
|
# 保证金支付方式提取
|
|
|
class DepositPaymentWay():
|