|
@@ -857,6 +857,8 @@ class PREMPredict():
|
|
elif re.search('合同供方:?$|合同签约单位', front):
|
|
elif re.search('合同供方:?$|合同签约单位', front):
|
|
label = 0
|
|
label = 0
|
|
values[label] = 0.5
|
|
values[label] = 0.5
|
|
|
|
+ elif re.search('现由$', front) and re.search('^作为\d个单位的牵头(单位|公司)?', behind): # 修复 469369884 站源批量预测错误 现由第七合同段保利长大工程有限公司作为6个单位的牵头单位,
|
|
|
|
+ label = 5
|
|
elif re.search('是否中标:是,供应商', front) and label == 5:
|
|
elif re.search('是否中标:是,供应商', front) and label == 5:
|
|
label = 2
|
|
label = 2
|
|
values[label] = 0.9
|
|
values[label] = 0.9
|
|
@@ -1389,7 +1391,7 @@ class RoleRulePredictor():
|
|
"(人|方|单位|组织|用户|业主|主体|部门|公司|企业|工厂)|[转流]出方|文章来源|委托机构|产权所有人|承包权人|结算单位|收货地址)" \
|
|
"(人|方|单位|组织|用户|业主|主体|部门|公司|企业|工厂)|[转流]出方|文章来源|委托机构|产权所有人|承包权人|结算单位|收货地址)" \
|
|
"[))]?(信息|联系方式|概况)?[,,::]?([((](1|2|1.1|1.2)[))])?((公司|单位)?名称)?([((](全称|盖章)[))])?(是|为|:|:|\s*)+$|(采购商|招标人):(\w{2,10}-)?$)"
|
|
"[))]?(信息|联系方式|概况)?[,,::]?([((](1|2|1.1|1.2)[))])?((公司|单位)?名称)?([((](全称|盖章)[))])?(是|为|:|:|\s*)+$|(采购商|招标人):(\w{2,10}-)?$)"
|
|
self.pattern_tenderee_center = "(?P<tenderee_center>(受.{5,20}的?委托|现将[\w()()]{5,20}[\d年月季度至()]+采购意向|尊敬的供应商(伙伴)?:\w{5,20}(以下简称“\w{2,5}”)))"
|
|
self.pattern_tenderee_center = "(?P<tenderee_center>(受.{5,20}的?委托|现将[\w()()]{5,20}[\d年月季度至()]+采购意向|尊敬的供应商(伙伴)?:\w{5,20}(以下简称“\w{2,5}”)))"
|
|
- self.pattern_tenderee_right = "(?P<tenderee_right>^([((](以下简称)?[,\"“]*(招标|采购)(人|单位|机构)[,\"”]*[))]|^委托|^将于[\d年月日,::]+进行|^现委托|^的\w{2,10}正在进行|[\d年月季度至]+采购意向|^)?的招标工作已圆满结束)|^([拟须需]|计划)(采购|招标|购置|购买)|^须购[买置]一批|作为(采购|招标)(人|单位))" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
|
|
|
|
+ self.pattern_tenderee_right = "(?P<tenderee_right>^([((](以下简称)?[,\"“]*(招标|采购)(人|单位|机构)[,\"”]*[))]|^委托|^将于[\d年月日,::]+进行|^现委托|^的\w{2,10}正在进行|[\d年月季度至]+采购意向|^)?的招标工作已圆满结束)|^([拟须需]|计划)(采购|招标|购置|购买)|^须购[买置]一批|作为(采购|招标)(人|单位)|^关于)" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
self.pattern_tendereeORagency_right = "(?P<tendereeORagency_right>(^拟对|^现?就|^现对))"
|
|
self.pattern_tendereeORagency_right = "(?P<tendereeORagency_right>(^拟对|^现?就|^现对))"
|
|
self.pattern_agency_left = "(?P<agency_left>((代理|拍卖)(?:人|机构|公司|企业|单位|组织)|专业采购机构|集中采购机构|招标组织机构|交易机构|集采机构|[招议))]+标机构|(采购|招标)代理)(名称|.{,4}名,?称|全称)?(是|为|:|:|[,,]?\s*)$|(受.{5,20}委托,?$))"
|
|
self.pattern_agency_left = "(?P<agency_left>((代理|拍卖)(?:人|机构|公司|企业|单位|组织)|专业采购机构|集中采购机构|招标组织机构|交易机构|集采机构|[招议))]+标机构|(采购|招标)代理)(名称|.{,4}名,?称|全称)?(是|为|:|:|[,,]?\s*)$|(受.{5,20}委托,?$))"
|
|
self.pattern_agency_right = "(?P<agency_right>^([((](以下简称)?[,\"“]*(代理)(人|单位|机构)[,\"”]*[))])|^受.{5,20}委托|^受委?托,)" # |^受托 会与 受托生产等冲突,代理表达一般会在后面有逗号
|
|
self.pattern_agency_right = "(?P<agency_right>^([((](以下简称)?[,\"“]*(代理)(人|单位|机构)[,\"”]*[))])|^受.{5,20}委托|^受委?托,)" # |^受托 会与 受托生产等冲突,代理表达一般会在后面有逗号
|
|
@@ -2231,7 +2233,7 @@ class RoleGrade():
|
|
self.thirdTenderer_left_9 = "(?P<thirdTenderer_left_9>(第[三3](中标|中选|中价|成交)?候选(人|单位|供应商|公司)|第[三3]名|排[名序]:3|名次:3))"
|
|
self.thirdTenderer_left_9 = "(?P<thirdTenderer_left_9>(第[三3](中标|中选|中价|成交)?候选(人|单位|供应商|公司)|第[三3]名|排[名序]:3|名次:3))"
|
|
self.pattern_list = [self.tenderee_left_9,self.tenderee_center_8, self.tenderee_left_8,self.tenderee_left_6,self.tenderee_left_5,self.agency_left_9,
|
|
self.pattern_list = [self.tenderee_left_9,self.tenderee_center_8, self.tenderee_left_8,self.tenderee_left_6,self.tenderee_left_5,self.agency_left_9,
|
|
self.winTenderer_left_6, self.winTenderer_left_9,self.winTenderer_left_8, self.secondTenderer_left_9, self.thirdTenderer_left_9]
|
|
self.winTenderer_left_6, self.winTenderer_left_9,self.winTenderer_left_8, self.secondTenderer_left_9, self.thirdTenderer_left_9]
|
|
- def predict(self, list_sentences, list_entitys, span=15, min_prob=0.7):
|
|
|
|
|
|
+ def predict(self, list_sentences, list_entitys, original_docchannel, span=15, min_prob=0.7):
|
|
'''
|
|
'''
|
|
根据规则给角色分配不同等级概率;分三级:0.9-1,0.8-0.9,0.7-0.8;附件0.7-0.8,0.6-0.7,0.5-0.6
|
|
根据规则给角色分配不同等级概率;分三级:0.9-1,0.8-0.9,0.7-0.8;附件0.7-0.8,0.6-0.7,0.5-0.6
|
|
修改概率小于0.6的且在大数据代理集合里面的招标人为代理人
|
|
修改概率小于0.6的且在大数据代理集合里面的招标人为代理人
|
|
@@ -3154,10 +3156,16 @@ class ProductAttributesPredictor():
|
|
_budget = col1_l[i]
|
|
_budget = col1_l[i]
|
|
re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?", _budget)
|
|
re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?", _budget)
|
|
if re_price:
|
|
if re_price:
|
|
- _budget = re_price[0]
|
|
|
|
- if '万元' in col0_l[i] and '万' not in _budget:
|
|
|
|
- _budget += '万元'
|
|
|
|
- budget = str(getUnifyMoney(_budget))
|
|
|
|
|
|
+ # _budget = re_price[0]
|
|
|
|
+ # if '万元' in col0_l[i] and '万' not in _budget:
|
|
|
|
+ # _budget += '万元'
|
|
|
|
+ # budget = str(getUnifyMoney(_budget))
|
|
|
|
+ _budget, _money_unit = money_process(_budget, col0_l[i])
|
|
|
|
+ budget = str(_budget)
|
|
|
|
+ if '.' in budget:
|
|
|
|
+ budget = budget.rstrip('0').rstrip('.')
|
|
|
|
+ if float(budget)>= 500*100000000:
|
|
|
|
+ budget = ""
|
|
elif re.search('预算单位|(采购|招标|购买)(单位|人|方|主体)|项目业主|采购商|申购单位|需求单位|业主单位', col0_l[i]):
|
|
elif re.search('预算单位|(采购|招标|购买)(单位|人|方|主体)|项目业主|采购商|申购单位|需求单位|业主单位', col0_l[i]):
|
|
header_list2.append(col0_l[i])
|
|
header_list2.append(col0_l[i])
|
|
tenderee = re.sub("\s","",col1_l[i])
|
|
tenderee = re.sub("\s","",col1_l[i])
|
|
@@ -3180,7 +3188,7 @@ class ProductAttributesPredictor():
|
|
if order_begin_year>=2050 or order_end_year>=2050:
|
|
if order_begin_year>=2050 or order_end_year>=2050:
|
|
order_begin = order_end = ""
|
|
order_begin = order_end = ""
|
|
# print(product,demand,budget,order_begin)
|
|
# print(product,demand,budget,order_begin)
|
|
- if product!= "" and demand != "" and budget!="" and order_begin != "" and len(budget)<15: # 限制金额小于15位数的才要
|
|
|
|
|
|
+ if product!= "" and demand != "" and budget!="" and order_begin != "":
|
|
link = {'project_name': product, 'product': [], 'demand': demand, 'budget': budget,
|
|
link = {'project_name': product, 'product': [], 'demand': demand, 'budget': budget,
|
|
'order_begin': order_begin, 'order_end': order_end ,'tenderee':tenderee, 'notes':notes, 'issue_date':issue_date}
|
|
'order_begin': order_begin, 'order_end': order_end ,'tenderee':tenderee, 'notes':notes, 'issue_date':issue_date}
|
|
if link not in demand_link:
|
|
if link not in demand_link:
|
|
@@ -3696,13 +3704,19 @@ class ProductAttributesPredictor():
|
|
_unitPrice = deal_list[id3]
|
|
_unitPrice = deal_list[id3]
|
|
re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_unitPrice)
|
|
re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_unitPrice)
|
|
if re_price:
|
|
if re_price:
|
|
- _unitPrice = re_price[0]
|
|
|
|
- if '万元' in header_list[3] and '万' not in _unitPrice:
|
|
|
|
- _unitPrice += '万元'
|
|
|
|
- unitPrice = getUnifyMoney(_unitPrice)
|
|
|
|
- if unitPrice>=10000*10000:
|
|
|
|
- unitPrice = ""
|
|
|
|
- unitPrice = str(unitPrice)
|
|
|
|
|
|
+ # _unitPrice = re_price[0]
|
|
|
|
+ # if '万元' in header_list[3] and '万' not in _unitPrice:
|
|
|
|
+ # _unitPrice += '万元'
|
|
|
|
+ # unitPrice = getUnifyMoney(_unitPrice)
|
|
|
|
+ # if unitPrice>=10000*10000:
|
|
|
|
+ # unitPrice = ""
|
|
|
|
+ # unitPrice = str(unitPrice)
|
|
|
|
+ _unitPrice, _money_unit = money_process(_unitPrice, header_list[3])
|
|
|
|
+ if _unitPrice >= 10000 * 10000:
|
|
|
|
+ _unitPrice = ""
|
|
|
|
+ unitPrice = str(_unitPrice)
|
|
|
|
+ if '.' in unitPrice:
|
|
|
|
+ unitPrice = unitPrice.rstrip('0').rstrip('.')
|
|
if id4 != "":
|
|
if id4 != "":
|
|
if re.search('\w', deal_list[id4]):
|
|
if re.search('\w', deal_list[id4]):
|
|
brand = deal_list[id4]
|
|
brand = deal_list[id4]
|
|
@@ -3727,10 +3741,14 @@ class ProductAttributesPredictor():
|
|
_budget = deal_list[id7]
|
|
_budget = deal_list[id7]
|
|
re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_budget)
|
|
re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_budget)
|
|
if re_price:
|
|
if re_price:
|
|
- _budget = re_price[0]
|
|
|
|
- if '万元' in header_list2[2] and '万' not in _budget:
|
|
|
|
- _budget += '万元'
|
|
|
|
- budget = str(getUnifyMoney(_budget))
|
|
|
|
|
|
+ # _budget = re_price[0]
|
|
|
|
+ # if '万元' in header_list2[2] and '万' not in _budget:
|
|
|
|
+ # _budget += '万元'
|
|
|
|
+ # budget = str(getUnifyMoney(_budget))
|
|
|
|
+ _budget, _money_unit = money_process(_budget, header_list2[2])
|
|
|
|
+ budget = str(_budget)
|
|
|
|
+ if '.' in budget:
|
|
|
|
+ budget = budget.rstrip('0').rstrip('.')
|
|
if float(budget)>= 100000*10000:
|
|
if float(budget)>= 100000*10000:
|
|
budget = ""
|
|
budget = ""
|
|
if id8 != "":
|
|
if id8 != "":
|
|
@@ -3863,10 +3881,13 @@ class ProductAttributesPredictor():
|
|
|
|
|
|
|
|
|
|
def add_product_attrs(self,channel_dic, product_attrs, list_sentences,list_entitys,list_outlines,product_list,codeName,prem,text,page_time):
|
|
def add_product_attrs(self,channel_dic, product_attrs, list_sentences,list_entitys,list_outlines,product_list,codeName,prem,text,page_time):
|
|
|
|
+ # print(1,product_attrs[1]['demand_info']['data'])
|
|
if channel_dic['docchannel']['docchannel']=="采购意向" and len(product_attrs[1]['demand_info']['data']) == 0:
|
|
if channel_dic['docchannel']['docchannel']=="采购意向" and len(product_attrs[1]['demand_info']['data']) == 0:
|
|
product_attrs = self.predict_without_table(product_attrs, list_sentences,list_entitys,codeName,prem,text,page_time)
|
|
product_attrs = self.predict_without_table(product_attrs, list_sentences,list_entitys,codeName,prem,text,page_time)
|
|
|
|
+ # print(2,product_attrs[1]['demand_info']['data'])
|
|
if len(product_attrs[0]['product_attrs']['data']) == 0:
|
|
if len(product_attrs[0]['product_attrs']['data']) == 0:
|
|
product_attrs = self.predict_by_text(product_attrs,text,list_outlines,product_list,page_time)
|
|
product_attrs = self.predict_by_text(product_attrs,text,list_outlines,product_list,page_time)
|
|
|
|
+ # print(3,product_attrs[1]['demand_info']['data'])
|
|
if len(product_attrs[1]['demand_info']['data'])>0:
|
|
if len(product_attrs[1]['demand_info']['data'])>0:
|
|
for d in product_attrs[1]['demand_info']['data']:
|
|
for d in product_attrs[1]['demand_info']['data']:
|
|
for product in set(prem[0]['product']):
|
|
for product in set(prem[0]['product']):
|
|
@@ -3897,8 +3918,8 @@ class DocChannel():
|
|
self.type_dic = {
|
|
self.type_dic = {
|
|
'土地矿产': '供地结果|(土地|用地|宗地|地块|海域|矿)的?(基本信息|基本情况|概况|信息|详情|来源|用途|性质|编号|位置|坐落|使用年限|出让年限)|(土地|山地|农田)(经营权)?(出让|出租|招租|租赁|承包|流转)|流转土地',
|
|
'土地矿产': '供地结果|(土地|用地|宗地|地块|海域|矿)的?(基本信息|基本情况|概况|信息|详情|来源|用途|性质|编号|位置|坐落|使用年限|出让年限)|(土地|山地|农田)(经营权)?(出让|出租|招租|租赁|承包|流转)|流转土地',
|
|
'拍卖出让': '(拍卖|变卖|流拍|竞拍)的?(公告|活动|信息|结果|成交|主体|标的|资产|财产|方式|类型|流程|程序|规则|价格|保证金|时间)|(公开|进行|密封)(拍卖|变卖|竞拍)|第[一二三]次拍卖|(资产|司法|网络)拍卖|交易方式.{,2}拍卖|拍卖会',
|
|
'拍卖出让': '(拍卖|变卖|流拍|竞拍)的?(公告|活动|信息|结果|成交|主体|标的|资产|财产|方式|类型|流程|程序|规则|价格|保证金|时间)|(公开|进行|密封)(拍卖|变卖|竞拍)|第[一二三]次拍卖|(资产|司法|网络)拍卖|交易方式.{,2}拍卖|拍卖会',
|
|
- '产权交易': '(产权|资产|权证)的?(类型|信息|名称|编号|(基本)?情况)|(经营权|承包权|使用权|租赁权|股权|债权|排污权|化学需氧量|储备量)(挂牌|转让|出让)|竞价销售|销售结果|房屋所有权房产|免租期限|交易期限|(受让|转让|承租|出租)(人|方)|(店面|店铺|商铺|铺位?|门面|门市|食堂|饭堂|校舍|车位|停车场|厂?房|仓?库|馆|资产|物业|房产|房屋|场地|农田|鱼?塘)\w{,4}(处置|招租|出租|续租|租赁|转让)|(出租|转让|产权|资产)(项目|中标|成交|流标|废标)|出租(用途|类型)|转让底价|租赁(标的物|情况)',
|
|
|
|
- '采招数据': '(采购|招标)(条件|范围|文件|内容)|(申请人|投标人|供应商|报价人|参选人)的?资格要求;' # |变更|答疑|澄清|中标|成交|合同|废标|流标 |(采购|招标|代理)(人|机构|单位)|
|
|
|
|
|
|
+ '产权交易': '(产权|资产|权证)的?(类型|信息|名称|编号|(基本)?情况)|(经营权|承包权|使用权|租赁权|股权|债权|排污权|化学需氧量|储备量)(挂牌|转让|出让)|竞价销售|销售结果|房屋所有权房产|免租期限|交易期限|(受让|转让|承租|出租)(人|方)|(店面|店铺|商铺|铺位?|门面|门市|食堂|饭堂|校舍|车位|停车场|厂?房|仓?库|馆|资产|物业|房产|房屋|场地|农田|鱼?塘)\w{,4}(处置|招租|出租|续租|租赁|转让)|(出租|转让|产权|资产)(项目|中标|成交|流标|废标)|出租(用途|类型)|转让底价|租赁(标的物|情况)|看样(时间|地[点址]|方式)|最小加价|加价幅度',
|
|
|
|
+ '采招数据': '(采购|招标)(条件|范围|文件|内容)|(申请人|投标人|供应商|报价人|参选人)的?资格要求;|采购需求清单|最低价排序|竞争性采购方式|采购进行公开竞价|竞价模式[::\s]*一次报价|预算金额' # |变更|答疑|澄清|中标|成交|合同|废标|流标 |(采购|招标|代理)(人|机构|单位)|
|
|
}
|
|
}
|
|
|
|
|
|
self.title_type_dic = {
|
|
self.title_type_dic = {
|
|
@@ -3921,12 +3942,12 @@ class DocChannel():
|
|
'候选人公示': '候选人公示|评标结果公示|中标候选人名单公示|现将中标候选人(进行公示|公[示布]如下)|(中标|中选)候选人(信息|情况)[::\s]',
|
|
'候选人公示': '候选人公示|评标结果公示|中标候选人名单公示|现将中标候选人(进行公示|公[示布]如下)|(中标|中选)候选人(信息|情况)[::\s]',
|
|
'候选人公示neg': '中标候选人公示期|中标候选人公示前',
|
|
'候选人公示neg': '中标候选人公示期|中标候选人公示前',
|
|
'中标信息': '供地结果信息|采用单源直接采购的?情况说明|[特现]?将\w{,4}(成交|中标|中选|选定结果|选取结果|入围结果|竞价结果)\w{,4}(进行公示|公[示布]如下)|(询价|竞价|遴选)(成交|中标|中选)(公告|公示)|(成交|中标|中选|选定|选取|入围|询价)结果(如下|公告|公示)|(中标|中选)(供应商|承包商|候选人|入围单位)如下|拟定供应商的情况|((中标|中选)(人|成交)|成交)\w{,3}(信息|情况)[::\s]',
|
|
'中标信息': '供地结果信息|采用单源直接采购的?情况说明|[特现]?将\w{,4}(成交|中标|中选|选定结果|选取结果|入围结果|竞价结果)\w{,4}(进行公示|公[示布]如下)|(询价|竞价|遴选)(成交|中标|中选)(公告|公示)|(成交|中标|中选|选定|选取|入围|询价)结果(如下|公告|公示)|(中标|中选)(供应商|承包商|候选人|入围单位)如下|拟定供应商的情况|((中标|中选)(人|成交)|成交)\w{,3}(信息|情况)[::\s]',
|
|
- '中标信息2': '\s(成交|中标|中选)(信息|日期|时间|总?金额|价格)[::\s]|(采购|招标|成交|中标|中选|评标)结果|单一来源(采购|招标)?的?(中标|成交|结果)|项目已结束', # |单一来源采购原因|拟采取单一来源方式采购|单一来源采购公示
|
|
|
|
- '中标信息3': '(中标|中选|成交|拟定|拟选用|最终选定的?|受让)(供应商|供货商|服务商|机构|企业|公司|单位|候选人|人)(信息[,:]?)?(名称)?[::\s]|[、\s](第一名|(拟定|推荐|入围)?(供应商|供货商)|(中选|中标|供货)单位|中选人)[::\s]', # |唯一
|
|
|
|
|
|
+ '中标信息2': '\s(成交|中标|中选)(信息|日期|时间|总?金额|价格)[::\s]|(采购|招标|成交|中标|中选|评标)结果|单一来源(采购|招标)?的?(中标|成交|结果)|项目已结束|中标公示 ', # |单一来源采购原因|拟采取单一来源方式采购|单一来源采购公示
|
|
|
|
+ '中标信息3': '(中标|中选|成交|拟定|拟选用|最终选定的?|受让)(供应商|供货商|服务商|机构|企业|公司|单位|候选人|人)(信息[,:]?)?(名称)?[::\s]|[、\s](第一名|(拟定|推荐|入围)?(供应商|供货商)|(中选|中标|供货)单位|中选人)[::\s]|确定[\w()]{6,25}为中标人', # |唯一
|
|
'中标信息neg': '按项目控制价下浮\d%即为成交价|成交原则|不得确定为(中标|成交)|招标人按下列原则选择中标人|评选成交供应商:|拟邀请供应商|除单一来源采购项目外|单一来源除外|(各.{,5}|尊敬的)(供应商|供货商)[:\s]|竞拍起止时间:|询价结果[\s\n::]*不公开|本项目已具备招标条件|现对该项目进行招标公告|发布\w{2}结果后\d天内送达|本次\w{2}结果不对外公示|供应商\s*资格要求|成交情况:\s*[流废]标|中标单位:本次招标拟?中标单位\d家|通知中标单位|影响(成交|中标)结果',
|
|
'中标信息neg': '按项目控制价下浮\d%即为成交价|成交原则|不得确定为(中标|成交)|招标人按下列原则选择中标人|评选成交供应商:|拟邀请供应商|除单一来源采购项目外|单一来源除外|(各.{,5}|尊敬的)(供应商|供货商)[:\s]|竞拍起止时间:|询价结果[\s\n::]*不公开|本项目已具备招标条件|现对该项目进行招标公告|发布\w{2}结果后\d天内送达|本次\w{2}结果不对外公示|供应商\s*资格要求|成交情况:\s*[流废]标|中标单位:本次招标拟?中标单位\d家|通知中标单位|影响(成交|中标)结果',
|
|
# |确定成交供应商[:,\s]
|
|
# |确定成交供应商[:,\s]
|
|
'合同公告': '合同(公告|公示|信息|内容)|合同(编号|名称|主体|基本情况|完成(日期|时间))|(供应商乙方|乙方供应商):|合同总?金额|履约信息',
|
|
'合同公告': '合同(公告|公示|信息|内容)|合同(编号|名称|主体|基本情况|完成(日期|时间))|(供应商乙方|乙方供应商):|合同总?金额|履约信息',
|
|
- '废标公告': '(终止|中止|废标|流标|失败|作废|异常|撤销)(结果)?(公告|公示|招标|采购|竞价)|(谈判结果为|结果类型):?废标|((本|该)(项目|标段|合同|合同包|采购包|次)\w{,5})((失败|终止|流标|废标)|予以废标|(按|做|作)?(流标|废标|废置)处理)|(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答|项目)(终止|中止|废标|流标|失败|作废|异常|撤销)',
|
|
|
|
|
|
+ '废标公告': '(终止|中止|废标|流标|流采|失败|作废|异常|撤销)(结果)?(公告|公示|招标|采购|竞价)|(谈判结果为|结果类型):?废标|((本|该)(项目|标段|合同|合同包|采购包|次)\w{,5})((失败|终止|流标|废标)|予以废标|(按|做|作)?(流标|废标|废置)处理)|(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答|项目)(终止|中止|废标|流标|失败|作废|异常|撤销)',
|
|
'废标公告2': '(无效|中止|终止|废标|流标|失败|作废|异常|撤销)的?(原因|理由)|本项目因故取消|本(项目|次)(公开)?\w{2}失败|已终止\s*原因:|(人|人数|供应商|单位)(不足|未达\w{,3}数量)|已终止|不足[3三]家|无(废标)|成交情况:\s*[流废]标|现予以废置',
|
|
'废标公告2': '(无效|中止|终止|废标|流标|失败|作废|异常|撤销)的?(原因|理由)|本项目因故取消|本(项目|次)(公开)?\w{2}失败|已终止\s*原因:|(人|人数|供应商|单位)(不足|未达\w{,3}数量)|已终止|不足[3三]家|无(废标)|成交情况:\s*[流废]标|现予以废置',
|
|
'废标公告neg': '超过此报价将作为[废流]标处理|否则按[废流]标处理|终止规则:|成交规则:|视为流标|竞价失败的一切其他情形'
|
|
'废标公告neg': '超过此报价将作为[废流]标处理|否则按[废流]标处理|终止规则:|成交规则:|视为流标|竞价失败的一切其他情形'
|
|
}
|
|
}
|
|
@@ -6044,7 +6065,7 @@ class DistrictPredictor():
|
|
return ''
|
|
return ''
|
|
|
|
|
|
def get_project_addr(text):
|
|
def get_project_addr(text):
|
|
- p1 = '(项目(施工|实施)?|建设|工程|服务|交货|送货|收货|展示|看样|拍卖)(地址|地点|位置|所在地区?)(位于)?:(?P<addr>(\w{1,13}(自治[区州县旗]|地区|[省市区县旗盟])[^\w]*)+|\w{2,15}[,。])'
|
|
|
|
|
|
+ p1 = '(项目|施工|实施|建设|工程|服务|交货|送货|收货|展示|看样|拍卖)(地址|地点|位置|所在地区?)(位于)?:(?P<addr>(\w{1,13}(自治[区州县旗]|地区|[省市区县旗盟])[^\w]*)+|\w{2,15}[,。])'
|
|
if re.search(p1, text):
|
|
if re.search(p1, text):
|
|
return re.search(p1, text).group('addr')
|
|
return re.search(p1, text).group('addr')
|
|
else:
|
|
else:
|
|
@@ -7125,17 +7146,20 @@ class WebsourceTenderee():
|
|
|
|
|
|
class ApprovalPredictor():
|
|
class ApprovalPredictor():
|
|
def __init__(self):
|
|
def __init__(self):
|
|
|
|
+ '''
|
|
|
|
+ 项目(法人)单位
|
|
|
|
+ '''
|
|
self.other_part = {
|
|
self.other_part = {
|
|
- "project_name": "(项目|工程|采购|招标)名称:(?P<main>[^:。]{5,50})[,。](\w{2,10}:|$)?",
|
|
|
|
- "project_code": "(立案号|项目(统一)?代码|(项目|工程|采购|招标)编[号码]):(?P<main>(\w{2,8})?[()〔〕【】\[\]a-zA-Z0-9-]{5,30}号?)(\w{2,10}:|$)?",
|
|
|
|
- "doc_num": "((审[批查]|批[复准]|立项|[定知]书|[公发批]文|用地|决定|备案)文号|综合受理号):(?P<main>(\w{2,8})?[()〔〕【】\[\]a-zA-Z0-9-]{5,30}号?)[,。]?(\w{2,10}:|$)?",
|
|
|
|
- "pro_type": "(申报类型|项目所属行业):(?P<main>[^:。]{2,30})[,。](\w{2,10}:|$)?",
|
|
|
|
- "year_limit": "((建设|工程|服务)年限):(?P<main>[\d个年月日.-]{2,20})[,。](\w{2,10}:|$)?",
|
|
|
|
- "construction_scale": "(建设内容[及和](建设)?规模|建设规模[及和](主要)?(建设)?内容|建设规模(如下)?):(?P<main>[^:。]{2,50})[,。](\w{2,10}:|$)?",
|
|
|
|
- "approval_items": "((审[批查]|批[复准])事项|事项名称):(?P<main>[^:。]{2,50})[,。](\w{2,10}:|$)?",
|
|
|
|
- "properties": "((建设|工程)性质):(?P<main>[^:。]{2,50})[,。](\w{2,10}:|$)?",
|
|
|
|
- "approval_result": "((审[批查]|批[复准])(结果|决定)):(?P<main>[^:。]{2,50})[,。](\w{2,10}:|$)?",
|
|
|
|
- "phone": "联系电话:(?P<main>1[3-9][0-9][-—-―]?\d{4}[-—-―]?\d{4}|"
|
|
|
|
|
|
+ "project_name": "(项目|工程|采购|招标|计划)名称?:(?P<main>[^:。]{5,50})[,。](\w{2,10}:|$)?", # 项目名称
|
|
|
|
+ "project_code": "(立案号|项目(统一)?代码|(项目|工程|采购|招标|计划|任务|备案)(编[号码]|号)):(?P<main>(\w{2,8})?[()〔〕【】\[\]a-zA-Z0-9-]{5,30}号?)(\w{2,10}:|$)?", # 项目编号
|
|
|
|
+ "doc_num": "((审[批查核]|批[复准]|立项|[定知]书|[公发批]文|用地|决定|备案|核准|许可|确认)[文编]?号|综合受理号|文书号):(?P<main>(\w{2,8})?[()〔〕【】\[\]a-zA-Z0-9-.]{5,30}号?)[,。]?(\w{2,10}:|$)?", # 文号
|
|
|
|
+ "pro_type": "(申[报请](类型|种类)|项目所属行业|行业(分类|归属)|产业领域|项目行业|项目类型|立项类型):(?P<main>[^:。]{2,30})[,。](\w{2,10}:|$)?", # 项目类型
|
|
|
|
+ "year_limit": "((建设|工程|服务|项目)(年限|期限|时长)):(?P<main>[\d个年月日.-]{2,20})[,。](\w{2,10}:|$)?", # 建设年限
|
|
|
|
+ "construction_scale": "(建设内容[及和](建设)?规模|建设规模[及和](主要)?(建设)?内容|(建设|工程|项目)规模(如下)?):(?P<main>[^:。]{2,50})[,。](\w{2,10}:|$)?", # 建设规模
|
|
|
|
+ "approval_items": "((审[批查核]|批[复准]申请)(事项|内容)|事项名称|事项审批):(?P<main>[^:。]{2,50})[,。](\w{2,10}:|$)?", # 审批事项
|
|
|
|
+ "properties": "((建设|工程|项目)性质):(?P<main>[^:。]{2,50})[,。](\w{2,10}:|$)?", # 建设性质
|
|
|
|
+ "approval_result": "((审[批查核]|批[复准])(结果|决定|结论|状态|回复)|(办理|,)(状态|意见|结果)):(?P<main>[^:。]{2,50})[,。](\w{2,10}:|$)?", # 审批结果
|
|
|
|
+ "phone": "(联系)?电话:(?P<main>1[3-9][0-9][-—-―]?\d{4}[-—-―]?\d{4}|" # 联系电话
|
|
'\+86.?1[3-9]\d{9}|'
|
|
'\+86.?1[3-9]\d{9}|'
|
|
'0[1-9]\d{1,2}[-—-―][2-9]\d{6}\d?[-—-―]\d{1,4}|'
|
|
'0[1-9]\d{1,2}[-—-―][2-9]\d{6}\d?[-—-―]\d{1,4}|'
|
|
'0[1-9]\d{1,2}[-—-―]{0,2}[2-9]\d{6}\d?(?=1[3-9]\d{9})|'
|
|
'0[1-9]\d{1,2}[-—-―]{0,2}[2-9]\d{6}\d?(?=1[3-9]\d{9})|'
|
|
@@ -7148,26 +7172,26 @@ class ApprovalPredictor():
|
|
}
|
|
}
|
|
|
|
|
|
self.role_type = {
|
|
self.role_type = {
|
|
- "declare_company": "申报(部门|机关|单位|企业|公司)", # 申报单位
|
|
|
|
- "construct_company": "(业主|建设|用地))?(部门|机关|单位|企业|公司)|主送机关|法人单位", # 建设单位
|
|
|
|
- "approver": "(审批|许可|批准|发证|批复|管理)(部门|机关|单位|企业|公司)", # 审批部门
|
|
|
|
- "evaluation_agency": "环境影响评价机构|环评机构|评价机构|环评单位" # 环评机构
|
|
|
|
|
|
+ "declare_company": "(申[请报]|填报|呈报)(部门|机关|单位|企业|公司|机构|组织)", # 申报单位
|
|
|
|
+ "construct_company": "(业主|建设|用地|委托|发包|产权|项目))?(部门|机关|单位|企业|公司|方)|主送机关|法人单位|甲方", # 建设单位
|
|
|
|
+ "approver": "(审[批查核]|许可|批准|发证|批复|管理)(部门|机关|单位|企业|公司|机构)", # 审批部门
|
|
|
|
+ "evaluation_agency": "(环境|环保)?(影响)?(环评|评价|评估)(机构|单位|公司)" # 环评机构
|
|
}
|
|
}
|
|
self.person_type = {
|
|
self.person_type = {
|
|
- "legal_person": "项目法人|法定代表人" # 项目法人
|
|
|
|
|
|
+ "legal_person": "项目法人|法定代表人|企业法人" # 项目法人
|
|
}
|
|
}
|
|
self.date_type = {
|
|
self.date_type = {
|
|
- "time_declare": "申报时间",
|
|
|
|
- "time_commencement": "开工时间",
|
|
|
|
- "time_completion": "竣工时间"
|
|
|
|
|
|
+ "time_declare": "(申[请报]|填报|呈报)(时间|日期)", # 申报时间
|
|
|
|
+ "time_commencement": "(开工|动工|施工开始)(时间|日期)", # 开工时间
|
|
|
|
+ "time_completion": "(竣工|完工|验收|(项目|建设|工程)(完成|结束))(备案)?(时间|日期)" # 竣工时间
|
|
}
|
|
}
|
|
|
|
|
|
self.addr_type = {
|
|
self.addr_type = {
|
|
- "project_addr": "(建设|工程|项目)(地址|地点|位置)"
|
|
|
|
|
|
+ "project_addr": "(建设|工程|项目|施工)(地址|地点|位置|所在地)|[宗土]地坐落|用地位置" # 建设地址
|
|
}
|
|
}
|
|
|
|
|
|
self.money_type = {
|
|
self.money_type = {
|
|
- "total_tendereeMoney": "项目金额|项目投资|总投资|投资总额|总预算|总概算|投资规模|批复概算|投资额",
|
|
|
|
|
|
+ "total_tendereeMoney": "(项目|概算|投资)金额|项目投资|总投资|总预算|总概算|投资(规模|总额|估算|概算)|批复概算|投资额", # 总投资
|
|
}
|
|
}
|
|
|
|
|
|
def predict(self, list_sentences, list_entitys, span=12):
|
|
def predict(self, list_sentences, list_entitys, span=12):
|
|
@@ -7187,67 +7211,91 @@ class ApprovalPredictor():
|
|
self.other_part.keys() | self.role_type.keys() | self.date_type.keys() | self.addr_type.keys() | self.money_type.keys() | self.person_type.keys()}
|
|
self.other_part.keys() | self.role_type.keys() | self.date_type.keys() | self.addr_type.keys() | self.money_type.keys() | self.person_type.keys()}
|
|
multi_project['moneysource'] = ''
|
|
multi_project['moneysource'] = ''
|
|
text = sentences[i]
|
|
text = sentences[i]
|
|
- for k, v in self.other_part.items():
|
|
|
|
- for iter in re.finditer(v, text):
|
|
|
|
- rs_dic[k] = iter.group('main')
|
|
|
|
- multi_project[k] = iter.group('main')
|
|
|
|
- found_key = 1
|
|
|
|
- break
|
|
|
|
for entity in entities[i]:
|
|
for entity in entities[i]:
|
|
b, e = entity.wordOffset_begin, entity.wordOffset_end
|
|
b, e = entity.wordOffset_begin, entity.wordOffset_end
|
|
if entity.entity_type in ['org', 'company']:
|
|
if entity.entity_type in ['org', 'company']:
|
|
for k, v in self.role_type.items():
|
|
for k, v in self.role_type.items():
|
|
if re.search(v, sentences[entity.sentence_index][max(0, b - span):b]):
|
|
if re.search(v, sentences[entity.sentence_index][max(0, b - span):b]):
|
|
- rs_dic[k] = entity.entity_text
|
|
|
|
|
|
+ if rs_dic[k] == '':
|
|
|
|
+ rs_dic[k] = entity.entity_text
|
|
multi_project[k] = entity.entity_text
|
|
multi_project[k] = entity.entity_text
|
|
found_key = 1
|
|
found_key = 1
|
|
elif entity.entity_type in ['person']:
|
|
elif entity.entity_type in ['person']:
|
|
for k, v in self.person_type.items():
|
|
for k, v in self.person_type.items():
|
|
if re.search(v, sentences[entity.sentence_index][max(0, b - span):b]):
|
|
if re.search(v, sentences[entity.sentence_index][max(0, b - span):b]):
|
|
- rs_dic[k] = entity.entity_text
|
|
|
|
|
|
+ if rs_dic[k] == '':
|
|
|
|
+ rs_dic[k] = entity.entity_text
|
|
multi_project[k] = entity.entity_text
|
|
multi_project[k] = entity.entity_text
|
|
found_key = 1
|
|
found_key = 1
|
|
break
|
|
break
|
|
elif entity.entity_type in ['time']:
|
|
elif entity.entity_type in ['time']:
|
|
for k, v in self.date_type.items():
|
|
for k, v in self.date_type.items():
|
|
if re.search(v, sentences[entity.sentence_index][max(0, b - span):b]):
|
|
if re.search(v, sentences[entity.sentence_index][max(0, b - span):b]):
|
|
- rs_dic[k] = entity.entity_text
|
|
|
|
|
|
+ if rs_dic[k] == '':
|
|
|
|
+ rs_dic[k] = entity.entity_text
|
|
multi_project[k] = entity.entity_text
|
|
multi_project[k] = entity.entity_text
|
|
found_key = 1
|
|
found_key = 1
|
|
elif entity.entity_type in ['location']:
|
|
elif entity.entity_type in ['location']:
|
|
for k, v in self.addr_type.items():
|
|
for k, v in self.addr_type.items():
|
|
if re.search(v, sentences[entity.sentence_index][max(0, b - span):b]):
|
|
if re.search(v, sentences[entity.sentence_index][max(0, b - span):b]):
|
|
- rs_dic[k] = entity.entity_text
|
|
|
|
|
|
+ if rs_dic[k] == '':
|
|
|
|
+ rs_dic[k] = entity.entity_text
|
|
multi_project[k] = entity.entity_text
|
|
multi_project[k] = entity.entity_text
|
|
found_key = 1
|
|
found_key = 1
|
|
elif entity.entity_type in ['money']:
|
|
elif entity.entity_type in ['money']:
|
|
for k, v in self.money_type.items():
|
|
for k, v in self.money_type.items():
|
|
if re.search(v, sentences[entity.sentence_index][max(0, b - span):b]):
|
|
if re.search(v, sentences[entity.sentence_index][max(0, b - span):b]):
|
|
- rs_dic[k] = entity.entity_text
|
|
|
|
|
|
+ if rs_dic[k] == '':
|
|
|
|
+ rs_dic[k] = entity.entity_text
|
|
multi_project[k] = entity.entity_text
|
|
multi_project[k] = entity.entity_text
|
|
found_key = 1
|
|
found_key = 1
|
|
elif entity.entity_type in ['moneysource']:
|
|
elif entity.entity_type in ['moneysource']:
|
|
rs_dic['moneysource'] = turnMoneySource(entity.entity_text)
|
|
rs_dic['moneysource'] = turnMoneySource(entity.entity_text)
|
|
multi_project['moneysource'] = turnMoneySource(entity.entity_text)
|
|
multi_project['moneysource'] = turnMoneySource(entity.entity_text)
|
|
|
|
+ elif entity.entity_type in ['code']:
|
|
|
|
+ k = 'project_code'
|
|
|
|
+ v = self.other_part[k].split(':', maxsplit=1)[0]
|
|
|
|
+ if re.search(v, sentences[entity.sentence_index][max(0, b - span):b]):
|
|
|
|
+ if rs_dic[k] == '':
|
|
|
|
+ rs_dic[k] = entity.entity_text
|
|
|
|
+ multi_project[k] = entity.entity_text
|
|
|
|
+ found_key = 1
|
|
|
|
+ elif entity.entity_type in ['name']:
|
|
|
|
+ k = 'project_name'
|
|
|
|
+ v = self.other_part[k].split(':', maxsplit=1)[0]
|
|
|
|
+ if re.search(v, sentences[entity.sentence_index][max(0, b - span):b]):
|
|
|
|
+ if rs_dic[k] == '':
|
|
|
|
+ rs_dic[k] = entity.entity_text
|
|
|
|
+ multi_project[k] = entity.entity_text
|
|
|
|
+ found_key = 1
|
|
|
|
+ for k, v in self.other_part.items():
|
|
|
|
+ for iter in re.finditer(v, text):
|
|
|
|
+ if rs_dic[k] == '':
|
|
|
|
+ rs_dic[k] = iter.group('main')
|
|
|
|
+ multi_project[k] = iter.group('main')
|
|
|
|
+ found_key = 1
|
|
|
|
+ break
|
|
if (multi_project['project_code'] != "" or multi_project['project_name'] != "") and multi_project['project_code']+multi_project['project_name'] not in code_name_set:
|
|
if (multi_project['project_code'] != "" or multi_project['project_name'] != "") and multi_project['project_code']+multi_project['project_name'] not in code_name_set:
|
|
code_name_set.add(multi_project['project_code']+multi_project['project_name'])
|
|
code_name_set.add(multi_project['project_code']+multi_project['project_name'])
|
|
district = getPredictor('district').get_area(
|
|
district = getPredictor('district').get_area(
|
|
multi_project['project_name'] + multi_project['project_addr'], '')
|
|
multi_project['project_name'] + multi_project['project_addr'], '')
|
|
- multi_project['area'] = district['district']['area']
|
|
|
|
- multi_project['province'] = district['district']['province']
|
|
|
|
- multi_project['city'] = district['district']['city']
|
|
|
|
- multi_project['district'] = district['district']['district']
|
|
|
|
|
|
+ if district['district']['province'] != '全国':
|
|
|
|
+ multi_project['area'] = district['district']['area']
|
|
|
|
+ multi_project['province'] = district['district']['province']
|
|
|
|
+ multi_project['city'] = district['district']['city']
|
|
|
|
+ multi_project['district'] = district['district']['district']
|
|
multi_project = {k:v for k,v in multi_project.items() if v != ''}
|
|
multi_project = {k:v for k,v in multi_project.items() if v != ''}
|
|
rs_l.append(multi_project)
|
|
rs_l.append(multi_project)
|
|
- if len(rs_l)>1:
|
|
|
|
|
|
+ if len(rs_l)>1 and len(set(rs_l[0].keys()))>2 and set(rs_l[0].keys())&set(rs_l[1].keys())!=set():
|
|
return rs_l
|
|
return rs_l
|
|
elif found_key == 1:
|
|
elif found_key == 1:
|
|
district = getPredictor('district').get_area(
|
|
district = getPredictor('district').get_area(
|
|
rs_dic['construct_company'] + rs_dic['project_name'] + rs_dic['project_addr'], '')
|
|
rs_dic['construct_company'] + rs_dic['project_name'] + rs_dic['project_addr'], '')
|
|
- rs_dic['area'] = district['district']['area']
|
|
|
|
- rs_dic['province'] = district['district']['province']
|
|
|
|
- rs_dic['city'] = district['district']['city']
|
|
|
|
- rs_dic['district'] = district['district']['district']
|
|
|
|
|
|
+ if district['district']['province'] != '全国':
|
|
|
|
+ rs_dic['area'] = district['district']['area']
|
|
|
|
+ rs_dic['province'] = district['district']['province']
|
|
|
|
+ rs_dic['city'] = district['district']['city']
|
|
|
|
+ rs_dic['district'] = district['district']['district']
|
|
rs_dic = {k: v for k, v in rs_dic.items() if v != ''}
|
|
rs_dic = {k: v for k, v in rs_dic.items() if v != ''}
|
|
return [rs_dic]
|
|
return [rs_dic]
|
|
return []
|
|
return []
|