|
@@ -1111,13 +1111,13 @@ class RoleRulePredictor():
|
|
self.pattern_tenderee_center = "(?P<tenderee_center>(受.{5,20}委托))"
|
|
self.pattern_tenderee_center = "(?P<tenderee_center>(受.{5,20}委托))"
|
|
self.pattern_tenderee_right = "(?P<tenderee_right>^([((](以下简称)?[,\"“]*(招标|采购)(人|单位|机构)[,\"”]*[))])|^委托|^现委托|^的\w{2,10}正在进行)" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
self.pattern_tenderee_right = "(?P<tenderee_right>^([((](以下简称)?[,\"“]*(招标|采购)(人|单位|机构)[,\"”]*[))])|^委托|^现委托|^的\w{2,10}正在进行)" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
self.pattern_tendereeORagency_right = "(?P<tendereeORagency_right>(^拟对|^现?就|^现对))"
|
|
self.pattern_tendereeORagency_right = "(?P<tendereeORagency_right>(^拟对|^现?就|^现对))"
|
|
- self.pattern_agency_left = "(?P<agency_left>(代理(?:人|机构|公司|单位|组织)|专业采购机构|集中采购机构|招标组织机构|集采机构|[招议))]+标机构)(.{,4}名,?称|全称|是|为|:|:|[,,]?\s*)$|(受.{5,20}委托,?$))"
|
|
|
|
|
|
+ self.pattern_agency_left = "(?P<agency_left>(代理(?:人|机构|公司|单位|组织)|专业采购机构|集中采购机构|招标组织机构|集采机构|[招议))]+标机构)(名称)?(.{,4}名,?称|全称|是|为|:|:|[,,]?\s*)$|(受.{5,20}委托,?$))"
|
|
self.pattern_agency_right = "(?P<agency_right>^([((](以下简称)?[,\"“]*(代理)(人|单位|机构)[,\"”]*[))])|^受.{5,20}委托|^受委?托,)" # |^受托 会与 受托生产等冲突,代理表达一般会在后面有逗号
|
|
self.pattern_agency_right = "(?P<agency_right>^([((](以下简称)?[,\"“]*(代理)(人|单位|机构)[,\"”]*[))])|^受.{5,20}委托|^受委?托,)" # |^受托 会与 受托生产等冲突,代理表达一般会在后面有逗号
|
|
# 2020//11/24 大网站规则 中标关键词添加 选定单位|指定的中介服务机构
|
|
# 2020//11/24 大网站规则 中标关键词添加 选定单位|指定的中介服务机构
|
|
self.pattern_winTenderer_left = "(?P<winTenderer_left>(乙|承做|施工|供货|承包|承建|竞得|受让|签约)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)[::是为]+$|" \
|
|
self.pattern_winTenderer_left = "(?P<winTenderer_left>(乙|承做|施工|供货|承包|承建|竞得|受让|签约)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)[::是为]+$|" \
|
|
"(选定单位|指定的中介服务机构|实施主体|承制单位|供方)[::是为]+$|((评审结果|名次|排名|中标结果)[::]*第?[一1]名?)[::是为]+$|" \
|
|
"(选定单位|指定的中介服务机构|实施主体|承制单位|供方)[::是为]+$|((评审结果|名次|排名|中标结果)[::]*第?[一1]名?)[::是为]+$|" \
|
|
"单一来源(采购)?(供应商|供货商|服务商|方式向)$|((中标|成交)(结果|信息))[::是为]+$|(供应|供货|承销|服务|实施)(机构|单位|商|方)(名称)?[::是为]+$)"
|
|
"单一来源(采购)?(供应商|供货商|服务商|方式向)$|((中标|成交)(结果|信息))[::是为]+$|(供应|供货|承销|服务|实施)(机构|单位|商|方)(名称)?[::是为]+$)"
|
|
- self.pattern_winTenderer_left_w1 = "(?P<winTenderer_left_w1>(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)[::是为]+$)" #取消逗号 并拒绝执行改进计划的供应商,华新水泥将可能终止与其合作关系
|
|
|
|
|
|
+ self.pattern_winTenderer_left_w1 = "(?P<winTenderer_left_w1>(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)(名称)?[::是为]+$)" #取消逗号 并拒绝执行改进计划的供应商,华新水泥将可能终止与其合作关系
|
|
# self.pattern_winTenderer_center = "(?P<winTenderer_center>第[一1].{,20}[是为]((中标|中选|中价|成交|施工)(人|单位|机构|供应商|公司)|供应商)[::是为])"
|
|
# self.pattern_winTenderer_center = "(?P<winTenderer_center>第[一1].{,20}[是为]((中标|中选|中价|成交|施工)(人|单位|机构|供应商|公司)|供应商)[::是为])"
|
|
# self.pattern_winTenderer_right = "(?P<winTenderer_right>(^[是为\(]((采购(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))))|^(报价|价格)最低,确定为本项目成交供应商)"
|
|
# self.pattern_winTenderer_right = "(?P<winTenderer_right>(^[是为\(]((采购(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))))|^(报价|价格)最低,确定为本项目成交供应商)"
|
|
self.pattern_winTenderer_right = "(?P<winTenderer_right>(^[是为]((采购|中标)(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))|" \
|
|
self.pattern_winTenderer_right = "(?P<winTenderer_right>(^[是为]((采购|中标)(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))|" \
|
|
@@ -1126,10 +1126,10 @@ class RoleRulePredictor():
|
|
|
|
|
|
# self.pattern_winTenderer_location = "(中标|中选|中价|乙|成交|承做|施工|供货|承包|竞得|受让)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)|(供应商|供货商|服务商)[::]?$|(第[一1](名|((中标|中选|中价|成交)?(候选)?(人|单位|机构|供应商))))(是|为|:|:|\s*$)|((评审结果|名次|排名)[::]第?[一1]名?)|(单一来源(采购)?方式向.?$)"
|
|
# self.pattern_winTenderer_location = "(中标|中选|中价|乙|成交|承做|施工|供货|承包|竞得|受让)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)|(供应商|供货商|服务商)[::]?$|(第[一1](名|((中标|中选|中价|成交)?(候选)?(人|单位|机构|供应商))))(是|为|:|:|\s*$)|((评审结果|名次|排名)[::]第?[一1]名?)|(单一来源(采购)?方式向.?$)"
|
|
|
|
|
|
- self.pattern_secondTenderer_left = "(?P<secondTenderer_left>((第[二2](名|((中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司))))[::是为]+$)|((评审结果|名次|排名)[::]第?[二2]名?,?投标商名称[::]+$))"
|
|
|
|
|
|
+ self.pattern_secondTenderer_left = "(?P<secondTenderer_left>((第[二2](名|((中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司))))(名称)?[::是为]+$)|((评审结果|名次|排名)[::]第?[二2]名?,?投标商名称[::]+$))"
|
|
self.pattern_secondTenderer_right = "(?P<secondTenderer_right>^[是为\(]第[二2](名|(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司)))"
|
|
self.pattern_secondTenderer_right = "(?P<secondTenderer_right>^[是为\(]第[二2](名|(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司)))"
|
|
|
|
|
|
- self.pattern_thirdTenderer_left = "(?P<thirdTenderer_left>(第[三3](名|((中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司))))[::是为]+$|((评审结果|名次|排名)[::]第?[三3]名?,?投标商名称[::]+$))"
|
|
|
|
|
|
+ self.pattern_thirdTenderer_left = "(?P<thirdTenderer_left>(第[三3](名|((中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司))))(名称)?[::是为]+$|((评审结果|名次|排名)[::]第?[三3]名?,?投标商名称[::]+$))"
|
|
self.pattern_thirdTenderer_right = "(?P<thirdTenderer_right>^[是为\(]第[三3](名|(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司)))"
|
|
self.pattern_thirdTenderer_right = "(?P<thirdTenderer_right>^[是为\(]第[三3](名|(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司)))"
|
|
|
|
|
|
self.pattern_whole = [self.pattern_tenderee_left,
|
|
self.pattern_whole = [self.pattern_tenderee_left,
|
|
@@ -1508,7 +1508,7 @@ class TendereeRuleRecall():
|
|
"(人|公司|单位|组织|用户|业主|主体|方|部门)|文章来源|委托机构|产权所有人|需求?方|买方|业主|(业主|采购人|招标人)联系方式[,:]公司名称:|权属人|甲方当事人|询价书企业|比选发起人|项目单位[,:]单位名称|结算单位)"\
|
|
"(人|公司|单位|组织|用户|业主|主体|方|部门)|文章来源|委托机构|产权所有人|需求?方|买方|业主|(业主|采购人|招标人)联系方式[,:]公司名称:|权属人|甲方当事人|询价书企业|比选发起人|项目单位[,:]单位名称|结算单位)"\
|
|
"[))]?(信息[,:])?((公司|单位)?名称)?([((](全称|盖章)[))])?(是|为|:|:)+)(?P<unrecognized>[^,。::;]+)[,。;::]")
|
|
"[))]?(信息[,:])?((公司|单位)?名称)?([((](全称|盖章)[))])?(是|为|:|:)+)(?P<unrecognized>[^,。::;]+)[,。;::]")
|
|
# 未识别实体尾部判断
|
|
# 未识别实体尾部判断
|
|
- self.unrecognized_end1 = re.compile(".{2,}?(?:公司|医院|学校|学院|大学|中学|小学|幼儿园|政府|指挥部|办公室|项目部|业主大会|监狱|教育局|委员会|研究所|招标办|采购部|办事处|水利局|公墓)")
|
|
|
|
|
|
+ self.unrecognized_end1 = re.compile(".{2,}?(?:公司|医院|学校|学院|大学|中学|小学|幼儿园|政府|指挥部|办公室|项目部|业主大会|监狱|教育局|委员会|研究所|招标办|采购部|办事处|水利局|公墓|中心)")
|
|
self.unrecognized_end2 = re.compile(".{4,}(?:署|局|厅|处|室|科|部|站|所|股|行)")
|
|
self.unrecognized_end2 = re.compile(".{4,}(?:署|局|厅|处|室|科|部|站|所|股|行)")
|
|
|
|
|
|
def predict(self, list_articles,list_sentences, list_entitys, list_codenames):
|
|
def predict(self, list_articles,list_sentences, list_entitys, list_codenames):
|
|
@@ -2254,6 +2254,12 @@ class ProductAttributesPredictor():
|
|
header_list2.append(col0_l[i])
|
|
header_list2.append(col0_l[i])
|
|
order_time = col1_l[i].strip()
|
|
order_time = col1_l[i].strip()
|
|
order_begin, order_end = self.fix_time(order_time, html, page_time)
|
|
order_begin, order_end = self.fix_time(order_time, html, page_time)
|
|
|
|
+ if order_begin != "" and order_end!="":
|
|
|
|
+ order_begin_year = int(order_begin.split("-")[0])
|
|
|
|
+ order_end_year = int(order_end.split("-")[0])
|
|
|
|
+ # 限制附件错误识别时间
|
|
|
|
+ if order_begin_year>=2050 or order_end_year>=2050:
|
|
|
|
+ order_begin = order_end = ""
|
|
if product!= "" and demand != "" and budget!="" and order_begin != "":
|
|
if product!= "" and demand != "" and budget!="" and order_begin != "":
|
|
link = {'project_name': product, 'product': [], 'demand': demand, 'budget': budget,
|
|
link = {'project_name': product, 'product': [], 'demand': demand, 'budget': budget,
|
|
'order_begin': order_begin, 'order_end': order_end}
|
|
'order_begin': order_begin, 'order_end': order_end}
|
|
@@ -2357,7 +2363,13 @@ class ProductAttributesPredictor():
|
|
total_product_money += float(link['unitPrice'])*float(mat.group(1).replace(',', ''))
|
|
total_product_money += float(link['unitPrice'])*float(mat.group(1).replace(',', ''))
|
|
except:
|
|
except:
|
|
log('产品属性单价数量相乘出错, 单价: %s, 数量: %s'%(link['unitPrice'], link['quantity']))
|
|
log('产品属性单价数量相乘出错, 单价: %s, 数量: %s'%(link['unitPrice'], link['quantity']))
|
|
- if budget != "" and order_time != "" :
|
|
|
|
|
|
+ if order_begin != "" and order_end != "":
|
|
|
|
+ order_begin_year = int(order_begin.split("-")[0])
|
|
|
|
+ order_end_year = int(order_end.split("-")[0])
|
|
|
|
+ # 限制附件错误识别时间
|
|
|
|
+ if order_begin_year >= 2050 or order_end_year >= 2050:
|
|
|
|
+ order_begin = order_end = ""
|
|
|
|
+ if budget != "" and order_time != "":
|
|
link = {'project_name': product, 'product':[], 'demand': demand, 'budget': budget, 'order_begin':order_begin, 'order_end':order_end}
|
|
link = {'project_name': product, 'product':[], 'demand': demand, 'budget': budget, 'order_begin':order_begin, 'order_end':order_end}
|
|
if link not in demand_link:
|
|
if link not in demand_link:
|
|
demand_link.append(link)
|
|
demand_link.append(link)
|
|
@@ -2374,6 +2386,42 @@ class ProductAttributesPredictor():
|
|
demand_dic = {'demand_info':{'data':[], 'header':[], 'header_col':[]}}
|
|
demand_dic = {'demand_info':{'data':[], 'header':[], 'header_col':[]}}
|
|
return [attr_dic, demand_dic], total_product_money
|
|
return [attr_dic, demand_dic], total_product_money
|
|
|
|
|
|
|
|
+ def predict_without_table(self,product_attrs,list_sentences,list_entitys,codeName,prem, html='', page_time=""):
|
|
|
|
+ if len(prem[0]['prem'])==1:
|
|
|
|
+ list_sentence = list_sentences[0]
|
|
|
|
+ list_entity = list_entitys[0]
|
|
|
|
+ _data = product_attrs[1]['demand_info']['data']
|
|
|
|
+ re_bidding_time = re.compile("(采购时间|采购实施月份|采购月份)[::,].{0,2}$")
|
|
|
|
+ order_times = []
|
|
|
|
+ for entity in list_entity:
|
|
|
|
+ if entity.entity_type=='time':
|
|
|
|
+ sentence = list_sentence[entity.sentence_index]
|
|
|
|
+ s = spanWindow(tokens=sentence.tokens, begin_index=entity.begin_index,
|
|
|
|
+ end_index=entity.end_index,size=20)
|
|
|
|
+ entity_left = "".join(s[0])
|
|
|
|
+ if re.search(re_bidding_time,entity_left):
|
|
|
|
+ time_text = entity.entity_text.strip()
|
|
|
|
+ standard_time = re.compile("((?P<year>\d{4}|\d{2})\s*[-\/年\.]\s*(?P<month>\d{1,2})\s*[-\/月\.]\s*(?P<day>\d{1,2})日?)")
|
|
|
|
+ time_match = re.search(standard_time,time_text)
|
|
|
|
+ if time_match:
|
|
|
|
+ time_text = time_match.group()
|
|
|
|
+ order_times.append(time_text)
|
|
|
|
+ # print(order_times)
|
|
|
|
+ order_times = [tuple(self.fix_time(order_time, html, page_time)) for order_time in order_times]
|
|
|
|
+ order_times = [order_time for order_time in order_times if order_time[0]!=""]
|
|
|
|
+ if len(set(order_times))==1:
|
|
|
|
+ order_begin,order_end = order_times[0]
|
|
|
|
+ project_name = codeName[0]['name']
|
|
|
|
+ pack_info = [pack for pack in prem[0]['prem'].values()]
|
|
|
|
+ budget = pack_info[0].get('tendereeMoney',0)
|
|
|
|
+ product = prem[0]['product']
|
|
|
|
+ link = {'project_name': project_name, 'product': product, 'demand': project_name, 'budget': budget,
|
|
|
|
+ 'order_begin': order_begin, 'order_end': order_end}
|
|
|
|
+ _data.append(link)
|
|
|
|
+ product_attrs[1]['demand_info']['data'] = _data
|
|
|
|
+ return product_attrs
|
|
|
|
+
|
|
|
|
+
|
|
# docchannel类型提取
|
|
# docchannel类型提取
|
|
class DocChannel():
|
|
class DocChannel():
|
|
def __init__(self, life_model='/channel_savedmodel/channel.pb', type_model='/channel_savedmodel/doctype.pb'):
|
|
def __init__(self, life_model='/channel_savedmodel/channel.pb', type_model='/channel_savedmodel/doctype.pb'):
|