|
@@ -1400,7 +1400,7 @@ class RoleRulePredictor():
|
|
"(人|方|单位|组织|用户|业主|主体|部门|公司|企业|工厂)|[转流]出方|文章来源|委托机构|产权所有人|承包权人|结算单位|收货地址)" \
|
|
"(人|方|单位|组织|用户|业主|主体|部门|公司|企业|工厂)|[转流]出方|文章来源|委托机构|产权所有人|承包权人|结算单位|收货地址)" \
|
|
"[))]?(信息|联系方式|概况)?[,,::]?([((](1|2|1.1|1.2)[))])?((公司|单位)?名称)?([((](全称|盖章)[))])?(是|为|:|:|\s*)+$|(采购商|招标人):(\w{2,10}-)?$)"
|
|
"[))]?(信息|联系方式|概况)?[,,::]?([((](1|2|1.1|1.2)[))])?((公司|单位)?名称)?([((](全称|盖章)[))])?(是|为|:|:|\s*)+$|(采购商|招标人):(\w{2,10}-)?$)"
|
|
self.pattern_tenderee_center = "(?P<tenderee_center>(受.{5,20}的?委托|现将[\w()()]{5,20}[\d年月季度至()]+采购意向|尊敬的供应商(伙伴)?:\w{5,20}(以下简称“\w{2,5}”)))"
|
|
self.pattern_tenderee_center = "(?P<tenderee_center>(受.{5,20}的?委托|现将[\w()()]{5,20}[\d年月季度至()]+采购意向|尊敬的供应商(伙伴)?:\w{5,20}(以下简称“\w{2,5}”)))"
|
|
- self.pattern_tenderee_right = "(?P<tenderee_right>^([((](以下简称)?[,\"“]*(招标|采购)(人|单位|机构)[,\"”]*[))]|^委托|^将于[\d年月日,::]+进行|^现委托|^的\w{2,10}正在进行|[\d年月季度至]+采购意向|^)?的招标工作已圆满结束)|^([拟须需]|计划)(采购|招标|购置|购买)|^须购[买置]一批|作为(采购|招标)(人|单位)|^关于)" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
|
|
|
|
+ self.pattern_tenderee_right = "(?P<tenderee_right>^(机关)?([((](以下简称)?[,\"“]*((招标|采购)(人|单位|机构)|(服务)?购买方)[,\"”]*[))]|^委托|^将于[\d年月日,::]+进行|^现委托|^的\w{2,10}正在进行|[\d年月季度至]+采购意向|^)?的招标工作已圆满结束)|^([拟须需]|计划)(采购|招标|购置|购买)|^须购[买置]一批|作为(采购|招标)(人|单位)|^关于)" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
self.pattern_tendereeORagency_right = "(?P<tendereeORagency_right>(^拟对|^现?就|^现对))"
|
|
self.pattern_tendereeORagency_right = "(?P<tendereeORagency_right>(^拟对|^现?就|^现对))"
|
|
self.pattern_agency_left = "(?P<agency_left>((代理|拍卖)(?:人|机构|公司|企业|单位|组织)|专业采购机构|集中采购机构|招标组织机构|交易机构|集采机构|[招议))]+标机构|(采购|招标)代理)(名称|.{,4}名,?称|全称)?(是|为|:|:|[,,]?\s*)$|(受.{5,20}委托,?$))"
|
|
self.pattern_agency_left = "(?P<agency_left>((代理|拍卖)(?:人|机构|公司|企业|单位|组织)|专业采购机构|集中采购机构|招标组织机构|交易机构|集采机构|[招议))]+标机构|(采购|招标)代理)(名称|.{,4}名,?称|全称)?(是|为|:|:|[,,]?\s*)$|(受.{5,20}委托,?$))"
|
|
self.pattern_agency_right = "(?P<agency_right>^([((](以下简称)?[,\"“]*(代理)(人|单位|机构)[,\"”]*[))])|^受.{5,20}委托|^受委?托,)" # |^受托 会与 受托生产等冲突,代理表达一般会在后面有逗号
|
|
self.pattern_agency_right = "(?P<agency_right>^([((](以下简称)?[,\"“]*(代理)(人|单位|机构)[,\"”]*[))])|^受.{5,20}委托|^受委?托,)" # |^受托 会与 受托生产等冲突,代理表达一般会在后面有逗号
|
|
@@ -1464,7 +1464,7 @@ class RoleRulePredictor():
|
|
|
|
|
|
self.pattern_money_tenderee = re.compile("投?标?最高限价|采购计划金额|项目预算|招标金额|采购金额|项目金额|投资估算|采购(单位|人)委托价|招标限价|拦标价|预算金额|标底|总计|限额|资金来源,?为\w{2,4}资金|采购成本价|总费用约?为") # |建安费用 不作为招标金额
|
|
self.pattern_money_tenderee = re.compile("投?标?最高限价|采购计划金额|项目预算|招标金额|采购金额|项目金额|投资估算|采购(单位|人)委托价|招标限价|拦标价|预算金额|标底|总计|限额|资金来源,?为\w{2,4}资金|采购成本价|总费用约?为") # |建安费用 不作为招标金额
|
|
self.pattern_money_tenderer = re.compile("((合同|成交|中标|应付款|交易|投标|验收|订单)[)\)]?(综合)?(总?金额|结果|[单报总]?价))|标的基本情况|承包价|报酬(含税):|经评审的价格") # 单写 总价 不能作为中标金额,很多表格有单价、总价
|
|
self.pattern_money_tenderer = re.compile("((合同|成交|中标|应付款|交易|投标|验收|订单)[)\)]?(综合)?(总?金额|结果|[单报总]?价))|标的基本情况|承包价|报酬(含税):|经评审的价格") # 单写 总价 不能作为中标金额,很多表格有单价、总价
|
|
- self.pattern_money_tenderer_whole = re.compile("(以金额.*中标)|中标供应商.*单价|以.*元中标")
|
|
|
|
|
|
+ self.pattern_money_tenderer_whole = re.compile("(以金额.*中标)|中标供应商.*单价|以.*元(报价)?(中标|中选|成交)")
|
|
self.pattern_money_other = re.compile("代理费|服务费")
|
|
self.pattern_money_other = re.compile("代理费|服务费")
|
|
self.pattern_pack = "(([^承](包|标[段号的包]|分?包|包组)编?号?|项目)[::]?[\((]?[0-9A-Za-z一二三四五六七八九十]{1,4})[^至]?|(第?[0-9A-Za-z一二三四五六七八九十]{1,4}(包号|标[段号的包]|分?包))|[0-9]个(包|标[段号的包]|分?包|包组)"
|
|
self.pattern_pack = "(([^承](包|标[段号的包]|分?包|包组)编?号?|项目)[::]?[\((]?[0-9A-Za-z一二三四五六七八九十]{1,4})[^至]?|(第?[0-9A-Za-z一二三四五六七八九十]{1,4}(包号|标[段号的包]|分?包))|[0-9]个(包|标[段号的包]|分?包|包组)"
|
|
# self.role_file = open('/data/python/lsm/role_rule_predict.txt', 'a', encoding='utf-8')
|
|
# self.role_file = open('/data/python/lsm/role_rule_predict.txt', 'a', encoding='utf-8')
|
|
@@ -6511,7 +6511,7 @@ class TablePremExtractor(object):
|
|
package_code = package_code_raw
|
|
package_code = package_code_raw
|
|
if re.search('合计|总计', package_code+project_code):
|
|
if re.search('合计|总计', package_code+project_code):
|
|
continue
|
|
continue
|
|
- if package_code != '' and package_code + project_code == previous_package: # 处理 208162730 一个包采购多种东西情况
|
|
|
|
|
|
+ if package_code + project_code != '' and package_code + project_code == previous_package: # 处理 208162730 一个包采购多种东西情况
|
|
same_package = True
|
|
same_package = True
|
|
project_name = ''
|
|
project_name = ''
|
|
previous_package = package_code + project_code
|
|
previous_package = package_code + project_code
|
|
@@ -6612,7 +6612,7 @@ class TablePremExtractor(object):
|
|
"role_text": tenderee,
|
|
"role_text": tenderee,
|
|
"serviceTime": ""
|
|
"serviceTime": ""
|
|
})
|
|
})
|
|
- if tenderer and not same_package:
|
|
|
|
|
|
+ if tenderer:
|
|
if len(re.sub('[金额万元()()::零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分¥整\s\d,.]|人民币|不?含税', '',
|
|
if len(re.sub('[金额万元()()::零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分¥整\s\d,.]|人民币|不?含税', '',
|
|
bid_amount_)) > 5: # 金额字段出现超过5个非金额字符,中断匹配
|
|
bid_amount_)) > 5: # 金额字段出现超过5个非金额字符,中断匹配
|
|
prem_dic.pop(package)
|
|
prem_dic.pop(package)
|
|
@@ -6628,20 +6628,26 @@ class TablePremExtractor(object):
|
|
if (re.search('费率|下浮率|[%%‰折]',
|
|
if (re.search('费率|下浮率|[%%‰折]',
|
|
bid_amount_header + bid_amount_) and bid_amount < 100) or bid_amount > 50000000000: # 如果是费率或大于500亿的金额改为0
|
|
bid_amount_header + bid_amount_) and bid_amount < 100) or bid_amount > 50000000000: # 如果是费率或大于500亿的金额改为0
|
|
bid_amount = 0
|
|
bid_amount = 0
|
|
- prem_dic[package]['roleList'].append({
|
|
|
|
- "address": "",
|
|
|
|
- "linklist": [],
|
|
|
|
- "role_money": {
|
|
|
|
- "discount_ratio": "",
|
|
|
|
- "downward_floating_ratio": "",
|
|
|
|
- "floating_ratio": "",
|
|
|
|
- "money": bid_amount,
|
|
|
|
- "money_unit": money_unit
|
|
|
|
- },
|
|
|
|
- "role_name": "win_tenderer",
|
|
|
|
- "role_text": tenderer,
|
|
|
|
- "serviceTime": ""
|
|
|
|
- })
|
|
|
|
|
|
+ if not same_package:
|
|
|
|
+ prem_dic[package]['roleList'].append({
|
|
|
|
+ "address": "",
|
|
|
|
+ "linklist": [],
|
|
|
|
+ "role_money": {
|
|
|
|
+ "discount_ratio": "",
|
|
|
|
+ "downward_floating_ratio": "",
|
|
|
|
+ "floating_ratio": "",
|
|
|
|
+ "money": bid_amount,
|
|
|
|
+ "money_unit": money_unit
|
|
|
|
+ },
|
|
|
|
+ "role_name": "win_tenderer",
|
|
|
|
+ "role_text": tenderer,
|
|
|
|
+ "serviceTime": ""
|
|
|
|
+ })
|
|
|
|
+ elif prem_dic[package]['roleList'] and prem_dic[package]['roleList'][-1].get('role_name', '')=='win_tenderer':
|
|
|
|
+ if 'multi_winner' not in prem_dic[package]['roleList'][-1]:
|
|
|
|
+ prem_dic[package]['roleList'][-1]['multi_winner'] = prem_dic[package]['roleList'][-1]['role_text']+','+tenderer
|
|
|
|
+ else:
|
|
|
|
+ prem_dic[package]['roleList'][-1]['multi_winner'] += ','+tenderer
|
|
tenderer_list.append(tenderer)
|
|
tenderer_list.append(tenderer)
|
|
if len(prem_dic[package]['roleList']) == 0 and prem_dic[package]['tendereeMoney'] == 0: # 只有项目编号和名称的 丢弃 并不再继续往下匹配
|
|
if len(prem_dic[package]['roleList']) == 0 and prem_dic[package]['tendereeMoney'] == 0: # 只有项目编号和名称的 丢弃 并不再继续往下匹配
|
|
prem_dic.pop(package)
|
|
prem_dic.pop(package)
|
|
@@ -7367,7 +7373,7 @@ class ApprovalPredictor():
|
|
if (multi_project['project_code'] != "" or multi_project['project_name'] != "") and multi_project['project_code']+multi_project['project_name'] not in code_name_set:
|
|
if (multi_project['project_code'] != "" or multi_project['project_name'] != "") and multi_project['project_code']+multi_project['project_name'] not in code_name_set:
|
|
code_name_set.add(multi_project['project_code']+multi_project['project_name'])
|
|
code_name_set.add(multi_project['project_code']+multi_project['project_name'])
|
|
district = getPredictor('district').get_area(
|
|
district = getPredictor('district').get_area(
|
|
- multi_project['project_name'] + multi_project['project_addr'], '')
|
|
|
|
|
|
+ multi_project['approver'] + multi_project['project_name'] + multi_project['project_addr'], '')
|
|
if district['district']['province'] != '全国':
|
|
if district['district']['province'] != '全国':
|
|
multi_project['area'] = district['district']['area']
|
|
multi_project['area'] = district['district']['area']
|
|
multi_project['province'] = district['district']['province']
|
|
multi_project['province'] = district['district']['province']
|
|
@@ -7379,7 +7385,7 @@ class ApprovalPredictor():
|
|
return rs_l
|
|
return rs_l
|
|
elif found_key == 1:
|
|
elif found_key == 1:
|
|
district = getPredictor('district').get_area(
|
|
district = getPredictor('district').get_area(
|
|
- rs_dic['construct_company'] + rs_dic['project_name'] + rs_dic['project_addr'], '')
|
|
|
|
|
|
+ rs_dic['approver'] + rs_dic['project_name'] + rs_dic['project_addr'], '')
|
|
if district['district']['province'] != '全国':
|
|
if district['district']['province'] != '全国':
|
|
rs_dic['area'] = district['district']['area']
|
|
rs_dic['area'] = district['district']['area']
|
|
rs_dic['province'] = district['district']['province']
|
|
rs_dic['province'] = district['district']['province']
|