|
@@ -28,8 +28,10 @@ import calendar
|
|
import datetime
|
|
import datetime
|
|
from BiddingKG.dl.entityLink.entityLink import get_business_data
|
|
from BiddingKG.dl.entityLink.entityLink import get_business_data
|
|
from BiddingKG.dl.proposed_building.pb_extract import PBPredictor
|
|
from BiddingKG.dl.proposed_building.pb_extract import PBPredictor
|
|
-from BiddingKG.dl.interface.getAttributes import turnMoneySource
|
|
|
|
|
|
+# from BiddingKG.dl.interface.getAttributes import turnMoneySource
|
|
from BiddingKG.dl.common.Utils import del_tabel_achievement
|
|
from BiddingKG.dl.common.Utils import del_tabel_achievement
|
|
|
|
+from BiddingKG.dl.interface.getAttributes import turnMoneySource, extract_serviceTime
|
|
|
|
+from BiddingKG.dl.time.re_servicetime import extract_servicetime
|
|
# import fool # 统一用 selffool ,阿里云上只有selffool 包
|
|
# import fool # 统一用 selffool ,阿里云上只有selffool 包
|
|
|
|
|
|
cpu_num = int(os.environ.get("CPU_NUM",0))
|
|
cpu_num = int(os.environ.get("CPU_NUM",0))
|
|
@@ -6440,6 +6442,18 @@ class TablePremExtractor(object):
|
|
"tenderee": "(项目|采购|招标|遴选|寻源|竞价|议价|比选|委托|询比?价|比价|评选|谈判|邀标|邀请|洽谈|约谈|选取|抽取|抽选)(人|公司|单位|组织|用户|业主|主体|方|部门)(名称|$)",
|
|
"tenderee": "(项目|采购|招标|遴选|寻源|竞价|议价|比选|委托|询比?价|比价|评选|谈判|邀标|邀请|洽谈|约谈|选取|抽取|抽选)(人|公司|单位|组织|用户|业主|主体|方|部门)(名称|$)",
|
|
"budget": "最高(投标)?限价|总价限价|控制(价格?|金额|总价)|(总价|采购)限价|上限价|拦标价|(采购|招标|项目)?预算|(预算|招标|采购|计划)金额|挂牌价",
|
|
"budget": "最高(投标)?限价|总价限价|控制(价格?|金额|总价)|(总价|采购)限价|上限价|拦标价|(采购|招标|项目)?预算|(预算|招标|采购|计划)金额|挂牌价",
|
|
"bid_amount": "投标[报总]?价|报价(总?金额|总价|总额)|总报价|^\w{,5}报价(([\w、/]{1,15}))?$|(中标|成交|合同))?总?(金?额|[报均总]价|价[格款]?)|承包价|含税价|经评审的价格|中标存款金?额|中标资金|中标存款|存放金额|分配额度",
|
|
"bid_amount": "投标[报总]?价|报价(总?金额|总价|总额)|总报价|^\w{,5}报价(([\w、/]{1,15}))?$|(中标|成交|合同))?总?(金?额|[报均总]价|价[格款]?)|承包价|含税价|经评审的价格|中标存款金?额|中标资金|中标存款|存放金额|分配额度",
|
|
|
|
+ "serviceTime": '合同期限|工期/交货期/服务期|工期\(交货期\)|合格工期|服务期限|工期' \
|
|
|
|
+ '|工期要求|项目周期|工期\(交货期\)|计划工期\(服务期限\)|服务时限|履行期限|服务周期|供货期限' \
|
|
|
|
+ '|合格工期|计划工期\(服务期\)|服务期|服务,期|交货\(完工\)时间|交付\(服务、完工\)时间' \
|
|
|
|
+ '|交货时间|保洁期限|维保期|管理年限|工期承诺|(服务|合同|施工|实施|工程|设计)的?(年限|期限|周期|期:)' \
|
|
|
|
+ '|计划工期|工期要求|服务期限?' \
|
|
|
|
+ '|投标工期|设计工期|合格服务周期|总工期|服务时间(范围)?|流转期限|维护期限|服务时限|交货期' \
|
|
|
|
+ '|完成时间|中标工期|项目周期|期限要求|周期|供货期|合同的?履行日期|计划周期' \
|
|
|
|
+ '|履约期限|合同的?约定完成时限|合同的?完成日期|承诺完成日期' \
|
|
|
|
+ '|合同起始日起|合同的?履约期|履约截止日期|承包期限|合同的?完成日期|特许经营期限' \
|
|
|
|
+ '|服务期间|服务履行期|委托(管理)?期限|经营期限|数量' \
|
|
|
|
+ '|(工期|服务期限?|交货期限?|服务履行期|合同期限?|履[行约]期限?)说明|存款期限?|(存款|存放|定存)(期|年)限' \
|
|
|
|
+ '|服务(有效期|年限)|本?合同有效期|协议有效期|项目期限'
|
|
}
|
|
}
|
|
|
|
|
|
with open(os.path.dirname(__file__)+'/header_set.pkl', 'rb') as f:
|
|
with open(os.path.dirname(__file__)+'/header_set.pkl', 'rb') as f:
|
|
@@ -6533,6 +6547,8 @@ class TablePremExtractor(object):
|
|
return flag,contain_header, header_dic, not_sure_winner
|
|
return flag,contain_header, header_dic, not_sure_winner
|
|
elif 'tenderer' in header_dic and (re.search('(中标|中选|中价|成交|竞得)(人|单位|供应商|公司|企业|厂家|商家?|客户|供?方|银行)',header_dic['tenderer'][1]) or all_winner): # 有中标人,且有明确中标关键词的进行提取
|
|
elif 'tenderer' in header_dic and (re.search('(中标|中选|中价|成交|竞得)(人|单位|供应商|公司|企业|厂家|商家?|客户|供?方|银行)',header_dic['tenderer'][1]) or all_winner): # 有中标人,且有明确中标关键词的进行提取
|
|
return flag, contain_header, header_dic, not_sure_winner
|
|
return flag, contain_header, header_dic, not_sure_winner
|
|
|
|
+ # elif 'tenderer' in header_dic and 'serviceTime' in header_dic:
|
|
|
|
+ # return flag, contain_header, header_dic, not_sure_winner
|
|
elif len(set(fix_td_list) & self.headerset) >= 2 or (len(set(fix_td_list)) == 2 and len(set(td_list) & self.headerset) >= 1): # 如果包含两个表头以上或 只有两列且包含一个表头
|
|
elif len(set(fix_td_list) & self.headerset) >= 2 or (len(set(fix_td_list)) == 2 and len(set(td_list) & self.headerset) >= 1): # 如果包含两个表头以上或 只有两列且包含一个表头
|
|
contain_header = True
|
|
contain_header = True
|
|
return flag, contain_header, dict(), not_sure_winner
|
|
return flag, contain_header, dict(), not_sure_winner
|
|
@@ -6573,6 +6589,7 @@ class TablePremExtractor(object):
|
|
package_fix2raw = dict() # 处理后包号:处理前包号 字典
|
|
package_fix2raw = dict() # 处理后包号:处理前包号 字典
|
|
link_set = set()
|
|
link_set = set()
|
|
tenderer_list = [] # 保存所有中标人
|
|
tenderer_list = [] # 保存所有中标人
|
|
|
|
+ serviceTime_list = []
|
|
not_package = True if 'project_name' in headers and re.search('(货物|商品|产品|通用|主要标的)(名称?|内容)', headers['project_name'][1]) and \
|
|
not_package = True if 'project_name' in headers and re.search('(货物|商品|产品|通用|主要标的)(名称?|内容)', headers['project_name'][1]) and \
|
|
'package_code' not in headers and 'budget' not in headers and "bid_amount" not in headers else False
|
|
'package_code' not in headers and 'budget' not in headers and "bid_amount" not in headers else False
|
|
|
|
|
|
@@ -6594,6 +6611,7 @@ class TablePremExtractor(object):
|
|
bid_amount_ = df.loc[i, headers['bid_amount'][0]].strip() if "bid_amount" in headers else ""
|
|
bid_amount_ = df.loc[i, headers['bid_amount'][0]].strip() if "bid_amount" in headers else ""
|
|
win_sort = df.loc[i, headers['win_sort'][0]].strip() if "win_sort" in headers else ""
|
|
win_sort = df.loc[i, headers['win_sort'][0]].strip() if "win_sort" in headers else ""
|
|
win_or_not = df.loc[i, headers['win_or_not'][0]].strip() if "win_or_not" in headers else ""
|
|
win_or_not = df.loc[i, headers['win_or_not'][0]].strip() if "win_or_not" in headers else ""
|
|
|
|
+ serviceTime = df.loc[i, headers['serviceTime'][0]].strip() if "serviceTime" in headers else ""
|
|
|
|
|
|
if set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_]) & self.headerset != set(): # 只要有一项为表头 停止匹配
|
|
if set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_]) & self.headerset != set(): # 只要有一项为表头 停止匹配
|
|
# print('只要有一项为表头 停止匹配', set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_,win_sort]) & self.headerset)
|
|
# print('只要有一项为表头 停止匹配', set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_,win_sort]) & self.headerset)
|
|
@@ -6733,6 +6751,13 @@ class TablePremExtractor(object):
|
|
if (re.search('费率|下浮率|[%%‰折]',
|
|
if (re.search('费率|下浮率|[%%‰折]',
|
|
bid_amount_header + bid_amount_) and bid_amount < 100) or bid_amount > 50000000000: # 如果是费率或大于500亿的金额改为0
|
|
bid_amount_header + bid_amount_) and bid_amount < 100) or bid_amount > 50000000000: # 如果是费率或大于500亿的金额改为0
|
|
bid_amount = 0
|
|
bid_amount = 0
|
|
|
|
+ if serviceTime:
|
|
|
|
+ serviceTime_text = headers['serviceTime'][1] + serviceTime if headers['serviceTime'][1][-1] in [':',':'] else headers['serviceTime'][1] + ':' + serviceTime
|
|
|
|
+ # print('serviceTime_text',serviceTime_text)
|
|
|
|
+ serviceTime = extract_servicetime(serviceTime_text)
|
|
|
|
+ serviceTime.sort(key=lambda x:x.get('begin_index',0))
|
|
|
|
+ serviceTime = extract_serviceTime(serviceTime[0]['body'],"") if serviceTime else ""
|
|
|
|
+ # print(serviceTime)
|
|
if not same_package or len(prem_dic[package]['roleList'])==0:
|
|
if not same_package or len(prem_dic[package]['roleList'])==0:
|
|
prem_dic[package]['roleList'].append({
|
|
prem_dic[package]['roleList'].append({
|
|
"address": "",
|
|
"address": "",
|
|
@@ -6746,7 +6771,7 @@ class TablePremExtractor(object):
|
|
},
|
|
},
|
|
"role_name": "win_tenderer",
|
|
"role_name": "win_tenderer",
|
|
"role_text": tenderer,
|
|
"role_text": tenderer,
|
|
- "serviceTime": ""
|
|
|
|
|
|
+ "serviceTime": serviceTime
|
|
})
|
|
})
|
|
elif prem_dic[package]['roleList'] and prem_dic[package]['roleList'][-1].get('role_name', '')=='win_tenderer':
|
|
elif prem_dic[package]['roleList'] and prem_dic[package]['roleList'][-1].get('role_name', '')=='win_tenderer':
|
|
if 'multi_winner' not in prem_dic[package]['roleList'][-1]:
|
|
if 'multi_winner' not in prem_dic[package]['roleList'][-1]:
|
|
@@ -6757,8 +6782,9 @@ class TablePremExtractor(object):
|
|
if bid_amount != 0: # 有中标金额的才放进去
|
|
if bid_amount != 0: # 有中标金额的才放进去
|
|
if 'other_winner_dic' not in prem_dic[package]['roleList'][-1]:
|
|
if 'other_winner_dic' not in prem_dic[package]['roleList'][-1]:
|
|
prem_dic[package]['roleList'][-1]['other_winner_dic'] = []
|
|
prem_dic[package]['roleList'][-1]['other_winner_dic'] = []
|
|
- prem_dic[package]['roleList'][-1]['other_winner_dic'].append({'role_text': tenderer, "money": bid_amount, "money_unit": money_unit})
|
|
|
|
|
|
+ prem_dic[package]['roleList'][-1]['other_winner_dic'].append({'role_text': tenderer, "money": bid_amount, "money_unit": money_unit,"serviceTime":serviceTime})
|
|
tenderer_list.append(tenderer)
|
|
tenderer_list.append(tenderer)
|
|
|
|
+ serviceTime_list.append(serviceTime)
|
|
if len(prem_dic[package]['roleList']) == 0 and prem_dic[package]['tendereeMoney'] == 0: # 只有项目编号和名称的 丢弃 并不再继续往下匹配
|
|
if len(prem_dic[package]['roleList']) == 0 and prem_dic[package]['tendereeMoney'] == 0: # 只有项目编号和名称的 丢弃 并不再继续往下匹配
|
|
prem_dic.pop(package)
|
|
prem_dic.pop(package)
|
|
# break # 注释掉避免 400084571 某些包废标 中断匹配
|
|
# break # 注释掉避免 400084571 某些包废标 中断匹配
|
|
@@ -6790,7 +6816,7 @@ class TablePremExtractor(object):
|
|
},
|
|
},
|
|
"role_name": "win_tenderer",
|
|
"role_name": "win_tenderer",
|
|
"role_text": tenderer_list[0],
|
|
"role_text": tenderer_list[0],
|
|
- "serviceTime": ""
|
|
|
|
|
|
+ "serviceTime": serviceTime_list[0]
|
|
}],
|
|
}],
|
|
'tendereeMoney': 0,
|
|
'tendereeMoney': 0,
|
|
'tendereeMoneyUnit': ""
|
|
'tendereeMoneyUnit': ""
|