|
@@ -881,7 +881,7 @@ class PREMPredict():
|
|
|
elif label in [2,3,4] and re.search('序号:\d+,\w{,2}候选', front):
|
|
|
label = 5
|
|
|
elif label == 0:
|
|
|
- if re.search('拟邀请$|受邀谈判方|流入方名称:$|拟(选用|采用|选取)(单位|公司|企业)(名称)?:$|选择(建设|\w{,2})?服务单位:$', front): # 修复 626700009 二、拟选用单位:海南和泰消防技术服务有限公司。 632486555 选择建设服务单位:四川富吉兴工程管理有限公司, 642115802 拟采用公司:山东久木影视传媒有限公司
|
|
|
+ if re.search('拟邀请$|受邀谈判方|流入方名称:$|拟(选用|采用|选取)(单位|公司|企业)(名称)?:$|选择(建设|\w{,2})?服务单位:$|单一来源采购单位:$', front): # 修复 626700009 二、拟选用单位:海南和泰消防技术服务有限公司。 632486555 选择建设服务单位:四川富吉兴工程管理有限公司, 642115802 拟采用公司:山东久木影视传媒有限公司 654427839 单一来源采购单位:长沙新天地金融服务科技有限公司
|
|
|
label = 2
|
|
|
values[label] = 0.501
|
|
|
elif re.search('(发布(人|方|单位|机构|组织|用户|业主|主体|部门|公司|企业)|组织(单位|人|方|机构)?|(采购|招标|发布)机构)(名称)?[是为:]+', front) and is_agency(entity.entity_text):
|
|
@@ -890,7 +890,7 @@ class PREMPredict():
|
|
|
elif re.search('受托人((盖章))?:$', front):
|
|
|
label = 1
|
|
|
values[label] = 0.501
|
|
|
- elif re.search('采用$|异议受理部门|本次招标有:$|直购企业:$|主报名人:$|采购候选人:$', front): # 368177736 因本项目招标采用广西壮族自治区公共资源交易平台系统- 标公告,本次招标有:内黄县汇融钢材有限公司、安阳正元建筑工程有限公司、内黄县鸿业贸易有限责任公司三家合格供应商进行报名投标。 438880541 直购企业可能为多个,其中一个中标
|
|
|
+ elif re.search('采用$|异议受理部门|本次招标有:$|直购企业:$|主报名人:$|采购候选人:$|申报企业:$|生产企业:$', front): # 368177736 因本项目招标采用广西壮族自治区公共资源交易平台系统- 标公告,本次招标有:内黄县汇融钢材有限公司、安阳正元建筑工程有限公司、内黄县鸿业贸易有限责任公司三家合格供应商进行报名投标。 438880541 直购企业可能为多个,其中一个中标 # 654390120 申报企业:
|
|
|
label = 5
|
|
|
elif re.search(',单位名称:$', front) and re.search('^,(中标|中选)价格', behind):
|
|
|
label = 2
|
|
@@ -6494,8 +6494,8 @@ class DistrictPredictor():
|
|
|
|
|
|
province_l, city_l, district_l = self.find_whole_areas('%s %s'%(title, addr_project), self.pettern, self.area_variance_dic, self.full_dic)
|
|
|
pro_ids, city_ids, dis_ids = self.merge_score(province_l, city_l, district_l, self.full_dic, self.short_dic, self.idx_dic)
|
|
|
- big_area_1, pred_pro_1, pred_city_1, pred_dis_1, prob, max_score, code_dic_1 = self.get_final_addr(pro_ids, city_ids, dis_ids, self.idx_dic)
|
|
|
- big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_1, pred_pro_1, pred_city_1, pred_dis_1, code_dic_1
|
|
|
+ big_area_1, pred_pro_1, pred_city_1, pred_dis_1, prob_1, max_score, code_dic_1 = self.get_final_addr(pro_ids, city_ids, dis_ids, self.idx_dic)
|
|
|
+ big_area, pred_pro, pred_city, pred_dis, prob, code_dic = big_area_1, pred_pro_1, pred_city_1, pred_dis_1, prob_1, code_dic_1
|
|
|
# print('关键词1:', province_l, city_l, district_l)
|
|
|
# print('输入:', '标题:%s; 项目地址:%s'%(title, addr_project))
|
|
|
# print('分数:', pro_ids, city_ids, dis_ids, prob, max_score)
|
|
@@ -6515,12 +6515,12 @@ class DistrictPredictor():
|
|
|
city_l.extend(city_l2)
|
|
|
district_l.extend(district_l2)
|
|
|
pro_ids, city_ids, dis_ids = self.merge_score(province_l, city_l, district_l, self.full_dic, self.short_dic, self.idx_dic)
|
|
|
- big_area_2, pred_pro_2, pred_city_2, pred_dis_2, prob, max_score, code_dic_2 = self.get_final_addr(pro_ids, city_ids, dis_ids, self.idx_dic)
|
|
|
- big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_2, pred_pro_2, pred_city_2, pred_dis_2, code_dic_2
|
|
|
+ big_area_2, pred_pro_2, pred_city_2, pred_dis_2, prob_2, max_score, code_dic_2 = self.get_final_addr(pro_ids, city_ids, dis_ids, self.idx_dic)
|
|
|
+ big_area, pred_pro, pred_city, pred_dis, prob, code_dic = big_area_2, pred_pro_2, pred_city_2, pred_dis_2, prob_2, code_dic_2
|
|
|
# print('关键词2:', province_l, city_l, district_l)
|
|
|
# print('输入:', '招标人:%s; 招标人地址:%s; 收货地址:%s' % (ree, addr, addr_delivery))
|
|
|
# print('分数:', pro_ids, city_ids, dis_ids, prob, max_score)
|
|
|
- if re.search('省|市|县|自治', addr_project) and pred_pro_1 != '' and pred_pro_1 != pred_pro_2: # 如果有项目地址使用项目地址 要有省市县等 275127622 工程地点为狮山镇颜峰综合区岐山至人和段道路, 提错 岐山
|
|
|
+ if re.search('省|市|县|自治', addr_project) and prob_1 !=0.5 and pred_pro_1 != '' and pred_pro_1 != pred_pro_2: # 如果有项目地址使用项目地址 要有省市县等 275127622 工程地点为狮山镇颜峰综合区岐山至人和段道路, 提错 岐山
|
|
|
not_sure = False
|
|
|
big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_1, pred_pro_1, pred_city_1, pred_dis_1, code_dic_1
|
|
|
if not_sure and (pred_city_2 == "" or prob < 0.7 or max_score<2):
|
|
@@ -6530,12 +6530,12 @@ class DistrictPredictor():
|
|
|
city_l.extend(city_l3)
|
|
|
district_l.extend(district_l3)
|
|
|
pro_ids, city_ids, dis_ids = self.merge_score(province_l, city_l, district_l, self.full_dic, self.short_dic, self.idx_dic)
|
|
|
- big_area_3, pred_pro_3, pred_city_3, pred_dis_3, prob, max_score, code_dic_3 = self.get_final_addr(pro_ids, city_ids, dis_ids, self.idx_dic)
|
|
|
- big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_3, pred_pro_3, pred_city_3, pred_dis_3, code_dic_3
|
|
|
+ big_area_3, pred_pro_3, pred_city_3, pred_dis_3, prob_3, max_score, code_dic_3 = self.get_final_addr(pro_ids, city_ids, dis_ids, self.idx_dic)
|
|
|
+ big_area, pred_pro, pred_city, pred_dis, prob, code_dic = big_area_3, pred_pro_3, pred_city_3, pred_dis_3, prob_3, code_dic_3
|
|
|
# print('关键词3:', province_l, city_l, district_l)
|
|
|
# print('输入:', '联系:%s, 开标:%s, 邮寄:%s'%(addr_contact, addr_bidopen, addr_bidsend))
|
|
|
# print('分数:', pro_ids, city_ids, dis_ids, prob, max_score)
|
|
|
- if pred_city_2 != "" and pred_city_2 != pred_city_3:
|
|
|
+ if pred_city_2 != "" and prob_2 !=0.5 and pred_city_2 != pred_city_3:
|
|
|
not_sure = False
|
|
|
big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_2, pred_pro_2, pred_city_2, pred_dis_2, code_dic_2 # 如果招标人、招标人地址、收货地址与开标地址、联系地址等不一致,取招标人地址
|
|
|
if not_sure and (pred_city_3 == "" or prob < 0.6 or max_score < 2):
|
|
@@ -6545,9 +6545,9 @@ class DistrictPredictor():
|
|
|
city_l.extend(city_l4)
|
|
|
district_l.extend(district_l4)
|
|
|
pro_ids, city_ids, dis_ids = self.merge_score(province_l, city_l, district_l, self.full_dic, self.short_dic, self.idx_dic)
|
|
|
- big_area_4, pred_pro_4, pred_city_4, pred_dis_4, prob, max_score, code_dic_4 = self.get_final_addr(pro_ids, city_ids,dis_ids, self.idx_dic)
|
|
|
- big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_4, pred_pro_4, pred_city_4, pred_dis_4, code_dic_4
|
|
|
- if pred_city_3 != "" and pred_city_3 != pred_city_4:
|
|
|
+ big_area_4, pred_pro_4, pred_city_4, pred_dis_4, prob_4, max_score, code_dic_4 = self.get_final_addr(pro_ids, city_ids,dis_ids, self.idx_dic)
|
|
|
+ big_area, pred_pro, pred_city, pred_dis, prob, code_dic = big_area_4, pred_pro_4, pred_city_4, pred_dis_4, prob_4, code_dic_4
|
|
|
+ if pred_city_3 != "" and prob_3 !=0.5 and pred_city_3 != pred_city_4:
|
|
|
not_sure = False
|
|
|
big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_3, pred_pro_3, pred_city_3, pred_dis_3, code_dic_3 # 如果开标地址等提取的城市与所有地址提取的城市不一致,取开标地址等
|
|
|
if pred_pro_3 != pred_pro_4 and (prob < 0.6 or max_score < 2):
|