|
@@ -1528,10 +1528,16 @@ class RoleRulePredictor():
|
|
|
find_flag = True
|
|
|
break
|
|
|
|
|
|
+ if re.search('(项目|工程|招标|采购(条目)?|合同|标项|标的|计划|询价|询价单|询价通知书|申购单|申购)(名称|标名|标题|主题):$', _span[0]):
|
|
|
+ find_flag = True
|
|
|
+ p_entity.values[0] = on_value # 项目名称里面实体修改为最低概率
|
|
|
+ break
|
|
|
+
|
|
|
for _name in name_entitys:
|
|
|
if _name.sentence_index == p_entity.sentence_index and p_entity.wordOffset_begin >=_name.wordOffset_begin and p_entity.wordOffset_end < _name.wordOffset_end:
|
|
|
find_flag = True
|
|
|
p_entity.values[0] = on_value # 项目名称里面实体修改为最低概率
|
|
|
+ break
|
|
|
# if p_entity.values[0] > on_value:
|
|
|
# p_entity.values[0] = 0.5 + (p_entity.values[0] - 0.5) / 10
|
|
|
# else:
|
|
@@ -1807,8 +1813,8 @@ class RoleRuleFinalAdd():
|
|
|
end_tokens.extend(sentence.tokens)
|
|
|
# text_end = "".join(end_tokens[-30:])
|
|
|
text_end = "".join(end_tokens)
|
|
|
- text_end = re.sub(',?(招标办|招投标管理中心|国有资产管理处|采办共享中心|采购与招标管理办公室|附件\d+:[^附件,。]{5,100}\.(docx|doc|rar|xlsx|xls|jpg|pdf)|附件\d+:)', '', text_end)[-200:] # 处理 类似 285264698 传真:0512-62690315,苏州卫生职业技术学院,国有资产管理处,2022年11月24日, 这种情况
|
|
|
- # print('text_end: ', text_end)
|
|
|
+ text_end = re.sub(r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", '', text_end) # 去除网址
|
|
|
+ text_end = re.sub(',?(招标办|招投标管理中心|国有资产管理处|采办共享中心|采购与招标管理办公室|附件\d*:[^附件,。]{5,100}\.(docx|doc|rar|xlsx|xls|jpg|pdf)|附件\d*:.{,100})', '', text_end)[-200:] # 处理 类似 285264698 传真:0512-62690315,苏州卫生职业技术学院,国有资产管理处,2022年11月24日, 这种情况
|
|
|
# sear_ent = re.search('[,。]([\u4e00-\u9fa5()()]{5,20}),?\s*[.]{2,4}年.{1,2}月.{1,2}日', text_end)
|
|
|
sear_ent = re.search('[,。;](?P<entity>[\u4e00-\u9fa5()()]{5,20}(,?[\u4e00-\u9fa5]{,6}(分公司|部))?),?\s*(公告日期:)?[0-9零一二三四五六七八九十〇]{2,4}[年\-/][0-9零一二三四五六七八九十]{1,2}[月\-/][0-9零一二三四五六七八九十]{1,2}日?', text_end)
|
|
|
sear_ent1 = re.search('((招标|采购)联系人)[,::][A-Za-z0-9_]*(?P<entity>[\u4e00-\u9fa5()()]{4,20})', list_articles[0].content[:5000])
|
|
@@ -1877,26 +1883,6 @@ class RoleRuleFinalAdd():
|
|
|
if not tenderee_notfound:
|
|
|
break
|
|
|
|
|
|
- elif list_codenames[0]['name'] != "": #把标题包含的公司实体作为招标人
|
|
|
- # tenderee_notfound = True
|
|
|
- # ents = []
|
|
|
- # for ent in list_entitys[0]:
|
|
|
- # if ent.entity_type in ['org', 'company']:
|
|
|
- # if ent.label == 0:
|
|
|
- # tenderee_notfound = False
|
|
|
- # elif ent.label == 1:
|
|
|
- # agency_notfound = False
|
|
|
- # elif ent.label == 5:
|
|
|
- # ents.append(ent)
|
|
|
- if tenderee_notfound == True:
|
|
|
- # print('list_codenames',list_codenames[0]['name'])
|
|
|
- for ent in ents:
|
|
|
- if ent.entity_text in list_codenames[0]['name']:
|
|
|
- ent.label = 0
|
|
|
- ent.values[0] = 0.5
|
|
|
- tenderee_notfound == False
|
|
|
- # log('正则召回标题中包含的实体:%s'%ent.entity_text)
|
|
|
- break
|
|
|
|
|
|
# 招标人角色召回规则
|
|
|
class TendereeRuleRecall():
|
|
@@ -2183,14 +2169,15 @@ class RoleGrade():
|
|
|
self.tenderee_left_9 = "(?P<tenderee_left_9>(招标|采购|遴选|寻源|竞价|议价|比选|询比?价|比价|评选|谈判|邀标|邀请|洽谈|约谈|选取|抽取|抽选)(人|方|单位))"
|
|
|
self.tenderee_center_8 = "(?P<tenderee_center_8>受.{5,20}委托)"
|
|
|
self.tenderee_left_8 = "(?P<tenderee_left_8>(尊敬的供应商|(需求|最终|发包|征集|甲|转让|出租|处置)(人|方|单位|组织|用户|业主|主体|部门|公司)))"
|
|
|
- self.tenderee_left_6 = "(?P<tenderee_left_6>(发布|业主|建设|委托)(人|方|单位|组织|用户|业主|主体|部门|公司|企业)|业主|买方|发布机构)"
|
|
|
+ self.tenderee_left_6 = "(?P<tenderee_left_6>(业主|建设|委托)(人|方|单位|组织|用户|业主|主体|部门|公司|企业)|业主|买方)"
|
|
|
+ self.tenderee_left_5 = "(?P<tenderee_left_5>(发布)(人|方|单位|组织|用户|业主|主体|部门|公司|企业)|买方|发布机构)"
|
|
|
self.agency_left_9 = "(?P<agency_left_9>代理)"
|
|
|
self.winTenderer_left_9 = "(?P<winTenderer_left_9>(中标|中选|中价|成交|竞得)|第[1一]名|排[名序]:1|名次:1)"
|
|
|
self.winTenderer_left_8 = "(?P<winTenderer_left_8>(入选供应商|供货商|乙方))"
|
|
|
self.winTenderer_left_6 = "(?P<winTenderer_left_6>(入围|承[接建包修做制担租销]))"
|
|
|
self.secondTenderer_left_9 = "(?P<secondTenderer_left_9>(第[二2](中标|中选|中价|成交)?候选(人|单位|供应商|公司)|第[二2]名|排[名序]:2|名次:2))"
|
|
|
self.thirdTenderer_left_9 = "(?P<thirdTenderer_left_9>(第[三3](中标|中选|中价|成交)?候选(人|单位|供应商|公司)|第[三3]名|排[名序]:3|名次:3))"
|
|
|
- self.pattern_list = [self.tenderee_left_9,self.tenderee_center_8, self.tenderee_left_8,self.tenderee_left_6,self.agency_left_9, self.winTenderer_left_9,
|
|
|
+ self.pattern_list = [self.tenderee_left_9,self.tenderee_center_8, self.tenderee_left_8,self.tenderee_left_6,self.tenderee_left_5,self.agency_left_9, self.winTenderer_left_9,
|
|
|
self.winTenderer_left_8,self.winTenderer_left_6, self.secondTenderer_left_9, self.thirdTenderer_left_9]
|
|
|
def predict(self, list_sentences, list_entitys, span=15, min_prob=0.7):
|
|
|
'''
|
|
@@ -3780,7 +3767,7 @@ class DocChannel():
|
|
|
'土地矿产': '(土地|用地|宗地|荒地|山地|海域|矿)(出让|出租|招租|租赁|承包|流转|使用权|经营权|征收|划拨|中标|成交)|供地结果|矿业权|探矿权|采矿权|(土地|用地|宗地|地块)(使用权)?(终止|中止|网上)?(挂牌|出让|拍卖|招拍|划拨)|征收土地',
|
|
|
'拍卖出让': '(拍卖|变卖|流拍|竞拍)的?(公告|公示)|拍卖|变卖|流拍|竞拍',
|
|
|
'产权交易': '经营权|承包权|使用权|租赁权|股权|债权|排污权|化学需氧量|储备量|竞价销售|销售结果|出租|招租|拍租|竞租|续租|挂牌|出让',
|
|
|
- '采招数据': '(采购|招标|询价|议价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判)的?(公告|公示|中标|成交|结果|$)|工程招标',
|
|
|
+ '采招数据': '(采购|招标|询价|议价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判)的?(公告|公示|中标|成交|结果|$)|工程招标|定点服务',
|
|
|
# |竞价 采招/产权都有竞价方式 # 意向|需求|预公?告|报建|总承包|工程|施工|设计|勘察|代理|监理 |变更|答疑|澄清|中标|成交|合同|废标|流标
|
|
|
'新闻资讯': '(考试|面试|笔试)成绩|成绩的?(公告|公示|公布)|公开招聘|招聘(公告|简章|启事|合同制)|疫情防控\s{,5}(通知|情况|提示)'
|
|
|
}
|
|
@@ -3817,7 +3804,7 @@ class DocChannel():
|
|
|
'资审结果': '((资格|资质)(审查|预审|后审|审核)|资审)结果(公告|公示)?|(资质|资格)(预审|后审)公示|资审及业绩公示',
|
|
|
'招标公告': '(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|拍卖|招租|交易|出让)的?(公告|公示|$)|公开(采购|招标|招租|拍卖|挂牌|出让)|(资审|预审|后审)公告',
|
|
|
'开标记录': '开标记录|截标信息|评委名单公示|开标安排|开标数据表|开标信息|开标情况|开标一览表|开标结果',
|
|
|
- '验收合同': '验收公告|验收单公示|验收结果公告|验收报告公示|验收意见报告|履约公告|履约结果公告'
|
|
|
+ '验收合同': '(验收|履约)(公告|公示)|(验收|履约)(结果|报告|意见|单)(公告|公示)'
|
|
|
}
|
|
|
|
|
|
def load_life(self,life_model,config):
|
|
@@ -4426,8 +4413,12 @@ class DocChannel():
|
|
|
if doc_type =="":
|
|
|
type_id, type_prob = type_model_predict()
|
|
|
type_model = self.id2type[type_id]
|
|
|
- result['docchannel']['doctype'] = type_model
|
|
|
- msc += type_model + ' 概率:%.4f;'%type_prob
|
|
|
+ if type_model == '新闻资讯' and doc_life!='': # 修复bug 78584245 "docchannel": "合同公告", "doctype": "新闻资讯",
|
|
|
+ result['docchannel']['doctype'] = '采招数据'
|
|
|
+ msc += '模型结果为新闻资讯,生命周期不为空,改为采招数据;'
|
|
|
+ else:
|
|
|
+ result['docchannel']['doctype'] = type_model
|
|
|
+ msc += type_model + ' 概率:%.4f;'%type_prob
|
|
|
# print('公告类别:', self.id2type[id], '概率:',prob)
|
|
|
# if id == 0:
|
|
|
if doc_life=="" and result['docchannel']['doctype'] not in ['', '新闻资讯']:
|
|
@@ -6098,6 +6089,8 @@ class WebsourceTenderee():
|
|
|
find_tenderee = True
|
|
|
if d['role_text'] == "":
|
|
|
d['role_text'] = web_ree
|
|
|
+ elif re.search('大学$', web_ree) and re.search('学院$', d['role_text']) and web_ree not in d['role_text']:
|
|
|
+ d['role_text'] = web_ree
|
|
|
# elif re.search(p, web_ree) and (re.search(p, d['role_text'])==None and len(d['role_text'])<6): # 数据源唯一招标人以医院等结尾,角色中无相关关键词的,替换为数据源招标人
|
|
|
# d['role_text'] = web_ree
|
|
|
# elif re.search('有限(责任)?公司', web_ree) and (re.search('有限(责任)?公司', d['role_text'])==None and len(d['role_text'])<6):
|