|
@@ -1095,16 +1095,24 @@ class RoleRulePredictor():
|
|
|
|
|
|
def __init__(self):
|
|
def __init__(self):
|
|
# self.pattern_tenderee_left = "(?P<tenderee_left>((遴选|采购|招标|项目|竞价|议价|需求|最终|建设|业主|转让|招租|甲|议标|合同主体|比选|委托|询价)(?:人|公司|单位|组织|用户|业主|方|部门)|文章来源|需方)(名称)?(是|为|信息|:|:|\s*)$)"
|
|
# self.pattern_tenderee_left = "(?P<tenderee_left>((遴选|采购|招标|项目|竞价|议价|需求|最终|建设|业主|转让|招租|甲|议标|合同主体|比选|委托|询价)(?:人|公司|单位|组织|用户|业主|方|部门)|文章来源|需方)(名称)?(是|为|信息|:|:|\s*)$)"
|
|
- self.pattern_tenderee_left = "(?P<tenderee_left>((遴选|采购|招标|项目|竞价|议价|需求|最终|建设|业主|转让|招租|甲|议标|合同主体|比选|委托|询价|评选|挂牌|出租|出让|谈判|邀标|邀请|洽谈|约谈|买受|选取|抽取|抽选|出售|标卖|比价)(人|公司|单位|组织|用户|业主|主体|方|部门)|文章来源|委托机构|产权所有人|需方|买方|业主|权属人|甲方当事人)[))]?(名称|信息)?([((](全称|盖章)[))])?(是|为|:|:|,|\s*)+$)"
|
|
|
|
|
|
+ # self.pattern_tenderee_left = "(?P<tenderee_left>((遴选|采购|招标|项目|竞价|议价|需求|最终|建设|业主|转让|招租|甲|议标|合同主体|比选|委托|询价|评选|挂牌|出租|出让|谈判|邀标|邀请|洽谈|约谈|买受|选取|抽取|抽选|出售|标卖|比价)(人|公司|单位|组织|用户|业主|主体|方|部门)|文章来源|委托机构|产权所有人|需方|买方|业主|权属人|甲方当事人)[))]?(名称|信息)?([((](全称|盖章)[))])?(是|为|:|:|,|\s*)+$)"
|
|
|
|
+ self.pattern_tenderee_left = "(?P<tenderee_left>((遴选|采购|招标|项目|竞价|议价|需求|最终|建设|业主|转让|招租|甲|议标|合同主体|比选|委托|询价|评选|挂牌|出租|出让|谈判|邀标|邀请|洽谈|约谈|买受|选取|抽取|抽选|出售|标卖|比价|处置)" \
|
|
|
|
+ "(人|公司|单位|组织|用户|业主|主体|方|部门)|文章来源|委托机构|产权所有人|需方|买方|业主|权属人|甲方当事人|询价书企业|比选发起人|项目单位[,:]单位名称|结算单位)"\
|
|
|
|
+ "[))]?(信息[,:])?(名称)?([((](全称|盖章)[))])?(是|为|:|:|,|\s*)+$)"
|
|
self.pattern_tenderee_center = "(?P<tenderee_center>(受.{,20}委托))"
|
|
self.pattern_tenderee_center = "(?P<tenderee_center>(受.{,20}委托))"
|
|
self.pattern_tenderee_right = "(?P<tenderee_right>^([((](以下简称)?[,\"“]*(招标|采购)(人|单位|机构)[,\"”]*[))])|^委托|^拟对|^现就|^现委托)" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
self.pattern_tenderee_right = "(?P<tenderee_right>^([((](以下简称)?[,\"“]*(招标|采购)(人|单位|机构)[,\"”]*[))])|^委托|^拟对|^现就|^现委托)" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
|
|
|
|
self.pattern_agency_left = "(?P<agency_left>(代理(?:人|机构|公司|单位|组织)|专业采购机构|集中采购机构|集采机构|[招议))]+标机构)(.{,4}名,?称|全称|是|为|:|:|[,,]?\s*)$|(受.{,20}委托))"
|
|
self.pattern_agency_left = "(?P<agency_left>(代理(?:人|机构|公司|单位|组织)|专业采购机构|集中采购机构|集采机构|[招议))]+标机构)(.{,4}名,?称|全称|是|为|:|:|[,,]?\s*)$|(受.{,20}委托))"
|
|
self.pattern_agency_right = "(?P<agency_right>^([((](以下简称)?[,\"“]*(代理)(人|单位|机构)[,\"”]*[))])|受.{,15}委托|^受托)"
|
|
self.pattern_agency_right = "(?P<agency_right>^([((](以下简称)?[,\"“]*(代理)(人|单位|机构)[,\"”]*[))])|受.{,15}委托|^受托)"
|
|
# 2020//11/24 大网站规则 中标关键词添加 选定单位|指定的中介服务机构
|
|
# 2020//11/24 大网站规则 中标关键词添加 选定单位|指定的中介服务机构
|
|
- self.pattern_winTenderer_left = "(?P<winTenderer_left>((中标|中选|中价|乙|成交|承做|施工|供货|承包|竞得|受让)(候选)?(人|单位|机构|各?供应商|方|公司|厂商|商)[::是为]+$|(选定单位|指定的中介服务机构))[::是为,]+$|(第[一1](名|((中标|中选|中价|成交)?(候选)?(人|单位|机构|供应商))))[::是为]+$|((评审结果|名次|排名)[::]第?[一1]名?)$|单一来源(采购)?方式向$|((中标|成交)(结果|信息))(是|为|:|:)$|(单一来源采购(供应商|供货商|服务商))$|[^候选]((分包|标包){,5}供应商|供货商|服务商|供应商名称|服务机构|供方)[::]$)"
|
|
|
|
|
|
+ # self.pattern_winTenderer_left = "(?P<winTenderer_left>((中标|中选|中价|乙|成交|承做|施工|供货|承包|竞得|受让)(候选)?(人|单位|机构|各?供应商|方|公司|厂商|商)[::是为]+$|(选定单位|指定的中介服务机构))[::是为,]+$|(第[一1](名|((中标|中选|中价|成交)?(候选)?(人|单位|机构|供应商))))[::是为]+$|((评审结果|名次|排名)[::]第?[一1]名?)$|单一来源(采购)?方式向$|((中标|成交)(结果|信息))(是|为|:|:)$|(单一来源采购(供应商|供货商|服务商))$|[^候选]((分包|标包){,5}供应商|供货商|服务商|供应商名称|服务机构|供方)[::]$)"
|
|
|
|
+ self.pattern_winTenderer_left = "(?P<winTenderer_left>(中标|中选|中价|乙|成交|承做|施工|供货|承包|竞得|受让|签约)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)[::是为,]+$|" \
|
|
|
|
+ "(选定单位|指定的中介服务机构|实施主体|承制单位)[::是为,]+$|((评审结果|名次|排名|中标结果)[::]*第?[一1]名?)[::,]*$|" \
|
|
|
|
+ "单一来源(采购)?(供应商|供货商|服务商|方式向)$|((中标|成交)(结果|信息))(是|为|:|:)$|(供应|供货|供|承销|服务|实施)(机构|单位|商|方)(名称)?[::是为,]$)"
|
|
# self.pattern_winTenderer_center = "(?P<winTenderer_center>第[一1].{,20}[是为]((中标|中选|中价|成交|施工)(人|单位|机构|供应商|公司)|供应商)[::是为])"
|
|
# self.pattern_winTenderer_center = "(?P<winTenderer_center>第[一1].{,20}[是为]((中标|中选|中价|成交|施工)(人|单位|机构|供应商|公司)|供应商)[::是为])"
|
|
- self.pattern_winTenderer_right = "(?P<winTenderer_right>(^[是为\(]((采购(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))))|^(报价|价格)最低,确定为本项目成交供应商)"
|
|
|
|
|
|
+ # self.pattern_winTenderer_right = "(?P<winTenderer_right>(^[是为\(]((采购(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))))|^(报价|价格)最低,确定为本项目成交供应商)"
|
|
|
|
+ self.pattern_winTenderer_right = "(?P<winTenderer_right>(^[是为]((采购|中标)(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))|" \
|
|
|
|
+ "^(报价|价格)最低,确定为本项目成交供应商|^:贵公司参与|^你方于))"
|
|
self.pattern_winTenderer_whole = "(?P<winTenderer_center>贵公司.{,15}以.{,15}中标|最终由.{,15}竞买成功|经.{,15}决定[以由].{,15}公司中标|谈判结果:由.{5,20}供货)|中标通知书.{,15}你方" # 2020//11/24 大网站规则 中标关键词添加 谈判结果:由.{5,20}供货
|
|
self.pattern_winTenderer_whole = "(?P<winTenderer_center>贵公司.{,15}以.{,15}中标|最终由.{,15}竞买成功|经.{,15}决定[以由].{,15}公司中标|谈判结果:由.{5,20}供货)|中标通知书.{,15}你方" # 2020//11/24 大网站规则 中标关键词添加 谈判结果:由.{5,20}供货
|
|
|
|
|
|
# self.pattern_winTenderer_location = "(中标|中选|中价|乙|成交|承做|施工|供货|承包|竞得|受让)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)|(供应商|供货商|服务商)[::]?$|(第[一1](名|((中标|中选|中价|成交)?(候选)?(人|单位|机构|供应商))))(是|为|:|:|\s*$)|((评审结果|名次|排名)[::]第?[一1]名?)|(单一来源(采购)?方式向.?$)"
|
|
# self.pattern_winTenderer_location = "(中标|中选|中价|乙|成交|承做|施工|供货|承包|竞得|受让)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)|(供应商|供货商|服务商)[::]?$|(第[一1](名|((中标|中选|中价|成交)?(候选)?(人|单位|机构|供应商))))(是|为|:|:|\s*$)|((评审结果|名次|排名)[::]第?[一1]名?)|(单一来源(采购)?方式向.?$)"
|
|
@@ -1244,7 +1252,7 @@ class RoleRulePredictor():
|
|
_role = _group.split("_")[0]
|
|
_role = _group.split("_")[0]
|
|
_direct = _group.split("_")[1]
|
|
_direct = _group.split("_")[1]
|
|
_label = {"tenderee":0,"agency":1,"winTenderer":2,"secondTenderer":3,"thirdTenderer":4}.get(_role)
|
|
_label = {"tenderee":0,"agency":1,"winTenderer":2,"secondTenderer":3,"thirdTenderer":4}.get(_role)
|
|
- if _i_span==0 and _direct=="left" and '各供应商' not in _v_group: #2021/12/22 修正错误中标召回 例子208668937
|
|
|
|
|
|
+ if _i_span==0 and _direct=="left" and re.search('各供应商|尊敬的供应商', list_spans[0])==None: #2021/12/22 修正错误中标召回 例子208668937
|
|
_flag = True
|
|
_flag = True
|
|
_distance = abs((len(list_spans[_i_span])-_iter.span()[1]))
|
|
_distance = abs((len(list_spans[_i_span])-_iter.span()[1]))
|
|
list_distance[int(_label)] = min(_distance,list_distance[int(_label)])
|
|
list_distance[int(_label)] = min(_distance,list_distance[int(_label)])
|
|
@@ -1411,8 +1419,15 @@ class RoleRuleFinalAdd():
|
|
text_end = list_articles[0].content[-40:]
|
|
text_end = list_articles[0].content[-40:]
|
|
# sear_ent = re.search('[,。]([\u4e00-\u9fa5()()]{5,20}),?\s*[.]{2,4}年.{1,2}月.{1,2}日', text_end)
|
|
# sear_ent = re.search('[,。]([\u4e00-\u9fa5()()]{5,20}),?\s*[.]{2,4}年.{1,2}月.{1,2}日', text_end)
|
|
sear_ent = re.search('[,。]([\u4e00-\u9fa5()()]{5,20}(,?[\u4e00-\u9fa5]{,6}(分公司|部))?),?\s*[0-9零一二三四五六七八九十]{2,4}年.{1,2}月.{1,2}日', text_end)
|
|
sear_ent = re.search('[,。]([\u4e00-\u9fa5()()]{5,20}(,?[\u4e00-\u9fa5]{,6}(分公司|部))?),?\s*[0-9零一二三四五六七八九十]{2,4}年.{1,2}月.{1,2}日', text_end)
|
|
- if sear_ent:
|
|
|
|
- ent_re = sear_ent.group(1).replace(',', '')
|
|
|
|
|
|
+ sear_ent2 = re.search('(户名|开户名称)[::]([\u4e00-\u9fa5()()]{5,20})[,。]', list_articles[0].content[:5000])
|
|
|
|
+ sear_ent3 = re.search('报名咨询,([\u4e00-\u9fa5()()]{5,20})[0-9\-]*[,。]', list_articles[0].content[:5000])
|
|
|
|
+ if sear_ent or sear_ent2 or sear_ent3:
|
|
|
|
+ if sear_ent3:
|
|
|
|
+ ent_re = sear_ent3.group(1).replace("(","(").replace(")",")")
|
|
|
|
+ elif sear_ent2:
|
|
|
|
+ ent_re = sear_ent2.group(2).replace("(","(").replace(")",")")
|
|
|
|
+ else:
|
|
|
|
+ ent_re = sear_ent.group(1).replace(',', '').replace("(","(").replace(")",")")
|
|
tenderee_notfound = True
|
|
tenderee_notfound = True
|
|
agency_notfound = True
|
|
agency_notfound = True
|
|
ents = []
|
|
ents = []
|
|
@@ -1428,7 +1443,7 @@ class RoleRuleFinalAdd():
|
|
n = 0
|
|
n = 0
|
|
for i in range(len(ents) - 1, -1, -1):
|
|
for i in range(len(ents) - 1, -1, -1):
|
|
n += 1
|
|
n += 1
|
|
- if n > 3:
|
|
|
|
|
|
+ if n > 3 and sear_ent: # 文章末尾角色加日期这种只找后三个实体
|
|
break
|
|
break
|
|
if ents[i].entity_text == ent_re or (ents[i].entity_text in ent_re and len(ents[i].entity_text)/len(ent_re)>0.6):
|
|
if ents[i].entity_text == ent_re or (ents[i].entity_text in ent_re and len(ents[i].entity_text)/len(ent_re)>0.6):
|
|
ents[i].label = 1
|
|
ents[i].label = 1
|
|
@@ -1439,7 +1454,7 @@ class RoleRuleFinalAdd():
|
|
n = 0
|
|
n = 0
|
|
for i in range(len(ents) - 1, -1, -1):
|
|
for i in range(len(ents) - 1, -1, -1):
|
|
n += 1
|
|
n += 1
|
|
- if n > 3:
|
|
|
|
|
|
+ if n > 3 and sear_ent: # 文章末尾角色加日期这种只找后三个实体
|
|
break
|
|
break
|
|
if ents[i].entity_text == ent_re or (ents[i].entity_text in ent_re and len(ents[i].entity_text)/len(ent_re)>0.6):
|
|
if ents[i].entity_text == ent_re or (ents[i].entity_text in ent_re and len(ents[i].entity_text)/len(ent_re)>0.6):
|
|
ents[i].label = 0
|
|
ents[i].label = 0
|