|
@@ -30,6 +30,7 @@ dict_predictor = {"codeName":{"predictor":None,"Lock":RLock()},
|
|
|
"prem":{"predictor":None,"Lock":RLock()},
|
|
|
"epc":{"predictor":None,"Lock":RLock()},
|
|
|
"roleRule":{"predictor":None,"Lock":RLock()},
|
|
|
+ "roleRuleFinal":{"predictor":None,"Lock":RLock()},
|
|
|
"form":{"predictor":None,"Lock":RLock()},
|
|
|
"time":{"predictor":None,"Lock":RLock()},
|
|
|
"punish":{"predictor":None,"Lock":RLock()},
|
|
@@ -51,6 +52,8 @@ def getPredictor(_type):
|
|
|
dict_predictor[_type]["predictor"] = EPCPredict()
|
|
|
if _type=="roleRule":
|
|
|
dict_predictor[_type]["predictor"] = RoleRulePredictor()
|
|
|
+ if _type == "roleRuleFinal":
|
|
|
+ dict_predictor[_type]["predictor"] = RoleRuleFinalAdd()
|
|
|
if _type=="form":
|
|
|
dict_predictor[_type]["predictor"] = FormPredictor()
|
|
|
if _type=="time":
|
|
@@ -658,6 +661,8 @@ class PREMPredict():
|
|
|
elif re.search('尊敬的供应商:.{,25}我公司', text):
|
|
|
label = 0
|
|
|
values[label] = 0.801
|
|
|
+ if label == 1 and re.search('委托(单位|人|方)[是为:]+', text[:10]) and re.search('受委托(单位|人|方)[是为:]+', text[:10])==None:
|
|
|
+ label = 0
|
|
|
entity.set_Role(label, values)
|
|
|
|
|
|
def predict_money(self,list_sentences,list_entitys):
|
|
@@ -1083,12 +1088,13 @@ class FormPredictor():
|
|
|
class RoleRulePredictor():
|
|
|
|
|
|
def __init__(self):
|
|
|
- self.pattern_tenderee_left = "(?P<tenderee_left>((遴选|采购|招标|项目|竞价|议价|需求|最终|建设|业主|转让|招租|甲|议标|合同主体|比选|委托|询价)(?:人|公司|单位|组织|用户|业主|方|部门)|文章来源|需方)(名称)?(是|为|信息|:|:|\s*)$)"
|
|
|
+ # self.pattern_tenderee_left = "(?P<tenderee_left>((遴选|采购|招标|项目|竞价|议价|需求|最终|建设|业主|转让|招租|甲|议标|合同主体|比选|委托|询价)(?:人|公司|单位|组织|用户|业主|方|部门)|文章来源|需方)(名称)?(是|为|信息|:|:|\s*)$)"
|
|
|
+ self.pattern_tenderee_left = "(?P<tenderee_left>((遴选|采购|招标|项目|竞价|议价|需求|最终|建设|业主|转让|招租|甲|议标|合同主体|比选|委托|询价|评选|挂牌|出租|出让|谈判|邀标|邀请|洽谈|约谈|买受|选取|抽取|抽选|出售|标卖|比价)(人|公司|单位|组织|用户|业主|主体|方|部门)|文章来源|委托机构|产权所有人|需方|买方|业主|权属人|甲方当事人)[))]?(名称|信息)?([((](全称|盖章)[))])?(是|为|:|:|,|\s*)+$)"
|
|
|
self.pattern_tenderee_center = "(?P<tenderee_center>(受.{,20}委托))"
|
|
|
- self.pattern_tenderee_right = "(?P<tenderee_right>^(\((以下简称)?[\"”]?(招标|采购)(人|单位|机构)\)?))" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
|
-
|
|
|
- self.pattern_agency_left = "(?P<agency_left>(代理(?:人|机构|公司|单位|组织)|专业采购机构|集中采购机构|集采机构|招标机构)(.{,4}名,?称|全称|是|为|:|:|[,,]?\s*)$|(受.{,20}委托))"
|
|
|
- self.pattern_agency_right = "(?P<agency_right>^(\((以下简称)?[\"”]?(代理)(人|单位|机构)\))|受.{,15}委托)"
|
|
|
+ self.pattern_tenderee_right = "(?P<tenderee_right>^([((](以下简称)?[,\"“]*(招标|采购)(人|单位|机构)\)?))|^委托" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
|
+
|
|
|
+ self.pattern_agency_left = "(?P<agency_left>(代理(?:人|机构|公司|单位|组织)|专业采购机构|集中采购机构|集采机构|[招议))]+标机构)(.{,4}名,?称|全称|是|为|:|:|[,,]?\s*)$|(受.{,20}委托))"
|
|
|
+ self.pattern_agency_right = "(?P<agency_right>^([((](以下简称)?[,\"“]*(代理)(人|单位|机构)\))|受.{,15}委托)|^受托"
|
|
|
# 2020//11/24 大网站规则 中标关键词添加 选定单位|指定的中介服务机构
|
|
|
self.pattern_winTenderer_left = "(?P<winTenderer_left>((中标|中选|中价|乙|成交|承做|施工|供货|承包|竞得|受让)(候选)?(人|单位|机构|各?供应商|方|公司|厂商|商)[::是为]+$|(选定单位|指定的中介服务机构))[::是为,]+$|(第[一1](名|((中标|中选|中价|成交)?(候选)?(人|单位|机构|供应商))))[::是为]+$|((评审结果|名次|排名)[::]第?[一1]名?)$|单一来源(采购)?方式向$|((中标|成交)(结果|信息))(是|为|:|:)$|(单一来源采购(供应商|供货商|服务商))$|[^候选]((分包|标包){,5}供应商|供货商|服务商|供应商名称|服务机构|供方)[::]$)"
|
|
|
# self.pattern_winTenderer_center = "(?P<winTenderer_center>第[一1].{,20}[是为]((中标|中选|中价|成交|施工)(人|单位|机构|供应商|公司)|供应商)[::是为])"
|
|
@@ -1371,6 +1377,47 @@ class RoleRulePredictor():
|
|
|
if p_entity.entity_text in self.SET_NOT_TENDERER:
|
|
|
p_entity.label=5
|
|
|
|
|
|
+'''正则补充最后一句实体日期格式为招标或代理 2021/12/30'''
|
|
|
+class RoleRuleFinalAdd():
|
|
|
+ def predict(self, list_articles, list_entitys):
|
|
|
+ text_end = list_articles[0].content[-30:]
|
|
|
+ sear_ent = re.search('[,。]([\u4e00-\u9fa5()()]{5,20}),\s*.{2,4}年.{1,2}月.{1,2}日', text_end)
|
|
|
+ if sear_ent:
|
|
|
+ ent_re = sear_ent.group(1)
|
|
|
+ tenderee_notfound = True
|
|
|
+ agency_notfound = True
|
|
|
+ ents = []
|
|
|
+ for ent in list_entitys[0]:
|
|
|
+ if ent.entity_type in ['org', 'company']:
|
|
|
+ if ent.label == 0:
|
|
|
+ tenderee_notfound = False
|
|
|
+ elif ent.label == 1:
|
|
|
+ agency_notfound = False
|
|
|
+ elif ent.label == 5:
|
|
|
+ ents.append(ent)
|
|
|
+ if agency_notfound == True and re.search('(采购|招标|投标|交易|代理|拍卖|咨询|顾问|管理)', ent_re):
|
|
|
+ n = 0
|
|
|
+ for i in range(len(ents) - 1, -1, -1):
|
|
|
+ n += 1
|
|
|
+ if n > 3:
|
|
|
+ break
|
|
|
+ if ents[i].entity_text == ent_re:
|
|
|
+ ents[i].label = 1
|
|
|
+ ents[i].values[1] = 0.5
|
|
|
+ break
|
|
|
+
|
|
|
+ elif tenderee_notfound == True and re.search('(采购|招标|投标|交易|代理|拍卖|咨询|顾问|管理)', ent_re) == None:
|
|
|
+ n = 0
|
|
|
+ for i in range(len(ents) - 1, -1, -1):
|
|
|
+ n += 1
|
|
|
+ if n > 3:
|
|
|
+ break
|
|
|
+ if ents[i].entity_text == ent_re:
|
|
|
+ ents[i].label = 0
|
|
|
+ ents[i].values[0] = 0.5
|
|
|
+ break
|
|
|
+
|
|
|
+
|
|
|
# 时间类别
|
|
|
class TimePredictor():
|
|
|
def __init__(self):
|