|
@@ -1291,6 +1291,10 @@ class RoleRulePredictor():
|
|
|
end_index=p_entity.end_index, size=20, center_include=True,
|
|
|
word_flag=True, use_text=True,
|
|
|
text=re.sub(")", ")", re.sub("(", "(", p_entity.entity_text)))
|
|
|
+ if re.search(self.pattern_tenderee_left, _span[0]) or re.search(self.pattern_tenderee_left_w0, _span[0]): # 前面有关键词的实体不判断是否在项目名称中出现
|
|
|
+ find_flag = True
|
|
|
+ break
|
|
|
+
|
|
|
for _name in list_name:
|
|
|
if _name != "" and str(_span[0][-10:]+_span[1] + _span[2][:len(str(_name))]).find(_name) >= 0: #加上前面一些信息,修复公司不在项目名称开头的,检测不到
|
|
|
find_flag = True
|
|
@@ -1880,17 +1884,17 @@ class TendereeRuleRecall():
|
|
|
|
|
|
class RoleGrade():
|
|
|
def __init__(self):
|
|
|
- self.tenderee_left_9 = "(?P<tenderee_left_9>(招标|采购|遴选|寻源|竞价|议价|比选|委托|询比?价|比价|评选|谈判|邀标|邀请|洽谈|约谈|选取|抽取|抽选|甲)(人|方|单位))"
|
|
|
+ self.tenderee_left_9 = "(?P<tenderee_left_9>(招标|采购|遴选|寻源|竞价|议价|比选|委托|询比?价|比价|评选|谈判|邀标|邀请|洽谈|约谈|选取|抽取|抽选)(人|方|单位))"
|
|
|
self.tenderee_center_9 = "(?P<tenderee_center_9>受.{5,20}委托)"
|
|
|
- self.tenderee_left_8 = "(?P<tenderee_left_8>(业主|转让方|尊敬的供应商|出租方|处置方|(需求|建设|最终|发包)(人|方|单位|组织|用户|业主|主体|部门|公司)))"
|
|
|
+ self.tenderee_left_8 = "(?P<tenderee_left_8>(业主|转让方|尊敬的供应商|出租方|处置方|(需求|建设|最终|发包|甲)(人|方|单位|组织|用户|业主|主体|部门|公司)))"
|
|
|
self.agency_left_9 = "(?P<agency_left_9>代理)"
|
|
|
- self.winTenderer_left_9 = "(?P<winTenderer_left_9>(中标|中选|中价|成交|竞得|乙方)|第[1一]|排名:1)"
|
|
|
- self.winTenderer_left_8 = "(?P<winTenderer_left_8>(入选供应商|供货商))"
|
|
|
+ self.winTenderer_left_9 = "(?P<winTenderer_left_9>(中标|中选|中价|成交|竞得)|第[1一]|排名:1)"
|
|
|
+ self.winTenderer_left_8 = "(?P<winTenderer_left_8>(入选供应商|供货商|乙方))"
|
|
|
self.secondTenderer_left_9 = "(?P<secondTenderer_left_9>(第[二2](中标|中选|中价|成交)?候选(人|单位|供应商|公司)|第[二2]名|排名:2))"
|
|
|
self.thirdTenderer_left_9 = "(?P<thirdTenderer_left_9>(第[三3](中标|中选|中价|成交)?候选(人|单位|供应商|公司)|第[三3]名|排名:3))"
|
|
|
self.pattern_list = [self.tenderee_left_9,self.tenderee_center_9, self.tenderee_left_8,self.agency_left_9, self.winTenderer_left_9,
|
|
|
self.winTenderer_left_8, self.secondTenderer_left_9, self.thirdTenderer_left_9]
|
|
|
- def predict(self, list_sentences, list_entitys, span=10, min_prob=0.7):
|
|
|
+ def predict(self, list_sentences, list_entitys, span=15, min_prob=0.7):
|
|
|
'''
|
|
|
根据规则给角色分配不同等级概率;分三级:0.9-1,0.8-0.9,0.7-0.8;附件0.7-0.8,0.6-0.7,0.5-0.6
|
|
|
:param list_articles:
|
|
@@ -1901,8 +1905,11 @@ class RoleGrade():
|
|
|
'''
|
|
|
sentences = sorted(list_sentences[0], key=lambda x:x.sentence_index)
|
|
|
role2id = {"tenderee": 0, "agency": 1, "winTenderer": 2, "secondTenderer": 3, "thirdTenderer": 4}
|
|
|
+ org_winner = []
|
|
|
+ company_winner = []
|
|
|
+ org_tenderee = []
|
|
|
for entity in list_entitys[0]:
|
|
|
- if entity.entity_type in ['org', 'company'] and entity.label in [0, 1, 2, 3, 4] and entity.values[entity.label]> 0.5:
|
|
|
+ if entity.entity_type in ['org', 'company'] and entity.label in [0, 1, 2, 3, 4] and entity.values[entity.label]> min_prob:
|
|
|
text = sentences[entity.sentence_index].sentence_text
|
|
|
in_att = sentences[entity.sentence_index].in_attachment
|
|
|
pre_prob = entity.values[entity.label]
|
|
@@ -1940,6 +1947,28 @@ class RoleGrade():
|
|
|
entity.values[entity.label] = _prob + entity.values[entity.label] / 20
|
|
|
# print('找不到规则修改角色概率:', entity.entity_text, entity.label, entity.values)
|
|
|
|
|
|
+ if entity.label == 2 and entity.values[entity.label]> min_prob:
|
|
|
+ if entity.entity_type == 'org':
|
|
|
+ org_winner.append(entity)
|
|
|
+ elif entity.entity_type == 'company':
|
|
|
+ company_winner.append(entity) # 保存中标人实体
|
|
|
+ if entity.label == 0 and entity.values[entity.label]> min_prob:
|
|
|
+ org_tenderee.append(entity.entity_text) # 保存所有招标人名称
|
|
|
+
|
|
|
+ if org_winner != []:
|
|
|
+ flag = 0
|
|
|
+ if org_tenderee != []:
|
|
|
+ for ent in org_winner:
|
|
|
+ if ent.entity_text in org_tenderee:
|
|
|
+ # log('如果org中标人同时为招标人角色,降低中标概率:%s, %s' % (ent.entity_text, ent.label))
|
|
|
+ ent.values[2] = 0.6
|
|
|
+ flag = 1
|
|
|
+ if flag == 0 and company_winner != []:
|
|
|
+ for ent in org_winner:
|
|
|
+ if ent.label == 2 and ent.values[2] > 0.6:
|
|
|
+ # log('如果同时包含org和company中标人,降低org中标人概率为0.6:%s, %s' % (ent.entity_text, ent.values[2]))
|
|
|
+ ent.values[2] = 0.6
|
|
|
+
|
|
|
|
|
|
class MoneyGrade():
|
|
|
def __init__(self):
|