|
@@ -4326,14 +4326,16 @@ def get_win_joint(prem, list_entitys, list_sentences, list_articles):
|
|
b = b2
|
|
b = b2
|
|
e = e2
|
|
e = e2
|
|
find_joint = 1
|
|
find_joint = 1
|
|
- elif (find_joint or re.search('与[^,。]{6,100}联合体', list_articles[0].content)) and s[e:b2] in ['与',';','、','&',',','/','//'] and (len(s)==e2 or s[e2] in [';','、','&',',','/','//', '。'] or s[e2:e2+3]=='联合体'):
|
|
|
|
|
|
+ elif (find_joint or re.search('与[^,。]{6,100}联合体', list_articles[0].content)) and behind_entity.entity_type in ['org', 'company'] and s[e:b2] in ['与',';','、','&',',','/','//'] and (len(s)==e2 or s[e2] in [';','、','&',',','/','//', '。'] or s[e2:e2+3]=='联合体'):
|
|
join_l.append(behind_entity.entity_text)
|
|
join_l.append(behind_entity.entity_text)
|
|
b = b2
|
|
b = b2
|
|
e = e2
|
|
e = e2
|
|
|
|
+ elif e == e2: # 修复重复实体导致中断情况
|
|
|
|
+ continue
|
|
else:
|
|
else:
|
|
break
|
|
break
|
|
if len(join_l)>1:
|
|
if len(join_l)>1:
|
|
- d['win_tenderer_joint'] = ','.join(set(join_l))
|
|
|
|
|
|
+ d['win_tenderer_joint'] = ','.join(set(join_l))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -4453,15 +4455,17 @@ def get_multi_winner_and_money(channel_dic, prem, list_entitys,list_sentences):
|
|
sentence_text = sentences[ent_bh.sentence_index].sentence_text
|
|
sentence_text = sentences[ent_bh.sentence_index].sentence_text
|
|
if sentence_text[e_idx_fr:b_idx_bh] in [';', '、', '&', ',', '/', '//'] and (
|
|
if sentence_text[e_idx_fr:b_idx_bh] in [';', '、', '&', ',', '/', '//'] and (
|
|
len(sentence_text) == e_idx_bh or sentence_text[e_idx_bh] in [';', '、', '&', ',','/', '//','。']): # 修复多中标人刚好在文末index超出报错,例子 407126558
|
|
len(sentence_text) == e_idx_bh or sentence_text[e_idx_bh] in [';', '、', '&', ',','/', '//','。']): # 修复多中标人刚好在文末index超出报错,例子 407126558
|
|
- multi_winner_l.append((ent.entity_text, ent.sentence_index, ent.wordOffset_begin, ent.in_attachment))
|
|
|
|
|
|
+ multi_winner_l.append((ent_bh.entity_text, ent_bh.sentence_index, ent_bh.wordOffset_begin, ent_bh.in_attachment))
|
|
e_idx_fr = e_idx_bh
|
|
e_idx_fr = e_idx_bh
|
|
i = j + 1
|
|
i = j + 1
|
|
else:
|
|
else:
|
|
break
|
|
break
|
|
elif ent_bh.entity_type in ['org', 'company'] and ent_bh.label == 5 and ent_bh.sentence_index == ent.sentence_index and b_idx_bh == e_idx_fr:
|
|
elif ent_bh.entity_type in ['org', 'company'] and ent_bh.label == 5 and ent_bh.sentence_index == ent.sentence_index and b_idx_bh == e_idx_fr:
|
|
- multi_winner_l.append((ent.entity_text, ent.sentence_index, ent.wordOffset_begin, ent.in_attachment))
|
|
|
|
|
|
+ multi_winner_l.append((ent_bh.entity_text, ent_bh.sentence_index, ent_bh.wordOffset_begin, ent_bh.in_attachment))
|
|
e_idx_fr = e_idx_bh
|
|
e_idx_fr = e_idx_bh
|
|
i = j + 1
|
|
i = j + 1
|
|
|
|
+ elif ent_bh.entity_type in ['org', 'company'] and ent_bh.label == 5 and e_idx_fr == e_idx_bh: # 处理 514603520 中国邮政储蓄银行股份有限公司淄博市临淄区支行 实体由于字典匹配重复两次情况
|
|
|
|
+ i = j + 1
|
|
else:
|
|
else:
|
|
break
|
|
break
|
|
if re.search('入围', pre_text) and re.search('未入围', pre_text)==None:
|
|
if re.search('入围', pre_text) and re.search('未入围', pre_text)==None:
|
|
@@ -4615,8 +4619,16 @@ def confirm_prem(prem, channel_dic):
|
|
if d['role_name'] in ['win_tenderer', 'pre_win_tenderer', 'second_tenderer','third_tenderer']:
|
|
if d['role_name'] in ['win_tenderer', 'pre_win_tenderer', 'second_tenderer','third_tenderer']:
|
|
if k == 'Project':
|
|
if k == 'Project':
|
|
pro_winner.add(d['role_text'])
|
|
pro_winner.add(d['role_text'])
|
|
|
|
+ if 'win_tenderer_joint' in d:
|
|
|
|
+ pro_winner.updata(set(d['win_tenderer_joint'].split(',')))
|
|
|
|
+ if 'multi_winner' in d:
|
|
|
|
+ pro_winner.update(set(d['multi_winner'].split(',')))
|
|
else:
|
|
else:
|
|
other_winner.add(d['role_text'])
|
|
other_winner.add(d['role_text'])
|
|
|
|
+ if 'win_tenderer_joint' in d:
|
|
|
|
+ other_winner.update(set(d['win_tenderer_joint'].split(',')))
|
|
|
|
+ if 'multi_winner' in d:
|
|
|
|
+ other_winner.update(set(d['multi_winner'].split(',')))
|
|
if pro_winner & other_winner != set():
|
|
if pro_winner & other_winner != set():
|
|
prem['Project']['roleList'] = [d for d in prem['Project']['roleList'] if
|
|
prem['Project']['roleList'] = [d for d in prem['Project']['roleList'] if
|
|
d['role_name'] not in ['win_tenderer', 'second_tenderer',
|
|
d['role_name'] not in ['win_tenderer', 'second_tenderer',
|