|
@@ -2753,103 +2753,83 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
packagePointer.pointer_tendereeMoney = entity
|
|
|
p_entity -= 1
|
|
|
|
|
|
- '''包名与标段号链接'''
|
|
|
- l_main = []
|
|
|
- l_attn = []
|
|
|
- pack_num_main = 0
|
|
|
- name_num_main = 0
|
|
|
- pack_num_attn = 0
|
|
|
- name_num_attn = 0
|
|
|
- for entity in list_entity:
|
|
|
- if entity.entity_type in ['name', 'package']:
|
|
|
- if entity.in_attachment:
|
|
|
- l_attn.append((entity.entity_type, entity.entity_text, entity.sentence_index, entity.wordOffset_begin, entity.wordOffset_end))
|
|
|
- if entity.entity_type == 'name':
|
|
|
- name_num_attn += 1
|
|
|
- else:
|
|
|
- pack_num_attn += 1
|
|
|
- else:
|
|
|
- l_main.append((entity.entity_type, entity.entity_text, entity.sentence_index, entity.wordOffset_begin, entity.wordOffset_end))
|
|
|
- if entity.entity_type == 'name':
|
|
|
- name_num_main += 1
|
|
|
- else:
|
|
|
- pack_num_main += 1
|
|
|
- if name_num_main > 0 and pack_num_main > 0:
|
|
|
- l_main.sort(key=lambda x: [x[2],x[3]])
|
|
|
- # print('正文名称:',l_main)
|
|
|
- link_dic = {}
|
|
|
- i = 1
|
|
|
- pre_ty = l_main[0][0]
|
|
|
- while i < len(l_main):
|
|
|
- if l_main[i][0] != pre_ty:
|
|
|
- ty1, ent1, s1, b1, e1 = l_main[i-1]
|
|
|
- ty2, ent2, s2, b2, e2 = l_main[i]
|
|
|
- if ty1 == 'package':
|
|
|
- if ent1 not in link_dic:
|
|
|
- link_dic[ent1] = []
|
|
|
- if s1 == s2:
|
|
|
- dist = abs(b2 - b1)
|
|
|
- else:
|
|
|
- dist = len(list_sentence[s1].sentence_text) - b1
|
|
|
- for id in range(s1+1, s2):
|
|
|
- dist += len(list_sentence[id].sentence_text)
|
|
|
- dist += b2
|
|
|
- link_dic[ent1].append((s2-s1, dist, ent2))
|
|
|
- elif ty2 == 'package':
|
|
|
- if ent2 not in link_dic:
|
|
|
- link_dic[ent2] = []
|
|
|
- if s1 == s2:
|
|
|
- dist = abs(b2 - b1)
|
|
|
- else:
|
|
|
- dist = len(list_sentence[s1].sentence_text) - b1
|
|
|
- for id in range(s1+1, s2):
|
|
|
- dist += len(list_sentence[id].sentence_text)
|
|
|
- dist += b2
|
|
|
- link_dic[ent2].append((s2-s1, dist, ent1))
|
|
|
- pre_ty = l_main[i][0]
|
|
|
- i += 1
|
|
|
- for k, v in link_dic.items():
|
|
|
- v.sort(key=lambda x: [x[0], x[1]])
|
|
|
- # print('各包排序后项目名:', k, v)
|
|
|
- PackDict[k]["name"] = v[0][2]
|
|
|
- elif name_num_attn > 0 and pack_num_attn > 0:
|
|
|
- # print("附件名称:", l_attn)
|
|
|
- l_attn.sort(key=lambda x: [x[2],x[3]])
|
|
|
+ '''标段链接包名包号'''
|
|
|
+ pk_name_l = []
|
|
|
+ pk_code_l = []
|
|
|
+ count_dic = {
|
|
|
+ 'package': set(),
|
|
|
+ 'name': set(),
|
|
|
+ 'code': set()
|
|
|
+ }
|
|
|
+
|
|
|
+ def get_sort_dist(l, max_sent_dist=2):
|
|
|
+ '''
|
|
|
+ 计算标段与其他要素距离,并按距离排序返回字典
|
|
|
+ :param l: [(entity.entity_type, entity.entity_text, entity.sentence_index, entity.wordOffset_begin, entity.wordOffset_end)]
|
|
|
+ :param max_sent_dist: 最大句子距离
|
|
|
+ :return:
|
|
|
+ '''
|
|
|
+ l.sort(key=lambda x: [x[2],x[3]])
|
|
|
link_dic = {}
|
|
|
i = 1
|
|
|
- pre_ty = l_attn[0][0]
|
|
|
- while i < len(l_attn):
|
|
|
- if l_attn[i][0] != pre_ty:
|
|
|
- ty1, ent1, s1, b1, e1 = l_attn[i-1]
|
|
|
- ty2, ent2, s2, b2, e2 = l_attn[i]
|
|
|
+ while i < len(l):
|
|
|
+ ty1, ent1, s1, b1, e1, in_att1 = l[i - 1]
|
|
|
+ ty2, ent2, s2, b2, e2, in_att2 = l[i]
|
|
|
+ if ty1 != ty2 and in_att1 == in_att2 and s2 - s1 <= max_sent_dist:
|
|
|
if ty1 == 'package':
|
|
|
if ent1 not in link_dic:
|
|
|
link_dic[ent1] = []
|
|
|
if s1 == s2:
|
|
|
- dist = abs(b2 - b1)
|
|
|
+ dist = abs(b2 - e1)
|
|
|
else:
|
|
|
- dist = len(list_sentence[s1].sentence_text) - b1
|
|
|
+ dist = len(list_sentence[s1].sentence_text) - e1
|
|
|
for id in range(s1+1, s2):
|
|
|
dist += len(list_sentence[id].sentence_text)
|
|
|
dist += b2
|
|
|
- link_dic[ent1].append((s2-s1, dist, ent2))
|
|
|
+ if in_att1:
|
|
|
+ dist += 100 # 附件的距离加100
|
|
|
+ link_dic[ent1].append((s2 - s1, dist, ent2))
|
|
|
elif ty2 == 'package':
|
|
|
if ent2 not in link_dic:
|
|
|
link_dic[ent2] = []
|
|
|
if s1 == s2:
|
|
|
- dist = abs(b2 - b1)
|
|
|
+ dist = abs(b2 - e1)
|
|
|
else:
|
|
|
- dist = len(list_sentence[s1].sentence_text) - b1
|
|
|
+ dist = len(list_sentence[s1].sentence_text) - e1
|
|
|
for id in range(s1+1, s2):
|
|
|
dist += len(list_sentence[id].sentence_text)
|
|
|
dist += b2
|
|
|
- link_dic[ent2].append((s2-s1, dist, ent1))
|
|
|
- pre_ty = l_attn[i][0]
|
|
|
+ if in_att1:
|
|
|
+ dist += 100 # 附件的距离加100
|
|
|
+ dist += 30 # 包号在实体后面距离再加30
|
|
|
+ link_dic[ent2].append((s2 - s1, dist, ent1))
|
|
|
i += 1
|
|
|
- for k, v in link_dic.items():
|
|
|
- v.sort(key=lambda x: [x[0], x[1]])
|
|
|
- # print('各包排序后项目名:', k, v)
|
|
|
- PackDict[k]["name"] = v[0][2]
|
|
|
+ return link_dic
|
|
|
+
|
|
|
+ for entity in list_entity:
|
|
|
+ if entity.entity_type == 'package':
|
|
|
+ pk_name_l.append((entity.entity_type, entity.entity_text, entity.sentence_index, entity.wordOffset_begin, entity.wordOffset_end, entity.in_attachment))
|
|
|
+ pk_code_l.append((entity.entity_type, entity.entity_text, entity.sentence_index, entity.wordOffset_begin, entity.wordOffset_end, entity.in_attachment))
|
|
|
+ count_dic['package'].add(entity.entity_text)
|
|
|
+ elif entity.entity_type == 'name':
|
|
|
+ pk_name_l.append((entity.entity_type, entity.entity_text, entity.sentence_index, entity.wordOffset_begin, entity.wordOffset_end, entity.in_attachment))
|
|
|
+ count_dic['name'].add(entity.entity_text)
|
|
|
+ elif entity.entity_type == 'code':
|
|
|
+ pk_code_l.append((entity.entity_type, entity.entity_text, entity.sentence_index, entity.wordOffset_begin, entity.wordOffset_end, entity.in_attachment))
|
|
|
+ count_dic['code'].add(entity.entity_text)
|
|
|
+ if len(count_dic['package']) > 0:
|
|
|
+ if len(count_dic['name'])>0:
|
|
|
+ link_dic = get_sort_dist(pk_name_l)
|
|
|
+ for k, v in link_dic.items():
|
|
|
+ v.sort(key=lambda x: [x[0], x[1]])
|
|
|
+ if v[0][0] < 2 and v[0][1] < 200: # 标段号与包名句子数小于2,字距离小于200的才添加
|
|
|
+ PackDict[k]["name"] = v[0][2]
|
|
|
+ if len(count_dic['code'])>0:
|
|
|
+ link_dic = get_sort_dist(pk_code_l)
|
|
|
+ for k, v in link_dic.items():
|
|
|
+ v.sort(key=lambda x: [x[0], x[1]])
|
|
|
+ if v[0][0] < 2 and v[0][1] < 200:
|
|
|
+ PackDict[k]["code"] = v[0][2]
|
|
|
|
|
|
#删除一个机构有多个角色的数据
|
|
|
#删除重复人、概率不回传
|
|
@@ -4801,7 +4781,7 @@ def update_prem(old_prem, new_prem, in_attachment=False):
|
|
|
|
|
|
# return old_prem
|
|
|
|
|
|
-def confirm_prem(prem, channel_dic, is_deposit_project=False, total_tendereeMoney=0, name=""):
|
|
|
+def confirm_prem(prem, channel_dic, is_deposit_project=False, total_tendereeMoney=0):
|
|
|
'''
|
|
|
规则检查纠正prem,如果Project包中标人在其他包中标人,去掉project包中标角色;如果有其他包中标人,去掉roleList为空的包;
|
|
|
:param prem: prem 字段字典
|
|
@@ -4852,7 +4832,36 @@ def confirm_prem(prem, channel_dic, is_deposit_project=False, total_tendereeMone
|
|
|
for k in prem:
|
|
|
if float(prem[k]['tendereeMoney'])==0:
|
|
|
prem[k]['tendereeMoney'] = total_tendereeMoney
|
|
|
- if name != '' and len(prem)<=2: # 20241129 小于等于两个包且无包名称,取项目名称
|
|
|
+
|
|
|
+def add_package_name(prem, list_entity, product_list, name):
|
|
|
+ '''
|
|
|
+ 通过产品、项目名称,补充各标段包名,如果标段无包名,标段后紧接产品,把产品作为包名;如果标段数少于等于2且包名为空,补充项目名称为包名
|
|
|
+ :param prem:
|
|
|
+ :param list_entity:
|
|
|
+ :param product_list:
|
|
|
+ :param name:
|
|
|
+ :return:
|
|
|
+ '''
|
|
|
+ if len(prem)>2 and len(product_list)>2:
|
|
|
+ ent_l = []
|
|
|
+ for entity in list_entity:
|
|
|
+ if entity.entity_type in ['product', 'package']:
|
|
|
+ ent_l.append((entity.entity_type, entity.entity_text, entity.sentence_index, entity.wordOffset_begin, entity.wordOffset_end, entity.in_attachment))
|
|
|
+ ent_l.sort(key=lambda x: [x[2],x[3]])
|
|
|
+ i = 0
|
|
|
+ pk_dic = {}
|
|
|
+ while i < len(ent_l)-1:
|
|
|
+ ty1, ent1, s1, b1, e1, in_att1 = ent_l[i]
|
|
|
+ ty2, ent2, s2, b2, e2, in_att2 = ent_l[i+1]
|
|
|
+ if in_att1 == in_att2 and ty1 == 'package' and ty2 == 'product' and s1 == s2 and 0<b2-e1<3:
|
|
|
+ pk_dic[ent1] = ent2
|
|
|
+ i += 1
|
|
|
+ if len(pk_dic) > 1:
|
|
|
+ for k, v in prem.items():
|
|
|
+ if k in pk_dic and v.get('name', '') == '':
|
|
|
+ v['name'] = pk_dic[k]
|
|
|
+
|
|
|
+ elif name != '' and len(prem)<=2: # 20241129 小于等于两个包且无包名称,取项目名称
|
|
|
for k in prem:
|
|
|
if prem[k].get('name', '') == '':
|
|
|
prem[k]['name'] = name
|