|
@@ -768,6 +768,26 @@ def getPackagesFromArticle(list_sentence,list_entity):
|
|
|
PackageList.append(copy_pack)
|
|
|
return PackageList,PackageSet,dict_packageCode
|
|
|
|
|
|
+# km配对方法
|
|
|
+def dispatch(match_list):
|
|
|
+ main_roles = list(set([match.main_role for match in match_list]))
|
|
|
+ attributes = list(set([match.attribute for match in match_list]))
|
|
|
+
|
|
|
+ label = np.zeros(shape=(len(main_roles), len(attributes)))
|
|
|
+ for match in match_list:
|
|
|
+ main_role = match.main_role
|
|
|
+ attribute = match.attribute
|
|
|
+ value = match.value
|
|
|
+ label[main_roles.index(main_role), attributes.index(attribute)] = value + 10000
|
|
|
+ # print(label)
|
|
|
+ gragh = -label
|
|
|
+ # km算法
|
|
|
+ row, col = linear_sum_assignment(gragh)
|
|
|
+ max_dispatch = [(i, j) for i, j, value in zip(row, col, gragh[row, col]) if value]
|
|
|
+ # return [Match(main_roles[row], attributes[col]) for row, col in max_dispatch]
|
|
|
+ return [(main_roles[row], attributes[col]) for row, col in max_dispatch]
|
|
|
+
|
|
|
+from BiddingKG.dl.common.Utils import getUnifyMoney
|
|
|
from BiddingKG.dl.interface.modelFactory import Model_relation_extraction
|
|
|
relationExtraction_model = Model_relation_extraction()
|
|
|
def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity,list_sentence,on_value = 0.5,on_value_person=0.5,sentence_len=4):
|
|
@@ -812,7 +832,15 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
|
|
|
packDict[packageName]["roleList"][i].money_unit = money.money_unit
|
|
|
# print('链接中的金额:{0}, 单位:{1}'.format(money.entity_text, money.money_unit))
|
|
|
return packDict
|
|
|
-
|
|
|
+ def addRatioByEntity(packDict,packageName,entity,ratio):
|
|
|
+ for i in range(len(packDict[packageName]["roleList"])):
|
|
|
+ if packDict[packageName]["roleList"][i].entity_text==entity:
|
|
|
+ packDict[packageName]["roleList"][i].ratio = ratio.entity_text
|
|
|
+ def addServiceTimeByEntity(packDict,packageName,entity,serviceTime):
|
|
|
+ for i in range(len(packDict[packageName]["roleList"])):
|
|
|
+ if packDict[packageName]["roleList"][i].entity_text==entity:
|
|
|
+ packDict[packageName]["roleList"][i].serviceTime = serviceTime.entity_text
|
|
|
+
|
|
|
#根据实体名称得到角色
|
|
|
def getRoleWithText(packDict,entity_text):
|
|
|
for pack in packDict.keys():
|
|
@@ -838,8 +866,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
|
|
|
# print('连接前修改大于50亿金额:前面是后面的一万倍则把前面金额/10000')
|
|
|
|
|
|
#遍历所有实体
|
|
|
- while(p_entity<len(list_entity)):
|
|
|
- entity = list_entity[p_entity]
|
|
|
+ # while(p_entity<len(list_entity)):
|
|
|
+ # entity = list_entity[p_entity]
|
|
|
'''
|
|
|
#招标金额从后往前找
|
|
|
if entity.entity_type=="money":
|
|
@@ -902,88 +930,206 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
|
|
|
|
|
|
|
|
|
#如果实体属于角色集合,则往后找属性
|
|
|
- if doesEntityOrLinkedEntity_inRoleSet(entity, roleSet):
|
|
|
-
|
|
|
- p_entity += 1
|
|
|
- #循环查找符合的属性
|
|
|
- while(p_entity<len(list_entity)):
|
|
|
-
|
|
|
- entity_after = list_entity[p_entity]
|
|
|
- if entity_after.sentence_index-entity.sentence_index>=sentence_len:
|
|
|
- p_entity -= 1
|
|
|
- break
|
|
|
- #若是遇到公司实体,则跳出循环
|
|
|
- if entity_after.entity_type in ['org','company']:
|
|
|
- p_entity -= 1
|
|
|
- break
|
|
|
- if entity_after.values is not None:
|
|
|
- if entity_after.entity_type=="money":
|
|
|
- if entity_after.values[entity_after.label]>=on_value:
|
|
|
- '''
|
|
|
- #招标金额从后往前找
|
|
|
- if str(entity_after.label)=="0":
|
|
|
- packagePointer,_ = getPackage(PackageList,entity.sentence_index,entity.begin_index,"money-"+str(entity.label))
|
|
|
- if packagePointer is None:
|
|
|
- packageName = "Project"
|
|
|
+ # if doesEntityOrLinkedEntity_inRoleSet(entity, roleSet):
|
|
|
+ #
|
|
|
+ # p_entity += 1
|
|
|
+ # #循环查找符合的属性
|
|
|
+ # while(p_entity<len(list_entity)):
|
|
|
+ #
|
|
|
+ # entity_after = list_entity[p_entity]
|
|
|
+ # if entity_after.sentence_index-entity.sentence_index>=sentence_len:
|
|
|
+ # p_entity -= 1
|
|
|
+ # break
|
|
|
+ # #若是遇到公司实体,则跳出循环
|
|
|
+ # if entity_after.entity_type in ['org','company']:
|
|
|
+ # p_entity -= 1
|
|
|
+ # break
|
|
|
+ # if entity_after.values is not None:
|
|
|
+ # if entity_after.entity_type=="money":
|
|
|
+ # if entity_after.values[entity_after.label]>=on_value:
|
|
|
+ # '''
|
|
|
+ # #招标金额从后往前找
|
|
|
+ # if str(entity_after.label)=="0":
|
|
|
+ # packagePointer,_ = getPackage(PackageList,entity.sentence_index,entity.begin_index,"money-"+str(entity.label))
|
|
|
+ # if packagePointer is None:
|
|
|
+ # packageName = "Project"
|
|
|
+ # else:
|
|
|
+ # packageName = packagePointer.entity_text
|
|
|
+ # addMoneyByRoleid(PackDict, packageName, "0", entity_after.entity_text, entity_after.values[entity_after.label])
|
|
|
+ # '''
|
|
|
+ # if str(entity_after.label)=="1":
|
|
|
+ # #print(entity_after.entity_text,entity.entity_text)
|
|
|
+ # _list_entitys = [entity]+entity.linked_entitys
|
|
|
+ # if len(PackageSet)>0:
|
|
|
+ # packagePointer,_ = getPackage(PackageList,entity_after.sentence_index,entity_after.begin_index,"money-"+str(entity_after.entity_text)+"-"+str(entity_after.label))
|
|
|
+ # if packagePointer is None:
|
|
|
+ # packageName_entity = "Project"
|
|
|
+ # else:
|
|
|
+ # packageName_entity = packagePointer.entity_text
|
|
|
+ # else:
|
|
|
+ # packageName_entity = "Project"
|
|
|
+ # if str(entity.label) in ["2","3","4"]:
|
|
|
+ # # addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after.entity_text, entity_after.values[entity_after.label])
|
|
|
+ # if entity_after.notes == '单价' or float(entity_after.entity_text)<5000: #2021/12/17 调整小金额阈值,避免203608823.html 两次金额一次万元没提取到的情况
|
|
|
+ # addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after,
|
|
|
+ # 0.5)
|
|
|
+ # entity.pointer_money = entity_after
|
|
|
+ # # print('role zhao money', entity.entity_text, '中标金额:', entity_after.entity_text)
|
|
|
+ # else:
|
|
|
+ # addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after,
|
|
|
+ # entity_after.values[entity_after.label])
|
|
|
+ # entity.pointer_money = entity_after
|
|
|
+ # # print('role zhao money', entity.entity_text, '中标金额:', entity_after.entity_text)
|
|
|
+ # if entity_after.values[entity_after.label]>0.6:
|
|
|
+ # break # 2021/7/16 新增,找到中标金额,非单价即停止,不再往后找金额
|
|
|
+ # #add pointer_money
|
|
|
+ # # entity.pointer_money = entity_after
|
|
|
+ # # print('role zhao money', entity.entity_text, '中标金额:', entity_after.entity_text)
|
|
|
+ # # if entity_after.notes!='单价':
|
|
|
+ # # break # 2021/7/16 新增,找到中标金额即停止,不再往后找金额
|
|
|
+ # '''
|
|
|
+ # if entity_after.entity_type=="person":
|
|
|
+ # if entity_after.values[entity_after.label]>=on_value_person:
|
|
|
+ # if str(entity_after.label)=="1":
|
|
|
+ # for i in range(len(roleList)):
|
|
|
+ # if roleList[i].role_name=="tenderee":
|
|
|
+ # roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
|
|
|
+ # elif str(entity_after.label)=="2":
|
|
|
+ # for i in range(len(roleList)):
|
|
|
+ # if roleList[i].role_name=="agency":
|
|
|
+ # roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
|
|
|
+ # elif str(entity_after.label)=="3":
|
|
|
+ # _list_entitys = [entity]+entity.linked_entitys
|
|
|
+ # for _entity in _list_entitys:
|
|
|
+ # for i in range(len(roleList)):
|
|
|
+ # if roleList[i].entity_text==_entity.entity_text:
|
|
|
+ # if entity_after.sentence_index-_entity.sentence_index>1 and len(roleList[i].linklist)>0:
|
|
|
+ # break
|
|
|
+ # roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
|
|
|
+ # '''
|
|
|
+ #
|
|
|
+ # p_entity += 1
|
|
|
+ #
|
|
|
+ # p_entity += 1
|
|
|
+ # 记录每句的分词数量
|
|
|
+ tokens_num_dict = dict()
|
|
|
+ last_tokens_num = 0
|
|
|
+ for sentence in list_sentence:
|
|
|
+ _index = sentence.sentence_index
|
|
|
+ if _index == 0:
|
|
|
+ tokens_num_dict[_index] = 0
|
|
|
+ else:
|
|
|
+ tokens_num_dict[_index] = tokens_num_dict[_index - 1] + last_tokens_num
|
|
|
+ last_tokens_num = len(sentence.tokens)
|
|
|
+ attribute_type = ['money','serviceTime','ratio']# 'money'仅指“中投标金额”
|
|
|
+ for link_attribute in attribute_type:
|
|
|
+ temp_entity_list = []
|
|
|
+ if link_attribute=="money":
|
|
|
+ temp_entity_list = [ent for ent in list_entity if (ent.entity_type in ['org','company'] and ent.label in [2,3,4]) or
|
|
|
+ (ent.entity_type=='money' and ent.label==1)]
|
|
|
+ # 删除重复的‘中投标金额’,一般为大小写两种样式
|
|
|
+ drop_tendererMoney = []
|
|
|
+ for ent_idx in range(len(temp_entity_list)-1):
|
|
|
+ entity = temp_entity_list[ent_idx]
|
|
|
+ if entity.entity_type=='money':
|
|
|
+ next_entity = temp_entity_list[ent_idx+1]
|
|
|
+ if next_entity.entity_type=='money':
|
|
|
+ if getUnifyMoney(entity.entity_text)==getUnifyMoney(next_entity.entity_text):
|
|
|
+ if (tokens_num_dict[next_entity.sentence_index] + next_entity.begin_index) - (
|
|
|
+ tokens_num_dict[entity.sentence_index] + entity.end_index) < 10:
|
|
|
+ drop_tendererMoney.append(next_entity)
|
|
|
+ for _drop in drop_tendererMoney:
|
|
|
+ temp_entity_list.remove(_drop)
|
|
|
+ elif link_attribute=="serviceTime":
|
|
|
+ temp_entity_list = [ent for ent in list_entity if (ent.entity_type in ['org','company'] and ent.label in [2,3,4]) or
|
|
|
+ ent.entity_type=='serviceTime']
|
|
|
+ elif link_attribute=="ratio":
|
|
|
+ temp_entity_list = [ent for ent in list_entity if (ent.entity_type in ['org','company'] and ent.label in [2,3,4]) or
|
|
|
+ ent.entity_type=='ratio']
|
|
|
+ temp_entity_list = sorted(temp_entity_list,key=lambda x: (x.sentence_index, x.begin_index))
|
|
|
+ temp_match_list = []
|
|
|
+ for ent_idx in range(len(temp_entity_list)):
|
|
|
+ entity = temp_entity_list[ent_idx]
|
|
|
+ if entity.entity_type in ['org','company']:
|
|
|
+ match_nums = 0
|
|
|
+ tenderer_nums = 0 #经过其他中投标人的数量
|
|
|
+ byNotTenderer_match_nums = 0 #跟在中投标人后面的属性
|
|
|
+ for after_index in range(ent_idx + 1, min(len(temp_entity_list), ent_idx + 4)):
|
|
|
+ after_entity = temp_entity_list[after_index]
|
|
|
+ if after_entity.entity_type == link_attribute:
|
|
|
+ distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
|
|
|
+ tokens_num_dict[entity.sentence_index] + entity.end_index)
|
|
|
+ sentence_distance = after_entity.sentence_index - entity.sentence_index
|
|
|
+ if sentence_distance == 0:
|
|
|
+ if distance < 100:
|
|
|
+ value = (-1 / 2 * (distance ** 2)) / 10000
|
|
|
+ temp_match_list.append(Match(entity, after_entity, value))
|
|
|
+ match_nums += 1
|
|
|
+ if not tenderer_nums:
|
|
|
+ byNotTenderer_match_nums += 1
|
|
|
else:
|
|
|
- packageName = packagePointer.entity_text
|
|
|
- addMoneyByRoleid(PackDict, packageName, "0", entity_after.entity_text, entity_after.values[entity_after.label])
|
|
|
- '''
|
|
|
- if str(entity_after.label)=="1":
|
|
|
- #print(entity_after.entity_text,entity.entity_text)
|
|
|
- _list_entitys = [entity]+entity.linked_entitys
|
|
|
- if len(PackageSet)>0:
|
|
|
- packagePointer,_ = getPackage(PackageList,entity_after.sentence_index,entity_after.begin_index,"money-"+str(entity_after.entity_text)+"-"+str(entity_after.label))
|
|
|
- if packagePointer is None:
|
|
|
- packageName_entity = "Project"
|
|
|
- else:
|
|
|
- packageName_entity = packagePointer.entity_text
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ if distance < 60:
|
|
|
+ value = (-1 / 2 * (distance ** 2)) / 10000
|
|
|
+ temp_match_list.append(Match(entity, after_entity, value))
|
|
|
+ match_nums += 1
|
|
|
+ if not tenderer_nums:
|
|
|
+ byNotTenderer_match_nums += 1
|
|
|
else:
|
|
|
- packageName_entity = "Project"
|
|
|
- if str(entity.label) in ["2","3","4"]:
|
|
|
- # addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after.entity_text, entity_after.values[entity_after.label])
|
|
|
- if entity_after.notes == '单价' or float(entity_after.entity_text)<5000: #2021/12/17 调整小金额阈值,避免203608823.html 两次金额一次万元没提取到的情况
|
|
|
- addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after,
|
|
|
- 0.5)
|
|
|
- entity.pointer_money = entity_after
|
|
|
- # print('role zhao money', entity.entity_text, '中标金额:', entity_after.entity_text)
|
|
|
- else:
|
|
|
- addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after,
|
|
|
- entity_after.values[entity_after.label])
|
|
|
- entity.pointer_money = entity_after
|
|
|
- # print('role zhao money', entity.entity_text, '中标金额:', entity_after.entity_text)
|
|
|
- if entity_after.values[entity_after.label]>0.6:
|
|
|
- break # 2021/7/16 新增,找到中标金额,非单价即停止,不再往后找金额
|
|
|
- #add pointer_money
|
|
|
- # entity.pointer_money = entity_after
|
|
|
- # print('role zhao money', entity.entity_text, '中标金额:', entity_after.entity_text)
|
|
|
- # if entity_after.notes!='单价':
|
|
|
- # break # 2021/7/16 新增,找到中标金额即停止,不再往后找金额
|
|
|
- '''
|
|
|
- if entity_after.entity_type=="person":
|
|
|
- if entity_after.values[entity_after.label]>=on_value_person:
|
|
|
- if str(entity_after.label)=="1":
|
|
|
- for i in range(len(roleList)):
|
|
|
- if roleList[i].role_name=="tenderee":
|
|
|
- roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
|
|
|
- elif str(entity_after.label)=="2":
|
|
|
- for i in range(len(roleList)):
|
|
|
- if roleList[i].role_name=="agency":
|
|
|
- roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
|
|
|
- elif str(entity_after.label)=="3":
|
|
|
- _list_entitys = [entity]+entity.linked_entitys
|
|
|
- for _entity in _list_entitys:
|
|
|
- for i in range(len(roleList)):
|
|
|
- if roleList[i].entity_text==_entity.entity_text:
|
|
|
- if entity_after.sentence_index-_entity.sentence_index>1 and len(roleList[i].linklist)>0:
|
|
|
- break
|
|
|
- roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
|
|
|
- '''
|
|
|
-
|
|
|
- p_entity += 1
|
|
|
-
|
|
|
- p_entity += 1
|
|
|
-
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ tenderer_nums += 1
|
|
|
+ #前向查找属性
|
|
|
+ if not match_nums or not byNotTenderer_match_nums:
|
|
|
+ previous_entity = temp_entity_list[ent_idx - 1]
|
|
|
+ if previous_entity.entity_type == link_attribute:
|
|
|
+ if previous_entity.sentence_index == entity.sentence_index:
|
|
|
+ distance = (tokens_num_dict[entity.sentence_index] + entity.begin_index) - (
|
|
|
+ tokens_num_dict[
|
|
|
+ previous_entity.sentence_index] + previous_entity.end_index)
|
|
|
+ if distance < 20:
|
|
|
+ # 前向 没有 /10000
|
|
|
+ value = (-1 / 2 * (distance ** 2))
|
|
|
+ temp_match_list.append(Match(entity, previous_entity, value))
|
|
|
+ # km算法分配求解
|
|
|
+ dispatch_result = dispatch(temp_match_list)
|
|
|
+ # print(dispatch_result)
|
|
|
+ for match in dispatch_result:
|
|
|
+ _entity = match[0]
|
|
|
+ _attribute = match[1]
|
|
|
+ if link_attribute=='money':
|
|
|
+ _entity.pointer_money = _attribute
|
|
|
+ packagePointer, _ = getPackage(PackageList, _attribute.sentence_index, _attribute.begin_index,
|
|
|
+ "money-" + str(_attribute.entity_text) + "-" + str(_attribute.label))
|
|
|
+ if packagePointer is None:
|
|
|
+ packageName_entity = "Project"
|
|
|
+ else:
|
|
|
+ packageName_entity = packagePointer.entity_text
|
|
|
+ if _attribute.notes == '单价' or float(_attribute.entity_text) < 5000: # 2021/12/17 调整小金额阈值,避免203608823.html 两次金额一次万元没提取到的情况
|
|
|
+ addMoneyByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute,0.5)
|
|
|
+ else:
|
|
|
+ addMoneyByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute,
|
|
|
+ _attribute.values[_attribute.label])
|
|
|
+ elif link_attribute=='serviceTime':
|
|
|
+ _entity.pointer_serviceTime = _attribute
|
|
|
+ packagePointer, _ = getPackage(PackageList, _attribute.sentence_index, _attribute.begin_index,
|
|
|
+ "serviceTime-" + str(_attribute.entity_text) + "-" + str(_attribute.label))
|
|
|
+ if packagePointer is None:
|
|
|
+ packageName_entity = "Project"
|
|
|
+ else:
|
|
|
+ packageName_entity = packagePointer.entity_text
|
|
|
+ addServiceTimeByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute)
|
|
|
+ elif link_attribute=='ratio':
|
|
|
+ _entity.pointer_ratio = _attribute
|
|
|
+ packagePointer, _ = getPackage(PackageList, _attribute.sentence_index, _attribute.begin_index,
|
|
|
+ "ratio-" + str(_attribute.entity_text) + "-" + str(_attribute.label))
|
|
|
+ if packagePointer is None:
|
|
|
+ packageName_entity = "Project"
|
|
|
+ else:
|
|
|
+ packageName_entity = packagePointer.entity_text
|
|
|
+ addRatioByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute)
|
|
|
+
|
|
|
''''''
|
|
|
# 通过模型分类的招标/代理联系人
|
|
|
list_sentence = sorted(list_sentence, key=lambda x: x.sentence_index)
|
|
@@ -1073,24 +1219,6 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
|
|
|
for one_phone in _phone:
|
|
|
PackDict["Project"]["roleList"][i].linklist.append(("", one_phone))
|
|
|
agency_phone.add(one_phone)
|
|
|
- # km配对方法
|
|
|
- def dispatch(match_list):
|
|
|
- main_roles = list(set([match.main_role for match in match_list]))
|
|
|
- attributes = list(set([match.attribute for match in match_list]))
|
|
|
-
|
|
|
- label = np.zeros(shape=(len(main_roles), len(attributes)))
|
|
|
- for match in match_list:
|
|
|
- main_role = match.main_role
|
|
|
- attribute = match.attribute
|
|
|
- value = match.value
|
|
|
- label[main_roles.index(main_role), attributes.index(attribute)] = value + 10000
|
|
|
- # print(label)
|
|
|
- gragh = -label
|
|
|
- # km算法
|
|
|
- row, col = linear_sum_assignment(gragh)
|
|
|
- max_dispatch = [(i, j) for i, j, value in zip(row, col, gragh[row, col]) if value]
|
|
|
- # return [Match(main_roles[row], attributes[col]) for row, col in max_dispatch]
|
|
|
- return [(main_roles[row], attributes[col]) for row, col in max_dispatch]
|
|
|
|
|
|
# 正则提取电话号码实体
|
|
|
# key_word = re.compile('((?:电话|联系方式|联系人).{0,4}?)([0-1]\d{6,11})')
|
|
@@ -1193,15 +1321,15 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
|
|
|
# 去重结果
|
|
|
relation_list = list(set(relation_list))
|
|
|
# print(relation_list)
|
|
|
- tokens_num_dict = dict()
|
|
|
- last_tokens_num = 0
|
|
|
- for sentence in list_sentence:
|
|
|
- _index = sentence.sentence_index
|
|
|
- if _index == 0:
|
|
|
- tokens_num_dict[_index] = 0
|
|
|
- else:
|
|
|
- tokens_num_dict[_index] = tokens_num_dict[_index - 1] + last_tokens_num
|
|
|
- last_tokens_num = len(sentence.tokens)
|
|
|
+ # tokens_num_dict = dict()
|
|
|
+ # last_tokens_num = 0
|
|
|
+ # for sentence in list_sentence:
|
|
|
+ # _index = sentence.sentence_index
|
|
|
+ # if _index == 0:
|
|
|
+ # tokens_num_dict[_index] = 0
|
|
|
+ # else:
|
|
|
+ # tokens_num_dict[_index] = tokens_num_dict[_index - 1] + last_tokens_num
|
|
|
+ # last_tokens_num = len(sentence.tokens)
|
|
|
right_combination = [('org','person'),('company','person'),('company','location'),('org','location'),('person','phone')]
|
|
|
linked_company = set()
|
|
|
linked_person = set()
|
|
@@ -2241,9 +2369,9 @@ def getTimeAttributes(list_entity,list_sentence):
|
|
|
time_entitys = sorted(time_entitys,key=lambda x:(x.sentence_index, x.begin_index))
|
|
|
list_sentence = sorted(list_sentence,key=lambda x:x.sentence_index)
|
|
|
dict_time = {
|
|
|
- "time_release": [],
|
|
|
- "time_bidopen": [],
|
|
|
- "time_bidclose": [],
|
|
|
+ "time_release": [], # 1 发布时间
|
|
|
+ "time_bidopen": [], # 2 开标时间
|
|
|
+ "time_bidclose": [], # 3 截标时间
|
|
|
'time_bidstart': [], # 12 投标(开始)时间、响应文件接收(开始)时间
|
|
|
|
|
|
'time_publicityStart': [], # 4 公示开始时间(公示时间、公示期)
|
|
@@ -2424,18 +2552,11 @@ def getTimeAttributes(list_entity,list_sentence):
|
|
|
def getOtherAttributes(list_entity):
|
|
|
dict_other = {"moneysource":"",
|
|
|
"person_review":[],
|
|
|
- # "time_release":"",
|
|
|
- # "time_bidopen":"",
|
|
|
- # "time_bidclose":"",
|
|
|
"serviceTime":"",
|
|
|
"product":[],
|
|
|
"total_tendereeMoney":0,
|
|
|
"total_tendereeMoneyUnit":''}
|
|
|
- # dict_time = {
|
|
|
- # "time_release": [],
|
|
|
- # "time_bidopen": [],
|
|
|
- # "time_bidclose": []
|
|
|
- # }
|
|
|
+
|
|
|
for entity in list_entity:
|
|
|
if entity.entity_type == 'bidway':
|
|
|
dict_other["bidway"] = turnBidWay(entity.entity_text)
|
|
@@ -2443,18 +2564,6 @@ def getOtherAttributes(list_entity):
|
|
|
dict_other["moneysource"] = entity.entity_text
|
|
|
elif entity.entity_type=='serviceTime':
|
|
|
dict_other["serviceTime"] = entity.entity_text
|
|
|
- # elif entity.entity_type == 'time' and entity.label==1:
|
|
|
- # if entity.values[entity.label]>0.6:
|
|
|
- # dict_time['time_release'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
|
|
|
- # # dict_other["time_release"] = timeFormat(entity.entity_text)
|
|
|
- # elif entity.entity_type == 'time' and entity.label==2:
|
|
|
- # if entity.values[entity.label]>0.6:
|
|
|
- # dict_time['time_bidopen'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
|
|
|
- # # dict_other["time_bidopen"] = timeFormat(entity.entity_text)
|
|
|
- # elif entity.entity_type == 'time' and entity.label == 3:
|
|
|
- # if entity.values[entity.label]>0.6:
|
|
|
- # dict_time['time_bidclose'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
|
|
|
- # # dict_other["time_bidclose"] = timeFormat(entity.entity_text)
|
|
|
elif entity.entity_type=="person" and entity.label ==4:
|
|
|
dict_other["person_review"].append(entity.entity_text)
|
|
|
elif entity.entity_type=='product':
|
|
@@ -2462,12 +2571,7 @@ def getOtherAttributes(list_entity):
|
|
|
elif entity.entity_type=='money' and entity.notes=='总投资' and dict_other["total_tendereeMoney"]<float(entity.entity_text):
|
|
|
dict_other["total_tendereeMoney"] = float(entity.entity_text)
|
|
|
dict_other["total_tendereeMoneyUnit"] = entity.money_unit
|
|
|
- # 时间类别
|
|
|
- # for time_type,value in dict_time.items():
|
|
|
- # list_time = dict_time[time_type]
|
|
|
- # if list_time:
|
|
|
- # list_time.sort(key=lambda x:x[1],reverse=True)
|
|
|
- # dict_other[time_type] = list_time[0][0]
|
|
|
+
|
|
|
dict_other["product"] = list(set(dict_other["product"]))
|
|
|
return dict_other
|
|
|
|