|
@@ -440,6 +440,7 @@ def getRoleList(list_sentence,list_entity,on_value = 0.5):
|
|
|
|
|
|
#拿到所有可能的情况
|
|
#拿到所有可能的情况
|
|
dict_role_combination = {}
|
|
dict_role_combination = {}
|
|
|
|
+ # print(PackageList)
|
|
#拿到各个实体的packageName,packageCode
|
|
#拿到各个实体的packageName,packageCode
|
|
for entity in list_entity:
|
|
for entity in list_entity:
|
|
if entity.entity_type in ['org','company']:
|
|
if entity.entity_type in ['org','company']:
|
|
@@ -457,10 +458,12 @@ def getRoleList(list_sentence,list_entity,on_value = 0.5):
|
|
if packagePointer is None:
|
|
if packagePointer is None:
|
|
#continue
|
|
#continue
|
|
packageName = "Project"
|
|
packageName = "Project"
|
|
|
|
+ # print(entity.entity_text, packageName,entity.sentence_index,entity.begin_index)
|
|
else:
|
|
else:
|
|
#add pointer_pack
|
|
#add pointer_pack
|
|
entity.pointer_pack = packagePointer
|
|
entity.pointer_pack = packagePointer
|
|
packageName = packagePointer.entity_text
|
|
packageName = packagePointer.entity_text
|
|
|
|
+ # print(entity.entity_text, packageName)
|
|
else:
|
|
else:
|
|
packageName = "Project"
|
|
packageName = "Project"
|
|
find_flag = False
|
|
find_flag = False
|
|
@@ -486,6 +489,8 @@ def getRoleList(list_sentence,list_entity,on_value = 0.5):
|
|
dict_role_combination[entity.packageName][str(_roleId)] = set([""])
|
|
dict_role_combination[entity.packageName][str(_roleId)] = set([""])
|
|
dict_role_combination[entity.packageName][str(entity.label)].add(entity.entity_text)
|
|
dict_role_combination[entity.packageName][str(entity.label)].add(entity.entity_text)
|
|
list_real_comba = get_legal_comba(list_entity,dict_role_combination)
|
|
list_real_comba = get_legal_comba(list_entity,dict_role_combination)
|
|
|
|
+ # print("===role_combination",dict_role_combination)
|
|
|
|
+ # print("== real_comba",list_real_comba)
|
|
#拿到最大期望值的组合
|
|
#拿到最大期望值的组合
|
|
max_index = 0
|
|
max_index = 0
|
|
max_expect = -100
|
|
max_expect = -100
|
|
@@ -511,6 +516,7 @@ def getRoleList(list_sentence,list_entity,on_value = 0.5):
|
|
packagecode = ""
|
|
packagecode = ""
|
|
RoleList.append(PREM(packageName,packagecode,role_name,entity_text,0,0,0.0,[]))
|
|
RoleList.append(PREM(packageName,packagecode,role_name,entity_text,0,0,0.0,[]))
|
|
RoleSet.add(entity_text)
|
|
RoleSet.add(entity_text)
|
|
|
|
+
|
|
#根据最优树来修正list_entity中角色对包的连接
|
|
#根据最优树来修正list_entity中角色对包的连接
|
|
for _entity in list_entity:
|
|
for _entity in list_entity:
|
|
if _entity.pointer_pack is not None:
|
|
if _entity.pointer_pack is not None:
|
|
@@ -546,6 +552,7 @@ def getPackagesFromArticle(list_sentence,list_entity):
|
|
|
|
|
|
if len(list_sentence)==0:
|
|
if len(list_sentence)==0:
|
|
return None
|
|
return None
|
|
|
|
+ list_sentence.sort(key=lambda x:x.sentence_index)
|
|
|
|
|
|
PackageList = []
|
|
PackageList = []
|
|
PackageList_scope = []
|
|
PackageList_scope = []
|
|
@@ -718,6 +725,7 @@ def getPackagesFromArticle(list_sentence,list_entity):
|
|
PackageList_scope = PackageList_scope+PackageList_item_scope
|
|
PackageList_scope = PackageList_scope+PackageList_item_scope
|
|
PackageList_item.sort(key=lambda x:x["sentence_index"])
|
|
PackageList_item.sort(key=lambda x:x["sentence_index"])
|
|
pattern_punctuation = "[::()\(\),,。;;]"
|
|
pattern_punctuation = "[::()\(\),,。;;]"
|
|
|
|
+ # print("===packageList_scope",PackageList_scope)
|
|
for i in range(len(list_sentence)):
|
|
for i in range(len(list_sentence)):
|
|
for j in range(len(PackageList_scope)):
|
|
for j in range(len(PackageList_scope)):
|
|
if i==PackageList_scope[j]["sentence_index"] and PackageList_scope[j]["name"]!="":
|
|
if i==PackageList_scope[j]["sentence_index"] and PackageList_scope[j]["name"]!="":
|
|
@@ -743,7 +751,7 @@ def getPackagesFromArticle(list_sentence,list_entity):
|
|
else:
|
|
else:
|
|
scope_begin = [PackageList_scope[j-1]["sentence_index"],PackageList_scope[j-1]["offsetWords_begin"]]
|
|
scope_begin = [PackageList_scope[j-1]["sentence_index"],PackageList_scope[j-1]["offsetWords_begin"]]
|
|
if j==len(PackageList_scope)-1:
|
|
if j==len(PackageList_scope)-1:
|
|
- scope_end = [PackageList_scope[j]["sentence_index"],changeIndexFromWordToWords(list_sentence[i].tokens, len(list_sentence[i].sentence_text))]
|
|
|
|
|
|
+ scope_end = [list_sentence[-1].sentence_index,changeIndexFromWordToWords(list_sentence[-1].tokens, len(list_sentence[-1].sentence_text))]
|
|
else:
|
|
else:
|
|
scope_end = [PackageList_scope[j+1]["sentence_index"],PackageList_scope[j+1]["offsetWords_begin"]]
|
|
scope_end = [PackageList_scope[j+1]["sentence_index"],PackageList_scope[j+1]["offsetWords_begin"]]
|
|
if PackageList_scope[j-1]["sentence_index"]==PackageList_scope[j]["sentence_index"] and PackageList_scope[j-1]["offsetWord_begin"]<=PackageList_scope[j]["offsetWord_begin"] and PackageList_scope[j-1]["offsetWord_end"]>=PackageList_scope[j]["offsetWord_end"]:
|
|
if PackageList_scope[j-1]["sentence_index"]==PackageList_scope[j]["sentence_index"] and PackageList_scope[j-1]["offsetWord_begin"]<=PackageList_scope[j]["offsetWord_begin"] and PackageList_scope[j-1]["offsetWord_end"]>=PackageList_scope[j]["offsetWord_end"]:
|