|
@@ -509,8 +509,8 @@ def getRoleList(list_sentence,list_entity,on_value = 0.5):
|
|
|
dict_role_combination[entity.packageName][str(_roleId)] = set([""])
|
|
|
dict_role_combination[entity.packageName][str(entity.label)].add(entity.entity_text)
|
|
|
list_real_comba = get_legal_comba(list_entity,dict_role_combination)
|
|
|
- # print("===role_combination",dict_role_combination)
|
|
|
- # print("== real_comba",list_real_comba)
|
|
|
+ # print("===role_combination",dict_role_combination)
|
|
|
+ # print("== real_comba",list_real_comba)
|
|
|
#拿到最大期望值的组合
|
|
|
max_index = 0
|
|
|
max_expect = -100
|
|
@@ -1439,14 +1439,14 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
# 例:"采购人联系方式:0833-5226788,"
|
|
|
phone_pattern = '(1[3-9][0-9][-—-―]?\d{4}[-—-―]?\d{4}|' \
|
|
|
'\+86.?1[3-9]\d{9}|' \
|
|
|
- '0[1-9]\d{1,2}[-—-―][1-9]\d{6,7}/[1-9]\d{6,10}|' \
|
|
|
- '0[1-9]\d{1,2}[-—-―]\d{7,8}.?转\d{1,4}|' \
|
|
|
- '0[1-9]\d{1,2}[-—-―]\d{7,8}[-—-―]\d{1,4}|' \
|
|
|
- '0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?(?=1[3-9]\d{9})|' \
|
|
|
- '0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?(?=0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?)|' \
|
|
|
- '0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?(?=[1-9]\d{6,7})|' \
|
|
|
- '0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?|' \
|
|
|
- '[\(|\(]0[1-9]\d{1,2}[\)|\)]-?\d{7,8}-?\d{,4}|' \
|
|
|
+ '0[1-9]\d{1,2}[-—-―][2-9]\d{6,7}/[1-9]\d{6,10}|' \
|
|
|
+ '0[1-9]\d{1,2}[-—-―][2-9]\d{6}\d?.?转\d{1,4}|' \
|
|
|
+ '0[1-9]\d{1,2}[-—-―][2-9]\d{6}\d?[-—-―]\d{1,4}|' \
|
|
|
+ '0[1-9]\d{1,2}[-—-―]?[2-9]\d{6}\d?(?=1[3-9]\d{9})|' \
|
|
|
+ '0[1-9]\d{1,2}[-—-―]?[2-9]\d{6}\d?(?=0[1-9]\d{1,2}[-—-―]?[2-9]\d{6}\d?)|' \
|
|
|
+ '0[1-9]\d{1,2}[-—-―]?[2-9]\d{6}\d?(?=[2-9]\d{6,7})|' \
|
|
|
+ '0[1-9]\d{1,2}[-—-―]?[2-9]\d{6}\d?|' \
|
|
|
+ '[\(|\(]0[1-9]\d{1,2}[\)|\)]-?[2-9]\d{6}\d?-?\d{,4}|' \
|
|
|
'[2-9]\d{6,7})'
|
|
|
re_tenderee_phone = re.compile(
|
|
|
"(?:(?:(?:采购|招标|议价|议标|比选)(?:人|公司|单位|组织|部门)|建设(?:单位|业主)|(?:采购|招标|甲)方|询价单位|项目业主|业主)[^。]{0,5}(?:电话|联系方式|联系人|联系电话)[::]?[^。]{0,7}?)"
|
|
@@ -1515,13 +1515,12 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
phone = re.compile('1[3-9][0-9][-—-―]?\d{4}[-—-―]?\d{4}|'
|
|
|
'\+86.?1[3-9]\d{9}|'
|
|
|
# '0[^0]\d{1,2}[-—-―][1-9]\d{6,7}/[1-9]\d{6,10}|'
|
|
|
- '0[1-9]\d{1,2}[-—-―]\d{7,8}.?转\d{1,4}|'
|
|
|
- '0[1-9]\d{1,2}[-—-―]\d{7,8}[-—-―]\d{1,4}|'
|
|
|
- '0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?(?=1[3-9]\d{9})|'
|
|
|
- '0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?(?=0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?)|'
|
|
|
- '0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?(?=[1-9]\d{6,7})|'
|
|
|
- '0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?|'
|
|
|
- '[\(|\(]0[1-9]\d{1,2}[\)|\)]-?\d{7,8}-?\d{,4}|'
|
|
|
+ '0[1-9]\d{1,2}[-—-―][2-9]\d{6}\d?[-—-―]\d{1,4}|'
|
|
|
+ '0[1-9]\d{1,2}[-—-―]{0,2}[2-9]\d{6}\d?(?=1[3-9]\d{9})|'
|
|
|
+ '0[1-9]\d{1,2}[-—-―]{0,2}[2-9]\d{6}\d?(?=0[1-9]\d{1,2}[-—-―]?[2-9]\d{6}\d?)|'
|
|
|
+ '0[1-9]\d{1,2}[-—-―]{0,2}[2-9]\d{6}\d?(?=[2-9]\d{6,7})|'
|
|
|
+ '0[1-9]\d{1,2}[-—-―]{0,2}[2-9]\d{6}\d?|'
|
|
|
+ '[\(|\(]0[1-9]\d{1,2}[\)|\)]-?[2-9]\d{6}\d?-?\d{,4}|'
|
|
|
'400\d{7}转\d{1,4}|'
|
|
|
'[2-9]\d{6,7}')
|
|
|
url_pattern = re.compile("http[s]?://(?:[a-zA-Z]|[0-9]|[#$\-_@.&+=\?:/]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+")
|
|
@@ -1670,7 +1669,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
if list_tokenbegin[j] >= item[2]:
|
|
|
end_index = j - 1
|
|
|
break
|
|
|
- _entity = Entity(_sentence.doc_id, None, item[0], "phone", _sentence.sentence_index, begin_index, end_index, item[1],
|
|
|
+ phone_text = re.sub("[-—-―]+","-",item[0]).replace("(","(").replace(")",")")
|
|
|
+ _entity = Entity(_sentence.doc_id, None, phone_text, "phone", _sentence.sentence_index, begin_index, end_index, item[1],
|
|
|
item[2],in_attachment=in_attachment)
|
|
|
phone_entitys.append(_entity)
|
|
|
# print('phone_set:',set([ent.entity_text for ent in phone_entitys]))
|
|
@@ -2112,14 +2112,15 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
|
|
|
# 公司-联系人连接(km算法)
|
|
|
re_phone = re.compile('1[3-9][0-9][-—-―]?\d{4}[-—-―]?\d{4}|'
|
|
|
'\+86.?1[3-9]\d{9}|'
|
|
|
- '0[1-9]\d{1,2}[-—-―][1-9]\d{6,7}/[1-9]\d{6,10}|'
|
|
|
- '0[1-9]\d{1,2}[-—-―]\d{7,8}.?转\d{1,4}|'
|
|
|
- '0[1-9]\d{1,2}[-—-―]\d{7,8}[-—-―]\d{1,4}|'
|
|
|
- '0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?(?=1[3-9]\d{9})|'
|
|
|
- '0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?(?=0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?)|'
|
|
|
- '0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?(?=[1-9]\d{6,7})|'
|
|
|
- '0[1-9]\d{1,2}[-—-―]?[1-9]\d{6}\d?|'
|
|
|
- '[\(|\(]0[1-9]\d{1,2}[\)|\)]-?\d{7,8}-?\d{,4}|'
|
|
|
+ # '0[1-9]\d{1,2}[-—-―][1-9]\d{6,7}/[1-9]\d{6,10}|'
|
|
|
+ '0[1-9]\d{1,2}[-—-―][2-9]\d{6,7}[^\d]?转\d{1,4}|'
|
|
|
+ '0[1-9]\d{1,2}[-—-―][2-9]\d{6}\d?[-—-―]\d{1,4}|'
|
|
|
+ '0[1-9]\d{1,2}[-—-―]{0,2}[2-9]\d{6}\d?(?=1[3-9]\d{9})|'
|
|
|
+ '0[1-9]\d{1,2}[-—-―]{0,2}[2-9]\d{6}\d?(?=0[1-9]\d{1,2}[-—-―]?[2-9]\d{6}\d?)|'
|
|
|
+ '0[1-9]\d{1,2}[-—-―]{0,2}[2-9]\d{6}\d?(?=[2-9]\d{6,7})|'
|
|
|
+ '0[1-9]\d{1,2}[-—-―]{0,2}[2-9]\d{6}\d?|'
|
|
|
+ '[\(|\(]0[1-9]\d{1,2}[\)|\)]-?[2-9]\d{6,7}-?\d{,4}|'
|
|
|
+ '400\d{7}转\d{1,4}|'
|
|
|
'[2-9]\d{6,7}')
|
|
|
key_phone = re.compile("联系方式|电话|联系人|负责人")
|
|
|
temporary_list2 = []
|