vor 1 Jahr · cb53b42016
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
@@ -2665,6 +2665,7 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
															             cost_time[key_nerToken] = 0
														
 
															         cost_time[key_nerToken] += round(time.time()-start_time,2)
														
 
															+        doctextcon_sentence_len = sum([1 for sentence in list_sentence if not sentence.in_attachment])
														
 
															         company_dict = set()
														
 
															         company_index = dict((i,set()) for i in range(len(list_sentence)))
														
 
															         for sentence_index in range(len(list_sentence)):
														
@@ -2743,6 +2744,11 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
															                     continue
														
 
															                 elif entity_type=="person" and len(entity_text)>10 and len(re.findall("[\u4e00-\u9fa5]",entity_text))<len(entity_text)/2:
														
 
															                     continue
														
 
															+                # 识别不完整的组织机构补充
														
 
															+                if entity_type in ["org"]:
														
 
															+                    end_words = re.search("^[\u4e00-\u9fa5]{,5}(?:办公室|部|中心|处|会)",sentence_text[end_index_temp:end_index_temp+10])
														
 
															+                    if end_words:
														
 
															+                        entity_text = entity_text + end_words.group()
														
 
															                 for j in range(len(list_tokenbegin)):
														
 
															                     if list_tokenbegin[j]==begin_index_temp:
														
@@ -2820,8 +2826,8 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
															                 list_sentence_entitys.append(Entity(doc_id,entity_id,entity_text,entity_type,sentence_index,begin_index,end_index,ner_entity[0],ner_entity[1],in_attachment=in_attachment))
														
 
															             # 标记文章末尾的"发布人”、“发布时间”实体
														
 
															-            if sentence_index==len(list_sentence)-1:
														
 
															-                if len(list_sentence_entitys[-2:])>2:
														
 
															+            if sentence_index==len(list_sentence)-1 or sentence_index==doctextcon_sentence_len-1:
														
 
															+                if len(list_sentence_entitys[-2:])==2:
														
 
															                     second2last = list_sentence_entitys[-2]
														
 
															                     last = list_sentence_entitys[-1]
														
 
															                     if (second2last.entity_type in ["company",'org'] and last.entity_type=="time") or (
														
--- a/BiddingKG/dl/interface/extract.py
+++ b/BiddingKG/dl/interface/extract.py
@@ -123,6 +123,50 @@ def str_normalize(text):
 
															     # print(new_text)
														
 
															     return new_text
														
 
															+# 修复prem中地区前缀不完整实体
														
 
															+def repair_entity(prem,district_dict,list_articles):
														
 
															+    district_dict = district_dict['district']
														
 
															+    province = district_dict['province'] if district_dict['province'] and district_dict['province'] not in ['未知','全国'] else ""
														
 
															+    city = district_dict['city'] if district_dict['city'] and district_dict['city']!='未知' else ""
														
 
															+    district = district_dict['district'] if district_dict['district'] and district_dict['district']!='未知' else ""
														
 
															+    content_text = list_articles[0].content
														
 
															+
														
 
															+    autonomous_region_dict = {
														
 
															+        "新疆":"新疆维吾尔",
														
 
															+        "西藏":"西藏",
														
 
															+        "内蒙古":"内蒙古",
														
 
															+        "广西":"广西壮族",
														
 
															+        "宁夏":"宁夏回族"
														
 
															+    }
														
 
															+
														
 
															+    for package,_prem in prem[0]['prem'].items():
														
 
															+        for role in _prem['roleList']:
														
 
															+            if role['role_name'] in ['tenderee','agency']:
														
 
															+                role_text = role['role_text']
														
 
															+                if re.search("^[省市县区]",role_text):
														
 
															+                    if role_text[0]=='省' and role_text[:2] not in ['省道']:
														
 
															+                        role['role_text'] = province + role_text
														
 
															+                    elif role_text[0]=='市' and role_text[:2] not in ['市政','市场']:
														
 
															+                        if district+'市' in content_text:
														
 
															+                            # 县级市
														
 
															+                            role['role_text'] = district + role_text
														
 
															+                        else:
														
 
															+                            role['role_text'] = city + role_text
														
 
															+                    elif role_text[0] in ['县','区']:
														
 
															+                        role['role_text'] = district + role_text
														
 
															+                elif re.search("^自治[区州县]",role_text):
														
 
															+                    if role_text[:3]=='自治区':
														
 
															+                        role['role_text'] = autonomous_region_dict.get(province,"") + role_text
														
 
															+                    elif role_text[:3] in ['自治县',"自治州"]:
														
 
															+                        if re.search("自治[县州]?$",district):
														
 
															+                            role['role_text'] = re.sub("自治[县州]?","",district) + role_text
														
 
															+                        elif re.search("族$",district):
														
 
															+                            role['role_text'] = district + role_text
														
 
															+                        elif re.search("自治[县州]?$",city):
														
 
															+                            role['role_text'] = re.sub("自治[县州]?","",city) + role_text
														
 
															+                        elif re.search("族$",city):
														
 
															+                            role['role_text'] = city + role_text
														
 
															+
														
 
															 def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="",original_docchannel='',**kwargs):
														
 
															     cost_time = dict()
														
@@ -274,6 +318,11 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
 
															     district = predictor.getPredictor('district').predict(project_name=codeName[0]['name'], prem=prem,title=title, list_articles=list_articles, web_source_name=web_source_name, list_entitys=list_entitys)
														
 
															     cost_time["district"] = round(time.time() - start_time, 2)
														
 
															+    '''根据district提取结果修复实体'''
														
 
															+    repair_entity(prem,district,list_articles)
														
 
															+
														
 
															+    '''限制行业最高金额'''
														
 
															+    getAttributes.limit_maximum_amount(prem, industry)
														
 
															     # '''限制行业最高金额'''
														
 
															     # getAttributes.limit_maximum_amount(prem, industry) # 20230703取消，改为整合所有要素后面纠正
														
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -1384,7 +1384,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                         error_numStr_index.append(numStr_index)
														
 
															                         last_phone_mask = False
														
 
															                         continue
														
 
															-                if re.search("身份证号?码?|注册[证号]|帐号|编[号码]|报价|标价|证号|价格|型号|附件|代码|列号|行号|税号|[\(\（]万?元[\)\）]|[a-zA-Z]+\d*$", re.sub("，","",phone_left)):
														
 
															+                if re.search("身份证号?码?|注册[证号]|帐号|编[号码]|报价|费率|标价|证号|价格|型号|附件|代码|列号|行号|税号|[\(\（]万?元[\)\）]|[a-zA-Z]+\d*$", re.sub("，","",phone_left)):
														
 
															                     error_numStr_index.append(numStr_index)
														
 
															                     last_phone_mask = False
														
 
															                     continue
														
@@ -1528,7 +1528,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															         # print("预测数据：",len(temp_data))
														
 
															         # 去重结果
														
 
															         relation_list = list(set(relation_list))
														
 
															-    # print(relation_list)
														
 
															+    # print([(rel[0].entity_text,rel[2].entity_text) for rel in relation_list])
														
 
															     right_combination = [('org','person'),('company','person'),('company','location'),('org','location'),('person','phone')]
														
 
															     linked_company = set()
														
 
															     linked_person = set()
														
@@ -1542,14 +1542,17 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															             _object = relation[2]
														
 
															             if isinstance(_subject,Entity) and isinstance(_object,Entity) and (_subject.entity_type,_object.entity_type) in right_combination:
														
 
															                 if relation[1]==predicate:
														
 
															+                    distance = (tokens_num_dict[_object.sentence_index] + _object.begin_index) - (
														
 
															+                            tokens_num_dict[_subject.sentence_index] + _subject.end_index)
														
 
															                     if predicate=="rel_person":
														
 
															                         if (_subject.label==0 and _object.entity_text in agency_contact ) or (_subject.label==1 and _object.entity_text in tenderee_contact):
														
 
															                             continue
														
 
															                         # 角色为中标候选人，排除"质疑|投诉|监督|受理"相关的联系人
														
 
															-                        if _subject.label in [2,3,4] and re.search("质疑|投诉|监督|受理",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-10):_object.wordOffset_begin]):
														
 
															+                        if _subject.label in [2,3,4] and re.search("质疑|投诉|监督|受理|项目(单位)?联系",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-10):_object.wordOffset_begin]):
														
 
															+                            continue
														
 
															+                        # 角色为中标候选人，排除距离过远的联系人
														
 
															+                        if _subject.label in [2, 3, 4] and distance>=40:
														
 
															                             continue
														
 
															-                    distance = (tokens_num_dict[_object.sentence_index] + _object.begin_index) - (
														
 
															-                                tokens_num_dict[_subject.sentence_index] + _subject.end_index)
														
 
															                     if distance>0:
														
 
															                         value = (-1 / 2 * (distance ** 2))/10000
														
 
															                     else:
														
@@ -1690,7 +1693,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                     sentence_distance = after_entity.sentence_index - entity.sentence_index
														
 
															                     value = (-1 / 2 * (distance ** 2)) / 10000
														
 
															                     if sentence_distance == 0:
														
 
															-                        if distance < 80:
														
 
															+                        if distance < 70:
														
 
															                             # value = (-1 / 2 * (distance ** 2)) / 10000
														
 
															                             t_match_list.append(Match(entity, after_entity, value))
														
 
															                             match_nums += 1
														
@@ -1699,7 +1702,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                             else:
														
 
															                                 break
														
 
															                     else:
														
 
															-                        if distance < 50:
														
 
															+                        if distance < 40:
														
 
															                             # value = (-1 / 2 * (distance ** 2)) / 10000
														
 
															                             t_match_list.append(Match(entity, after_entity, value))
														
 
															                             match_nums += 1
														
@@ -1945,6 +1948,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															     match_list2 = []
														
 
															     for split_index in range(len(new_temporary_list2)):
														
 
															         split_entitys = new_temporary_list2[split_index]
														
 
															+        if len(split_entitys)<=1:
														
 
															+            continue
														
 
															         is_skip = False
														
 
															         for index in range(len(split_entitys)):
														
 
															             entity = split_entitys[index]
														
@@ -1958,20 +1963,25 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                         for after_index in range(index + 1, min(len(split_entitys), index + 4)):
														
 
															                             after_entity = split_entitys[after_index]
														
 
															                             if after_entity.entity_type in ['person']:
														
 
															-
														
 
															+                                distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
														
 
															+                                                   tokens_num_dict[entity.sentence_index] + entity.end_index)
														
 
															                                 # 实体为中标人/候选人，联系人已确定类别【1，2】
														
 
															                                 if entity.label in [2, 3, 4] and after_entity.label in [1, 2]:
														
 
															                                     break
														
 
															+                                if entity.label in [2, 3, 4] and distance>=20:
														
 
															+                                    break
														
 
															                                 # 角色为中标候选人，排除"质疑|投诉|监督|受理"相关的联系人
														
 
															-                                if entity.label in [2, 3, 4] and re.search("质疑|投诉|监督|受理", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
														
 
															+                                if entity.label in [2, 3, 4] and re.search("质疑|投诉|监督|受理|项目(单位)?联系", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
														
 
															                                     break
														
 
															                                 if after_entity.label in [1, 2, 3]:
														
 
															-                                    distance = (tokens_num_dict[
														
 
															-                                                    after_entity.sentence_index] + after_entity.begin_index) - (
														
 
															-                                                           tokens_num_dict[entity.sentence_index] + entity.end_index)
														
 
															+                                    # distance = (tokens_num_dict[
														
 
															+                                    #                 after_entity.sentence_index] + after_entity.begin_index) - (
														
 
															+                                    #                        tokens_num_dict[entity.sentence_index] + entity.end_index)
														
 
															                                     sentence_distance = after_entity.sentence_index - entity.sentence_index
														
 
															                                     if sentence_distance == 0:
														
 
															                                         if distance < 100:
														
 
															+                                            if entity.label in [2, 3, 4] and distance>40:
														
 
															+                                                break
														
 
															                                             if (entity.label == 0 and after_entity.label == 1) or (
														
 
															                                                     entity.label == 1 and after_entity.label == 2):
														
 
															                                                 distance = distance / 100
														
@@ -1980,6 +1990,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                                             match_nums += 1
														
 
															                                     else:
														
 
															                                         if distance < 60:
														
 
															+                                            if entity.label in [2, 3, 4] and distance>20:
														
 
															+                                                break
														
 
															                                             if (entity.label == 0 and after_entity.label == 1) or (
														
 
															                                                     entity.label == 1 and after_entity.label == 2):
														
 
															                                                 distance = distance / 100
														
@@ -2008,17 +2020,15 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                                         if re.search("地，?址", after_entity_left):
														
 
															                                             is_skip = True
														
 
															                                             continue
														
 
															-                                        if re.search("\(|（", after_entity_left) and re.search("\)|）",
														
 
															-                                                                                              after_entity_right):
														
 
															+                                        if re.search("\(|（", after_entity_left) and re.search("\)|）",after_entity_right):
														
 
															                                             is_skip = True
														
 
															                                             continue
														
 
															-                                if entity.label in [0, 1] and after_entity.label in [0,
														
 
															-                                                                                     1] and entity.label == after_entity.label:
														
 
															+                                if entity.label in [0, 1] and after_entity.label in [0, 1] and entity.label == after_entity.label:
														
 
															                                     break
														
 
															                                 if entity.label in [0, 1] and after_entity.label in [0, 1] and split_entitys[
														
 
															                                     index + 1].entity_type == "person":
														
 
															                                     break
														
 
															-                                if entity.label in [0, 1] and after_entity.label in [2, 3, 4]:
														
 
															+                                if entity.label in [0, 1 ,5] and after_entity.label in [2, 3, 4]:
														
 
															                                     break
														
 
															                                 if entity.label in [2, 3, 4] and after_entity.label in [0, 1]:
														
 
															                                     break
														
@@ -2044,23 +2054,30 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                                             distance = 1
														
 
															                                             if is_same_sentence:
														
 
															                                                 if phone_begin <= 200:
														
 
															+                                                    if entity.label in [2,3,4] and phone_begin>80:
														
 
															+                                                        break
														
 
															                                                     value = (-1 / 2 * (distance ** 2)) / 10000
														
 
															                                                     match_list2.append(Match(entity, (entity, _phone), value))
														
 
															                                                     match_nums += 1
														
 
															                                             else:
														
 
															                                                 if phone_begin <= 60:
														
 
															+                                                    if entity.label in [2,3,4] and phone_begin>40:
														
 
															+                                                        break
														
 
															                                                     value = (-1 / 2 * (distance ** 2)) / 10000
														
 
															                                                     match_list2.append(Match(entity, (entity, _phone), value))
														
 
															                                                     match_nums += 1
														
 
															                         else:
														
 
															                             next_entity = split_entitys[index + 1]
														
 
															                             if next_entity.entity_type in ["org","company"]:
														
 
															-                                _entity_left = list_sentence[next_entity.sentence_index].sentence_text[max(0, next_entity.wordOffset_begin - 20):next_entity.wordOffset_begin]
														
 
															+                                _entity_left = list_sentence[next_entity.sentence_index].sentence_text[entity.wordOffset_end:next_entity.wordOffset_begin]
														
 
															                                 _entity_left2 = re.sub("，（）\(\):：", "", _entity_left)
														
 
															                                 _entity_left2 = _entity_left2[-5:]
														
 
															                                 if re.search("(地，?址|地，?点)[:：][^，。]*$", _entity_left) or re.search("地址|地点", _entity_left2):
														
 
															                                     if index + 2<= len(split_entitys) - 1:
														
 
															                                         next_entity = split_entitys[index + 2]
														
 
															+                                if len(_entity_left)<=2 and re.search("[、（\(]",_entity_left):
														
 
															+                                    if index + 2 <= len(split_entitys) - 1:
														
 
															+                                        next_entity = split_entitys[index + 2]
														
 
															                             if entity.sentence_index == next_entity.sentence_index:
														
 
															                                 mid_tokens += list_sentence[entity.sentence_index].tokens[
														
 
															                                               entity.end_index + 1:next_entity.begin_index]
														
@@ -2226,6 +2243,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                         prepare_link.append(after_entity)
														
 
															                         last_person = after_entity
														
 
															                         continue
														
 
															+
														
 
															     # 统一同类角色的属性
														
 
															     for k in PackDict.keys():
														
 
															         for i in range(len(PackDict[k]["roleList"])):
														
@@ -2259,8 +2277,11 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                                         (_pointer_person.entity_text, _p))
														
 
															     # "roleList"中联系人电话去重
														
 
															+    tenderee_agency_phone = []
														
 
															     for k in PackDict.keys():
														
 
															         for i in range(len(PackDict[k]["roleList"])):
														
 
															+            if PackDict[k]["roleList"][i].role_name in ['agency','tenderee']:
														
 
															+                tenderee_agency_phone.extend([person_phone[1] for person_phone in PackDict[k]["roleList"][i].linklist if person_phone[1]])
														
 
															             # 带有联系人的电话
														
 
															             with_person = [person_phone[1] for person_phone in PackDict[k]["roleList"][i].linklist if person_phone[0]]
														
 
															             # 带有电话的联系人
														
@@ -2276,7 +2297,26 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                         remove_list.append(item)
														
 
															             for _item in remove_list:
														
 
															                 PackDict[k]["roleList"][i].linklist.remove(_item)
														
 
															-
														
 
															+    # 中标候选人联系方式异常排除
														
 
															+    for k in PackDict.keys():
														
 
															+        for i in range(len(PackDict[k]["roleList"])):
														
 
															+            if PackDict[k]["roleList"][i].role_name in ['win_tenderer', 'second_tenderer','third_tenderer']:
														
 
															+                if tenderee_agency_phone:
														
 
															+                    remove_list = []
														
 
															+                    for item in PackDict[k]["roleList"][i].linklist:
														
 
															+                        if item[1] and item[1] in tenderee_agency_phone:
														
 
															+                            remove_list.append(item)
														
 
															+                    for _item in remove_list:
														
 
															+                        PackDict[k]["roleList"][i].linklist.remove(_item)
														
 
															+                # else:
														
 
															+                #     # 公告中无招标代理联系方式时，可排除中标联系方式
														
 
															+                #     remove_list = []
														
 
															+                #     for _item in PackDict[k]["roleList"][i].linklist:
														
 
															+                #         # 有联系方式
														
 
															+                #         if _item[1]:
														
 
															+                #             remove_list.append(_item)
														
 
															+                #     for _item in remove_list:
														
 
															+                #         PackDict[k]["roleList"][i].linklist.remove(_item)
														
 
															     # PackDict更新company/org地址
														
 
															     last_role_prob = {}
														
 
															     for ent in pre_entity:
														
@@ -2704,9 +2744,19 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															     # 公告中只有"招标人"且无"联系人"链接时
														
 
															     if len(PackDict)==1:
														
 
															         k = list(PackDict.keys())[0]
														
 
															-        if len(PackDict[k]["roleList"])==1:
														
 
															-            if PackDict[k]["roleList"][0].role_name == "tenderee":
														
 
															-                if not PackDict[k]["roleList"][0].linklist:
														
 
															+        tenderee_agency_role = [role for role in PackDict[k]["roleList"] if role.role_name in ['tenderee','agency']]
														
 
															+        if len(tenderee_agency_role)==1:
														
 
															+            exist_person = []
														
 
															+            exist_phone = []
														
 
															+            for role in PackDict[k]["roleList"]:
														
 
															+                for group in role.linklist:
														
 
															+                    if group[0]:
														
 
															+                        exist_person.append(group[0])
														
 
															+                    if group[1]:
														
 
															+                        exist_phone.append(group[1])
														
 
															+
														
 
															+            if tenderee_agency_role[0].role_name == "tenderee":
														
 
															+                if not tenderee_agency_role[0].linklist:
														
 
															                     get_contacts = False
														
 
															                     if not get_contacts:
														
 
															                         # 根据大纲Outline类召回联系人
														
@@ -2718,8 +2768,9 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                                         if t_person.person_phone:
														
 
															                                             _phone = [p.entity_text for p in t_person.person_phone]
														
 
															                                             for _p in _phone:
														
 
															-                                                PackDict[k]["roleList"][0].linklist.append((t_person.entity_text, _p))
														
 
															-                                            get_contacts = True
														
 
															+                                                if t_person.entity_text not in exist_person and _p not in exist_phone:
														
 
															+                                                    tenderee_agency_role[0].linklist.append((t_person.entity_text, _p))
														
 
															+                                                    get_contacts = True
														
 
															                                             break
														
 
															                                     elif words_num_dict[t_person.sentence_index] + t_person.wordOffset_begin >= \
														
 
															                                             words_num_dict[outline.sentence_end_index] + outline.wordOffset_end:
														
@@ -2727,9 +2778,10 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                                 if not get_contacts:
														
 
															                                     sentence_phone = phone.findall(outline.outline_text)
														
 
															                                     if sentence_phone:
														
 
															-                                        PackDict[k]["roleList"][0].linklist.append(("", sentence_phone[0]))
														
 
															-                                        get_contacts = True
														
 
															-                                        break
														
 
															+                                        if sentence_phone[0] not in exist_phone:
														
 
															+                                            tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
														
 
															+                                            get_contacts = True
														
 
															+                                            break
														
 
															                     if not get_contacts:
														
 
															                         # 直接取文中倒数第一个联系人
														
 
															                         for _entity in temporary_list2[::-1]:
														
@@ -2737,14 +2789,16 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                                 if _entity.person_phone:
														
 
															                                     _phone = [p.entity_text for p in _entity.person_phone]
														
 
															                                     for _p in _phone:
														
 
															-                                        PackDict[k]["roleList"][0].linklist.append((_entity.entity_text, _p))
														
 
															-                                    get_contacts = True
														
 
															+                                        if _entity.entity_text not in exist_person and _p not in exist_phone:
														
 
															+                                            tenderee_agency_role[0].linklist.append((_entity.entity_text, _p))
														
 
															+                                            get_contacts = True
														
 
															                                     break
														
 
															                     if not get_contacts:
														
 
															                         # 如果文中只有一个“phone”实体，则直接取为联系人电话
														
 
															                         if len(phone_entitys) == 1:
														
 
															-                            PackDict[k]["roleList"][0].linklist.append(("", phone_entitys[0].entity_text))
														
 
															-                            get_contacts = True
														
 
															+                            if phone_entitys[0].entity_text not in exist_phone:
														
 
															+                                tenderee_agency_role[0].linklist.append(("", phone_entitys[0].entity_text))
														
 
															+                                get_contacts = True
														
 
															                     if not get_contacts:
														
 
															                         # 通过大纲Outline类直接取电话
														
 
															                         if len(new_split_list) > 1:
														
@@ -2754,8 +2808,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                                 if re.search("联系人|联系方|联系方式|联系电话|电话|负责人|与.{2,4}联系", sentence_outline):
														
 
															                                     sentence_phone = phone.findall(temp_sentence)
														
 
															                                     if sentence_phone:
														
 
															-                                        if sentence_phone[0] in [ent.entity_text for ent in phone_entitys]:
														
 
															-                                            PackDict[k]["roleList"][0].linklist.append(("", sentence_phone[0]))
														
 
															+                                        if sentence_phone[0] in [ent.entity_text for ent in phone_entitys] and sentence_phone[0] not in exist_phone:
														
 
															+                                            tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
														
 
															                                             get_contacts = True
														
 
															                                             break
														
 
															                     if not get_contacts:
														
@@ -2773,9 +2827,10 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                                 match_text = match_text.split("。")[0]
														
 
															                                 sentence_phone = phone.findall(match_text)
														
 
															                                 if sentence_phone:
														
 
															-                                    PackDict[k]["roleList"][0].linklist.append(("", sentence_phone[0]))
														
 
															-                                    get_tenderee_contacts = True
														
 
															-                                    break
														
 
															+                                    if sentence_phone[0] not in exist_phone:
														
 
															+                                        tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
														
 
															+                                        get_tenderee_contacts = True
														
 
															+                                        break
														
 
															                             if get_tenderee_contacts:
														
 
															                                 break
														
@@ -2990,48 +3045,84 @@ def getTimeAttributes(list_entity,list_sentence):
 
															         in_attachment = entity.in_attachment
														
 
															         extract_time = my_timeFormat(entity_text)
														
 
															         # definite_time = "00:00:00"
														
 
															-        # if extract_time:
														
 
															-        #     t = re.compile("(?P<day>下午|上午|早上)?(?P<hour>\d{1,2})[:：时点](?P<half_hour>半)?(?P<minute>\d{2})?[:：分]?(?P<second>\d{2})?秒?")
														
 
															-        #     t_in_word = re.search(t,entity_text)
														
 
															-        #     t_out_of_word = re.search("^[^\d]{,2}"+t.pattern,sentence_text[entity.wordOffset_end:])
														
 
															-        #     if t_in_word:
														
 
															-        #         print('t_in_word',entity_text,t_in_word.groupdict())
														
 
															-        #         day = t_in_word.groupdict().get('day',"")
														
 
															-        #         hour = t_in_word.groupdict().get('hour',"")
														
 
															-        #         half_hour = t_in_word.groupdict().get('half_hour',"")
														
 
															-        #         minute = t_in_word.groupdict().get('minute',"")
														
 
															-        #         second = t_in_word.groupdict().get('second',"")
														
 
															-        #         if hour:
														
 
															-        #             if day=='下午' and int(hour)<12:
														
 
															-        #                 hour = str(int(hour)+12)
														
 
															-        #             if int(hour)>24:
														
 
															-        #                 continue
														
 
															-        #         else:
														
 
															-        #             hour = "00"
														
 
															-        #         if not minute:
														
 
															-        #             if half_hour:
														
 
															-        #                 minute = "30"
														
 
															-        #             else:
														
 
															-        #                 minute = "00"
														
 
															-        #         if int(minute)>60:
														
 
															-        #             continue
														
 
															-        #         if not second:
														
 
															-        #             second = "00"
														
 
															-        #         if int(second)>60:
														
 
															-        #             continue
														
 
															-        #         # 数字字符格式化
														
 
															-        #         # hour = str(int(hour))
														
 
															-        #         # minute = str(int(minute))
														
 
															-        #         # second = str(int(second))
														
 
															-        #         definite_time = "%s:%s:%s"%(hour.rjust(2,"0"),minute.rjust(2,"0"),second.rjust(2,"0"))
														
 
															-        #         print(definite_time)
														
 
															-        #
														
 
															-        #     elif t_out_of_word:
														
 
															-        #         print('t_out_of_word', entity_text+sentence_text[entity.wordOffset_end:], t_out_of_word.groupdict())
														
 
															+        if extract_time:
														
 
															+            definite_time_list = []
														
 
															+            t = re.compile("(?P<day>下午|上午|早上)?(?P<hour>\d{1,2})[:：时点](?P<half_hour>半)?(?P<minute>\d{2})?[:：分]?(?P<second>\d{2})?秒?")
														
 
															+            t_in_word = re.search(t,entity_text.replace(" ",""))
														
 
															+            t_out_of_word = re.search("^[^\d]{,2}"+t.pattern,sentence_text[entity.wordOffset_end:])
														
 
															+            if t_in_word:
														
 
															+                # print('t_in_word',entity_text,t_in_word.groupdict())
														
 
															+                day = t_in_word.groupdict().get('day',"")
														
 
															+                hour = t_in_word.groupdict().get('hour',"")
														
 
															+                half_hour = t_in_word.groupdict().get('half_hour',"")
														
 
															+                minute = t_in_word.groupdict().get('minute',"")
														
 
															+                second = t_in_word.groupdict().get('second',"")
														
 
															+                if hour:
														
 
															+                    if day=='下午' and int(hour)<12:
														
 
															+                        hour = str(int(hour)+12)
														
 
															+                    if int(hour)>24:
														
 
															+                        continue
														
 
															+                else:
														
 
															+                    hour = "00"
														
 
															+                if not minute:
														
 
															+                    if half_hour:
														
 
															+                        minute = "30"
														
 
															+                    else:
														
 
															+                        minute = "00"
														
 
															+                if int(minute)>60:
														
 
															+                    continue
														
 
															+                if not second:
														
 
															+                    second = "00"
														
 
															+                if int(second)>60:
														
 
															+                    continue
														
 
															+                definite_time = "%s:%s:%s"%(hour.rjust(2,"0"),minute.rjust(2,"0"),second.rjust(2,"0"))
														
 
															+                # print(definite_time)
														
 
															+                definite_time_list.append(definite_time)
														
 
															+
														
 
															+            if t_out_of_word:
														
 
															+                # print('t_out_of_word', entity_text+sentence_text[entity.wordOffset_end:], t_out_of_word.groupdict())
														
 
															+                day = t_out_of_word.groupdict().get('day', "")
														
 
															+                hour = t_out_of_word.groupdict().get('hour', "")
														
 
															+                half_hour = t_out_of_word.groupdict().get('half_hour', "")
														
 
															+                minute = t_out_of_word.groupdict().get('minute', "")
														
 
															+                second = t_out_of_word.groupdict().get('second', "")
														
 
															+                if hour:
														
 
															+                    if day == '下午' and int(hour) < 12:
														
 
															+                        hour = str(int(hour) + 12)
														
 
															+                    if int(hour) > 24:
														
 
															+                        continue
														
 
															+                else:
														
 
															+                    hour = "00"
														
 
															+                if not minute:
														
 
															+                    if half_hour:
														
 
															+                        minute = "30"
														
 
															+                    else:
														
 
															+                        minute = "00"
														
 
															+                if int(minute) > 60:
														
 
															+                    continue
														
 
															+                if not second:
														
 
															+                    second = "00"
														
 
															+                if int(second) > 60:
														
 
															+                    continue
														
 
															+                definite_time = "%s:%s:%s" % (hour.rjust(2, "0"), minute.rjust(2, "0"), second.rjust(2, "0"))
														
 
															+                # print(definite_time)
														
 
															+                definite_time_list.append(definite_time)
														
 
															+            min_len = min(len(extract_time),len(definite_time_list))
														
 
															+            for i in range(min_len):
														
 
															+                if definite_time_list[i] != "00:00:00":
														
 
															+                    extract_time[i] = extract_time[i] + " " + definite_time_list[i]
														
 
															         if extract_time:
														
 
															+            # 时间变更prob优化
														
 
															+            if re.search("原",entity_left2):
														
 
															+                last_index = 0
														
 
															+                for item in re.finditer("原",entity_left2):
														
 
															+                    last_index = item.start() + 1
														
 
															+                label_prob = label_prob - 0.2 * last_index / len(entity_left2)
														
 
															+                # print('prob优化',label_prob,extract_time)
														
 
															+
														
 
															             # 优化多个并列的时间，如：开标时间和截标时间，截标时间和报名结束时间
														
 
															             if entity.label in [2,3,9]:
														
 
															                 if entity.label==2 and re.search("截标|投标.{,2}截止|递交(?:文件)?.{,2}截止|报价.{,2}截止|响应.{,2}截止",entity_left3):
														
@@ -3042,8 +3133,12 @@ def getTimeAttributes(list_entity,list_sentence):
 
															                     dict_time['time_registrationEnd'].append((extract_time[0], 0.5, in_attachment))
														
 
															                 if entity.label==9 and re.search("截标|投标.{,2}截止|递交(?:文件)?.{,2}截止|报价.{,2}截止|响应.{,2}截止",entity_left3):
														
 
															                     dict_time['time_bidclose'].append((extract_time[0], 0.5, in_attachment))
														
 
															-
														
 
															-
														
 
															+            # 补充公告末尾处的发布时间
														
 
															+            if entity.label==0:
														
 
															+                if entity.is_tail:
														
 
															+                    entity.label = 1
														
 
															+                    entity.values[1] = 0.5
														
 
															+                    dict_time['time_release'].append((extract_time[0], 0.5, in_attachment))
														
 
															             # 2022/12/12 新增挂牌时间正则
														
 
															             if re.search("挂牌.{,4}(?:时间|日期)",entity_left2):
														
 
															                 if re.search("挂牌.{,4}(?:时间|日期)",entity_left2).end()>len(entity_left2)/2:
														
@@ -3206,7 +3301,7 @@ def getTimeAttributes(list_entity,list_sentence):
 
															             last_time_type = ""
														
 
															         last_sentence_index = entity.sentence_index
														
 
															-
														
 
															+    # print(dict_time)
														
 
															     result_dict = dict((key,"") for key in dict_time.keys())
														
 
															     for time_type,value in dict_time.items():
														
 
															         list_time = dict_time[time_type]