1 year ago · cb53b42016
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
@@ -2665,6 +2665,7 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
				             cost_time[key_nerToken] = 0
			
 
				         cost_time[key_nerToken] += round(time.time()-start_time,2)
			
 
				 
			
 
				+        doctextcon_sentence_len = sum([1 for sentence in list_sentence if not sentence.in_attachment])
			
 
				         company_dict = set()
			
 
				         company_index = dict((i,set()) for i in range(len(list_sentence)))
			
 
				         for sentence_index in range(len(list_sentence)):
			
@@ -2743,6 +2744,11 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
				                     continue
			
 
				                 elif entity_type=="person" and len(entity_text)>10 and len(re.findall("[\u4e00-\u9fa5]",entity_text))<len(entity_text)/2:
			
 
				                     continue
			
 
				+                # 识别不完整的组织机构补充
			
 
				+                if entity_type in ["org"]:
			
 
				+                    end_words = re.search("^[\u4e00-\u9fa5]{,5}(?:办公室|部|中心|处|会)",sentence_text[end_index_temp:end_index_temp+10])
			
 
				+                    if end_words:
			
 
				+                        entity_text = entity_text + end_words.group()
			
 
				 
			
 
				                 for j in range(len(list_tokenbegin)):
			
 
				                     if list_tokenbegin[j]==begin_index_temp:
			
@@ -2820,8 +2826,8 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
				 
			
 
				                 list_sentence_entitys.append(Entity(doc_id,entity_id,entity_text,entity_type,sentence_index,begin_index,end_index,ner_entity[0],ner_entity[1],in_attachment=in_attachment))
			
 
				             # 标记文章末尾的"发布人”、“发布时间”实体
			
 
				-            if sentence_index==len(list_sentence)-1:
			
 
				-                if len(list_sentence_entitys[-2:])>2:
			
 
				+            if sentence_index==len(list_sentence)-1 or sentence_index==doctextcon_sentence_len-1:
			
 
				+                if len(list_sentence_entitys[-2:])==2:
			
 
				                     second2last = list_sentence_entitys[-2]
			
 
				                     last = list_sentence_entitys[-1]
			
 
				                     if (second2last.entity_type in ["company",'org'] and last.entity_type=="time") or (
			
--- a/BiddingKG/dl/interface/extract.py
+++ b/BiddingKG/dl/interface/extract.py
@@ -123,6 +123,50 @@ def str_normalize(text):
 
				     # print(new_text)
			
 
				 
			
 
				     return new_text
			
 
				+# 修复prem中地区前缀不完整实体
			
 
				+def repair_entity(prem,district_dict,list_articles):
			
 
				+    district_dict = district_dict['district']
			
 
				+    province = district_dict['province'] if district_dict['province'] and district_dict['province'] not in ['未知','全国'] else ""
			
 
				+    city = district_dict['city'] if district_dict['city'] and district_dict['city']!='未知' else ""
			
 
				+    district = district_dict['district'] if district_dict['district'] and district_dict['district']!='未知' else ""
			
 
				+    content_text = list_articles[0].content
			
 
				+
			
 
				+    autonomous_region_dict = {
			
 
				+        "新疆":"新疆维吾尔",
			
 
				+        "西藏":"西藏",
			
 
				+        "内蒙古":"内蒙古",
			
 
				+        "广西":"广西壮族",
			
 
				+        "宁夏":"宁夏回族"
			
 
				+    }
			
 
				+
			
 
				+    for package,_prem in prem[0]['prem'].items():
			
 
				+        for role in _prem['roleList']:
			
 
				+            if role['role_name'] in ['tenderee','agency']:
			
 
				+                role_text = role['role_text']
			
 
				+                if re.search("^[省市县区]",role_text):
			
 
				+                    if role_text[0]=='省' and role_text[:2] not in ['省道']:
			
 
				+                        role['role_text'] = province + role_text
			
 
				+                    elif role_text[0]=='市' and role_text[:2] not in ['市政','市场']:
			
 
				+                        if district+'市' in content_text:
			
 
				+                            # 县级市
			
 
				+                            role['role_text'] = district + role_text
			
 
				+                        else:
			
 
				+                            role['role_text'] = city + role_text
			
 
				+                    elif role_text[0] in ['县','区']:
			
 
				+                        role['role_text'] = district + role_text
			
 
				+                elif re.search("^自治[区州县]",role_text):
			
 
				+                    if role_text[:3]=='自治区':
			
 
				+                        role['role_text'] = autonomous_region_dict.get(province,"") + role_text
			
 
				+                    elif role_text[:3] in ['自治县',"自治州"]:
			
 
				+                        if re.search("自治[县州]?$",district):
			
 
				+                            role['role_text'] = re.sub("自治[县州]?","",district) + role_text
			
 
				+                        elif re.search("族$",district):
			
 
				+                            role['role_text'] = district + role_text
			
 
				+                        elif re.search("自治[县州]?$",city):
			
 
				+                            role['role_text'] = re.sub("自治[县州]?","",city) + role_text
			
 
				+                        elif re.search("族$",city):
			
 
				+                            role['role_text'] = city + role_text
			
 
				+
			
 
				 
			
 
				 def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="",original_docchannel='',**kwargs):
			
 
				     cost_time = dict()
			
@@ -274,6 +318,11 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
 
				     district = predictor.getPredictor('district').predict(project_name=codeName[0]['name'], prem=prem,title=title, list_articles=list_articles, web_source_name=web_source_name, list_entitys=list_entitys)
			
 
				     cost_time["district"] = round(time.time() - start_time, 2)
			
 
				 
			
 
				+    '''根据district提取结果修复实体'''
			
 
				+    repair_entity(prem,district,list_articles)
			
 
				+
			
 
				+    '''限制行业最高金额'''
			
 
				+    getAttributes.limit_maximum_amount(prem, industry)
			
 
				     # '''限制行业最高金额'''
			
 
				     # getAttributes.limit_maximum_amount(prem, industry) # 20230703取消，改为整合所有要素后面纠正
			
 
				 
			
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -1384,7 +1384,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                         error_numStr_index.append(numStr_index)
			
 
				                         last_phone_mask = False
			
 
				                         continue
			
 
				-                if re.search("身份证号?码?|注册[证号]|帐号|编[号码]|报价|标价|证号|价格|型号|附件|代码|列号|行号|税号|[\(\（]万?元[\)\）]|[a-zA-Z]+\d*$", re.sub("，","",phone_left)):
			
 
				+                if re.search("身份证号?码?|注册[证号]|帐号|编[号码]|报价|费率|标价|证号|价格|型号|附件|代码|列号|行号|税号|[\(\（]万?元[\)\）]|[a-zA-Z]+\d*$", re.sub("，","",phone_left)):
			
 
				                     error_numStr_index.append(numStr_index)
			
 
				                     last_phone_mask = False
			
 
				                     continue
			
@@ -1528,7 +1528,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				         # print("预测数据：",len(temp_data))
			
 
				         # 去重结果
			
 
				         relation_list = list(set(relation_list))
			
 
				-    # print(relation_list)
			
 
				+    # print([(rel[0].entity_text,rel[2].entity_text) for rel in relation_list])
			
 
				     right_combination = [('org','person'),('company','person'),('company','location'),('org','location'),('person','phone')]
			
 
				     linked_company = set()
			
 
				     linked_person = set()
			
@@ -1542,14 +1542,17 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				             _object = relation[2]
			
 
				             if isinstance(_subject,Entity) and isinstance(_object,Entity) and (_subject.entity_type,_object.entity_type) in right_combination:
			
 
				                 if relation[1]==predicate:
			
 
				+                    distance = (tokens_num_dict[_object.sentence_index] + _object.begin_index) - (
			
 
				+                            tokens_num_dict[_subject.sentence_index] + _subject.end_index)
			
 
				                     if predicate=="rel_person":
			
 
				                         if (_subject.label==0 and _object.entity_text in agency_contact ) or (_subject.label==1 and _object.entity_text in tenderee_contact):
			
 
				                             continue
			
 
				                         # 角色为中标候选人，排除"质疑|投诉|监督|受理"相关的联系人
			
 
				-                        if _subject.label in [2,3,4] and re.search("质疑|投诉|监督|受理",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-10):_object.wordOffset_begin]):
			
 
				+                        if _subject.label in [2,3,4] and re.search("质疑|投诉|监督|受理|项目(单位)?联系",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-10):_object.wordOffset_begin]):
			
 
				+                            continue
			
 
				+                        # 角色为中标候选人，排除距离过远的联系人
			
 
				+                        if _subject.label in [2, 3, 4] and distance>=40:
			
 
				                             continue
			
 
				-                    distance = (tokens_num_dict[_object.sentence_index] + _object.begin_index) - (
			
 
				-                                tokens_num_dict[_subject.sentence_index] + _subject.end_index)
			
 
				                     if distance>0:
			
 
				                         value = (-1 / 2 * (distance ** 2))/10000
			
 
				                     else:
			
@@ -1690,7 +1693,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                     sentence_distance = after_entity.sentence_index - entity.sentence_index
			
 
				                     value = (-1 / 2 * (distance ** 2)) / 10000
			
 
				                     if sentence_distance == 0:
			
 
				-                        if distance < 80:
			
 
				+                        if distance < 70:
			
 
				                             # value = (-1 / 2 * (distance ** 2)) / 10000
			
 
				                             t_match_list.append(Match(entity, after_entity, value))
			
 
				                             match_nums += 1
			
@@ -1699,7 +1702,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                             else:
			
 
				                                 break
			
 
				                     else:
			
 
				-                        if distance < 50:
			
 
				+                        if distance < 40:
			
 
				                             # value = (-1 / 2 * (distance ** 2)) / 10000
			
 
				                             t_match_list.append(Match(entity, after_entity, value))
			
 
				                             match_nums += 1
			
@@ -1945,6 +1948,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				     match_list2 = []
			
 
				     for split_index in range(len(new_temporary_list2)):
			
 
				         split_entitys = new_temporary_list2[split_index]
			
 
				+        if len(split_entitys)<=1:
			
 
				+            continue
			
 
				         is_skip = False
			
 
				         for index in range(len(split_entitys)):
			
 
				             entity = split_entitys[index]
			
@@ -1958,20 +1963,25 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                         for after_index in range(index + 1, min(len(split_entitys), index + 4)):
			
 
				                             after_entity = split_entitys[after_index]
			
 
				                             if after_entity.entity_type in ['person']:
			
 
				-
			
 
				+                                distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
			
 
				+                                                   tokens_num_dict[entity.sentence_index] + entity.end_index)
			
 
				                                 # 实体为中标人/候选人，联系人已确定类别【1，2】
			
 
				                                 if entity.label in [2, 3, 4] and after_entity.label in [1, 2]:
			
 
				                                     break
			
 
				+                                if entity.label in [2, 3, 4] and distance>=20:
			
 
				+                                    break
			
 
				                                 # 角色为中标候选人，排除"质疑|投诉|监督|受理"相关的联系人
			
 
				-                                if entity.label in [2, 3, 4] and re.search("质疑|投诉|监督|受理", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
			
 
				+                                if entity.label in [2, 3, 4] and re.search("质疑|投诉|监督|受理|项目(单位)?联系", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
			
 
				                                     break
			
 
				                                 if after_entity.label in [1, 2, 3]:
			
 
				-                                    distance = (tokens_num_dict[
			
 
				-                                                    after_entity.sentence_index] + after_entity.begin_index) - (
			
 
				-                                                           tokens_num_dict[entity.sentence_index] + entity.end_index)
			
 
				+                                    # distance = (tokens_num_dict[
			
 
				+                                    #                 after_entity.sentence_index] + after_entity.begin_index) - (
			
 
				+                                    #                        tokens_num_dict[entity.sentence_index] + entity.end_index)
			
 
				                                     sentence_distance = after_entity.sentence_index - entity.sentence_index
			
 
				                                     if sentence_distance == 0:
			
 
				                                         if distance < 100:
			
 
				+                                            if entity.label in [2, 3, 4] and distance>40:
			
 
				+                                                break
			
 
				                                             if (entity.label == 0 and after_entity.label == 1) or (
			
 
				                                                     entity.label == 1 and after_entity.label == 2):
			
 
				                                                 distance = distance / 100
			
@@ -1980,6 +1990,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                             match_nums += 1
			
 
				                                     else:
			
 
				                                         if distance < 60:
			
 
				+                                            if entity.label in [2, 3, 4] and distance>20:
			
 
				+                                                break
			
 
				                                             if (entity.label == 0 and after_entity.label == 1) or (
			
 
				                                                     entity.label == 1 and after_entity.label == 2):
			
 
				                                                 distance = distance / 100
			
@@ -2008,17 +2020,15 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                         if re.search("地，?址", after_entity_left):
			
 
				                                             is_skip = True
			
 
				                                             continue
			
 
				-                                        if re.search("\(|（", after_entity_left) and re.search("\)|）",
			
 
				-                                                                                              after_entity_right):
			
 
				+                                        if re.search("\(|（", after_entity_left) and re.search("\)|）",after_entity_right):
			
 
				                                             is_skip = True
			
 
				                                             continue
			
 
				-                                if entity.label in [0, 1] and after_entity.label in [0,
			
 
				-                                                                                     1] and entity.label == after_entity.label:
			
 
				+                                if entity.label in [0, 1] and after_entity.label in [0, 1] and entity.label == after_entity.label:
			
 
				                                     break
			
 
				                                 if entity.label in [0, 1] and after_entity.label in [0, 1] and split_entitys[
			
 
				                                     index + 1].entity_type == "person":
			
 
				                                     break
			
 
				-                                if entity.label in [0, 1] and after_entity.label in [2, 3, 4]:
			
 
				+                                if entity.label in [0, 1 ,5] and after_entity.label in [2, 3, 4]:
			
 
				                                     break
			
 
				                                 if entity.label in [2, 3, 4] and after_entity.label in [0, 1]:
			
 
				                                     break
			
@@ -2044,23 +2054,30 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                             distance = 1
			
 
				                                             if is_same_sentence:
			
 
				                                                 if phone_begin <= 200:
			
 
				+                                                    if entity.label in [2,3,4] and phone_begin>80:
			
 
				+                                                        break
			
 
				                                                     value = (-1 / 2 * (distance ** 2)) / 10000
			
 
				                                                     match_list2.append(Match(entity, (entity, _phone), value))
			
 
				                                                     match_nums += 1
			
 
				                                             else:
			
 
				                                                 if phone_begin <= 60:
			
 
				+                                                    if entity.label in [2,3,4] and phone_begin>40:
			
 
				+                                                        break
			
 
				                                                     value = (-1 / 2 * (distance ** 2)) / 10000
			
 
				                                                     match_list2.append(Match(entity, (entity, _phone), value))
			
 
				                                                     match_nums += 1
			
 
				                         else:
			
 
				                             next_entity = split_entitys[index + 1]
			
 
				                             if next_entity.entity_type in ["org","company"]:
			
 
				-                                _entity_left = list_sentence[next_entity.sentence_index].sentence_text[max(0, next_entity.wordOffset_begin - 20):next_entity.wordOffset_begin]
			
 
				+                                _entity_left = list_sentence[next_entity.sentence_index].sentence_text[entity.wordOffset_end:next_entity.wordOffset_begin]
			
 
				                                 _entity_left2 = re.sub("，（）\(\):：", "", _entity_left)
			
 
				                                 _entity_left2 = _entity_left2[-5:]
			
 
				                                 if re.search("(地，?址|地，?点)[:：][^，。]*$", _entity_left) or re.search("地址|地点", _entity_left2):
			
 
				                                     if index + 2<= len(split_entitys) - 1:
			
 
				                                         next_entity = split_entitys[index + 2]
			
 
				+                                if len(_entity_left)<=2 and re.search("[、（\(]",_entity_left):
			
 
				+                                    if index + 2 <= len(split_entitys) - 1:
			
 
				+                                        next_entity = split_entitys[index + 2]
			
 
				                             if entity.sentence_index == next_entity.sentence_index:
			
 
				                                 mid_tokens += list_sentence[entity.sentence_index].tokens[
			
 
				                                               entity.end_index + 1:next_entity.begin_index]
			
@@ -2226,6 +2243,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                         prepare_link.append(after_entity)
			
 
				                         last_person = after_entity
			
 
				                         continue
			
 
				+
			
 
				     # 统一同类角色的属性
			
 
				     for k in PackDict.keys():
			
 
				         for i in range(len(PackDict[k]["roleList"])):
			
@@ -2259,8 +2277,11 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                         (_pointer_person.entity_text, _p))
			
 
				 
			
 
				     # "roleList"中联系人电话去重
			
 
				+    tenderee_agency_phone = []
			
 
				     for k in PackDict.keys():
			
 
				         for i in range(len(PackDict[k]["roleList"])):
			
 
				+            if PackDict[k]["roleList"][i].role_name in ['agency','tenderee']:
			
 
				+                tenderee_agency_phone.extend([person_phone[1] for person_phone in PackDict[k]["roleList"][i].linklist if person_phone[1]])
			
 
				             # 带有联系人的电话
			
 
				             with_person = [person_phone[1] for person_phone in PackDict[k]["roleList"][i].linklist if person_phone[0]]
			
 
				             # 带有电话的联系人
			
@@ -2276,7 +2297,26 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                         remove_list.append(item)
			
 
				             for _item in remove_list:
			
 
				                 PackDict[k]["roleList"][i].linklist.remove(_item)
			
 
				-
			
 
				+    # 中标候选人联系方式异常排除
			
 
				+    for k in PackDict.keys():
			
 
				+        for i in range(len(PackDict[k]["roleList"])):
			
 
				+            if PackDict[k]["roleList"][i].role_name in ['win_tenderer', 'second_tenderer','third_tenderer']:
			
 
				+                if tenderee_agency_phone:
			
 
				+                    remove_list = []
			
 
				+                    for item in PackDict[k]["roleList"][i].linklist:
			
 
				+                        if item[1] and item[1] in tenderee_agency_phone:
			
 
				+                            remove_list.append(item)
			
 
				+                    for _item in remove_list:
			
 
				+                        PackDict[k]["roleList"][i].linklist.remove(_item)
			
 
				+                # else:
			
 
				+                #     # 公告中无招标代理联系方式时，可排除中标联系方式
			
 
				+                #     remove_list = []
			
 
				+                #     for _item in PackDict[k]["roleList"][i].linklist:
			
 
				+                #         # 有联系方式
			
 
				+                #         if _item[1]:
			
 
				+                #             remove_list.append(_item)
			
 
				+                #     for _item in remove_list:
			
 
				+                #         PackDict[k]["roleList"][i].linklist.remove(_item)
			
 
				     # PackDict更新company/org地址
			
 
				     last_role_prob = {}
			
 
				     for ent in pre_entity:
			
@@ -2704,9 +2744,19 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				     # 公告中只有"招标人"且无"联系人"链接时
			
 
				     if len(PackDict)==1:
			
 
				         k = list(PackDict.keys())[0]
			
 
				-        if len(PackDict[k]["roleList"])==1:
			
 
				-            if PackDict[k]["roleList"][0].role_name == "tenderee":
			
 
				-                if not PackDict[k]["roleList"][0].linklist:
			
 
				+        tenderee_agency_role = [role for role in PackDict[k]["roleList"] if role.role_name in ['tenderee','agency']]
			
 
				+        if len(tenderee_agency_role)==1:
			
 
				+            exist_person = []
			
 
				+            exist_phone = []
			
 
				+            for role in PackDict[k]["roleList"]:
			
 
				+                for group in role.linklist:
			
 
				+                    if group[0]:
			
 
				+                        exist_person.append(group[0])
			
 
				+                    if group[1]:
			
 
				+                        exist_phone.append(group[1])
			
 
				+
			
 
				+            if tenderee_agency_role[0].role_name == "tenderee":
			
 
				+                if not tenderee_agency_role[0].linklist:
			
 
				                     get_contacts = False
			
 
				                     if not get_contacts:
			
 
				                         # 根据大纲Outline类召回联系人
			
@@ -2718,8 +2768,9 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                         if t_person.person_phone:
			
 
				                                             _phone = [p.entity_text for p in t_person.person_phone]
			
 
				                                             for _p in _phone:
			
 
				-                                                PackDict[k]["roleList"][0].linklist.append((t_person.entity_text, _p))
			
 
				-                                            get_contacts = True
			
 
				+                                                if t_person.entity_text not in exist_person and _p not in exist_phone:
			
 
				+                                                    tenderee_agency_role[0].linklist.append((t_person.entity_text, _p))
			
 
				+                                                    get_contacts = True
			
 
				                                             break
			
 
				                                     elif words_num_dict[t_person.sentence_index] + t_person.wordOffset_begin >= \
			
 
				                                             words_num_dict[outline.sentence_end_index] + outline.wordOffset_end:
			
@@ -2727,9 +2778,10 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                 if not get_contacts:
			
 
				                                     sentence_phone = phone.findall(outline.outline_text)
			
 
				                                     if sentence_phone:
			
 
				-                                        PackDict[k]["roleList"][0].linklist.append(("", sentence_phone[0]))
			
 
				-                                        get_contacts = True
			
 
				-                                        break
			
 
				+                                        if sentence_phone[0] not in exist_phone:
			
 
				+                                            tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
			
 
				+                                            get_contacts = True
			
 
				+                                            break
			
 
				                     if not get_contacts:
			
 
				                         # 直接取文中倒数第一个联系人
			
 
				                         for _entity in temporary_list2[::-1]:
			
@@ -2737,14 +2789,16 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                 if _entity.person_phone:
			
 
				                                     _phone = [p.entity_text for p in _entity.person_phone]
			
 
				                                     for _p in _phone:
			
 
				-                                        PackDict[k]["roleList"][0].linklist.append((_entity.entity_text, _p))
			
 
				-                                    get_contacts = True
			
 
				+                                        if _entity.entity_text not in exist_person and _p not in exist_phone:
			
 
				+                                            tenderee_agency_role[0].linklist.append((_entity.entity_text, _p))
			
 
				+                                            get_contacts = True
			
 
				                                     break
			
 
				                     if not get_contacts:
			
 
				                         # 如果文中只有一个“phone”实体，则直接取为联系人电话
			
 
				                         if len(phone_entitys) == 1:
			
 
				-                            PackDict[k]["roleList"][0].linklist.append(("", phone_entitys[0].entity_text))
			
 
				-                            get_contacts = True
			
 
				+                            if phone_entitys[0].entity_text not in exist_phone:
			
 
				+                                tenderee_agency_role[0].linklist.append(("", phone_entitys[0].entity_text))
			
 
				+                                get_contacts = True
			
 
				                     if not get_contacts:
			
 
				                         # 通过大纲Outline类直接取电话
			
 
				                         if len(new_split_list) > 1:
			
@@ -2754,8 +2808,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                 if re.search("联系人|联系方|联系方式|联系电话|电话|负责人|与.{2,4}联系", sentence_outline):
			
 
				                                     sentence_phone = phone.findall(temp_sentence)
			
 
				                                     if sentence_phone:
			
 
				-                                        if sentence_phone[0] in [ent.entity_text for ent in phone_entitys]:
			
 
				-                                            PackDict[k]["roleList"][0].linklist.append(("", sentence_phone[0]))
			
 
				+                                        if sentence_phone[0] in [ent.entity_text for ent in phone_entitys] and sentence_phone[0] not in exist_phone:
			
 
				+                                            tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
			
 
				                                             get_contacts = True
			
 
				                                             break
			
 
				                     if not get_contacts:
			
@@ -2773,9 +2827,10 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                 match_text = match_text.split("。")[0]
			
 
				                                 sentence_phone = phone.findall(match_text)
			
 
				                                 if sentence_phone:
			
 
				-                                    PackDict[k]["roleList"][0].linklist.append(("", sentence_phone[0]))
			
 
				-                                    get_tenderee_contacts = True
			
 
				-                                    break
			
 
				+                                    if sentence_phone[0] not in exist_phone:
			
 
				+                                        tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
			
 
				+                                        get_tenderee_contacts = True
			
 
				+                                        break
			
 
				                             if get_tenderee_contacts:
			
 
				                                 break
			
 
				 
			
@@ -2990,48 +3045,84 @@ def getTimeAttributes(list_entity,list_sentence):
 
				         in_attachment = entity.in_attachment
			
 
				         extract_time = my_timeFormat(entity_text)
			
 
				         # definite_time = "00:00:00"
			
 
				-        # if extract_time:
			
 
				-        #     t = re.compile("(?P<day>下午|上午|早上)?(?P<hour>\d{1,2})[:：时点](?P<half_hour>半)?(?P<minute>\d{2})?[:：分]?(?P<second>\d{2})?秒?")
			
 
				-        #     t_in_word = re.search(t,entity_text)
			
 
				-        #     t_out_of_word = re.search("^[^\d]{,2}"+t.pattern,sentence_text[entity.wordOffset_end:])
			
 
				-        #     if t_in_word:
			
 
				-        #         print('t_in_word',entity_text,t_in_word.groupdict())
			
 
				-        #         day = t_in_word.groupdict().get('day',"")
			
 
				-        #         hour = t_in_word.groupdict().get('hour',"")
			
 
				-        #         half_hour = t_in_word.groupdict().get('half_hour',"")
			
 
				-        #         minute = t_in_word.groupdict().get('minute',"")
			
 
				-        #         second = t_in_word.groupdict().get('second',"")
			
 
				-        #         if hour:
			
 
				-        #             if day=='下午' and int(hour)<12:
			
 
				-        #                 hour = str(int(hour)+12)
			
 
				-        #             if int(hour)>24:
			
 
				-        #                 continue
			
 
				-        #         else:
			
 
				-        #             hour = "00"
			
 
				-        #         if not minute:
			
 
				-        #             if half_hour:
			
 
				-        #                 minute = "30"
			
 
				-        #             else:
			
 
				-        #                 minute = "00"
			
 
				-        #         if int(minute)>60:
			
 
				-        #             continue
			
 
				-        #         if not second:
			
 
				-        #             second = "00"
			
 
				-        #         if int(second)>60:
			
 
				-        #             continue
			
 
				-        #         # 数字字符格式化
			
 
				-        #         # hour = str(int(hour))
			
 
				-        #         # minute = str(int(minute))
			
 
				-        #         # second = str(int(second))
			
 
				-        #         definite_time = "%s:%s:%s"%(hour.rjust(2,"0"),minute.rjust(2,"0"),second.rjust(2,"0"))
			
 
				-        #         print(definite_time)
			
 
				-        #
			
 
				-        #     elif t_out_of_word:
			
 
				-        #         print('t_out_of_word', entity_text+sentence_text[entity.wordOffset_end:], t_out_of_word.groupdict())
			
 
				+        if extract_time:
			
 
				+            definite_time_list = []
			
 
				+            t = re.compile("(?P<day>下午|上午|早上)?(?P<hour>\d{1,2})[:：时点](?P<half_hour>半)?(?P<minute>\d{2})?[:：分]?(?P<second>\d{2})?秒?")
			
 
				+            t_in_word = re.search(t,entity_text.replace(" ",""))
			
 
				+            t_out_of_word = re.search("^[^\d]{,2}"+t.pattern,sentence_text[entity.wordOffset_end:])
			
 
				+            if t_in_word:
			
 
				+                # print('t_in_word',entity_text,t_in_word.groupdict())
			
 
				+                day = t_in_word.groupdict().get('day',"")
			
 
				+                hour = t_in_word.groupdict().get('hour',"")
			
 
				+                half_hour = t_in_word.groupdict().get('half_hour',"")
			
 
				+                minute = t_in_word.groupdict().get('minute',"")
			
 
				+                second = t_in_word.groupdict().get('second',"")
			
 
				+                if hour:
			
 
				+                    if day=='下午' and int(hour)<12:
			
 
				+                        hour = str(int(hour)+12)
			
 
				+                    if int(hour)>24:
			
 
				+                        continue
			
 
				+                else:
			
 
				+                    hour = "00"
			
 
				+                if not minute:
			
 
				+                    if half_hour:
			
 
				+                        minute = "30"
			
 
				+                    else:
			
 
				+                        minute = "00"
			
 
				+                if int(minute)>60:
			
 
				+                    continue
			
 
				+                if not second:
			
 
				+                    second = "00"
			
 
				+                if int(second)>60:
			
 
				+                    continue
			
 
				+                definite_time = "%s:%s:%s"%(hour.rjust(2,"0"),minute.rjust(2,"0"),second.rjust(2,"0"))
			
 
				+                # print(definite_time)
			
 
				+                definite_time_list.append(definite_time)
			
 
				+
			
 
				+            if t_out_of_word:
			
 
				+                # print('t_out_of_word', entity_text+sentence_text[entity.wordOffset_end:], t_out_of_word.groupdict())
			
 
				+                day = t_out_of_word.groupdict().get('day', "")
			
 
				+                hour = t_out_of_word.groupdict().get('hour', "")
			
 
				+                half_hour = t_out_of_word.groupdict().get('half_hour', "")
			
 
				+                minute = t_out_of_word.groupdict().get('minute', "")
			
 
				+                second = t_out_of_word.groupdict().get('second', "")
			
 
				+                if hour:
			
 
				+                    if day == '下午' and int(hour) < 12:
			
 
				+                        hour = str(int(hour) + 12)
			
 
				+                    if int(hour) > 24:
			
 
				+                        continue
			
 
				+                else:
			
 
				+                    hour = "00"
			
 
				+                if not minute:
			
 
				+                    if half_hour:
			
 
				+                        minute = "30"
			
 
				+                    else:
			
 
				+                        minute = "00"
			
 
				+                if int(minute) > 60:
			
 
				+                    continue
			
 
				+                if not second:
			
 
				+                    second = "00"
			
 
				+                if int(second) > 60:
			
 
				+                    continue
			
 
				+                definite_time = "%s:%s:%s" % (hour.rjust(2, "0"), minute.rjust(2, "0"), second.rjust(2, "0"))
			
 
				+                # print(definite_time)
			
 
				+                definite_time_list.append(definite_time)
			
 
				 
			
 
				 
			
 
				+            min_len = min(len(extract_time),len(definite_time_list))
			
 
				+            for i in range(min_len):
			
 
				+                if definite_time_list[i] != "00:00:00":
			
 
				+                    extract_time[i] = extract_time[i] + " " + definite_time_list[i]
			
 
				 
			
 
				         if extract_time:
			
 
				+            # 时间变更prob优化
			
 
				+            if re.search("原",entity_left2):
			
 
				+                last_index = 0
			
 
				+                for item in re.finditer("原",entity_left2):
			
 
				+                    last_index = item.start() + 1
			
 
				+                label_prob = label_prob - 0.2 * last_index / len(entity_left2)
			
 
				+                # print('prob优化',label_prob,extract_time)
			
 
				+
			
 
				             # 优化多个并列的时间，如：开标时间和截标时间，截标时间和报名结束时间
			
 
				             if entity.label in [2,3,9]:
			
 
				                 if entity.label==2 and re.search("截标|投标.{,2}截止|递交(?:文件)?.{,2}截止|报价.{,2}截止|响应.{,2}截止",entity_left3):
			
@@ -3042,8 +3133,12 @@ def getTimeAttributes(list_entity,list_sentence):
 
				                     dict_time['time_registrationEnd'].append((extract_time[0], 0.5, in_attachment))
			
 
				                 if entity.label==9 and re.search("截标|投标.{,2}截止|递交(?:文件)?.{,2}截止|报价.{,2}截止|响应.{,2}截止",entity_left3):
			
 
				                     dict_time['time_bidclose'].append((extract_time[0], 0.5, in_attachment))
			
 
				-
			
 
				-
			
 
				+            # 补充公告末尾处的发布时间
			
 
				+            if entity.label==0:
			
 
				+                if entity.is_tail:
			
 
				+                    entity.label = 1
			
 
				+                    entity.values[1] = 0.5
			
 
				+                    dict_time['time_release'].append((extract_time[0], 0.5, in_attachment))
			
 
				             # 2022/12/12 新增挂牌时间正则
			
 
				             if re.search("挂牌.{,4}(?:时间|日期)",entity_left2):
			
 
				                 if re.search("挂牌.{,4}(?:时间|日期)",entity_left2).end()>len(entity_left2)/2:
			
@@ -3206,7 +3301,7 @@ def getTimeAttributes(list_entity,list_sentence):
 
				             last_time_type = ""
			
 
				         last_sentence_index = entity.sentence_index
			
 
				 
			
 
				-
			
 
				+    # print(dict_time)
			
 
				     result_dict = dict((key,"") for key in dict_time.keys())
			
 
				     for time_type,value in dict_time.items():
			
 
				         list_time = dict_time[time_type]