Browse Source

Merge remote-tracking branch 'origin/master'

# Conflicts:
#	BiddingKG/dl/interface/extract.py
lsm 1 year ago
parent
commit
cb53b42016

+ 8 - 2
BiddingKG/dl/interface/Preprocessing.py

@@ -2665,6 +2665,7 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
             cost_time[key_nerToken] = 0
         cost_time[key_nerToken] += round(time.time()-start_time,2)
 
+        doctextcon_sentence_len = sum([1 for sentence in list_sentence if not sentence.in_attachment])
         company_dict = set()
         company_index = dict((i,set()) for i in range(len(list_sentence)))
         for sentence_index in range(len(list_sentence)):
@@ -2743,6 +2744,11 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
                     continue
                 elif entity_type=="person" and len(entity_text)>10 and len(re.findall("[\u4e00-\u9fa5]",entity_text))<len(entity_text)/2:
                     continue
+                # 识别不完整的组织机构补充
+                if entity_type in ["org"]:
+                    end_words = re.search("^[\u4e00-\u9fa5]{,5}(?:办公室|部|中心|处|会)",sentence_text[end_index_temp:end_index_temp+10])
+                    if end_words:
+                        entity_text = entity_text + end_words.group()
 
                 for j in range(len(list_tokenbegin)):
                     if list_tokenbegin[j]==begin_index_temp:
@@ -2820,8 +2826,8 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
                 list_sentence_entitys.append(Entity(doc_id,entity_id,entity_text,entity_type,sentence_index,begin_index,end_index,ner_entity[0],ner_entity[1],in_attachment=in_attachment))
             # 标记文章末尾的"发布人”、“发布时间”实体
-            if sentence_index==len(list_sentence)-1:
-                if len(list_sentence_entitys[-2:])>2:
+            if sentence_index==len(list_sentence)-1 or sentence_index==doctextcon_sentence_len-1:
+                if len(list_sentence_entitys[-2:])==2:
                     second2last = list_sentence_entitys[-2]
                     last = list_sentence_entitys[-1]
                     if (second2last.entity_type in ["company",'org'] and last.entity_type=="time") or (

+ 49 - 0
BiddingKG/dl/interface/extract.py

@@ -123,6 +123,50 @@ def str_normalize(text):
     # print(new_text)
 
     return new_text
+# 修复prem中地区前缀不完整实体
+def repair_entity(prem,district_dict,list_articles):
+    district_dict = district_dict['district']
+    province = district_dict['province'] if district_dict['province'] and district_dict['province'] not in ['未知','全国'] else ""
+    city = district_dict['city'] if district_dict['city'] and district_dict['city']!='未知' else ""
+    district = district_dict['district'] if district_dict['district'] and district_dict['district']!='未知' else ""
+    content_text = list_articles[0].content
+
+    autonomous_region_dict = {
+        "新疆":"新疆维吾尔",
+        "西藏":"西藏",
+        "内蒙古":"内蒙古",
+        "广西":"广西壮族",
+        "宁夏":"宁夏回族"
+    }
+
+    for package,_prem in prem[0]['prem'].items():
+        for role in _prem['roleList']:
+            if role['role_name'] in ['tenderee','agency']:
+                role_text = role['role_text']
+                if re.search("^[省市县区]",role_text):
+                    if role_text[0]=='省' and role_text[:2] not in ['省道']:
+                        role['role_text'] = province + role_text
+                    elif role_text[0]=='市' and role_text[:2] not in ['市政','市场']:
+                        if district+'市' in content_text:
+                            # 县级市
+                            role['role_text'] = district + role_text
+                        else:
+                            role['role_text'] = city + role_text
+                    elif role_text[0] in ['县','区']:
+                        role['role_text'] = district + role_text
+                elif re.search("^自治[区州县]",role_text):
+                    if role_text[:3]=='自治区':
+                        role['role_text'] = autonomous_region_dict.get(province,"") + role_text
+                    elif role_text[:3] in ['自治县',"自治州"]:
+                        if re.search("自治[县州]?$",district):
+                            role['role_text'] = re.sub("自治[县州]?","",district) + role_text
+                        elif re.search("族$",district):
+                            role['role_text'] = district + role_text
+                        elif re.search("自治[县州]?$",city):
+                            role['role_text'] = re.sub("自治[县州]?","",city) + role_text
+                        elif re.search("族$",city):
+                            role['role_text'] = city + role_text
+
 
 def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="",original_docchannel='',**kwargs):
     cost_time = dict()
@@ -274,6 +318,11 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
     district = predictor.getPredictor('district').predict(project_name=codeName[0]['name'], prem=prem,title=title, list_articles=list_articles, web_source_name=web_source_name, list_entitys=list_entitys)
     cost_time["district"] = round(time.time() - start_time, 2)
 
+    '''根据district提取结果修复实体'''
+    repair_entity(prem,district,list_articles)
+
+    '''限制行业最高金额'''
+    getAttributes.limit_maximum_amount(prem, industry)
     # '''限制行业最高金额'''
     # getAttributes.limit_maximum_amount(prem, industry) # 20230703取消,改为整合所有要素后面纠正
 

+ 172 - 77
BiddingKG/dl/interface/getAttributes.py

@@ -1384,7 +1384,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                         error_numStr_index.append(numStr_index)
                         last_phone_mask = False
                         continue
-                if re.search("身份证号?码?|注册[证号]|帐号|编[号码]|报价|标价|证号|价格|型号|附件|代码|列号|行号|税号|[\(\(]万?元[\)\)]|[a-zA-Z]+\d*$", re.sub(",","",phone_left)):
+                if re.search("身份证号?码?|注册[证号]|帐号|编[号码]|报价|费率|标价|证号|价格|型号|附件|代码|列号|行号|税号|[\(\(]万?元[\)\)]|[a-zA-Z]+\d*$", re.sub(",","",phone_left)):
                     error_numStr_index.append(numStr_index)
                     last_phone_mask = False
                     continue
@@ -1528,7 +1528,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
         # print("预测数据:",len(temp_data))
         # 去重结果
         relation_list = list(set(relation_list))
-    # print(relation_list)
+    # print([(rel[0].entity_text,rel[2].entity_text) for rel in relation_list])
     right_combination = [('org','person'),('company','person'),('company','location'),('org','location'),('person','phone')]
     linked_company = set()
     linked_person = set()
@@ -1542,14 +1542,17 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
             _object = relation[2]
             if isinstance(_subject,Entity) and isinstance(_object,Entity) and (_subject.entity_type,_object.entity_type) in right_combination:
                 if relation[1]==predicate:
+                    distance = (tokens_num_dict[_object.sentence_index] + _object.begin_index) - (
+                            tokens_num_dict[_subject.sentence_index] + _subject.end_index)
                     if predicate=="rel_person":
                         if (_subject.label==0 and _object.entity_text in agency_contact ) or (_subject.label==1 and _object.entity_text in tenderee_contact):
                             continue
                         # 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
-                        if _subject.label in [2,3,4] and re.search("质疑|投诉|监督|受理",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-10):_object.wordOffset_begin]):
+                        if _subject.label in [2,3,4] and re.search("质疑|投诉|监督|受理|项目(单位)?联系",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-10):_object.wordOffset_begin]):
+                            continue
+                        # 角色为中标候选人,排除距离过远的联系人
+                        if _subject.label in [2, 3, 4] and distance>=40:
                             continue
-                    distance = (tokens_num_dict[_object.sentence_index] + _object.begin_index) - (
-                                tokens_num_dict[_subject.sentence_index] + _subject.end_index)
                     if distance>0:
                         value = (-1 / 2 * (distance ** 2))/10000
                     else:
@@ -1690,7 +1693,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                     sentence_distance = after_entity.sentence_index - entity.sentence_index
                     value = (-1 / 2 * (distance ** 2)) / 10000
                     if sentence_distance == 0:
-                        if distance < 80:
+                        if distance < 70:
                             # value = (-1 / 2 * (distance ** 2)) / 10000
                             t_match_list.append(Match(entity, after_entity, value))
                             match_nums += 1
@@ -1699,7 +1702,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                             else:
                                 break
                     else:
-                        if distance < 50:
+                        if distance < 40:
                             # value = (-1 / 2 * (distance ** 2)) / 10000
                             t_match_list.append(Match(entity, after_entity, value))
                             match_nums += 1
@@ -1945,6 +1948,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
     match_list2 = []
     for split_index in range(len(new_temporary_list2)):
         split_entitys = new_temporary_list2[split_index]
+        if len(split_entitys)<=1:
+            continue
         is_skip = False
         for index in range(len(split_entitys)):
             entity = split_entitys[index]
@@ -1958,20 +1963,25 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                         for after_index in range(index + 1, min(len(split_entitys), index + 4)):
                             after_entity = split_entitys[after_index]
                             if after_entity.entity_type in ['person']:
-
+                                distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
+                                                   tokens_num_dict[entity.sentence_index] + entity.end_index)
                                 # 实体为中标人/候选人,联系人已确定类别【1,2】
                                 if entity.label in [2, 3, 4] and after_entity.label in [1, 2]:
                                     break
+                                if entity.label in [2, 3, 4] and distance>=20:
+                                    break
                                 # 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
-                                if entity.label in [2, 3, 4] and re.search("质疑|投诉|监督|受理", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
+                                if entity.label in [2, 3, 4] and re.search("质疑|投诉|监督|受理|项目(单位)?联系", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
                                     break
                                 if after_entity.label in [1, 2, 3]:
-                                    distance = (tokens_num_dict[
-                                                    after_entity.sentence_index] + after_entity.begin_index) - (
-                                                           tokens_num_dict[entity.sentence_index] + entity.end_index)
+                                    # distance = (tokens_num_dict[
+                                    #                 after_entity.sentence_index] + after_entity.begin_index) - (
+                                    #                        tokens_num_dict[entity.sentence_index] + entity.end_index)
                                     sentence_distance = after_entity.sentence_index - entity.sentence_index
                                     if sentence_distance == 0:
                                         if distance < 100:
+                                            if entity.label in [2, 3, 4] and distance>40:
+                                                break
                                             if (entity.label == 0 and after_entity.label == 1) or (
                                                     entity.label == 1 and after_entity.label == 2):
                                                 distance = distance / 100
@@ -1980,6 +1990,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                             match_nums += 1
                                     else:
                                         if distance < 60:
+                                            if entity.label in [2, 3, 4] and distance>20:
+                                                break
                                             if (entity.label == 0 and after_entity.label == 1) or (
                                                     entity.label == 1 and after_entity.label == 2):
                                                 distance = distance / 100
@@ -2008,17 +2020,15 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                         if re.search("地,?址", after_entity_left):
                                             is_skip = True
                                             continue
-                                        if re.search("\(|(", after_entity_left) and re.search("\)|)",
-                                                                                              after_entity_right):
+                                        if re.search("\(|(", after_entity_left) and re.search("\)|)",after_entity_right):
                                             is_skip = True
                                             continue
-                                if entity.label in [0, 1] and after_entity.label in [0,
-                                                                                     1] and entity.label == after_entity.label:
+                                if entity.label in [0, 1] and after_entity.label in [0, 1] and entity.label == after_entity.label:
                                     break
                                 if entity.label in [0, 1] and after_entity.label in [0, 1] and split_entitys[
                                     index + 1].entity_type == "person":
                                     break
-                                if entity.label in [0, 1] and after_entity.label in [2, 3, 4]:
+                                if entity.label in [0, 1 ,5] and after_entity.label in [2, 3, 4]:
                                     break
                                 if entity.label in [2, 3, 4] and after_entity.label in [0, 1]:
                                     break
@@ -2044,23 +2054,30 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                             distance = 1
                                             if is_same_sentence:
                                                 if phone_begin <= 200:
+                                                    if entity.label in [2,3,4] and phone_begin>80:
+                                                        break
                                                     value = (-1 / 2 * (distance ** 2)) / 10000
                                                     match_list2.append(Match(entity, (entity, _phone), value))
                                                     match_nums += 1
                                             else:
                                                 if phone_begin <= 60:
+                                                    if entity.label in [2,3,4] and phone_begin>40:
+                                                        break
                                                     value = (-1 / 2 * (distance ** 2)) / 10000
                                                     match_list2.append(Match(entity, (entity, _phone), value))
                                                     match_nums += 1
                         else:
                             next_entity = split_entitys[index + 1]
                             if next_entity.entity_type in ["org","company"]:
-                                _entity_left = list_sentence[next_entity.sentence_index].sentence_text[max(0, next_entity.wordOffset_begin - 20):next_entity.wordOffset_begin]
+                                _entity_left = list_sentence[next_entity.sentence_index].sentence_text[entity.wordOffset_end:next_entity.wordOffset_begin]
                                 _entity_left2 = re.sub(",()\(\)::", "", _entity_left)
                                 _entity_left2 = _entity_left2[-5:]
                                 if re.search("(地,?址|地,?点)[::][^,。]*$", _entity_left) or re.search("地址|地点", _entity_left2):
                                     if index + 2<= len(split_entitys) - 1:
                                         next_entity = split_entitys[index + 2]
+                                if len(_entity_left)<=2 and re.search("[、(\(]",_entity_left):
+                                    if index + 2 <= len(split_entitys) - 1:
+                                        next_entity = split_entitys[index + 2]
                             if entity.sentence_index == next_entity.sentence_index:
                                 mid_tokens += list_sentence[entity.sentence_index].tokens[
                                               entity.end_index + 1:next_entity.begin_index]
@@ -2226,6 +2243,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                         prepare_link.append(after_entity)
                         last_person = after_entity
                         continue
+
     # 统一同类角色的属性
     for k in PackDict.keys():
         for i in range(len(PackDict[k]["roleList"])):
@@ -2259,8 +2277,11 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                         (_pointer_person.entity_text, _p))
 
     # "roleList"中联系人电话去重
+    tenderee_agency_phone = []
     for k in PackDict.keys():
         for i in range(len(PackDict[k]["roleList"])):
+            if PackDict[k]["roleList"][i].role_name in ['agency','tenderee']:
+                tenderee_agency_phone.extend([person_phone[1] for person_phone in PackDict[k]["roleList"][i].linklist if person_phone[1]])
             # 带有联系人的电话
             with_person = [person_phone[1] for person_phone in PackDict[k]["roleList"][i].linklist if person_phone[0]]
             # 带有电话的联系人
@@ -2276,7 +2297,26 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                         remove_list.append(item)
             for _item in remove_list:
                 PackDict[k]["roleList"][i].linklist.remove(_item)
-
+    # 中标候选人联系方式异常排除
+    for k in PackDict.keys():
+        for i in range(len(PackDict[k]["roleList"])):
+            if PackDict[k]["roleList"][i].role_name in ['win_tenderer', 'second_tenderer','third_tenderer']:
+                if tenderee_agency_phone:
+                    remove_list = []
+                    for item in PackDict[k]["roleList"][i].linklist:
+                        if item[1] and item[1] in tenderee_agency_phone:
+                            remove_list.append(item)
+                    for _item in remove_list:
+                        PackDict[k]["roleList"][i].linklist.remove(_item)
+                # else:
+                #     # 公告中无招标代理联系方式时,可排除中标联系方式
+                #     remove_list = []
+                #     for _item in PackDict[k]["roleList"][i].linklist:
+                #         # 有联系方式
+                #         if _item[1]:
+                #             remove_list.append(_item)
+                #     for _item in remove_list:
+                #         PackDict[k]["roleList"][i].linklist.remove(_item)
     # PackDict更新company/org地址
     last_role_prob = {}
     for ent in pre_entity:
@@ -2704,9 +2744,19 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
     # 公告中只有"招标人"且无"联系人"链接时
     if len(PackDict)==1:
         k = list(PackDict.keys())[0]
-        if len(PackDict[k]["roleList"])==1:
-            if PackDict[k]["roleList"][0].role_name == "tenderee":
-                if not PackDict[k]["roleList"][0].linklist:
+        tenderee_agency_role = [role for role in PackDict[k]["roleList"] if role.role_name in ['tenderee','agency']]
+        if len(tenderee_agency_role)==1:
+            exist_person = []
+            exist_phone = []
+            for role in PackDict[k]["roleList"]:
+                for group in role.linklist:
+                    if group[0]:
+                        exist_person.append(group[0])
+                    if group[1]:
+                        exist_phone.append(group[1])
+
+            if tenderee_agency_role[0].role_name == "tenderee":
+                if not tenderee_agency_role[0].linklist:
                     get_contacts = False
                     if not get_contacts:
                         # 根据大纲Outline类召回联系人
@@ -2718,8 +2768,9 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                         if t_person.person_phone:
                                             _phone = [p.entity_text for p in t_person.person_phone]
                                             for _p in _phone:
-                                                PackDict[k]["roleList"][0].linklist.append((t_person.entity_text, _p))
-                                            get_contacts = True
+                                                if t_person.entity_text not in exist_person and _p not in exist_phone:
+                                                    tenderee_agency_role[0].linklist.append((t_person.entity_text, _p))
+                                                    get_contacts = True
                                             break
                                     elif words_num_dict[t_person.sentence_index] + t_person.wordOffset_begin >= \
                                             words_num_dict[outline.sentence_end_index] + outline.wordOffset_end:
@@ -2727,9 +2778,10 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                 if not get_contacts:
                                     sentence_phone = phone.findall(outline.outline_text)
                                     if sentence_phone:
-                                        PackDict[k]["roleList"][0].linklist.append(("", sentence_phone[0]))
-                                        get_contacts = True
-                                        break
+                                        if sentence_phone[0] not in exist_phone:
+                                            tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
+                                            get_contacts = True
+                                            break
                     if not get_contacts:
                         # 直接取文中倒数第一个联系人
                         for _entity in temporary_list2[::-1]:
@@ -2737,14 +2789,16 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                 if _entity.person_phone:
                                     _phone = [p.entity_text for p in _entity.person_phone]
                                     for _p in _phone:
-                                        PackDict[k]["roleList"][0].linklist.append((_entity.entity_text, _p))
-                                    get_contacts = True
+                                        if _entity.entity_text not in exist_person and _p not in exist_phone:
+                                            tenderee_agency_role[0].linklist.append((_entity.entity_text, _p))
+                                            get_contacts = True
                                     break
                     if not get_contacts:
                         # 如果文中只有一个“phone”实体,则直接取为联系人电话
                         if len(phone_entitys) == 1:
-                            PackDict[k]["roleList"][0].linklist.append(("", phone_entitys[0].entity_text))
-                            get_contacts = True
+                            if phone_entitys[0].entity_text not in exist_phone:
+                                tenderee_agency_role[0].linklist.append(("", phone_entitys[0].entity_text))
+                                get_contacts = True
                     if not get_contacts:
                         # 通过大纲Outline类直接取电话
                         if len(new_split_list) > 1:
@@ -2754,8 +2808,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                 if re.search("联系人|联系方|联系方式|联系电话|电话|负责人|与.{2,4}联系", sentence_outline):
                                     sentence_phone = phone.findall(temp_sentence)
                                     if sentence_phone:
-                                        if sentence_phone[0] in [ent.entity_text for ent in phone_entitys]:
-                                            PackDict[k]["roleList"][0].linklist.append(("", sentence_phone[0]))
+                                        if sentence_phone[0] in [ent.entity_text for ent in phone_entitys] and sentence_phone[0] not in exist_phone:
+                                            tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
                                             get_contacts = True
                                             break
                     if not get_contacts:
@@ -2773,9 +2827,10 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                 match_text = match_text.split("。")[0]
                                 sentence_phone = phone.findall(match_text)
                                 if sentence_phone:
-                                    PackDict[k]["roleList"][0].linklist.append(("", sentence_phone[0]))
-                                    get_tenderee_contacts = True
-                                    break
+                                    if sentence_phone[0] not in exist_phone:
+                                        tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
+                                        get_tenderee_contacts = True
+                                        break
                             if get_tenderee_contacts:
                                 break
 
@@ -2990,48 +3045,84 @@ def getTimeAttributes(list_entity,list_sentence):
         in_attachment = entity.in_attachment
         extract_time = my_timeFormat(entity_text)
         # definite_time = "00:00:00"
-        # if extract_time:
-        #     t = re.compile("(?P<day>下午|上午|早上)?(?P<hour>\d{1,2})[::时点](?P<half_hour>半)?(?P<minute>\d{2})?[::分]?(?P<second>\d{2})?秒?")
-        #     t_in_word = re.search(t,entity_text)
-        #     t_out_of_word = re.search("^[^\d]{,2}"+t.pattern,sentence_text[entity.wordOffset_end:])
-        #     if t_in_word:
-        #         print('t_in_word',entity_text,t_in_word.groupdict())
-        #         day = t_in_word.groupdict().get('day',"")
-        #         hour = t_in_word.groupdict().get('hour',"")
-        #         half_hour = t_in_word.groupdict().get('half_hour',"")
-        #         minute = t_in_word.groupdict().get('minute',"")
-        #         second = t_in_word.groupdict().get('second',"")
-        #         if hour:
-        #             if day=='下午' and int(hour)<12:
-        #                 hour = str(int(hour)+12)
-        #             if int(hour)>24:
-        #                 continue
-        #         else:
-        #             hour = "00"
-        #         if not minute:
-        #             if half_hour:
-        #                 minute = "30"
-        #             else:
-        #                 minute = "00"
-        #         if int(minute)>60:
-        #             continue
-        #         if not second:
-        #             second = "00"
-        #         if int(second)>60:
-        #             continue
-        #         # 数字字符格式化
-        #         # hour = str(int(hour))
-        #         # minute = str(int(minute))
-        #         # second = str(int(second))
-        #         definite_time = "%s:%s:%s"%(hour.rjust(2,"0"),minute.rjust(2,"0"),second.rjust(2,"0"))
-        #         print(definite_time)
-        #
-        #     elif t_out_of_word:
-        #         print('t_out_of_word', entity_text+sentence_text[entity.wordOffset_end:], t_out_of_word.groupdict())
+        if extract_time:
+            definite_time_list = []
+            t = re.compile("(?P<day>下午|上午|早上)?(?P<hour>\d{1,2})[::时点](?P<half_hour>半)?(?P<minute>\d{2})?[::分]?(?P<second>\d{2})?秒?")
+            t_in_word = re.search(t,entity_text.replace(" ",""))
+            t_out_of_word = re.search("^[^\d]{,2}"+t.pattern,sentence_text[entity.wordOffset_end:])
+            if t_in_word:
+                # print('t_in_word',entity_text,t_in_word.groupdict())
+                day = t_in_word.groupdict().get('day',"")
+                hour = t_in_word.groupdict().get('hour',"")
+                half_hour = t_in_word.groupdict().get('half_hour',"")
+                minute = t_in_word.groupdict().get('minute',"")
+                second = t_in_word.groupdict().get('second',"")
+                if hour:
+                    if day=='下午' and int(hour)<12:
+                        hour = str(int(hour)+12)
+                    if int(hour)>24:
+                        continue
+                else:
+                    hour = "00"
+                if not minute:
+                    if half_hour:
+                        minute = "30"
+                    else:
+                        minute = "00"
+                if int(minute)>60:
+                    continue
+                if not second:
+                    second = "00"
+                if int(second)>60:
+                    continue
+                definite_time = "%s:%s:%s"%(hour.rjust(2,"0"),minute.rjust(2,"0"),second.rjust(2,"0"))
+                # print(definite_time)
+                definite_time_list.append(definite_time)
+
+            if t_out_of_word:
+                # print('t_out_of_word', entity_text+sentence_text[entity.wordOffset_end:], t_out_of_word.groupdict())
+                day = t_out_of_word.groupdict().get('day', "")
+                hour = t_out_of_word.groupdict().get('hour', "")
+                half_hour = t_out_of_word.groupdict().get('half_hour', "")
+                minute = t_out_of_word.groupdict().get('minute', "")
+                second = t_out_of_word.groupdict().get('second', "")
+                if hour:
+                    if day == '下午' and int(hour) < 12:
+                        hour = str(int(hour) + 12)
+                    if int(hour) > 24:
+                        continue
+                else:
+                    hour = "00"
+                if not minute:
+                    if half_hour:
+                        minute = "30"
+                    else:
+                        minute = "00"
+                if int(minute) > 60:
+                    continue
+                if not second:
+                    second = "00"
+                if int(second) > 60:
+                    continue
+                definite_time = "%s:%s:%s" % (hour.rjust(2, "0"), minute.rjust(2, "0"), second.rjust(2, "0"))
+                # print(definite_time)
+                definite_time_list.append(definite_time)
 
 
+            min_len = min(len(extract_time),len(definite_time_list))
+            for i in range(min_len):
+                if definite_time_list[i] != "00:00:00":
+                    extract_time[i] = extract_time[i] + " " + definite_time_list[i]
 
         if extract_time:
+            # 时间变更prob优化
+            if re.search("原",entity_left2):
+                last_index = 0
+                for item in re.finditer("原",entity_left2):
+                    last_index = item.start() + 1
+                label_prob = label_prob - 0.2 * last_index / len(entity_left2)
+                # print('prob优化',label_prob,extract_time)
+
             # 优化多个并列的时间,如:开标时间和截标时间,截标时间和报名结束时间
             if entity.label in [2,3,9]:
                 if entity.label==2 and re.search("截标|投标.{,2}截止|递交(?:文件)?.{,2}截止|报价.{,2}截止|响应.{,2}截止",entity_left3):
@@ -3042,8 +3133,12 @@ def getTimeAttributes(list_entity,list_sentence):
                     dict_time['time_registrationEnd'].append((extract_time[0], 0.5, in_attachment))
                 if entity.label==9 and re.search("截标|投标.{,2}截止|递交(?:文件)?.{,2}截止|报价.{,2}截止|响应.{,2}截止",entity_left3):
                     dict_time['time_bidclose'].append((extract_time[0], 0.5, in_attachment))
-
-
+            # 补充公告末尾处的发布时间
+            if entity.label==0:
+                if entity.is_tail:
+                    entity.label = 1
+                    entity.values[1] = 0.5
+                    dict_time['time_release'].append((extract_time[0], 0.5, in_attachment))
             # 2022/12/12 新增挂牌时间正则
             if re.search("挂牌.{,4}(?:时间|日期)",entity_left2):
                 if re.search("挂牌.{,4}(?:时间|日期)",entity_left2).end()>len(entity_left2)/2:
@@ -3206,7 +3301,7 @@ def getTimeAttributes(list_entity,list_sentence):
             last_time_type = ""
         last_sentence_index = entity.sentence_index
 
-
+    # print(dict_time)
     result_dict = dict((key,"") for key in dict_time.keys())
     for time_type,value in dict_time.items():
         list_time = dict_time[time_type]