فهرست منبع

Merge remote-tracking branch 'origin/master'

luojiehua 10 ماه پیش
والد
کامیت
c2963160be

+ 4 - 1
BiddingKG/dl/interface/Preprocessing.py

@@ -710,6 +710,9 @@ def tableToText(soup, docid=None):
                 inner_table[i][j] = [origin_inner_table[i][j][0], int(predict_list[i][j])]
 
         if show:
+            print(inner_table)
+            print("="*80)
+
             print("table_head before repair")
             for r in inner_table:
                 print('row', r)
@@ -725,10 +728,10 @@ def tableToText(soup, docid=None):
                 inner_table[i][j] = [origin_inner_table[i][j][0], int(inner_table[i][j][1])]
 
         if show:
-            print("="*80)
             print("table_head after repair")
             for r in inner_table:
                 print('row', r)
+            print("="*80)
 
         # 按表头分割表格
         head_list = sliceTable(inner_table)

+ 1 - 1
BiddingKG/dl/interface/extract.py

@@ -388,7 +388,7 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
     # predictor.getPredictor("product").predict(list_sentences, list_entitys)
     log("get product done of doc_id%s"%(doc_id))
     cost_time["product"] = round(time.time()-start_time,2)
-    prem[0].update(getAttributes.getOtherAttributes(list_entitys[0],page_time,prem))
+    prem[0].update(getAttributes.getOtherAttributes(list_entitys[0],page_time,prem,channel_dic))
 
     '''更新单一来源招标公告中标角色为预中标'''
     getAttributes.fix_single_source(prem[0], channel_dic, original_docchannel)

+ 2 - 2
BiddingKG/dl/interface/getAttributes.py

@@ -3939,7 +3939,7 @@ def extract_serviceTime(service_time,page_time):
 
     return serviceTime_dict
 
-def getOtherAttributes(list_entity,page_time,prem):
+def getOtherAttributes(list_entity,page_time,prem,channel_dic):
     dict_other = {"moneysource":"",
                   "person_review":[],
                   "serviceTime":"",
@@ -4042,7 +4042,7 @@ def getOtherAttributes(list_entity,page_time,prem):
         service_days = get_days_between(serviceTime_dict['service_start'],serviceTime_dict['service_end'])
         serviceTime_dict['service_days'] = str(service_days) + "天"
     dict_other["serviceTime"] = serviceTime_dict
-    if not time_contractEnd and prem[0]['docchannel']['docchannel']=='合同公告': # 用serviceTime补充合同开始结束时间,公告类型为合同公告
+    if not time_contractEnd and channel_dic['docchannel']['docchannel']=='合同公告': # 用serviceTime补充合同开始结束时间,公告类型为合同公告
         if serviceTime_dict['service_start'] and serviceTime_dict['service_end']:
             prem[0]["time_contractStart"] = serviceTime_dict['service_start']
             prem[0]["time_contractEnd"] = serviceTime_dict['service_end']

+ 325 - 126
BiddingKG/dl/proposed_building/pb_extract.py

@@ -23,6 +23,7 @@ class PBPredictor:
 
     def get_col_from_prem(self, prem):
         tenderee, agency, product = None, None, None
+        begin_time, end_time = None, None
         for item in prem:
             prem = item.get('prem')
             for key in prem.keys():
@@ -63,68 +64,71 @@ class PBPredictor:
                     project_code = None
 
                 start_time = time.time()
-                stage = extract_legal_stage(project_name+doctitle, self.stage_pattern, self.stage_priority_dict, product, tenderee=tenderee, agency=agency)
+                stage = extract_legal_stage(project_name + doctitle, self.stage_pattern, self.stage_priority_dict,
+                                            product, tenderee=tenderee, agency=agency)
                 if show:
-                    print('extract_legal_stage time', time.time()-start_time)
+                    print('extract_legal_stage time', time.time() - start_time)
                     start_time = time.time()
-                industry1 = extract_industry(doctitle+content, self.industry_pattern)
+                industry1 = extract_industry(doctitle + content, self.industry_pattern)
                 if show:
-                    print('extract_industry time', time.time()-start_time)
+                    print('extract_industry time', time.time() - start_time)
                     start_time = time.time()
-                industry = extract_industry(doctitle+content_no_att, self.industry_pattern)
+                industry = extract_industry(doctitle + content_no_att, self.industry_pattern)
                 if show:
-                    print('extract_industry time', time.time()-start_time)
+                    print('extract_industry time', time.time() - start_time)
                     start_time = time.time()
                 # print('industry', industry, industry1)
                 if not industry and industry1:
                     industry = industry1
                 proportion1, proportion = extract_proportion(content)
                 if show:
-                    print('extract_proportion time', time.time()-start_time)
+                    print('extract_proportion time', time.time() - start_time)
                     start_time = time.time()
                 project_digest = extract_project_digest(content)
                 if show:
-                    print('extract_project_digest time', time.time()-start_time)
+                    print('extract_project_digest time', time.time() - start_time)
                     start_time = time.time()
                 project_address = extract_project_address(list_sentence, list_entity)
                 if show:
-                    print('extract_project_address time', time.time()-start_time)
+                    print('extract_project_address time', time.time() - start_time)
                     start_time = time.time()
-                location = get_bid_location(doctitle+"\t"+project_name)
+                location = get_bid_location(doctitle + "\t" + project_name)
                 if show:
-                    print('get_bid_location time', time.time()-start_time)
+                    print('get_bid_location time', time.time() - start_time)
                     start_time = time.time()
-                project_name_refind, show_name_refind = get_project_name_refind(project_name, doctitle, tenderee, agency)
+                project_name_refind, show_name_refind = get_project_name_refind(project_name, doctitle, tenderee,
+                                                                                agency)
                 if show:
-                    print('get_project_name_refind time', time.time()-start_time)
+                    print('get_project_name_refind time', time.time() - start_time)
                     start_time = time.time()
                 has_elevator = extract_has_elevator(content)
                 if show:
-                    print('extract_has_elevator time', time.time()-start_time)
+                    print('extract_has_elevator time', time.time() - start_time)
                     start_time = time.time()
-                project_property = extract_project_property(doctitle+"\t"+project_name, self.property_pattern, self.property_priority_dict)
+                project_property = extract_project_property(doctitle + "\t" + project_name, self.property_pattern,
+                                                            self.property_priority_dict)
                 if show:
-                    print('extract_project_property time', time.time()-start_time)
+                    print('extract_project_property time', time.time() - start_time)
                     start_time = time.time()
                 total_invest, construct_install_fee, engineer_cost = extract_several_money(list_sentence, dochtmlcon)
                 if show:
-                    print('extract_several_money time', time.time()-start_time)
+                    print('extract_several_money time', time.time() - start_time)
                     start_time = time.time()
                 max_floor = extract_max_floor(content, dochtmlcon)
                 if show:
-                    print('extract_max_floor time', time.time()-start_time)
+                    print('extract_max_floor time', time.time() - start_time)
                     start_time = time.time()
                 structure = extract_structure(content, dochtmlcon, self.structure_keyword_list)
                 if show:
-                    print('extract_structure time', time.time()-start_time)
+                    print('extract_structure time', time.time() - start_time)
                     start_time = time.time()
                 has_steel = extract_has_steel_structure(structure)
                 if show:
-                    print('extract_has_steel_structure time', time.time()-start_time)
+                    print('extract_has_steel_structure time', time.time() - start_time)
                     start_time = time.time()
-                wall_type, wall_type2 = extract_wall_type(doctitle+"\t"+project_name, content)
+                wall_type, wall_type2 = extract_wall_type(doctitle + "\t" + project_name, content)
                 if show:
-                    print('extract_wall_type time', time.time()-start_time)
+                    print('extract_wall_type time', time.time() - start_time)
                     start_time = time.time()
 
                 if stage is not None:
@@ -191,7 +195,7 @@ def extract_legal_stage(content, _pattern, priority_dict, product='', tenderee='
 
     list_stage = []
     for stage_search in re.finditer(_pattern, _content):
-        for k,v in stage_search.groupdict().items():
+        for k, v in stage_search.groupdict().items():
             if v is not None:
                 list_stage.append([k, priority_dict.get(k)])
     if len(list_stage) > 0:
@@ -211,10 +215,10 @@ def extract_legal_stage(content, _pattern, priority_dict, product='', tenderee='
         if stage == '立项阶段':
             sub_content = re.sub('立项目', '', _content)
             for stage_search in re.finditer(_pattern, sub_content):
-                for k,v in stage_search.groupdict().items():
+                for k, v in stage_search.groupdict().items():
                     if v is not None:
                         list_stage.append([k, priority_dict.get(k)])
-            if len(list_stage)>0:
+            if len(list_stage) > 0:
                 list_stage.sort(key=lambda x: x[1])
                 stage = list_stage[0][0]
 
@@ -222,7 +226,7 @@ def extract_legal_stage(content, _pattern, priority_dict, product='', tenderee='
     return None
 
 
-def get_project_name_refind(project_name, doctitle, tenderee='', agency= '', min_len=3):
+def get_project_name_refind(project_name, doctitle, tenderee='', agency='', min_len=3):
     # 跳过部分
     re_str11 = '网上超市|服务市场采购|印刷服务|复印纸|车辆维修和保养|商品房预售|办公家具定点|直接订购|定点议价' \
                '|政府采购意向|信息技术服务定点议价|信息技术服务定点采购|法人章刻制中介机构|专用设备|办公设备采购' \
@@ -352,18 +356,21 @@ def get_project_name_refind(project_name, doctitle, tenderee='', agency= '', min
             project_word_in_org = []
             for m in match:
                 # 混淆词,设施工程中的施工
-                if m.span()[0] > 0 and name_refind[m.span()[0]-1] in ['设']:
+                if m.span()[0] > 0 and name_refind[m.span()[0] - 1] in ['设']:
                     continue
 
                 # 判断是不是公司名里的工程
                 if re.search(re_str26, name_refind[m.span()[1]:]):
-                    project_word_in_org.append(name_refind[max(0, m.span()[0]-1):min(m.span()[1]+1, len(name_refind))])
+                    project_word_in_org.append(
+                        name_refind[max(0, m.span()[0] - 1):min(m.span()[1] + 1, len(name_refind))])
                     continue
-                if re.search(re_str17, name_refind[m.span()[1]:m.span()[1]+3]):
-                    project_word_in_org.append(name_refind[max(0, m.span()[0]-1):min(m.span()[1]+1, len(name_refind))])
+                if re.search(re_str17, name_refind[m.span()[1]:m.span()[1] + 3]):
+                    project_word_in_org.append(
+                        name_refind[max(0, m.span()[0] - 1):min(m.span()[1] + 1, len(name_refind))])
                     continue
                 if re.search(re_str18, name_refind[m.span()[1]:]):
-                    project_word_in_org.append(name_refind[max(0, m.span()[0]-1):min(m.span()[1]+1, len(name_refind))])
+                    project_word_in_org.append(
+                        name_refind[max(0, m.span()[0] - 1):min(m.span()[1] + 1, len(name_refind))])
                     continue
 
                 match_flag = True
@@ -377,18 +384,21 @@ def get_project_name_refind(project_name, doctitle, tenderee='', agency= '', min
                 last_index = 0
                 for m in match:
                     # 混淆词,设施工程中的施工
-                    if m.span()[0] > 0 and name_refind[m.span()[0]-1] in ['设']:
+                    if m.span()[0] > 0 and name_refind[m.span()[0] - 1] in ['设']:
                         continue
 
                     # 判断是不是公司名里的工程
                     if re.search(re_str26, name_refind[m.span()[1]:]):
-                        project_word_in_org.append(name_refind[max(0, m.span()[0]-1):min(m.span()[1]+1, len(name_refind))])
+                        project_word_in_org.append(
+                            name_refind[max(0, m.span()[0] - 1):min(m.span()[1] + 1, len(name_refind))])
                         continue
-                    if re.search(re_str17, name_refind[m.span()[1]:m.span()[1]+3]):
-                        project_word_in_org.append(name_refind[max(0, m.span()[0]-1):min(m.span()[1]+1, len(name_refind))])
+                    if re.search(re_str17, name_refind[m.span()[1]:m.span()[1] + 3]):
+                        project_word_in_org.append(
+                            name_refind[max(0, m.span()[0] - 1):min(m.span()[1] + 1, len(name_refind))])
                         continue
                     if re.search(re_str18, name_refind[m.span()[1]:]):
-                        project_word_in_org.append(name_refind[max(0, m.span()[0]-1):min(m.span()[1]+1, len(name_refind))])
+                        project_word_in_org.append(
+                            name_refind[max(0, m.span()[0] - 1):min(m.span()[1] + 1, len(name_refind))])
                         continue
                     match_flag = True
                     prob_name_list.append(name_refind[last_index:m.span()[1]])
@@ -429,7 +439,7 @@ def get_project_name_refind(project_name, doctitle, tenderee='', agency= '', min
                 match1 = re.finditer(re_str6, name)
                 for m1 in match1:
                     # 混淆词,设施工程中的施工
-                    if m1.span()[0] > 0 and name[m1.span()[0]-1] in ['设']:
+                    if m1.span()[0] > 0 and name[m1.span()[0] - 1] in ['设']:
                         continue
                     s_index, e_index = m1.span()
                     word = name[s_index:e_index]
@@ -473,12 +483,12 @@ def get_project_name_refind(project_name, doctitle, tenderee='', agency= '', min
     for name_refind in name_refind_candidate_list:
         # 直接判断删除数字
         match = re.match(re_str16, name_refind)
-        if match and not re.match('[0-9]', name_refind[match.span()[1]:match.span()[1]+1]):
+        if match and not re.match('[0-9]', name_refind[match.span()[1]:match.span()[1] + 1]):
             name_refind = name_refind[match.span()[1]:]
 
         # 删除开头奇怪数字
         match = re.match(re_str15, name_refind)
-        if match and not re.match('[a-zA-Z地块号]', name_refind[match.span()[1]:match.span()[1]+1]):
+        if match and not re.match('[a-zA-Z地块号]', name_refind[match.span()[1]:match.span()[1] + 1]):
             name_refind = name_refind[match.span()[1]:]
 
         # 删除期数
@@ -525,7 +535,7 @@ def get_project_name_refind(project_name, doctitle, tenderee='', agency= '', min
         # 删除区
         match2 = re.match(re_str22, name_refind)
         if match2:
-            name_refind = name_refind[match2.span()[1]-1:]
+            name_refind = name_refind[match2.span()[1] - 1:]
 
         # 删除'小区表达'
         if len(name_refind) >= min_len + 2:
@@ -537,7 +547,8 @@ def get_project_name_refind(project_name, doctitle, tenderee='', agency= '', min
         if agency in [None, 'None', '-', '']:
             agency = ''
         try:
-            if len(name_refind) >= 4 and (re.search(re.escape(name_refind[-4:]), tenderee) or re.search(re.escape(name_refind[-4:]), agency)):
+            if len(name_refind) >= 4 and (
+                    re.search(re.escape(name_refind[-4:]), tenderee) or re.search(re.escape(name_refind[-4:]), agency)):
                 name_refind = ''
                 show_name_refind = ''
         except:
@@ -558,14 +569,14 @@ def extract_industry(content, _pattern):
     list_stage = []
     stage_dict = {}
     for stage_search in re.finditer(_pattern, content):
-        for k,v in stage_search.groupdict().items():
+        for k, v in stage_search.groupdict().items():
             if v is not None:
                 list_stage.append(k)
                 if k in stage_dict.keys():
                     stage_dict[k] += 1
                 else:
                     stage_dict[k] = 1
-    if len(list_stage)>0:
+    if len(list_stage) > 0:
         stage_cnt_list = [[x, stage_dict.get(x)] for x in stage_dict.keys()]
         stage_cnt_list.sort(key=lambda x: x[1], reverse=True)
         # print('extract_industry ' + str(stage_cnt_list))
@@ -598,12 +609,12 @@ def extract_tenderee(list_entity):
 
 def extract_project_digest(content):
     _pattern = "(?P<projectDigest>(项目|工程|标的|需求|建设|招标|采购|内容)(概况|规模|简介|信息|范围|内容|说明|摘要).{10,300})"
-    _pattern_search = re.search(_pattern,content)
+    _pattern_search = re.search(_pattern, content)
     _projectDigest = ""
     _find = ""
     if _pattern_search is not None:
-        _find = _pattern_search.groupdict().get("projectDigest","")
-    if len(_find)>0:
+        _find = _pattern_search.groupdict().get("projectDigest", "")
+    if len(_find) > 0:
         _projectDigest = "。".join(_find.split("。")[0:3])
 
     # 截掉中标信息
@@ -620,7 +631,6 @@ def extract_project_address(list_sentence, list_entity):
     reg3 = "(项目|建设|工程)(地址|地点)[::]?(位于|起于)"
     reg4 = "(项目|建设|工程)(地址|地点)[为::]+"
 
-
     address_list = []
     candidate_list = []
     for sentence in list_sentence:
@@ -637,10 +647,12 @@ def extract_project_address(list_sentence, list_entity):
                         continue
 
                     text = p_entity.entity_text
-                    if text == content[end_index:end_index+len(text)] or text in content[end_index:end_index+len(text)+10]:
+                    if text == content[end_index:end_index + len(text)] or text in content[end_index:end_index + len(
+                            text) + 10]:
                         address_list.append(text)
                     else:
-                        candidate_list.append(content[max(0, end_index-10):end_index] + '@@@' + content[end_index:end_index+20] + '@@@' + text)
+                        candidate_list.append(content[max(0, end_index - 10):end_index] + '@@@' + content[
+                                                                                                  end_index:end_index + 20] + '@@@' + text)
 
             if address_list:
                 break
@@ -665,17 +677,19 @@ def extract_begin_end_time(list_sentence, list_entity):
         if p_entity.entity_type == "time":
             for _sentence in list_sentence:
                 if _sentence.sentence_index == p_entity.sentence_index:
-                    _span = spanWindow(tokens=_sentence.tokens,begin_index=p_entity.begin_index,end_index=p_entity.end_index,size=20,center_include=True,word_flag=True,text=p_entity.entity_text)
-                    if re.search("开工(时间|日期)",_span[0]) is not None:
+                    _span = spanWindow(tokens=_sentence.tokens, begin_index=p_entity.begin_index,
+                                       end_index=p_entity.end_index, size=20, center_include=True, word_flag=True,
+                                       text=p_entity.entity_text)
+                    if re.search("开工(时间|日期)", _span[0]) is not None:
                         _time_temp = timeFormat(p_entity.entity_text)
-                        if len(_time_temp)>0:
+                        if len(_time_temp) > 0:
                             _begin_time = _time_temp
-                    if re.search("(竣工|完工)(时间|日期)",_span[0]) is not None:
+                    if re.search("(竣工|完工)(时间|日期)", _span[0]) is not None:
                         _time_temp = timeFormat(p_entity.entity_text)
-                        if len(_time_temp)>0:
+                        if len(_time_temp) > 0:
                             _end_time = _time_temp
 
-    return _begin_time,_end_time
+    return _begin_time, _end_time
 
 
 def get_bid_location(content):
@@ -709,7 +723,8 @@ def get_bid_location(content):
             find_flag = False
             for entitys in list_entitys:
                 for entity in entitys:
-                    if entity.entity_type in ["tenderee", 'agency', 'win_tenderer', 'second_tenderer', 'third_tenderer', 'company', 'org']:
+                    if entity.entity_type in ["tenderee", 'agency', 'win_tenderer', 'second_tenderer', 'third_tenderer',
+                                              'company', 'org']:
                         if location in entity.entity_text:
                             find_flag = True
                             break
@@ -727,8 +742,8 @@ def extract_proportion(content, has_preffix=True):
     # log(content)
     suffix = "[大概约为是::【\[\s]*[\d,]+(\.\d+)?[十百千万亿]*([\]】平方kK千万公㎡mM米里顷亩]+2?))"
     reg_dict = {
-        0: "(?P<proportion>(总((建筑|建设)面积|长|长度))" + suffix,
-        1: "(?P<proportion>((建筑|建设)面积|全长)" + suffix,
+        0: "(?P<proportion>(总((建筑|建设)(面积|规模)|长|长度))" + suffix,
+        1: "(?P<proportion>((建筑|建设)(面积|规模)|全长)" + suffix,
         2: "(?P<proportion>((建筑|建设|区域)?面积|全长|项目规模)" + suffix
     }
 
@@ -743,7 +758,7 @@ def extract_proportion(content, has_preffix=True):
         # logging.info('content ' + str(content))
         match = re.search(_pattern, str(content))
         if match:
-            _proportion = match.groupdict().get("proportion","")
+            _proportion = match.groupdict().get("proportion", "")
 
     if not _proportion:
         return "", ""
@@ -820,7 +835,7 @@ def extract_has_elevator(content):
         has_flag = 1
         if judge_yeji(match.span()[0], content):
             has_flag = 0
-        elif re.search('公司', content[end_index:end_index+8]):
+        elif re.search('公司', content[end_index:end_index + 8]):
             has_flag = 0
     return has_flag
 
@@ -828,12 +843,12 @@ def extract_has_elevator(content):
 def extract_project_property(content, property_pattern, property_priority_dict):
     property_list = []
     for m in re.finditer(property_pattern, content):
-        for k,v in m.groupdict().items():
+        for k, v in m.groupdict().items():
             if v is not None:
                 property_list.append([k, property_priority_dict.get(k)])
 
     _property = '新建'
-    if len(property_list)>0:
+    if len(property_list) > 0:
         property_list.sort(key=lambda x: x[1])
         _property = property_list[0][0]
     return _property
@@ -850,7 +865,7 @@ def extract_several_money(list_sentence, html='', is_obj=True, show=0):
     tables_and_divs = soup.find_all(['table', 'div'])
     for i, sentence in enumerate(list_sentence):
         if show and i % 100 == 0:
-            print('extract_several_money Loop', i, len(list_sentence), time.time()-start_time1)
+            print('extract_several_money Loop', i, len(list_sentence), time.time() - start_time1)
             start_time1 = time.time()
         last_text = ''
         next_text = ''
@@ -858,40 +873,40 @@ def extract_several_money(list_sentence, html='', is_obj=True, show=0):
             text = sentence.sentence_text
             all_before_sentence += text
             if i > 0:
-                last_text = list_sentence[i-1].sentence_text[-30:]
+                last_text = list_sentence[i - 1].sentence_text[-30:]
             if i < len(list_sentence) - 1:
-                next_text = list_sentence[i+1].sentence_text[:30]
+                next_text = list_sentence[i + 1].sentence_text[:30]
         else:
             text = sentence
             all_before_sentence += text
             if i > 0:
-                last_text = list_sentence[i-1][-30:]
+                last_text = list_sentence[i - 1][-30:]
             if i < len(list_sentence) - 1:
-                next_text = list_sentence[i+1][:30]
+                next_text = list_sentence[i + 1][:30]
 
         start_time2 = time.time()
-        if judge_yeji(len(all_before_sentence), all_before_sentence, 300+len(text)):
+        if judge_yeji(len(all_before_sentence), all_before_sentence, 300 + len(text)):
             # print('sentence yeji before ' + text)
             continue
         if show:
-            print('extract_several_money time0.1', time.time()-start_time2)
+            print('extract_several_money time0.1', time.time() - start_time2)
             start_time2 = time.time()
         # if '项目概算总投资为' in text:
         _list, _ = get_several_money(text, 0, False, tables_and_divs=tables_and_divs)
         if show:
-            print('extract_several_money time0.2', time.time()-start_time2)
+            print('extract_several_money time0.2', time.time() - start_time2)
             start_time2 = time.time()
         # logging.info('get_several_money _list ' + str(_list))
 
         temp_list = []
         for l in _list:
             if l[-1] == '总投资':
-                if re.search('业绩', last_text+text+next_text):
+                if re.search('业绩', last_text + text + next_text):
                     continue
             temp_list.append(l)
         _list = temp_list
         if show:
-            print('extract_several_money time0.3', time.time()-start_time2)
+            print('extract_several_money time0.3', time.time() - start_time2)
             start_time2 = time.time()
 
         money_list += _list
@@ -899,7 +914,7 @@ def extract_several_money(list_sentence, html='', is_obj=True, show=0):
         #     break
 
     if show:
-        print('extract_several_money time1', time.time()-start_time)
+        print('extract_several_money time1', time.time() - start_time)
         start_time = time.time()
 
     money_type_dict = {}
@@ -925,7 +940,7 @@ def extract_several_money(list_sentence, html='', is_obj=True, show=0):
 
     # logging.info('money_type_dict ' + str(money_type_dict))
     if show:
-        print('extract_several_money time2', time.time()-start_time)
+        print('extract_several_money time2', time.time() - start_time)
         start_time = time.time()
 
     result_list = []
@@ -944,7 +959,7 @@ def extract_several_money(list_sentence, html='', is_obj=True, show=0):
             result_list.append(None)
 
     if show:
-        print('extract_several_money time3', time.time()-start_time)
+        print('extract_several_money time3', time.time() - start_time)
         start_time = time.time()
 
     for i in range(len(result_list)):
@@ -961,9 +976,9 @@ def extract_max_floor(content, html=None):
         _floor_list = []
         for m in _match:
             if 'reg6' in _reg:
-                _floor1 = content[max(0, m.span('reg6')[0]-1):m.span('reg6')[1]+1]
+                _floor1 = content[max(0, m.span('reg6')[0] - 1):m.span('reg6')[1] + 1]
             elif 'reg4' in _reg:
-                _floor1 = content[max(0, m.span('reg4')[0]-1):m.span('reg4')[1]+1]
+                _floor1 = content[max(0, m.span('reg4')[0] - 1):m.span('reg4')[1] + 1]
             else:
                 _floor1 = content[m.span()[0]:m.span()[1]]
             if judge_yeji(m.span()[0], _content, 300, _tables_and_divs, _floor1):
@@ -1003,7 +1018,7 @@ def extract_max_floor(content, html=None):
                     _floor = chinese_to_arabic(_floor)
                 _floor = int(_floor)
             if _reg2:
-                _floor_list2 = match_floor(_reg2, _content[m.span()[1]:m.span()[1]+35])
+                _floor_list2 = match_floor(_reg2, _content[m.span()[1]:m.span()[1] + 35])
                 # print('@2', _floor_list2)
                 if _floor_list2:
                     _floor2 = int(_floor_list2[0])
@@ -1087,11 +1102,11 @@ def extract_structure(content, html=None, structure_keyword_list=None):
     match = re.finditer(reg, content)
     for m in match:
         structure = m.group()
-        structure1 = content[max(0, m.span()[0]-1):m.span()[1]+1]
+        structure1 = content[max(0, m.span()[0] - 1):m.span()[1] + 1]
         if judge_yeji(m.span()[0], content, 300, tables_and_divs, structure1):
             continue
         if structure in ['钢结构']:
-            if re.search('公司', content[m.span()[1]:m.span()[1]+8]):
+            if re.search('公司', content[m.span()[1]:m.span()[1] + 8]):
                 continue
         structure_list.append(structure)
 
@@ -1234,7 +1249,7 @@ def cut_win_bid_part(_str):
         index_start = m.span()[0]
         cut_str = re.split("[,,。;;]", _str[index_start:])[0]
         if len(cut_str) < 25:
-            cut_str = _str[index_start:index_start+25]
+            cut_str = _str[index_start:index_start + 25]
         # cut_str = _str[index_start:index_start+15]
         # print("cut_str", cut_str)
 
@@ -1307,20 +1322,21 @@ def judge_yeji(end_index, content, judge_len=300, tables_and_divs=None, entity=N
                                     # print('len(rows[index2])', len(row2.find_all('td')))
                                     # if len(row2.find_all('td')) <= max_col_span / 2:
                                     #     print(re.search('业绩', str(row2)), str(row2))
-                                    if len(row2.find_all('td')) <= max_col_span / 2 and re.search(reg_yeji, str(row2.get_text())):
+                                    if len(row2.find_all('td')) <= max_col_span / 2 and re.search(reg_yeji,
+                                                                                                  str(row2.get_text())):
                                         # logging.info('is_yeji_table 2')
                                         is_yeji = 1
 
                         break
 
                 # 前面都找不到,那么找表格上方的两行
-                div_list = [str(x.get_text()) for x in _tables_and_divs[max(0, index3-2):index3]]
+                div_list = [str(x.get_text()) for x in _tables_and_divs[max(0, index3 - 2):index3]]
                 if not is_yeji and re.search(reg_yeji, ' '.join(div_list)):
                     # logging.info('is_yeji_table 3')
                     is_yeji = 1
                 break
         if show:
-            print('is_yeji_table time', time.time()-start_time)
+            print('is_yeji_table time', time.time() - start_time)
         return is_yeji
 
     # 先判断表格业绩
@@ -1349,15 +1365,17 @@ def get_several_money(sentence_text, found_yeji, in_attachment=False,
         '''
         @summary:拿到中文对应的数字
         '''
-        DigitsDic = {"零":0, "壹":1, "贰":2, "叁":3, "肆":4, "伍":5, "陆":6, "柒":7, "捌":8, "玖":9,
-                     "〇":0, "一":1, "二":2, "三":3, "四":4, "五":5, "六":6, "七":7, "八":8, "九":9}
+        DigitsDic = {"零": 0, "壹": 1, "贰": 2, "叁": 3, "肆": 4, "伍": 5, "陆": 6, "柒": 7, "捌": 8, "玖": 9,
+                     "〇": 0, "一": 1, "二": 2, "三": 3, "四": 4, "五": 5, "六": 6, "七": 7, "八": 8, "九": 9}
         return DigitsDic.get(_unit)
 
     def getMultipleFactor(_unit):
         '''
         @summary:拿到单位对应的值
         '''
-        MultipleFactor = {"兆":Decimal(1000000000000),"亿":Decimal(100000000),"万":Decimal(10000),"仟":Decimal(1000),"千":Decimal(1000),"佰":Decimal(100),"百":Decimal(100),"拾":Decimal(10),"十":Decimal(10),"元":Decimal(1),"圆":Decimal(1),"角":round(Decimal(0.1),1),"分":round(Decimal(0.01),2)}
+        MultipleFactor = {"兆": Decimal(1000000000000), "亿": Decimal(100000000), "万": Decimal(10000), "仟": Decimal(1000),
+                          "千": Decimal(1000), "佰": Decimal(100), "百": Decimal(100), "拾": Decimal(10), "十": Decimal(10),
+                          "元": Decimal(1), "圆": Decimal(1), "角": round(Decimal(0.1), 1), "分": round(Decimal(0.01), 2)}
         return MultipleFactor.get(_unit)
 
     def getUnifyMoney(money):
@@ -1370,45 +1388,45 @@ def get_several_money(sentence_text, found_yeji, in_attachment=False,
 
         MAX_MONEY = 1000000000000
         MAX_NUM = 12
-        #去掉逗号
-        money = re.sub("[,,]","",money)
-        money = re.sub("[^0-9.零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]","",money)
+        # 去掉逗号
+        money = re.sub("[,,]", "", money)
+        money = re.sub("[^0-9.零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]", "", money)
         result = Decimal(0)
         chnDigits = ["零", "壹", "贰", "叁", "肆", "伍", "陆", "柒", "捌", "玖"]
         # chnFactorUnits = ["兆", "亿", "万", "仟", "佰", "拾","圆","元","角","分"]
-        chnFactorUnits = ["圆", "元","兆", "亿", "万", "仟", "佰", "拾", "角", "分", '十', '百', '千']
+        chnFactorUnits = ["圆", "元", "兆", "亿", "万", "仟", "佰", "拾", "角", "分", '十', '百', '千']
 
         LowMoneypattern = re.compile("^[\d,]+(\.\d+)?$")
-        BigMoneypattern = re.compile("^零?(?P<BigMoney>[%s])$"%("".join(chnDigits)))
+        BigMoneypattern = re.compile("^零?(?P<BigMoney>[%s])$" % ("".join(chnDigits)))
         try:
-            if re.search(LowMoneypattern,money) is not None:
+            if re.search(LowMoneypattern, money) is not None:
                 return Decimal(money)
-            elif re.search(BigMoneypattern,money) is not None:
-                return getDigitsDic(re.search(BigMoneypattern,money).group("BigMoney"))
+            elif re.search(BigMoneypattern, money) is not None:
+                return getDigitsDic(re.search(BigMoneypattern, money).group("BigMoney"))
             for factorUnit in chnFactorUnits:
-                if re.search(re.compile(".*%s.*"%(factorUnit)),money) is not None:
-                    subMoneys = re.split(re.compile("%s(?!.*%s.*)"%(factorUnit,factorUnit)),money)
-                    if re.search(re.compile("^(\d+)(\.\d+)?$"),subMoneys[0]) is not None:
-                        if MAX_MONEY/getMultipleFactor(factorUnit)<Decimal(subMoneys[0]):
+                if re.search(re.compile(".*%s.*" % (factorUnit)), money) is not None:
+                    subMoneys = re.split(re.compile("%s(?!.*%s.*)" % (factorUnit, factorUnit)), money)
+                    if re.search(re.compile("^(\d+)(\.\d+)?$"), subMoneys[0]) is not None:
+                        if MAX_MONEY / getMultipleFactor(factorUnit) < Decimal(subMoneys[0]):
                             return Decimal(0)
-                        result += Decimal(subMoneys[0])*(getMultipleFactor(factorUnit))
-                    elif len(subMoneys[0])==1:
-                        if re.search(re.compile("^[%s]$"%("".join(chnDigits))),subMoneys[0]) is not None:
-                            result += Decimal(getDigitsDic(subMoneys[0]))*(getMultipleFactor(factorUnit))
+                        result += Decimal(subMoneys[0]) * (getMultipleFactor(factorUnit))
+                    elif len(subMoneys[0]) == 1:
+                        if re.search(re.compile("^[%s]$" % ("".join(chnDigits))), subMoneys[0]) is not None:
+                            result += Decimal(getDigitsDic(subMoneys[0])) * (getMultipleFactor(factorUnit))
                     # subMoneys[0]中无金额单位,不可再拆分
-                    elif subMoneys[0]=="":
+                    elif subMoneys[0] == "":
                         result += 0
-                    elif re.search(re.compile("[%s]"%("".join(chnFactorUnits))),subMoneys[0]) is None:
+                    elif re.search(re.compile("[%s]" % ("".join(chnFactorUnits))), subMoneys[0]) is None:
                         # print(subMoneys)
                         # subMoneys[0] = subMoneys[0][0]
                         result += Decimal(getUnifyMoney(subMoneys[0])) * (getMultipleFactor(factorUnit))
                     else:
-                        result += Decimal(getUnifyMoney(subMoneys[0]))*(getMultipleFactor(factorUnit))
-                    if len(subMoneys)>1:
-                        if re.search(re.compile("^(\d+(,)?)+(\.\d+)?[百千万亿]?\s?(元)?$"),subMoneys[1]) is not None:
+                        result += Decimal(getUnifyMoney(subMoneys[0])) * (getMultipleFactor(factorUnit))
+                    if len(subMoneys) > 1:
+                        if re.search(re.compile("^(\d+(,)?)+(\.\d+)?[百千万亿]?\s?(元)?$"), subMoneys[1]) is not None:
                             result += Decimal(subMoneys[1])
-                        elif len(subMoneys[1])==1:
-                            if re.search(re.compile("^[%s]$"%("".join(chnDigits))),subMoneys[1]) is not None:
+                        elif len(subMoneys[1]) == 1:
+                            if re.search(re.compile("^[%s]$" % ("".join(chnDigits))), subMoneys[1]) is not None:
                                 result += Decimal(getDigitsDic(subMoneys[1]))
                         else:
                             result += Decimal(getUnifyMoney(subMoneys[1]))
@@ -1456,7 +1474,7 @@ def get_several_money(sentence_text, found_yeji, in_attachment=False,
         sentence_text = re.sub(re.escape(match.group()), match.group()[0] + match.group()[2:], sentence_text)
 
     if show:
-        print('get_several_money time2', time.time()-start_time)
+        print('get_several_money time2', time.time() - start_time)
         start_time = time.time()
 
     if re.search('业绩(公示|汇总|及|报告|\w{,2}(内容|情况|信息)|[^\w])', sentence_text):
@@ -1464,14 +1482,15 @@ def get_several_money(sentence_text, found_yeji, in_attachment=False,
     if found_yeji >= 2:  # 过滤掉业绩后面的所有金额
         all_match = []
     else:
-        ser = re.search('((收费标准|计算[方公]?式):|\w{3,5}\s*=)+\s*[中标投标成交金额招标人预算价格万元\s()()\[\]【】\d\.%%‰\+\-*×/]{20,}[,。]?', sentence_text)  # 过滤掉收费标准里面的金额
+        ser = re.search('((收费标准|计算[方公]?式):|\w{3,5}\s*=)+\s*[中标投标成交金额招标人预算价格万元\s()()\[\]【】\d\.%%‰\+\-*×/]{20,}[,。]?',
+                        sentence_text)  # 过滤掉收费标准里面的金额
         if ser:
             all_match = re.finditer(pattern_money, sentence_text.replace(ser.group(0), ' ' * len(ser.group(0))))
         else:
             all_match = re.finditer(pattern_money, sentence_text)
 
     if show:
-        print('get_several_money time3', time.time()-start_time)
+        print('get_several_money time3', time.time() - start_time)
         start_time = time.time()
 
     for _match in all_match:
@@ -1486,7 +1505,8 @@ def get_several_money(sentence_text, found_yeji, in_attachment=False,
             filter_unit = False
             notSure = False
             science = ""
-            if re.search('业绩(公示|汇总|及|报告|\w{,2}(内容|情况|信息)|[^\w])', sentence_text[:_match.span()[0]]):  # 2021/7/21过滤掉业绩后面金额
+            if re.search('业绩(公示|汇总|及|报告|\w{,2}(内容|情况|信息)|[^\w])',
+                         sentence_text[:_match.span()[0]]):  # 2021/7/21过滤掉业绩后面金额
                 # print('金额在业绩后面: ', _match.group(0))
                 found_yeji += 1
                 break
@@ -1529,12 +1549,14 @@ def get_several_money(sentence_text, found_yeji, in_attachment=False,
             if re.search('电话|手机|联系|方式|编号|编码|日期|数字|时间', text_beforeMoney):
                 # print('过滤掉手机号码作为金额')
                 continue
-            elif re.search('^1[3-9]\d{9}$', entity_text) and re.search(':\w{1,3}$', text_beforeMoney): # 过滤掉类似 '13863441880', '金额(万元):季勇13863441880'
+            elif re.search('^1[3-9]\d{9}$', entity_text) and re.search(':\w{1,3}$',
+                                                                       text_beforeMoney):  # 过滤掉类似 '13863441880', '金额(万元):季勇13863441880'
                 # print('过滤掉手机号码作为金额')
                 continue
 
             if unit == "":  # 2021/7/21 有明显金额特征的补充单位,避免被过滤
-                if (re.search('(¥|¥|RMB|CNY)[::]?$', text_beforeMoney) or re.search('[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}', entity_text)):
+                if (re.search('(¥|¥|RMB|CNY)[::]?$', text_beforeMoney) or re.search('[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}',
+                                                                                    entity_text)):
                     if entity_text.endswith('万元'):
                         unit = '万元'
                         entity_text = entity_text[:-2]
@@ -1550,8 +1572,10 @@ def get_several_money(sentence_text, found_yeji, in_attachment=False,
                 elif re.search('^[-—]+[\d,.]+万元', sentence_text[end_index:]):
                     # print('两个金额连接后面的有单位,用后面单位')
                     unit = '万元'
-                elif re.search('([单报标限总造]价款?|金额|租金|(中标|成交|合同|承租|投资))?[价额]|价格|预算(金额)?|(监理|设计|勘察)(服务)?费)[::为]*-?$', text_beforeMoney.strip()) and re.search('^0|1[3|4|5|6|7|8|9]\d{9}', entity_text) == None:
-                    if re.search('^[\d,,.]+$', entity_text) and float(re.sub('[,,]', '', entity_text))<500 and re.search('万元', sentence_text):
+                elif re.search('([单报标限总造]价款?|金额|租金|(中标|成交|合同|承租|投资))?[价额]|价格|预算(金额)?|(监理|设计|勘察)(服务)?费)[::为]*-?$',
+                               text_beforeMoney.strip()) and re.search('^0|1[3|4|5|6|7|8|9]\d{9}', entity_text) == None:
+                    if re.search('^[\d,,.]+$', entity_text) and float(
+                            re.sub('[,,]', '', entity_text)) < 500 and re.search('万元', sentence_text):
                         unit = '万元'
                         # print('金额较小且句子中有万元的,补充单位为万元')
                     elif re.search('^\d{1,3}\.\d{4,6}$', entity_text) and re.search('0000$', entity_text) == None:
@@ -1568,7 +1592,7 @@ def get_several_money(sentence_text, found_yeji, in_attachment=False,
             elif unit == '万元':
                 if end_index < len(sentence_text) and sentence_text[end_index] == '元' and re.search('\d$', entity_text):
                     unit = '元'
-                elif re.search('^[5-9]\d{6,}\.\d{2}$', entity_text): # 五百亿以上的万元改为元
+                elif re.search('^[5-9]\d{6,}\.\d{2}$', entity_text):  # 五百亿以上的万元改为元
                     unit = '元'
 
             if unit.find("万") >= 0 and entity_text.find("万") >= 0:  # 2021/7/19修改为金额文本有万,不计算单位
@@ -1625,13 +1649,13 @@ def get_several_money(sentence_text, found_yeji, in_attachment=False,
                 continue
             # print("金额:{0} ,单位:{1}, 前文:{2}, filter: {3}, filter_unit: {4}".format(entity_text, unit, text_beforeMoney,
             #                                                                      filter, filter_unit))
-            if re.search('[%%‰折]|费率|下浮率', text_beforeMoney) and float(entity_text)<1000: # 过滤掉可能是费率的金额
+            if re.search('[%%‰折]|费率|下浮率', text_beforeMoney) and float(entity_text) < 1000:  # 过滤掉可能是费率的金额
                 # print('过滤掉可能是费率的金额')
                 continue
             money_list.append((entity_text, start_index, end_index, unit, notes))
 
     if show:
-        print('get_several_money time4', time.time()-start_time)
+        print('get_several_money time4', time.time() - start_time)
         start_time = time.time()
 
     # 排除过小的金额
@@ -1727,8 +1751,8 @@ def get_stage_pattern():
     }
 
     list_stage_v = []
-    for k,v in stage_dict.items():
-        list_stage_v.append("(?P<%s>%s)"%(k,v))
+    for k, v in stage_dict.items():
+        list_stage_v.append("(?P<%s>%s)" % (k, v))
     stage_pattern = "|".join(list_stage_v)
     return stage_pattern, stage_priority_dict
 
@@ -1777,7 +1801,182 @@ def get_property_pattern():
     }
 
     list_property_v = []
-    for k,v in property_dict.items():
-        list_property_v.append("(?P<%s>%s)"%(k,v))
+    for k, v in property_dict.items():
+        list_property_v.append("(?P<%s>%s)" % (k, v))
     property_pattern = "|".join(list_property_v)
-    return property_pattern, property_priority_dict
+    return property_pattern, property_priority_dict
+
+
+class get_service_end:
+    def __init__(self):
+        self.pattern1 = re.compile("\d{4}[年\-\./]\d{1,2}[月\-\./]\d{1,2}日?")
+        self.pattern2 = re.compile("\d+(?:\.\d+)?[\((]?个?[^\d]?[^\d]?(?:日|天|周年|整年|学?年|月|周|日历[天日]|工作[天日])")
+        self.pattern3 = re.compile("\d{4}[年\-\./]\d{1,2}月?")
+        self.pattern4 = re.compile("(?:日|天|周年|年|月|周|日历[天日]|工作[天日]|星期)[^\d]{1,3}\d+(?:\.\d+)?")
+        self.DigitsDic = {"零": 0, "壹": 1, "贰": 2, "叁": 3, "肆": 4, "伍": 5, "陆": 6, "柒": 7, "捌": 8, "玖": 9,
+                          "〇": 0, "一": 1, "二": 2, "三": 3, "四": 4, "五": 5, "六": 6, "七": 7, "八": 8, "九": 9,
+                          "两": 2, '貮': 2}
+
+    def get_num(self, text):
+        CN_UNIT = {
+            '十': 10,
+            '拾': 10,
+            '百': 100,
+            '佰': 100,
+            '千': 1000,
+            '仟': 1000}
+
+        regex = re.compile(r'[〇一二三四五六七八九零壹贰叁肆伍陆柒捌玖貮两十拾百佰千仟]+')
+        text = regex.search(text)
+        if text:
+            text = text.group()
+        else:
+            return ""
+        result = 0
+        result_list = []
+        unit = 0
+        control = 0
+        for i, d in enumerate(text):
+            if d in '零百佰千仟' and i == 0:
+                return ""
+                break
+            if d in self.DigitsDic:
+                result += self.DigitsDic[d]
+            elif d in CN_UNIT:
+                if unit == 0:
+                    unit_1 = CN_UNIT[d]
+                    # 这里的处理主要是考虑到类似于二十三亿五千万这种数
+                    if result == 0:
+                        result = CN_UNIT[d]
+                    else:
+                        result *= CN_UNIT[d]
+                    unit = CN_UNIT[d]
+                    result_1 = result
+                elif unit > CN_UNIT[d]:
+                    result -= self.DigitsDic[text[i - 1]]
+                    result += self.DigitsDic[text[i - 1]] * CN_UNIT[d]
+                    unit = CN_UNIT[d]
+                elif unit <= CN_UNIT[d]:
+                    if (CN_UNIT[d] < unit_1) and (len(result_list) == control):
+                        result_list.append(result_1)
+                        result = (result - result_1) * CN_UNIT[d]
+                        control += 1
+                    else:
+                        result *= CN_UNIT[d]
+                    unit = CN_UNIT[d]
+                    if len(result_list) == control:
+                        unit_1 = unit
+                        result_1 = result
+            else:
+                return ""
+                break
+        return sum(result_list) + result
+
+    def process(self, page_time, service_time):
+        try:
+            page_time = re.search("\d{4}\-\d{1,2}\-\d{1,2}", page_time).group()
+
+            re_num = re.findall(r'[〇一二三四五六七八九零壹贰叁肆伍陆柒捌玖貮两十拾百佰千仟]+', service_time)
+            for _num in re_num:
+                if not re.search("[十拾百佰千仟]", _num):
+                    num = ""
+                    for word in _num:
+                        num += str(self.DigitsDic.get(word, word))
+                    service_time = service_time.replace(_num, num, 1)
+                else:
+                    num = str(self.get_num(_num))
+                    service_time = service_time.replace(_num, num, 1)
+
+            end_time = ""
+            service_days = 0
+            page_timestamp = time.mktime(time.strptime(page_time, "%Y-%m-%d"))
+            if re.search(self.pattern1, service_time):
+                end_time = re.findall(self.pattern1, service_time)[-1]
+                end_time = re.sub("日", "", end_time)
+                end_time = re.sub("[年月\./]", "-", end_time)
+
+                _year, _month, _day = end_time.split("-")
+                _month = int(_month)
+                _day = int(_day)
+                _year = int(_year)
+                if _year > 2050 or _year <= 2000 or _month > 12 or _month <= 0 or _day <= 0 or _day > 31:
+                    service_days = 0
+                else:
+                    if _month == 2:
+                        _day = min(28, _day)
+                    else:
+                        _day = min(30, _day)
+                    end_time = str(_year) + '-' + str(_month) + '-' + str(_day)
+                    service_days = (time.mktime(time.strptime(end_time, "%Y-%m-%d")) - page_timestamp) / (24 * 60 * 60)
+            elif re.search(self.pattern3, service_time):
+                # logging.info('2')
+                end_time = re.findall(self.pattern3, service_time)[-1]
+                end_time = re.sub("月", "", end_time)
+                end_time = re.sub("[年\./]", "-", end_time)
+
+                _year, _month = end_time.split("-")
+                _day = 0
+                _month = int(_month)
+                _year = int(_year)
+                if _year > 2050 or _year <= 2000 or _month > 12 or _month <= 0:
+                    service_days = 0
+                else:
+                    if _month == 2:
+                        _day = 28
+                    else:
+                        _day = 30
+                    end_time = str(_year) + '-' + str(_month) + '-' + str(_day)
+                    service_days = (time.mktime(time.strptime(end_time, "%Y-%m-%d")) - page_timestamp) / (24 * 60 * 60)
+            elif re.search(self.pattern2, service_time) or re.search(self.pattern4, service_time):
+                for pattern in [self.pattern2, self.pattern4]:
+                    unit = 1
+                    match = re.findall(pattern, service_time)
+                    if len(set(match)) == 1:
+                        match_text = match[0]
+                        # turn_service_time = match_text
+                        if "月" in match_text:
+                            unit = 30
+                        elif "年" in match_text:
+                            unit = 365
+                        elif "周" in match_text or "星期" in match_text:
+                            unit = 7
+                        match_num = float(re.search("\d+", match_text).group())
+                        # 数字能被365整除,单位更正为天
+                        if int(match_num) % 365 == 0:
+                            unit = 1
+                            # turn_service_time = str(match_num)+"天"
+                        if unit == 365:
+                            if match_num > 10:  # 单位为'年'时,排除数字过大的
+                                match_num = 0
+                        elif unit == 30:
+                            if match_num > 60:  # 单位为'月'时,排除数字过大的
+                                match_num = 0
+                        elif unit == 1:
+                            if match_num > 4000:  # 单位为'日'时,排除数字过大的
+                                match_num = 0
+                        service_days = match_num * unit
+                        service_days = int(service_days)
+                    if service_days > 0:
+                        break
+            elif "半年" in service_time:
+                # turn_service_time = "半年"
+                service_days = 180
+
+
+            if service_days > 4000 or service_days < 0:
+                service_days = 0
+            return str(service_days)
+
+            # # 服务天数小于90不预测
+            # if service_days<90 or service_days>4000:
+            #     end_time = ""
+            # elif not end_time and service_days!=0:
+            #     end_time = time.strftime("%Y-%m-%d",time.localtime(page_timestamp + service_days*24*60*60))
+            # may_begin = ""
+            # may_end = ""
+            # if end_time:
+            #     return end_time
+            # else:
+            #     return ''
+        except Exception as e:
+            return '0'