瀏覽代碼

Merge remote-tracking branch 'origin/master'

lsm 10 月之前
父節點
當前提交
9cde2603e3

+ 3 - 3
BiddingKG/dl/interface/Preprocessing.py

@@ -952,7 +952,7 @@ def tableToText(soup, docid=None):
             count_flag = True
             for width_index in range(width):
                 if inner_table[height][width_index][1]==0:
-                    if re.search(company_pattern,inner_table[height][width_index][0])  is not None:
+                    if re.search(company_pattern,inner_table[height][width_index][0]) is not None:
                         count_set.add(inner_table[height][width_index][0])
                     else:
                         count_flag = False
@@ -1082,7 +1082,7 @@ def tableToText(soup, docid=None):
 
                                 cell = table_occurence[i][j]
                                 head = (cell["top_head"]+":") if len(cell["top_head"])>0 else ""
-                                if re.search("单报标限总]价|金额|成交报?价|报价|供应商|候选人|中标人", head):
+                                if re.search("[单报标限总]价|金额|成交报?价|报价|供应商|候选人|中标人|[利费]率|负责人|工期|服务(期限?|年限|时间|日期|周期)|(履约|履行)期限|合同(期限?|(完成|截止)(日期|时间))", head):
                                     head = cell["left_head"] + head
                                 else:
                                     head += cell["left_head"]
@@ -1127,7 +1127,7 @@ def tableToText(soup, docid=None):
 
                                 cell = table_occurence[i][j]
                                 head = (cell["left_head"]+"") if len(cell["left_head"])>0 else ""
-                                if re.search("单报标限总]价|金额|成交报?价|报价", head):
+                                if re.search("[单报标限总]价|金额|成交报?价|报价|供应商|候选人|中标人|[利费]率|负责人|工期|服务(期限?|年限|时间|日期|周期)|(履约|履行)期限|合同(期限?|(完成|截止)(日期|时间))", head):
                                     head = cell["top_head"] + head
                                 else:
                                     head += cell["top_head"]

+ 12 - 4
BiddingKG/dl/interface/extract.py

@@ -80,6 +80,9 @@ def extractCount(extract_dict,page_attachments,web_source_name):
     dict_pack = _extract.get("prem",{})
     extract_count = 0
     list_code = _extract.get("code",[])
+    word_count = _extract.get("word_count",{})
+    if word_count.get("正文",0)>500:
+        extract_count += 3
     if len(list_code)>0:
         project_code = list_code[0]
     else:
@@ -102,10 +105,12 @@ def extractCount(extract_dict,page_attachments,web_source_name):
                 if _role[0]=="tenderee":
                     tenderee = _role[1]
                 if _role[0]=="win_tenderer":
+                    if _role[1] is not None and _role[1]!="":
+                        extract_count += 2
                     if  win_tenderer=="":
                         win_tenderer = _role[1]
                     if _role[2]!='' and float(_role[2])>0:
-                        extract_count += 1
+                        extract_count += 2
                         if win_bid_price=="":
                             win_bid_price = str(float(_role[2]))
                 if _role[0]=="agency":
@@ -118,15 +123,18 @@ def extractCount(extract_dict,page_attachments,web_source_name):
                 if _role.get("role_name")=="tenderee":
                     tenderee = _role["role_text"]
                 if _role.get("role_name")=="win_tenderer":
+                    if _role["role_text"] is not None and _role["role_text"]!="":
+                        extract_count += 2
                     if  win_tenderer=="":
                         win_tenderer = _role["role_text"]
                     if "role_money" in _role:
                         if str(_role["role_money"]["money"])!='' and float(_role["role_money"]["money"])>0:
-                            extract_count += 1
+                            extract_count += 2
                             if win_bid_price=="":
                                 win_bid_price = str(float(_role["role_money"]["money"]))
                 if _role["role_name"]=="agency":
                     agency = _role["role_text"]
+
                 linklist = _role.get("linklist",[])
                 for link in linklist:
                     for l in link:
@@ -175,7 +183,7 @@ def extractCount(extract_dict,page_attachments,web_source_name):
             extract_count += 1
 
     if web_source_name in set_login_web:
-        extract_count -= 1
+        extract_count -= 3
 
     return extract_count
 
@@ -511,7 +519,7 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
     return _extract_json#, list_articles[0].content, get_ent_context(list_sentences, list_entitys)
 
 
-def test(name,content):
+def test1(name,content):
     user = {
         "content": content,
         "id":name

+ 31 - 20
BiddingKG/dl/interface/getAttributes.py

@@ -944,7 +944,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
     def addServiceTimeByEntity(packDict,packageName,entity,serviceTime):
         for i in range(len(packDict[packageName]["roleList"])):
             if packDict[packageName]["roleList"][i].entity_text==entity:
-                packDict[packageName]["roleList"][i].serviceTime = serviceTime.entity_text
+                # packDict[packageName]["roleList"][i].serviceTime = serviceTime.entity_text
+                packDict[packageName]["roleList"][i].serviceTime = extract_serviceTime(serviceTime.entity_text,"")
 
     #根据实体名称得到角色
     def getRoleWithText(packDict,entity_text):
@@ -3767,9 +3768,9 @@ def get_days_between(day1,day2,get_abs=0):
         return days_difference
 
 def extract_serviceTime(service_time,page_time):
-    pattern1 = re.compile("\d{4}[年\-\./]\d{1,2}[月\-\./]\d{1,2}日?")
-    pattern2 = re.compile("\d+(?:\.\d+)?[\((]?个?[^\d]?[^\d]?(?:日|天|周年|整年|学?年|月|周|日历[天日]|工作[天日])")
-    pattern3 = re.compile("\d{4}[年\-\./]\d{1,2}月?")
+    pattern1 = re.compile("\d{4}[年\-./]\d{1,2}[月\-./]\d{1,2}日?")
+    pattern2 = re.compile("\d+(?:\.\d+)?[((]?个?[^\d]?[^\d]?(?:日|天|周年|整年|学?年|月|周|日历[天日]|工作[天日])")
+    pattern3 = re.compile("\d{4}[年\-./]\d{1,2}月?")
     pattern4 = re.compile("(?:日|天|周年|年|月|周|日历[天日]|工作[天日]|星期)[^\d]{1,3}\d+(?:\.\d+)?")
     DigitsDic = {"零":0, "壹":1, "贰":2, "叁":3, "肆":4, "伍":5, "陆":6, "柒":7, "捌":8, "玖":9,
                  "〇":0, "一":1, "二":2, "三":3, "四":4, "五":5, "六":6, "七":7, "八":8, "九":9,
@@ -3829,7 +3830,7 @@ def extract_serviceTime(service_time,page_time):
 
         return sum(result_list) + result
 
-    serviceTime_dict = {"service_start": "", "service_end": "", "service_days": ""}
+    serviceTime_dict = {"service_start": "", "service_end": "", "service_days": 0}
     re_num = re.findall(r'[〇一二三四五六七八九零壹贰叁肆伍陆柒捌玖貮两十拾百佰千仟]+',service_time)
     for _num in re_num:
         if not re.search("[十拾百佰千仟]",_num):
@@ -3850,7 +3851,7 @@ def extract_serviceTime(service_time,page_time):
         time_list = []
         for _time in re.findall(pattern1,service_time):
             _time = re.sub("日","",_time)
-            _time = re.sub("[年月\./]","-",_time)
+            _time = re.sub("[年月./]","-",_time)
             _year,_month,_day = _time.split("-")
             _month = int(_month)
             _day = int(_day)
@@ -3866,7 +3867,7 @@ def extract_serviceTime(service_time,page_time):
             if get_days_between(page_time,time_list[1])>1 and get_days_between(time_list[0],time_list[1])>0:
                 serviceTime_dict['service_end'] = time_list[1]
                 serviceTime_dict['service_start'] = time_list[0]
-        else:
+        elif len(time_list)==1:
             if get_days_between(page_time, time_list[0]) > 1:
                 serviceTime_dict['service_end'] = time_list[0]
             # service_days = (time.mktime(time.strptime(end_time,"%Y-%m-%d"))-page_timestamp)/(24*60*60)
@@ -3875,7 +3876,7 @@ def extract_serviceTime(service_time,page_time):
         # end_time = re.findall(pattern3,service_time)[-1]
         for _time in re.findall(pattern3,service_time):
             _time = re.sub("月","",_time)
-            _time = re.sub("[年\./]","-",_time)
+            _time = re.sub("[年./]","-",_time)
             _year,_month = _time.split("-")
             _day = 0
             _month = int(_month)
@@ -3892,7 +3893,7 @@ def extract_serviceTime(service_time,page_time):
             if get_days_between(page_time, time_list[1]) > 1 and get_days_between(time_list[0], time_list[1]) > 0:
                 serviceTime_dict['service_end'] = time_list[1]
                 serviceTime_dict['service_start'] = time_list[0]
-        else:
+        elif len(time_list)==1:
             if get_days_between(page_time, time_list[0]) > 1:
                 serviceTime_dict['service_end'] = time_list[0]
                 # service_days = (time.mktime(time.strptime(end_time,"%Y-%m-%d"))-page_timestamp)/(24*60*60)
@@ -3921,24 +3922,33 @@ def extract_serviceTime(service_time,page_time):
                 elif unit==1:
                     if match_num>4000:#单位为'日'时,排除数字过大的
                         match_num = 0
-                service_days = match_num * unit
-                if int(service_days) % 360==0:
+                service_days = int(match_num * unit)
+                if service_days % 360==0:
                     service_days = service_days / 360 * 365
+                elif service_days % 180==0 and service_days % 360!=0:
+                    service_days = service_days // 360 * 365 + 180
                 service_days = int(service_days)
                 if service_days <= 1 and service_days > 4000:
                     service_days = 0
 
-                if service_days>0:
-                    service_days = str(service_days) + "天"
+                if service_days>3:
+                    # service_days = str(service_days) + "天"
                     serviceTime_dict['service_days'] = service_days
                     break
     elif "半年" in service_time:
         service_days = 180
-        service_days = str(service_days) + "天"
+        # service_days = str(service_days) + "天"
+        serviceTime_dict['service_days'] = service_days
+    if serviceTime_dict['service_start'] and serviceTime_dict['service_end']:
+        service_days = get_days_between(serviceTime_dict['service_start'],serviceTime_dict['service_end'])
         serviceTime_dict['service_days'] = service_days
 
     return serviceTime_dict
 
+def getServiceTime():
+
+    pass
+
 def getOtherAttributes(list_entity,page_time,prem,channel_dic):
     dict_other = {"moneysource":"",
                   "person_review":[],
@@ -3976,7 +3986,7 @@ def getOtherAttributes(list_entity,page_time,prem,channel_dic):
 
     time_contractEnd = prem[0].get("time_contractEnd","")[:10]
     time_contractStart = prem[0].get("time_contractStart","")[:10]
-    serviceTime_dict = {"service_start":"", "service_end":"", "service_days": ""}
+    serviceTime_dict = {"service_start":"", "service_end":"", "service_days": 0}
     if time_contractEnd:
         serviceTime_dict['service_end'] = time_contractEnd
         if time_contractStart:
@@ -3986,12 +3996,10 @@ def getOtherAttributes(list_entity,page_time,prem,channel_dic):
     if list_serviceTime and not serviceTime_dict['service_end']:
         list_serviceTime_inAtt = [serviceTime for serviceTime in list_serviceTime if serviceTime.in_attachment==1]
         list_serviceTime = [serviceTime for serviceTime in list_serviceTime if serviceTime.in_attachment==0]
-        # if not list_serviceTime:
-        #     list_serviceTime = list_serviceTime_inAtt
         error_serviceTime = []
         for list_time in [list_serviceTime,list_serviceTime_inAtt]:
             # if not dict_other["serviceTime"]:
-            if not serviceTime_dict['service_end']:
+            if not serviceTime_dict['service_end'] and not serviceTime_dict['service_days']:
                 list_time.sort(key=lambda x: (x.prob,-x.sentence_index,-x.begin_index), reverse=True)
                 for _serviceTime in list_time:
                     # 优先取具体时间(20XX年x月x日-20XX年x月x日)
@@ -4040,7 +4048,7 @@ def getOtherAttributes(list_entity,page_time,prem,channel_dic):
                                 break
     if serviceTime_dict['service_start'] and serviceTime_dict['service_end']:
         service_days = get_days_between(serviceTime_dict['service_start'],serviceTime_dict['service_end'])
-        serviceTime_dict['service_days'] = str(service_days) + "天"
+        serviceTime_dict['service_days'] = service_days
     dict_other["serviceTime"] = serviceTime_dict
     if not time_contractEnd and channel_dic['docchannel']['docchannel']=='合同公告': # 用serviceTime补充合同开始结束时间,公告类型为合同公告
         if serviceTime_dict['service_start'] and serviceTime_dict['service_end']:
@@ -4189,7 +4197,10 @@ def limit_maximum_amount(dic, list_entity):
     for value in dic['prem'].values():
         for l in value['roleList']:
             if l["role_name"] in ['win_tenderer', 'second_tenderer', 'third_tenderer']:
-                date = float(re.search('(\d+)天', l.get('serviceTime', '')).group(1)) if re.search('(\d+)天', l.get('serviceTime', '')) else 0
+                # date = float(re.search('(\d+)天', l.get('serviceTime', '')).group(1)) if re.search('(\d+)天', l.get('serviceTime', '')) else 0
+                serviceTime_dict = l.get('serviceTime', dict())
+                serviceTime_dict = serviceTime_dict if serviceTime_dict else dict()
+                date = serviceTime_dict.get("service_days",0)
                 if 0 < date < 180 and float(l["role_money"]['money']) > 10000000000: # 工期小于180天且金额大于百亿的,错误
                     l["role_money"]['money'] = str(Decimal(l["role_money"]['money']) / 10000)
                     # print('工期纠正百亿以上金额 ')

+ 58 - 17
BiddingKG/dl/time/re_servicetime.py

@@ -30,22 +30,23 @@ before = '(?P<before>' \
          '|履约期限|合同的?约定完成时限|合同的?完成日期|承诺完成日期' \
          '|合同起始日起|合同的?履约期|履约截止日期|承包期限|合同的?完成日期|特许经营期限' \
          '|服务期间|服务履行期|委托(管理)?期限|经营期限|数量' \
-         '|(工期|服务期限?|交货期限?|服务履行期|合同期限?|履[行约]期限?)说明' \
+         '|(工期|服务期限?|交货期限?|服务履行期|合同期限?|履[行约]期限?)说明|存款期限?|(存款|存放|定存)(期|年)限|服务日期' \
+         '|服务(有效期|年限)|本?合同有效期|协议有效期|项目期限' \
          ')'
 
 
 # ^(?!.*abc).*$ 排除abc字符串
 before_wuye = '(?P<before>' \
-              '(履约期限、地点等简要信息[::]((履约|时间|期限){1,2}[::])?)' \
+              '(履约期限、地点等简要信息[::]((履约|时间|期限){1,2}[::])?)' \
               ')'
 # '|(履约期限、地点等简要信息[^\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{0,25})' \
 # (履约期限、地点等简要信息.{0,25}(?= [\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]+([年月日]|个月)|20[21]))
 
 before2 = '(?P<before2>' \
-          '自合同签订之日起至|合同签订之日起|自合同签订之日起|签订合同后|系统开发' \
-          '|合同签订之日起至|自合同签订之日|合同签定后|自签订合同之日起|自合同签订起' \
-          '|[自从]?合同签[订定]生效之日起|自合同签订后不超过|合同签订日至' \
-          '|合同签订生效之日起' \
+          '自合同签订[次]日起至|合同签订[次]日起|自合同签订[次]日起|签订合同后|系统开发' \
+          '|合同签订[次]日起至|自合同签订[次]日|合同签定后|自签订合同[次]日起|自合同签订起' \
+          '|[自从]?合同签[订定]生效[次]日起|自合同签订后不超过|合同签订日至' \
+          '|合同签订生效[次]日起' \
           '|本项目招标有效期|招标有效期' \
           '|[自从于]?签[订定署字](合同|协议书|协议)并?(期|开始履行|生效|有效期|约定|验收合格|期限|开始服务){0,2}(之[日后]|日期?[后起]|后|起|算|为)+[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{0,4}' \
           '|[自从于]?(采购)?(合同|协议书|协议)(正式)?签[订定署字](完[成毕])?并?(期|开始履行|生效|验收合格|开始服务|期限|有效期|约定){0,2}(之[日后]|日期?[后起]|后|起|算|为)+[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{0,5}' \
@@ -57,7 +58,7 @@ before2 = '(?P<before2>' \
           '|[自从于]服务(合同|协议书|协议)生效(之[日后]|后|起)[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{1,4}' \
           '|(本次)?采购周期' \
           '|(项目招标)?履行期|[自从于]?(合同|协议书|协议)生效(之[日后]|后|起)[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{1,3}' \
-          '|服务(有效期|年限)|本?合同有效期|(正式)?入驻(之[日后]|后|起|算)+' \
+          '|服务(有效期|年限)|本?合同有效期|协议有效期|(正式)?入驻(之[日后]|后|起|算)+' \
           '|(合同|协议书|协议)生效(之[日后]|后|起|算)+' \
           '|自?(提供服务|采购人指定|合同约定)(之[日后]|后|起|算)+' \
           '|本?项目合同期(为|是)*' \
@@ -66,6 +67,29 @@ before2 = '(?P<before2>' \
         # '|[^。]{0,4}[自从于][^。;;,]{0,15}(之[日后]|后|起|算|为)+[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{0,5}?' \
     # '|[自从于].{2,15}之日[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{1,4}' \
 
+# before2 用于做开头的表达,需排除一些不合理的
+before2_first = '(?P<before2>' \
+          '自合同签订之日起至|合同签订之日起|自合同签订之日起|签订合同后' \
+          '|合同签订之日起至|自合同签订之日|合同签定后|自签订合同之日起|自合同签订起' \
+          '|[自从]?合同签[订定]生效之日起|自合同签订后不超过|合同签订日至' \
+          '|合同签订生效之日起' \
+          '|本项目招标有效期|招标有效期' \
+          '|[自从于]?签[订定署字](合同|协议书|协议)并?(期|开始履行|生效|有效期|约定|验收合格|期限|开始服务){0,2}(之[日后]|日期?[后起]|后|起|算|为)+[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{0,4}' \
+          '|[自从于]?(采购)?(合同|协议书|协议)(正式)?签[订定署字](完[成毕])?并?(期|开始履行|生效|验收合格|开始服务|期限|有效期|约定){0,2}(之[日后]|日期?[后起]|后|起|算|为)+[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{0,5}' \
+          '|服务要求' \
+          '|签订合同起' \
+          '|项目的有效期限为|项目服务为|签订合同期为' \
+          '|(合同|协议书)签[订定署字]生效(之[日后]|后|起)[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{1,4}' \
+          '|[自从于]服务(合同|协议书|协议)生效(之[日后]|后|起)[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{1,4}' \
+          '|(本次)?采购周期' \
+          '|(项目招标)?履行期|[自从于]?(合同|协议书|协议)生效(之[日后]|后|起)[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{1,3}' \
+          '|服务(有效期|年限)|本?合同有效期|协议有效期|(正式)?入驻(之[日后]|后|起|算)+' \
+          '|(合同|协议书|协议)生效(之[日后]|后|起|算)+' \
+          '|自?(提供服务|采购人指定|合同约定)(之[日后]|后|起|算)+' \
+          '|本?项目合同期(为|是)*' \
+          '|交付使用(之[日后]|后|起|算)+|' \
+          ')'
+
 before3 = '(?P<before3>' \
           ',?([\((](日历天|施工时间|单位)[\))]|[\((]天[\))]?|[\((]年[\))]?|[\((]月[\))]?)?' \
           ')'
@@ -136,7 +160,7 @@ reg2 = re.compile(before + before3 + before7 + charac + before5 + before2 + befo
 
 reg3 = re.compile(before + before3 + before7 + charac + before5 + before2 + after2)
 
-reg4 = re.compile(before2[:-2]+before2[-1:] + before5 + center + after)
+reg4 = re.compile(before2_first[:-2]+before2_first[-1:] + before5 + center + after)
 
 reg5 = re.compile(before + before3 + before7 + charac + before5 + before2 + before4 + before6 + center2 + after)
 
@@ -229,18 +253,18 @@ def re_service_time(text):
             prob = 0.8
 
         if len(output_list) == 0:
-            output_list, text_index_list = re_find_all_result(reg4, input_str)
+            output_list, text_index_list = re_find_all_result(reg5, input_str)
             if TEST_MODE:
-                print("output_str, text_index reg4", output_list, text_index_list)
+                print("output_str, text_index reg5", output_list, text_index_list)
             output_list, text_index_list = filter_service_time(output_list, text_index_list)
-            prob = 0.5
+            prob = 0.8
 
         if len(output_list) == 0:
-            output_list, text_index_list = re_find_all_result(reg5, input_str)
+            output_list, text_index_list = re_find_all_result(reg4, input_str)
             if TEST_MODE:
-                print("output_str, text_index reg5", output_list, text_index_list)
+                print("output_str, text_index reg4", output_list, text_index_list)
             output_list, text_index_list = filter_service_time(output_list, text_index_list)
-            prob = 0.8
+            prob = 0.5
 
         # 添加
         all_output_list += output_list
@@ -298,7 +322,7 @@ def filter_service_time(output_list, text_index_list):
         if not re.findall(reg_right_unit, output) and not re.match('^\d{1,3}$', output):
             delete_list.append([output, text_index_list[i]])
             continue
-        if not re.findall("[^之]日|天|年|月|周|星期", output) or re.search("\d{4}[\-\./]\d{1,2}", output):
+        if not (re.findall("[^之]日|天|年|月|周|星期", output) or re.search("\d{4}[\-\./]\d{1,2}", output)):
             delete_list.append([output, text_index_list[i]])
             continue
         # 包含不要的字
@@ -362,7 +386,22 @@ def re_find_all_result(reg, input, unit="", index=0):
         if re.search("数量",i.group()) and not re.search("[年月日天周]",input[i.start()+front_len: i.end()]):
             continue
         # 前述表达有排除词的跳过
-        if re.search("公告|发布",input[i.start():i.start()+front_len]):
+        if re.search("公告|发布|公示",input[i.start():i.start()+front_len]):
+            continue
+        # ‘服务日期’只保留x年的
+        if re.search("服务日期", input[i.start():i.start() + front_len]) \
+            and (re.search('[日月]',input[i.start()+front_len: i.end()]) or not re.search('年',input[i.start()+front_len: i.end()])):
+            continue
+        # 排除某些容易错误的表达
+        if re.search('^(自合同签订[之次]日起至|合同签订[之次]日起|自合同签订[之次]日起|签订合同后' \
+              '|合同签订[之次]日起至|自合同签订[之次]日|合同签定后|自签订合同[之次]日起|自合同签订起' \
+              '|[自从]?合同签[订定]生效[之次]日起|自合同签订后不超过|合同签订日至' \
+              '|合同签订生效[之次]日起|签订合同起' \
+              '|[自从于]?签[订定署字](合同|协议书|协议)并?(期|开始履行|生效|有效期|约定|验收合格|期限|开始服务){0,2}(之[日后]|日期?[后起]|后|起|算|为)+[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{0,4}' \
+              '|[自从于]?(采购)?(合同|协议书|协议)(正式)?签[订定署字](完[成毕])?并?(期|开始履行|生效|验收合格|开始服务|期限|有效期|约定){0,2}(之[日后]|日期?[后起]|后|起|算|为)+[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{0,5}' \
+              '|(合同|协议书)签[订定署字]生效(之[日后]|后|起)[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{1,4}' \
+              '|[自从于]服务(合同|协议书|协议)生效(之[日后]|后|起)[^。\d半一二三四五六七八九十壹两叁贰肆伍陆柒捌玖拾]{1,4}' \
+              ')',input[i.start():i.start() + front_len]):
             continue
 
         text_index.append([i.start()+front_len, i.end()])
@@ -409,7 +448,9 @@ def test_from_str():
         "服务时间:2023年12-2024年12," \
         "第十四条,服务时间:2023.12-2024.12,。" \
         "第十四条本合同自2023年3月1日起至2024年2月29日止。" \
-        "二、垃圾清运委外期限,垃圾委外清运时间为1年,自2023年1月1日起至2023年12月31日止。"
+        "服务时间:自2022年10月1日至2023年9月3日" \
+        "二、垃圾清运委外期限,垃圾委外清运时间为1年,自2023年1月1日起至2023年12月31日止。" \
+        "服务时间:预计从2022年11月起,12个月。"
     # s = '第十四条,服务时间:2023.12-2024.12服务时间'
 #     s = ''',莆田市财政局走廊及卫生间吊顶改造工程中标结果公告,莆田市财政局走廊及卫生间吊顶改造工程,工程预算价236878元,发包价194240元,招标编号为:宏福莆招字【2020】H001号,该项目招标方式为:邀请招标。2020年04月07日开标,2020年04月07日评标完成,中标主要结果公示如下:中标人名称,福建省东海伟业建设有限公司,中标价:194240元,评标办法,随机抽取法,资格评审结果,注册建造师:合格:余爱华(注册编号:闽235141578763),履约保证金(元):合格:合同金额的10%,施工工期:14日历天,工程质量,备注,被确定为废标、无效标的投标人及原因:合格:无废标,资格审查小组:合格:王宗仙、林慧灵、谢淑青,根据评标结果确定福建省东海伟业建设有限公司为中标人,现在莆田市财政局网上(http://czj.putian.gov.cn/)公示。中标公示期自2020年04月08日至2020年04月10日。投标人对中标结果有异议或认为评标活动存在违法违规行为,可在公示期内向相关主管部门投诉,招标单位:招标代理机构:莆田市财政局,福建省宏福工程管理有限公司,联系电话:0594-2694413,联系电话:15160467775,2020年04月08日,2020年04月08日,
 # '''