浏览代码

Merge remote-tracking branch 'origin/master'

luojiehua 2 周之前
父节点
当前提交
831660aeae

+ 17 - 5
BaseDataMaintenance/maintenance/dataflow.py

@@ -448,6 +448,7 @@ class Dataflow():
         project_name = _dict.get(document_tmp_project_name,"")
         tenderee = _dict.get(document_tmp_tenderee,"")
         agency = _dict.get(document_tmp_agency,"")
+        doctitle = _dict.get(document_tmp_doctitle,"")
         doctitle_refine = _dict.get(document_tmp_doctitle_refine,"")
         win_tenderer = _dict.get("win_tenderer","")
         bidding_budget = _dict.get("bidding_budget","")
@@ -459,7 +460,7 @@ class Dataflow():
         page_time = _dict.get(document_tmp_page_time,"")
         fingerprint = _dict.get(document_tmp_fingerprint,"")
         product = _dict.get(document_tmp_product,"")
-        return docchannel,project_code,project_name,tenderee,agency,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product
+        return docchannel,project_code,project_name,tenderee,agency,doctitle,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product
 
     def f_set_docid_limitNum_contain(self,item, _split,singleNum_keys=["tenderee","win_tenderer"],contain_keys=[],multiNum_keys=[],notlike_keys=["project_code"]):
         flag = True
@@ -550,7 +551,7 @@ class Dataflow():
                 set_docid.add(_docid)
 
     def translate_dumplicate_rules(self,status_from,item):
-        docchannel,project_code,project_name,tenderee,agency,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product = self.get_dump_columns(item)
+        docchannel,project_code,project_name,tenderee,agency,doctitle,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product = self.get_dump_columns(item)
         if page_time=='':
             page_time = getCurrent_date("%Y-%m-%d")
         base_dict = {
@@ -1463,7 +1464,7 @@ class Dataflow():
 
     def merge_document(self,item,status_to=None):
         self.post_extract(item)
-        docchannel,project_code,project_name,tenderee,agency,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product = self.get_dump_columns(item)
+        docchannel,project_code,project_name,tenderee,agency,doctitle,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product = self.get_dump_columns(item)
 
 
         _d = {"partitionkey":item["partitionkey"],
@@ -2274,6 +2275,12 @@ class Dataflow_dumplicate(Dataflow):
         _dict["dict_time"] = self.get_dict_time(_extract)
         _dict["punish"] = _extract.get("punish",{})
         _dict["approval"] = _extract.get("approval",[])
+        _dict["products_original"] = _extract.get("product_attrs_original", {}).get("data",[])
+        _dict["products"] = _dict.get("products") if _dict.get("products") is not None else []
+        _dict["products"] = _dict["products"] if isinstance(_dict["products"], list) else json.loads(_dict["products"])
+        # 变更内容(变更答疑公告)
+        _dict["change_content"] = _extract.get("change_content","")
+        _dict["change_time"] = _extract.get("change_time","")
 
         # 专项债字段
         issue_details = _extract.get("debt_dic",{}).get("issue_details",[])
@@ -2287,7 +2294,8 @@ class Dataflow_dumplicate(Dataflow):
 
     def dumplicate_fianl_check(self,base_list,b_log=False):
         the_group = base_list
-        the_group.sort(key=lambda x:x["confidence"],reverse=True)
+        # the_group.sort(key=lambda x:x["confidence"],reverse=True)
+        the_group.sort(key=lambda x:(x["confidence"],-x['docid']),reverse=True)
 
         _index = 0
         base_fingerprint = "None"
@@ -2308,6 +2316,7 @@ class Dataflow_dumplicate(Dataflow):
                 _prob,day_dis = self.dumplicate_check(_dict1,_dict2,_dict1.get("min_counts",10),b_log=b_log)
                 if _prob<=0.1:
                     _pass = False
+                    # print('final check error',_dict1['docid'])
                     break
             log("checking index:%d %s %.2f"%(_i,str(_pass),_prob))
             _index = _i
@@ -2680,7 +2689,7 @@ class Dataflow_dumplicate(Dataflow):
         list_rules.append(_rule)
 
     def translate_dumplicate_rules(self,status_from,item,get_all=False,to_log=False,day_dis=7,table_name ="document_tmp",table_index="document_tmp_index"):
-        docchannel,project_code,project_name,tenderee,agency,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product = self.get_dump_columns(item)
+        docchannel,project_code,project_name,tenderee,agency,doctitle,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product = self.get_dump_columns(item)
         current_date = getCurrent_date("%Y-%m-%d")
         if page_time=='':
             page_time = current_date
@@ -2715,6 +2724,7 @@ class Dataflow_dumplicate(Dataflow):
                 }
                 must_not_dict = {"docid":item.get("docid")}
                 doctitle_refine_name = "doctitle"
+                doctitle_refine = doctitle
         else:
             _status = [201,300]
             base_dict = {
@@ -2724,6 +2734,7 @@ class Dataflow_dumplicate(Dataflow):
             }
             must_not_dict = {"docid":item.get("docid")}
             doctitle_refine_name = "doctitle"
+            doctitle_refine = doctitle
 
 
 
@@ -5256,6 +5267,7 @@ class Dataflow_dumplicate(Dataflow):
         if item:
             log("start dumplicate_comsumer_handle")
             self.dumplicate_comsumer_handle(item,None,self.ots_client,get_all=False,upgrade=False)
+            # self.dumplicate_comsumer_handle(item,None,self.ots_client,get_all=True,upgrade=False)
             return
 
     def test_merge(self,list_docid_less,list_docid_greater):

+ 22 - 1
BaseDataMaintenance/maintenance/preproject/fillColumns.py

@@ -498,6 +498,7 @@ class PreprojectFill():
             tenderee = _row.get('tenderee')
             demand_info = _row.get('demand_info')
             project_name = _row.get('project_name')
+            procurement_system = _row.get('procurement_system')
 
             if demand_info is not None and 'data":[]' not in demand_info:
                 result = process_purchaseIntention(docid, tenderee, demand_info, project_name)
@@ -538,6 +539,26 @@ class PreprojectFill():
                         doctitle_product_labels,core_field_product_labels = self.get_project_label(final_product, demand, _tenderee, "")
                         result_row['doctitle_product_labels'] = doctitle_product_labels
                         result_row['core_field_product_labels'] = core_field_product_labels
+                        if tenderee==_tenderee:
+                            result_row['procurement_system'] = procurement_system
+                        else:
+                            # 采购意向中项目的招标人与公告提取的不同,则查表
+                            _query = BoolQuery(must_queries=[
+                                TermQuery('name', _tenderee)
+                            ])
+                            _columns = ['procurement_system']
+                            rows, next_token, total_count, is_all_succeed = self.ots_client.search("enterprise","enterprise_index",
+                                                                                                   SearchQuery(_query,sort=Sort(sorters=[FieldSort("bidi_id")]),
+                                                                                                               get_total_count=True,limit=1),
+                                                                                                   ColumnsToGet(_columns,ColumnReturnType.SPECIFIED))
+                            _procurement_system = None
+                            if len(rows) > 0:
+                                dict_rows = getRow_ots(rows)
+                                row = dict_rows[0]
+                                _procurement_system = row.get('procurement_system')
+                                if not _procurement_system:
+                                    _procurement_system = None
+                            result_row['procurement_system'] = _procurement_system
 
                         # tenderee, product, may_begin, may_end, crtime, type, demand, project_name, bidding_budget, prob, json_docids, province, city, district,
                         # in_doctextcon, last_doctitle, last_tenderee_contact, last_tenderee_phone
@@ -552,7 +573,7 @@ class PreprojectFill():
 
     def purchaseIntention_process_producer(self):
         columns = ['tenderee','demand_info','project_name','province', 'city', 'district','doctitle', 'tenderee_contact',
-                   'tenderee_phone', 'info_type']
+                   'tenderee_phone', 'info_type','procurement_system']
         end_page_time = time.strftime("%Y-%m-%d",time.localtime())
         start_page_time = timeAdd(end_page_time,days=-5)
 

+ 148 - 24
BaseDataMaintenance/maxcompute/documentDumplicate.py

@@ -999,15 +999,68 @@ def check_codes(project_codes_less,project_codes_greater):
 def check_demand():
     return True
 
+def edit_distance_with_diff(s1, s2):
+    m, n = len(s1), len(s2)
+    # 创建动态规划表
+    dp = [[0] * (n + 1) for _ in range(m + 1)]
+
+    # 初始化动态规划表
+    for i in range(m + 1):
+        dp[i][0] = i
+    for j in range(n + 1):
+        dp[0][j] = j
+
+    # 填充动态规划表
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            if s1[i - 1] == s2[j - 1]:
+                dp[i][j] = dp[i - 1][j - 1]
+            else:
+                dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])
+
+    # 回溯找到差异部分
+    diff = []
+    i, j = m, n
+    while i > 0 and j > 0:
+        if s1[i - 1] == s2[j - 1]:
+            i -= 1
+            j -= 1
+        elif dp[i][j] == dp[i - 1][j] + 1:
+            diff.append(("删除",s1[i - 1]))
+            i -= 1
+        elif dp[i][j] == dp[i][j - 1] + 1:
+            diff.append(("插入",s2[j - 1]))
+            j -= 1
+        else:
+            diff.append(("替换",s1[i - 1],s2[j - 1]))
+            i -= 1
+            j -= 1
+
+    # 处理剩余部分
+    while i > 0:
+        diff.append(("删除",s1[i - 1]))
+        i -= 1
+    while j > 0:
+        diff.append(("插入",s2[j - 1]))
+        j -= 1
+
+    # 返回编辑距离和差异部分
+    return dp[m][n], diff[::-1]  # 将差异部分反转,因为我们是从后往前回溯的
+
 package_number_pattern = re.compile("(?P<name>(((([^承]|^)包|标[段号的包]|分?包|包组|包件)编?号?|子项目|项目类型|项目)[::]?[0-9A-Za-z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦ]{1,4}[^\.]?)[^至]?|((?![\.])第?[ⅠⅡⅢⅣⅤⅥⅦ0-9A-Za-z一二三四五六七八九十]{1,4}(包号|标[段号的包]|分?包)))")  # 第? 去掉问号 修复 纯木浆8包/箱复印 这种作为包号
 code_pattern = re.compile("[A-Za-z0-9\-\(\)()【】\.-]+")
 num_pattern = re.compile("^\d+(?:\.\d+)?$")
 num1_pattern = re.compile("[一二三四五六七八九十A-Za-z]+")
+num2_pattern = re.compile("[一二三四五六七八九十A-Za-z\d-]+")
+num3_pattern = re.compile("[一二三四五六七八九十A-Za-z\d-]+|.")
 location_pattern = re.compile("[^\[【\(]{1,2}[市区镇县村路]")
 building_pattern = "工程招标代理|工程设计|暂停|继续|工程造价咨询|施工图设计文件审查|咨询|环评|设计|施工监理|施工|监理|EPC|epc|总承包|水土保持|选址论证|勘界|勘察|预算编制|预算审核|结算审计|招标代理|设备类|第?[\((]?[一二三四五六七八九十1-9]+[)\)]?[次批]"
+# 标题中被括号括起来的重点内容
+brackets_pattern = "【([^【】]+?)】" # |{([^{}]+?)}
 rebid_pattern = "再次|重新招标|[一二三四五六七八九十]+次"
 date_pattern = re.compile("\d{2,4}[\-\./年]\d{1,2}[\-\./月]\d{1,2}")
-def check_doctitle(doctitle_refind_less, doctitle_refind_greater, codes_less=[], code_greater=[],page_time_less="",page_time_greater=""):
+def check_doctitle(doctitle_refind_less, doctitle_refind_greater,docchannel_less,docchannel_greater, codes_less=[], code_greater=[],page_time_less="",page_time_greater=""):
+    # print('doctitle',doctitle_refind_less,doctitle_refind_greater)
     if code_greater is None:
         code_greater = []
     doctitle_refind_less = str(doctitle_refind_less).replace("(","(").replace(")",")")
@@ -1024,6 +1077,8 @@ def check_doctitle(doctitle_refind_less, doctitle_refind_greater, codes_less=[],
         doctitle_refind_less = ""
     if doctitle_refind_greater is None:
         doctitle_refind_greater = ""
+    if doctitle_refind_less==doctitle_refind_greater:
+        return True
     _pack1 = None
     _pack2 = None
     #if contain then pass
@@ -1068,7 +1123,7 @@ def check_doctitle(doctitle_refind_less, doctitle_refind_greater, codes_less=[],
                 return False
 
     #check location and keywords
-    for _p in [num1_pattern,building_pattern]:
+    for _p in [num1_pattern,building_pattern,brackets_pattern]:
         num_all_l = re.findall(_p,doctitle_refind_less)
         num_all_g = re.findall(_p,doctitle_refind_greater)
         set_num_l = set(num_all_l)
@@ -1076,6 +1131,26 @@ def check_doctitle(doctitle_refind_less, doctitle_refind_greater, codes_less=[],
         if len(set_num_l)==len(set_num_g):
             if len(set_num_l&set_num_g)!=len(set_num_l):
                 return False
+    # 产权拍卖类公告,例:小区6号楼2单元1302号
+    if docchannel_less==docchannel_greater and docchannel_less in [115,116,117]:
+        for _p in [num2_pattern]:
+            num_all_l = re.findall(_p, doctitle_refind_less)
+            num_all_g = re.findall(_p, doctitle_refind_greater)
+            set_num_l = set(num_all_l)
+            set_num_g = set(num_all_g)
+            if len(set_num_l) == len(set_num_g):
+                if len(set_num_l & set_num_g) != len(set_num_l):
+                    return False
+    # 相似标题对比,编辑距离中替换字段前后都为"数字字母字符串"则判断为不同
+    if getSimilarityOfString(doctitle_refind_less,doctitle_refind_greater) > 0.7:
+        doctitle_refind_less_re = re.findall(num3_pattern,doctitle_refind_less)
+        doctitle_refind_greater_re = re.findall(num3_pattern,doctitle_refind_greater)
+        distance, differences = edit_distance_with_diff(doctitle_refind_less_re, doctitle_refind_greater_re)
+        for diff in differences:
+            if diff[0]=='替换':
+                if re.search("^[一二三四五六七八九十A-Za-z\d-]+$",diff[1]) and re.search("^[一二三四五六七八九十A-Za-z\d-]+$",diff[2]):
+                    # print("标题编辑距离中替换字段前后 数字字母字符串不同")
+                    return False
     # 重新(多次)招标关键词
     for _p in [rebid_pattern]:
         num_all_l = re.findall(_p,doctitle_refind_less)
@@ -1085,7 +1160,8 @@ def check_doctitle(doctitle_refind_less, doctitle_refind_greater, codes_less=[],
         if len(set_num_l)==len(set_num_g):
             if len(set_num_l&set_num_g)!=len(set_num_l):
                 return False
-        elif (len(set_num_l) and not len(set_num_g)) or (len(set_num_g) and not len(set_num_l)):
+        # if page_time_less and page_time_less != page_time_greater:
+        if (len(set_num_l) and not len(set_num_g)) or (len(set_num_g) and not len(set_num_l)):
             return False
 
     #check the location has conflict
@@ -1204,23 +1280,30 @@ def check_package(package_less,package_greater,split_char=","):
 def check_time(json_time_less,json_time_greater):
     has_same = False
     has_diff = False
+    time_count_less = 0
+    time_count_greater = 0
     if getLength(json_time_less)>0 and getLength(json_time_greater)>0:
         if isinstance(json_time_less,dict):
             time_less = json_time_less
         else:
             time_less = json.loads(json_time_less)
+        time_count_less += sum([1 for k,v in time_less.items() if v])
         if isinstance(json_time_greater,dict):
             time_greater = json_time_greater
         else:
             time_greater = json.loads(json_time_greater)
+        time_count_greater += sum([1 for k, v in time_greater.items() if v])
         for k,v in time_less.items():
             if getLength(v)>0:
                 v1 = time_greater.get(k,"")
                 if getLength(v1)>0:
                     if v[:10]!=v1[:10]:
+                        # print('time diff',k,v,v1)
                         has_diff = True
                     else:
                         has_same = True
+    if time_count_less==0 and time_count_greater==0:
+        return 2
     if has_same:
         if has_diff:
             return 1
@@ -1231,11 +1314,11 @@ def check_time(json_time_less,json_time_greater):
 
 def check_products(products_less,products_greater):
     if isinstance(products_less, list):
-        products_less = products_less
+        pass
     else:
         products_less = json.loads(products_less) if products_less else []
     if isinstance(products_greater, list):
-        products_greater = products_greater
+        pass
     else:
         products_greater = json.loads(products_greater) if products_greater else []
     # if len(products_less)>0 and len(products_greater)>0:
@@ -1296,6 +1379,9 @@ def check_dumplicate_rule(document_less,document_greater,min_counts,b_log=False,
     detail_link_less = document_less.get("detail_link")
     is_special_bonds_less = document_less.get("is_special_bonds")
     products_less = document_less.get("products")
+    products_original_less = document_less.get("products_original",[])
+    change_content_less = document_less.get("change_content","")
+    change_time_less = document_less.get("change_time","")
 
 
     docid_greater = document_greater["docid"]
@@ -1323,6 +1409,9 @@ def check_dumplicate_rule(document_less,document_greater,min_counts,b_log=False,
     detail_link_greater = document_greater.get("detail_link")
     is_special_bonds_greater = document_greater.get("is_special_bonds")
     products_greater = document_greater.get("products")
+    products_original_greater = document_greater.get("products_original", [])
+    change_content_greater = document_greater.get("change_content", "")
+    change_time_greater = document_greater.get("change_time", "")
 
     moneys_greater = document_greater.get("moneys")
     moneys_attachment_greater = document_greater.get("moneys_attachment")
@@ -1337,7 +1426,7 @@ def check_dumplicate_rule(document_less,document_greater,min_counts,b_log=False,
         # print('fingerprint same')
         return 1
 
-    # # 专项债去重
+    # 专项债去重
     if is_special_bonds_greater==is_special_bonds_less==1:
         detail_link_less = detail_link_less.strip() if detail_link_less else ""
         detail_link_greater = detail_link_greater.strip() if detail_link_greater else ""
@@ -1378,9 +1467,42 @@ def check_dumplicate_rule(document_less,document_greater,min_counts,b_log=False,
 
     # 采购产品products对比
     if getLength(products_less)>0 and getLength(products_greater)>0:
-        if not check_products(products_less,products_greater):
+        if products_original_less:# products不是AI补充提取的
+            _products_less = products_original_less
+        else:
+            _products_less = products_less
+        if products_original_greater:
+            _products_greater = products_original_greater
+        else:
+            _products_greater = products_greater
+        if not check_products(_products_less,_products_greater):
+            # print("check_products error")
             return 0
 
+    # 变更答疑公告 变更内容对比
+    if docchannel_less in [51,103] and docchannel_less==docchannel_greater:
+        if getLength(change_time_less)>0 and getLength(change_time_greater)>0:
+            if change_time_less != change_time_greater:
+                # print("change_time diff")
+                return 0
+        if getLength(change_content_less) > 10 and getLength(change_content_greater) > 10:
+            _change_content_less = re.findall("[\u4e00-\u9fa5a-zA-Z0-9]+", change_content_less)
+            _change_content_less = "".join(_change_content_less)
+            _change_content_greater = re.findall("[\u4e00-\u9fa5a-zA-Z0-9]+", change_content_greater)
+            _change_content_greater = "".join(_change_content_greater)
+            if _change_content_less == _change_content_greater:
+                # print("change_content same 1")
+                return 1
+            elif _change_content_less.find(_change_content_greater)>=0 or _change_content_greater.find(_change_content_less)>=0:
+                # print("change_content same 2")
+                return 1
+            # elif getSimilarityOfString(_change_content_less,_change_content_greater)>0.8:
+            #     print("change_content same 3")
+            #     print(_change_content_less)
+            #     print(_change_content_greater)
+            #     print(getSimilarityOfString(_change_content_less,_change_content_greater))
+            #     return 1
+
     #一篇要素都在附件,且两篇附件md5有重叠
     set_md5_less = set()
     set_md5_greater = set()
@@ -1404,21 +1526,21 @@ def check_dumplicate_rule(document_less,document_greater,min_counts,b_log=False,
         _md5 = _l.get("fileMd5")
         if _md5 is not None:
             set_md5_greater.add(_md5)
-    if len(set_md5_less&set_md5_greater)>0 and len(set_md5_less&set_md5_greater)==len(set_md5_less):
-        one_in_attach = False
-        dict_enterprise_less = json.loads(nlp_enterprise_less)
-        dict_enterprise_greater = json.loads(nlp_enterprise_greater)
-        indoctextcon_less = dict_enterprise_less.get("indoctextcon",[])
-        notindoctextcon_less = dict_enterprise_less.get("notindoctextcon",[])
-        indoctextcon_greater = dict_enterprise_greater.get("indoctextcon",[])
-        notindoctextcon_greater = dict_enterprise_greater.get("notindoctextcon",[])
-        if len(indoctextcon_less)<=1 and len(notindoctextcon_less)>=2:
-            one_in_attach = True
-        if len(indoctextcon_greater)<=1 and len(notindoctextcon_greater)>=2:
-            one_in_attach = True
-        if one_in_attach:
-            if check_product(product_less,product_greater,doctitle_refine_less=doctitle_refine_less,doctitle_refine_greater=doctitle_refine_greater):
-                return 1
+    # if len(set_md5_less&set_md5_greater)>0 and len(set_md5_less&set_md5_greater)==len(set_md5_less):
+    #     one_in_attach = False
+    #     dict_enterprise_less = json.loads(nlp_enterprise_less)
+    #     dict_enterprise_greater = json.loads(nlp_enterprise_greater)
+    #     indoctextcon_less = dict_enterprise_less.get("indoctextcon",[])
+    #     notindoctextcon_less = dict_enterprise_less.get("notindoctextcon",[])
+    #     indoctextcon_greater = dict_enterprise_greater.get("indoctextcon",[])
+    #     notindoctextcon_greater = dict_enterprise_greater.get("notindoctextcon",[])
+    #     if len(indoctextcon_less)<=1 and len(notindoctextcon_less)>=2:
+    #         one_in_attach = True
+    #     if len(indoctextcon_greater)<=1 and len(notindoctextcon_greater)>=2:
+    #         one_in_attach = True
+    #     if one_in_attach:
+    #         if check_product(product_less,product_greater,doctitle_refine_less=doctitle_refine_less,doctitle_refine_greater=doctitle_refine_greater):
+    #             return 1
 
     #同一个站源,都有附件但附件没有重叠则不去重
     if web_source_no_less==web_source_no_greater and len(set_md5_less)>0 and len(set_md5_greater)>0 and len(set_md5_less&set_md5_greater)==0:
@@ -1559,7 +1681,7 @@ def check_dumplicate_rule(document_less,document_greater,min_counts,b_log=False,
                 check_result["pass"] = 0
             else:
                 check_result["docchannel"] = 2
-    if not check_doctitle(doctitle_refine_less,doctitle_refine_greater,project_codes_less,project_codes_greater,page_time_less,page_time_greater):
+    if not check_doctitle(doctitle_refine_less,doctitle_refine_greater,docchannel_less,docchannel_greater,project_codes_less,project_codes_greater,page_time_less,page_time_greater):
         check_result["doctitle"] = 0
         check_result["pass"] = 0
         if b_log:
@@ -1641,7 +1763,9 @@ def check_dumplicate_rule(document_less,document_greater,min_counts,b_log=False,
 
     #added check
     _time_check = check_time(json_time_less,json_time_greater)
-    if not _time_check or (_time_check==1 and docchannel_less in (51,103)):
+    # if not _time_check or (_time_check==1 and docchannel_less in (51,103)):
+    if not _time_check or (_time_check==1 and docchannel_less in (51,103) and
+                           len([k for k,v in json_time_less.items() if v])>0 and len([k for k,v in json_time_greater.items() if v])>0):
         if b_log:
             logging.info("%d-%d,check_time_failed:%s==%s"%(docid_less,docid_greater,str(json_time_less),str(json_time_greater)))
             if isinstance(json_time_less,dict):