Ver código fonte

去重搜索时间问题

luojiehua 1 ano atrás
pai
commit
ffcc20cb3c
1 arquivos alterados com 4 adições e 4 exclusões
  1. 4 4
      BaseDataMaintenance/maintenance/dataflow.py

+ 4 - 4
BaseDataMaintenance/maintenance/dataflow.py

@@ -2623,9 +2623,9 @@ class Dataflow_dumplicate(Dataflow):
         if page_time=='':
             page_time = current_date
 
-        two_day_dict = {"page_time":[timeAdd(page_time,-2),timeAdd(page_time,2)]}
+        two_day_dict = {"page_time":[timeAdd(page_time,-7),timeAdd(page_time,7)]}
 
-        if page_time>=timeAdd(current_date,-2):
+        if page_time>=timeAdd(current_date,-7):
             table_name = "document_tmp"
             table_index = "document_tmp_index"
             base_dict = {
@@ -4003,7 +4003,7 @@ class Dataflow_dumplicate(Dataflow):
             log("dumplicate %s rules:%d"%(str(item.get(document_tmp_docid)),len(list_rules)))
             list_rules = list_rules[:30]
             _i = 0
-            step = 5
+            step = 2
 
 
             item["confidence"] = 999
@@ -4412,7 +4412,7 @@ if __name__ == '__main__':
     # test_attachment_interface()
     df_dump = Dataflow_dumplicate(start_delete_listener=False)
     # df_dump.start_flow_dumplicate()
-    df_dump.test_dumplicate(463253000
+    df_dump.test_dumplicate(464184856
                             )
     # compare_dumplicate_check()
     # df_dump.test_merge([391898061