luojiehua 1 жил өмнө
parent
commit
ffcc20cb3c

+ 4 - 4
BaseDataMaintenance/maintenance/dataflow.py

@@ -2623,9 +2623,9 @@ class Dataflow_dumplicate(Dataflow):
         if page_time=='':
         if page_time=='':
             page_time = current_date
             page_time = current_date
 
 
-        two_day_dict = {"page_time":[timeAdd(page_time,-2),timeAdd(page_time,2)]}
+        two_day_dict = {"page_time":[timeAdd(page_time,-7),timeAdd(page_time,7)]}
 
 
-        if page_time>=timeAdd(current_date,-2):
+        if page_time>=timeAdd(current_date,-7):
             table_name = "document_tmp"
             table_name = "document_tmp"
             table_index = "document_tmp_index"
             table_index = "document_tmp_index"
             base_dict = {
             base_dict = {
@@ -4003,7 +4003,7 @@ class Dataflow_dumplicate(Dataflow):
             log("dumplicate %s rules:%d"%(str(item.get(document_tmp_docid)),len(list_rules)))
             log("dumplicate %s rules:%d"%(str(item.get(document_tmp_docid)),len(list_rules)))
             list_rules = list_rules[:30]
             list_rules = list_rules[:30]
             _i = 0
             _i = 0
-            step = 5
+            step = 2
 
 
 
 
             item["confidence"] = 999
             item["confidence"] = 999
@@ -4412,7 +4412,7 @@ if __name__ == '__main__':
     # test_attachment_interface()
     # test_attachment_interface()
     df_dump = Dataflow_dumplicate(start_delete_listener=False)
     df_dump = Dataflow_dumplicate(start_delete_listener=False)
     # df_dump.start_flow_dumplicate()
     # df_dump.start_flow_dumplicate()
-    df_dump.test_dumplicate(463253000
+    df_dump.test_dumplicate(464184856
                             )
                             )
     # compare_dumplicate_check()
     # compare_dumplicate_check()
     # df_dump.test_merge([391898061
     # df_dump.test_merge([391898061