|
@@ -2623,9 +2623,9 @@ class Dataflow_dumplicate(Dataflow):
|
|
if page_time=='':
|
|
if page_time=='':
|
|
page_time = current_date
|
|
page_time = current_date
|
|
|
|
|
|
- two_day_dict = {"page_time":[timeAdd(page_time,-2),timeAdd(page_time,2)]}
|
|
|
|
|
|
+ two_day_dict = {"page_time":[timeAdd(page_time,-7),timeAdd(page_time,7)]}
|
|
|
|
|
|
- if page_time>=timeAdd(current_date,-2):
|
|
|
|
|
|
+ if page_time>=timeAdd(current_date,-7):
|
|
table_name = "document_tmp"
|
|
table_name = "document_tmp"
|
|
table_index = "document_tmp_index"
|
|
table_index = "document_tmp_index"
|
|
base_dict = {
|
|
base_dict = {
|
|
@@ -4003,7 +4003,7 @@ class Dataflow_dumplicate(Dataflow):
|
|
log("dumplicate %s rules:%d"%(str(item.get(document_tmp_docid)),len(list_rules)))
|
|
log("dumplicate %s rules:%d"%(str(item.get(document_tmp_docid)),len(list_rules)))
|
|
list_rules = list_rules[:30]
|
|
list_rules = list_rules[:30]
|
|
_i = 0
|
|
_i = 0
|
|
- step = 5
|
|
|
|
|
|
+ step = 2
|
|
|
|
|
|
|
|
|
|
item["confidence"] = 999
|
|
item["confidence"] = 999
|
|
@@ -4412,7 +4412,7 @@ if __name__ == '__main__':
|
|
# test_attachment_interface()
|
|
# test_attachment_interface()
|
|
df_dump = Dataflow_dumplicate(start_delete_listener=False)
|
|
df_dump = Dataflow_dumplicate(start_delete_listener=False)
|
|
# df_dump.start_flow_dumplicate()
|
|
# df_dump.start_flow_dumplicate()
|
|
- df_dump.test_dumplicate(463253000
|
|
|
|
|
|
+ df_dump.test_dumplicate(464184856
|
|
)
|
|
)
|
|
# compare_dumplicate_check()
|
|
# compare_dumplicate_check()
|
|
# df_dump.test_merge([391898061
|
|
# df_dump.test_merge([391898061
|