|
@@ -2269,6 +2269,7 @@ class Dataflow_dumplicate(Dataflow):
|
|
return []
|
|
return []
|
|
|
|
|
|
def dumplicate_check(self,_dict1,_dict2,min_counts,b_log=False):
|
|
def dumplicate_check(self,_dict1,_dict2,min_counts,b_log=False):
|
|
|
|
+ b_log=True
|
|
document_less = _dict1
|
|
document_less = _dict1
|
|
docid_less = _dict1["docid"]
|
|
docid_less = _dict1["docid"]
|
|
docchannel_less = document_less["docchannel"]
|
|
docchannel_less = document_less["docchannel"]
|
|
@@ -4236,7 +4237,7 @@ if __name__ == '__main__':
|
|
df_dump = Dataflow_dumplicate(start_delete_listener=False)
|
|
df_dump = Dataflow_dumplicate(start_delete_listener=False)
|
|
# df_dump.start_flow_dumplicate()
|
|
# df_dump.start_flow_dumplicate()
|
|
a = time.time()
|
|
a = time.time()
|
|
- df_dump.test_dumplicate(386161571
|
|
|
|
|
|
+ df_dump.test_dumplicate(393550944
|
|
)
|
|
)
|
|
# df_dump.test_merge([385521167
|
|
# df_dump.test_merge([385521167
|
|
# ],[385521113])
|
|
# ],[385521113])
|