Jelajahi Sumber

修复附件数据不规范的问题,调整去重规则

luojiehua 1 tahun lalu
induk
melakukan
98ac3777eb

+ 2 - 1
BaseDataMaintenance/maintenance/dataflow.py

@@ -2269,6 +2269,7 @@ class Dataflow_dumplicate(Dataflow):
         return []
 
     def dumplicate_check(self,_dict1,_dict2,min_counts,b_log=False):
+        b_log=True
         document_less = _dict1
         docid_less = _dict1["docid"]
         docchannel_less = document_less["docchannel"]
@@ -4236,7 +4237,7 @@ if __name__ == '__main__':
     df_dump = Dataflow_dumplicate(start_delete_listener=False)
     # df_dump.start_flow_dumplicate()
     a = time.time()
-    df_dump.test_dumplicate(386161571
+    df_dump.test_dumplicate(393550944
                             )
     # df_dump.test_merge([385521167
     #                     ],[385521113])

+ 1 - 0
BaseDataMaintenance/maintenance/dataflow_mq.py

@@ -292,6 +292,7 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
                 ackMsg(conn,message_id)
             log("document:%d get attachments with result:%s %s retry_times:%d"%(item.get("docid"),str(_succeed),str(_to_ack),_retry_times))
         except Exception as e:
+
             traceback.print_exc()
             if time.time()-start_time<10:
                 item["retry_times"] -= 1

+ 2 - 0
BaseDataMaintenance/maintenance/document/industry_keyword_expand.py

@@ -0,0 +1,2 @@
+
+

+ 1 - 1
BaseDataMaintenance/maxcompute/documentDumplicate.py

@@ -991,7 +991,7 @@ def check_product(product_less,product_greater,split_char=","):
         _product_l = product_less.split(split_char)
         _product_g = product_greater.split(split_char)
         same_count = 0
-        if len(_product_g)>len(_product_l):
+        if len(_product_l)>len(_product_g):
             a = _product_g
             _product_g = _product_l
             _product_l = a

File diff ditekan karena terlalu besar
+ 73 - 9
BaseDataMaintenance/model/ots/proposedBuilding_tmp.py


Beberapa file tidak ditampilkan karena terlalu banyak file yang berubah dalam diff ini