Selaa lähdekoodia

实时项目合并日志修复、更新字段过滤空值;初始化时放入源类别

luojiehua 2 vuotta sitten
vanhempi
commit
3192637b78

+ 3 - 5
BaseDataMaintenance/maintenance/dataflow.py

@@ -2430,12 +2430,12 @@ class Dataflow_dumplicate(Dataflow):
                     set_docid.add(_docid)
             set_docid.add(_docid)
 
-    def appendRule(self,list_rules,_dict,base_dict,must_not_dict,confidence,item,to_log=True):
+    def appendRule(self,list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=False):
         for k,v in _dict.items():
             if getLength(v)==0:
                 return
         _dict.update(base_dict)
-        if to_log:
+        if b_log:
             log(str(_dict))
         _query = self.generate_dumplicate_query(_dict,must_not_dict)
         _rule = {"confidence":confidence,
@@ -3108,7 +3108,6 @@ class Dataflow_dumplicate(Dataflow):
 
 
 
-        print("choose_dict",choose_dict)
         for _key,_value in choose_dict.items():
             _l = []
             for k,v in _value.items():
@@ -3159,7 +3158,6 @@ class Dataflow_dumplicate(Dataflow):
             if p_page_time=="":
                 p_page_time = page_time
 
-            print("docid %s page_time:%s docchannel %s"%(str(_docid),str(page_time),str(_docchannel)))
             if zhao_biao_page_time=="" and _docchannel in (51,52,102,103,114):
                 zhao_biao_page_time = page_time
             if zhong_biao_page_time=="" and _docchannel in (101,118,119,120):
@@ -3706,7 +3704,7 @@ class Dataflow_dumplicate(Dataflow):
         self.dumplicate_document_in_merge(list_projects)
 
         project_json = self.to_project_json(list_projects)
-        print("project_json",project_json)
+        # print("project_json",project_json)
         return project_json
 
     def dumplicate_comsumer_handle(self,item,result_queue,ots_client,get_all=False,upgrade=True):

+ 2 - 1
BaseDataMaintenance/maintenance/dataflow_mq.py

@@ -957,7 +957,8 @@ class Dataflow_init(Dataflow):
             body = json.loads(headers.body)
             body[document_tmp_partitionkey] = partitionkey
             body[document_tmp_docid] = next_docid
-            body[document_original_docchannel] = body.get(document_docchannel)
+            if body.get(document_original_docchannel) is None:
+                body[document_original_docchannel] = body.get(document_docchannel)
             page_attachments = body.get(document_tmp_attachment_path,"[]")
             _uuid = body.get(document_tmp_uuid,"")
             if page_attachments!="[]":