Przeglądaj źródła

公告去重文档指纹不限时间,只要存在状态正常且文档指纹一样的公告即去重

luojiehua 3 miesięcy temu
rodzic
commit
17cfefb932

+ 1 - 1
BaseDataMaintenance/dataSource/setttings.py

@@ -68,7 +68,7 @@ activateMQ_pswd = "admin"
 
 activateMQ_ali_host = "172.16.160.72"
 # activateMQ_ali_host = "172.16.147.13"
-# activateMQ_ali_host = "116.62.167.43"
+# activateMQ_ali_host = "8.149.130.129"
 activateMQ_ali_port = 61613
 activateMQ_ali_user = "admin"
 activateMQ_ali_pswd = "admin"

+ 41 - 31
BaseDataMaintenance/maintenance/dataflow.py

@@ -3286,47 +3286,57 @@ class Dataflow_dumplicate(Dataflow):
 
         list_projects = []
         if len(list_docid)>0:
+
             list_docs = self.search_docs(list_docid)
+            print("search_docs(list_docid)")
             list_projects = self.generate_projects_from_document(list_docs)
-            list_projects = dumplicate_projects(list_projects)
+            print("generate_projects_from_document")
+            list_projects = dumplicate_projects(list_projects,max_count=20)
+            print("dumplicate_projects")
         list_projects.extend(list_delete_projects)
         project_json = to_project_json(list_projects)
         return project_json
 
 
     def delete_doc_handle(self,_dict,result_queue):
-        headers = _dict.get("frame")
-        conn = _dict.get("conn")
-
-        if headers is not None:
-            message_id = headers.headers["message-id"]
-            body = headers.body
-            item = json.loads(body)
-            docid = item.get("docid")
-            log("==========start delete docid:%s"%(str(docid)))
-            if docid is None:
-                ackMsg(conn,message_id)
-            delete_result = self.delete_projects_by_document(docid)
-
-            log("1")
-            _uuid = uuid4().hex
-            _d = {PROJECT_PROCESS_UUID:_uuid,
-                  PROJECT_PROCESS_CRTIME:1,
-                  PROJECT_PROCESS_PROJECTS:delete_result}
-            _pp = Project_process(_d)
-            log("2")
-            try:
-                if _pp.update_row(self.ots_client):
+
+        try:
+            headers = _dict.get("frame")
+            conn = _dict.get("conn")
+
+            if headers is not None:
+                message_id = headers.headers["message-id"]
+                body = headers.body
+                item = json.loads(body)
+                docid = item.get("docid")
+                log("==========start delete docid:%s"%(str(docid)))
+                if docid is None:
                     ackMsg(conn,message_id)
-            except Exception as e:
-                ackMsg(conn,message_id)
-            log("3")
-            #取消插入结果队列,改成插入project_process表
-            # if send_msg_toacmq(self.pool_mq_ali,delete_result,self.doc_delete_result):
-            #     ackMsg(conn,message_id)
+                delete_result = self.delete_projects_by_document(docid)
+
+                log("1")
+                _uuid = uuid4().hex
+                _d = {PROJECT_PROCESS_UUID:_uuid,
+                      PROJECT_PROCESS_CRTIME:1,
+                      PROJECT_PROCESS_PROJECTS:delete_result}
+                _pp = Project_process(_d)
+                log("2")
+                try:
+                    if _pp.update_row(self.ots_client):
+                        ackMsg(conn,message_id)
+                except Exception as e:
+                    ackMsg(conn,message_id)
+                log("3")
+                #取消插入结果队列,改成插入project_process表
+                # if send_msg_toacmq(self.pool_mq_ali,delete_result,self.doc_delete_result):
+                #     ackMsg(conn,message_id)
+                log("==========end delete docid:%s"%(str(docid)))
+            else:
+                log("has not headers")
+        except Exception as e:
+            traceback.print_exc()
+            ackMsg(conn,message_id)
             log("==========end delete docid:%s"%(str(docid)))
-        else:
-            log("has not headers")
 
     def generate_common_properties(self,list_docs):
         '''

+ 2 - 2
BaseDataMaintenance/maxcompute/documentMerge.py

@@ -2181,7 +2181,7 @@ def appendKeyvalueCount(list_projects,keys=[project_tenderee,project_agency,proj
         _proj["keyvaluecount"] = _count
 
 
-def dumplicate_projects(list_projects,b_log=False):
+def dumplicate_projects(list_projects,b_log=False,max_count=100):
     '''
     对多标段项目进行去重
     :return:
@@ -2189,7 +2189,7 @@ def dumplicate_projects(list_projects,b_log=False):
     appendKeyvalueCount(list_projects)
     list_projects.sort(key=lambda x:str(x.get(project_page_time,"")))
     list_projects.sort(key=lambda x:x.get("keyvaluecount",0),reverse=True)
-    cluster_projects = list_projects[:100]
+    cluster_projects = list_projects[:max_count]
     _count = 10
     log("dumplicate projects rest %d"%len(cluster_projects))
     while _count>0: