преди 7 месеца · ab72456b5f
--- a/BaseDataMaintenance/dataMonitor/data_monitor.py
+++ b/BaseDataMaintenance/dataMonitor/data_monitor.py
@@ -243,11 +243,12 @@ class BaseDataMonitor():
 
				                 sentMsgToDD(_msg,ACCESS_TOKEN_DATAWORKS,atAll=True)
			
 
				                 # sendEmail(smtp_host,smtp_username,smtp_password,self.recieviers,_msg)
			
 
				 
			
 
				-            _count = fixDoc_to_queue_init(check_filename)
			
 
				-            if _count>0:
			
 
				-                _msg = "数据遗漏检查%d条公告已重新同步"%(_count)
			
 
				-                sentMsgToDD(_msg,ACCESS_TOKEN_DATAWORKS,atAll=True)
			
 
				-                df_data.to_excel("%s_bak.xlsx"%check_filename)
			
 
				+                _count = fixDoc_to_queue_init(check_filename)
			
 
				+                if _count>0:
			
 
				+                    _msg = "数据遗漏检查%d条公告已重新同步"%(_count)
			
 
				+                    sentMsgToDD(_msg,ACCESS_TOKEN_DATAWORKS,atAll=True)
			
 
				+                    df_data.to_excel("%s_bak.xlsx"%check_filename)
			
 
				+                    os.remove(check_filename)
			
 
				 
			
 
				 
			
 
				 
			
--- a/BaseDataMaintenance/model/ots/document.py
+++ b/BaseDataMaintenance/model/ots/document.py
@@ -307,11 +307,12 @@ def turn_document_status():
 
				 
			
 
				         bool_query = BoolQuery(
			
 
				             must_queries=[
			
 
				-                # MatchPhraseQuery("doctitle","珠海城市职业技术学院2022年05月至2022年06月政府采购意向"),
			
 
				+                MatchPhraseQuery("doctitle","破产清算案"),
			
 
				+                MatchPhraseQuery("project_name","经相关部门批准后方可开展经营活动"),
			
 
				                 # WildcardQuery("web_source_no","03716-*"),
			
 
				                 # RangeQuery("product_number",500),
			
 
				                 # TermQuery("save",1)
			
 
				-                RangeQuery("status",0,1),
			
 
				+                # RangeQuery("status",0,1),
			
 
				                 # NestedQuery("page_attachments",ExistsQuery("page_attachments.fileMd5")),
			
 
				                 # TermQuery("docid",397656324)
			
 
				                 # BoolQuery(should_queries=[
			
@@ -341,25 +342,25 @@ def turn_document_status():
 
				         #
			
 
				         # )
			
 
				 
			
 
				-        # rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
			
 
				-        #                                                                SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("docid",SortOrder.DESC)]),limit=100,get_total_count=True),
			
 
				-        #                                                                columns_to_get=ColumnsToGet(["product","product_number"],return_type=ColumnReturnType.SPECIFIED))
			
 
				-        # list_data = getRow_ots(rows)
			
 
				-        # print(total_count)
			
 
				-        # _count = len(list_data)
			
 
				-        # for _data in list_data:
			
 
				-        #     _document = Document(_data)
			
 
				-        #     task_queue.put(_document)
			
 
				-        # while next_token:
			
 
				-        #     rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
			
 
				-        #                                                                    SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
			
 
				-        #                                                                    columns_to_get=ColumnsToGet(["product"],return_type=ColumnReturnType.SPECIFIED))
			
 
				-        #     list_data = getRow_ots(rows)
			
 
				-        #     _count += len(list_data)
			
 
				-        #     print("%d/%d"%(_count,total_count))
			
 
				-        #     for _data in list_data:
			
 
				-        #         _document = Document(_data)
			
 
				-        #         task_queue.put(_document)
			
 
				+        rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
			
 
				+                                                                       SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("docid",SortOrder.DESC)]),limit=100,get_total_count=True),
			
 
				+                                                                       columns_to_get=ColumnsToGet(["product","product_number"],return_type=ColumnReturnType.SPECIFIED))
			
 
				+        list_data = getRow_ots(rows)
			
 
				+        print(total_count)
			
 
				+        _count = len(list_data)
			
 
				+        for _data in list_data:
			
 
				+            _document = Document(_data)
			
 
				+            task_queue.put(_document)
			
 
				+        while next_token:
			
 
				+            rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
			
 
				+                                                                           SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
			
 
				+                                                                           columns_to_get=ColumnsToGet(["product"],return_type=ColumnReturnType.SPECIFIED))
			
 
				+            list_data = getRow_ots(rows)
			
 
				+            _count += len(list_data)
			
 
				+            print("%d/%d"%(_count,total_count))
			
 
				+            for _data in list_data:
			
 
				+                _document = Document(_data)
			
 
				+                task_queue.put(_document)
			
 
				 
			
 
				         # docids = [223820830,224445409]
			
 
				         # for docid in docids:
			
@@ -372,23 +373,23 @@ def turn_document_status():
 
				         # list_docid = df["docid"]
			
 
				         # list_docid = [519497468]
			
 
				 
			
 
				-        list_docid = []
			
 
				-        filename = r"G:\新建文件夹\WeChat Files\wxid_kluerlj8cn3b21\FileStorage\File\2024-10\金额缺失的id (1).txt"
			
 
				-        with open(filename,"r",encoding="utf8") as f:
			
 
				-            while 1:
			
 
				-                line = f.readline()
			
 
				-                if not line:
			
 
				-                    break
			
 
				-                line = line.strip()
			
 
				-                docid = line.split('-')[-1]
			
 
				-                if re.search("^\d+$",docid) is not None:
			
 
				-                    list_docid.append(int(docid))
			
 
				-
			
 
				-        for docid in list_docid:
			
 
				-            _dict = {document_docid:int(docid),
			
 
				-                     document_partitionkey:int(docid)%500+1,
			
 
				-                     }
			
 
				-            task_queue.put(Document(_dict))
			
 
				+        # list_docid = []
			
 
				+        # filename = r"G:\新建文件夹\WeChat Files\wxid_kluerlj8cn3b21\FileStorage\File\2024-10\金额缺失的id (1).txt"
			
 
				+        # with open(filename,"r",encoding="utf8") as f:
			
 
				+        #     while 1:
			
 
				+        #         line = f.readline()
			
 
				+        #         if not line:
			
 
				+        #             break
			
 
				+        #         line = line.strip()
			
 
				+        #         docid = line.split('-')[-1]
			
 
				+        #         if re.search("^\d+$",docid) is not None:
			
 
				+        #             list_docid.append(int(docid))
			
 
				+
			
 
				+        # for docid in list_docid:
			
 
				+        #     _dict = {document_docid:int(docid),
			
 
				+        #              document_partitionkey:int(docid)%500+1,
			
 
				+        #              }
			
 
				+        #     task_queue.put(Document(_dict))
			
 
				         # for docid in df["docid2"]:
			
 
				         #     _dict = {document_docid:int(docid),
			
 
				         #              document_partitionkey:int(docid)%500+1,