|
@@ -341,25 +341,25 @@ def turn_document_status():
|
|
|
#
|
|
|
# )
|
|
|
|
|
|
- rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
|
|
|
- SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("docid",SortOrder.DESC)]),limit=100,get_total_count=True),
|
|
|
- columns_to_get=ColumnsToGet(["product","product_number"],return_type=ColumnReturnType.SPECIFIED))
|
|
|
- list_data = getRow_ots(rows)
|
|
|
- print(total_count)
|
|
|
- _count = len(list_data)
|
|
|
- for _data in list_data:
|
|
|
- _document = Document(_data)
|
|
|
- task_queue.put(_document)
|
|
|
- while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
|
|
|
- SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
- columns_to_get=ColumnsToGet(["product"],return_type=ColumnReturnType.SPECIFIED))
|
|
|
- list_data = getRow_ots(rows)
|
|
|
- _count += len(list_data)
|
|
|
- print("%d/%d"%(_count,total_count))
|
|
|
- for _data in list_data:
|
|
|
- _document = Document(_data)
|
|
|
- task_queue.put(_document)
|
|
|
+ # rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
|
|
|
+ # SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("docid",SortOrder.DESC)]),limit=100,get_total_count=True),
|
|
|
+ # columns_to_get=ColumnsToGet(["product","product_number"],return_type=ColumnReturnType.SPECIFIED))
|
|
|
+ # list_data = getRow_ots(rows)
|
|
|
+ # print(total_count)
|
|
|
+ # _count = len(list_data)
|
|
|
+ # for _data in list_data:
|
|
|
+ # _document = Document(_data)
|
|
|
+ # task_queue.put(_document)
|
|
|
+ # while next_token:
|
|
|
+ # rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
|
|
|
+ # SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
+ # columns_to_get=ColumnsToGet(["product"],return_type=ColumnReturnType.SPECIFIED))
|
|
|
+ # list_data = getRow_ots(rows)
|
|
|
+ # _count += len(list_data)
|
|
|
+ # print("%d/%d"%(_count,total_count))
|
|
|
+ # for _data in list_data:
|
|
|
+ # _document = Document(_data)
|
|
|
+ # task_queue.put(_document)
|
|
|
|
|
|
# docids = [223820830,224445409]
|
|
|
# for docid in docids:
|
|
@@ -372,23 +372,23 @@ def turn_document_status():
|
|
|
# list_docid = df["docid"]
|
|
|
# list_docid = [519497468]
|
|
|
|
|
|
- # list_docid = []
|
|
|
- # filename = r"G:\新建文件夹\WeChat Files\wxid_kluerlj8cn3b21\FileStorage\File\2024-10\金额缺失的id (1).txt"
|
|
|
- # with open(filename,"r",encoding="utf8") as f:
|
|
|
- # while 1:
|
|
|
- # line = f.readline()
|
|
|
- # if not line:
|
|
|
- # break
|
|
|
- # line = line.strip()
|
|
|
- # docid = line.split('-')[-1]
|
|
|
- # if re.search("^\d+$",docid) is not None:
|
|
|
- # list_docid.append(int(docid))
|
|
|
- #
|
|
|
- # for docid in list_docid:
|
|
|
- # _dict = {document_docid:int(docid),
|
|
|
- # document_partitionkey:int(docid)%500+1,
|
|
|
- # }
|
|
|
- # task_queue.put(Document(_dict))
|
|
|
+ list_docid = []
|
|
|
+ filename = r"G:\新建文件夹\WeChat Files\wxid_kluerlj8cn3b21\FileStorage\File\2024-10\金额缺失的id (1).txt"
|
|
|
+ with open(filename,"r",encoding="utf8") as f:
|
|
|
+ while 1:
|
|
|
+ line = f.readline()
|
|
|
+ if not line:
|
|
|
+ break
|
|
|
+ line = line.strip()
|
|
|
+ docid = line.split('-')[-1]
|
|
|
+ if re.search("^\d+$",docid) is not None:
|
|
|
+ list_docid.append(int(docid))
|
|
|
+
|
|
|
+ for docid in list_docid:
|
|
|
+ _dict = {document_docid:int(docid),
|
|
|
+ document_partitionkey:int(docid)%500+1,
|
|
|
+ }
|
|
|
+ task_queue.put(Document(_dict))
|
|
|
# for docid in df["docid2"]:
|
|
|
# _dict = {document_docid:int(docid),
|
|
|
# document_partitionkey:int(docid)%500+1,
|