|
@@ -307,8 +307,9 @@ def turn_document_status():
|
|
|
|
|
|
bool_query = BoolQuery(
|
|
|
must_queries=[
|
|
|
- MatchPhraseQuery("doctitle","破产清算案"),
|
|
|
- MatchPhraseQuery("project_name","经相关部门批准后方可开展经营活动"),
|
|
|
+ MatchPhraseQuery("doctitle","质量竣工验收监督"),
|
|
|
+ RangeQuery("status",401,451)
|
|
|
+ # MatchPhraseQuery("project_name","经相关部门批准后方可开展经营活动"),
|
|
|
# WildcardQuery("web_source_no","03716-*"),
|
|
|
# RangeQuery("product_number",500),
|
|
|
# TermQuery("save",1)
|
|
@@ -342,25 +343,25 @@ def turn_document_status():
|
|
|
#
|
|
|
# )
|
|
|
|
|
|
- # rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
|
|
|
- # SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("docid",SortOrder.DESC)]),limit=100,get_total_count=True),
|
|
|
- # columns_to_get=ColumnsToGet(["product","product_number"],return_type=ColumnReturnType.SPECIFIED))
|
|
|
- # list_data = getRow_ots(rows)
|
|
|
- # print(total_count)
|
|
|
- # _count = len(list_data)
|
|
|
- # for _data in list_data:
|
|
|
- # _document = Document(_data)
|
|
|
- # task_queue.put(_document)
|
|
|
- # while next_token:
|
|
|
- # rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
|
|
|
- # SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
- # columns_to_get=ColumnsToGet(["product"],return_type=ColumnReturnType.SPECIFIED))
|
|
|
- # list_data = getRow_ots(rows)
|
|
|
- # _count += len(list_data)
|
|
|
- # print("%d/%d"%(_count,total_count))
|
|
|
- # for _data in list_data:
|
|
|
- # _document = Document(_data)
|
|
|
- # task_queue.put(_document)
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
|
|
|
+ SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("docid",SortOrder.DESC)]),limit=100,get_total_count=True),
|
|
|
+ columns_to_get=ColumnsToGet(["product","product_number"],return_type=ColumnReturnType.SPECIFIED))
|
|
|
+ list_data = getRow_ots(rows)
|
|
|
+ print(total_count)
|
|
|
+ _count = len(list_data)
|
|
|
+ for _data in list_data:
|
|
|
+ _document = Document(_data)
|
|
|
+ task_queue.put(_document)
|
|
|
+ while next_token:
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
|
|
|
+ SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
+ columns_to_get=ColumnsToGet(["product"],return_type=ColumnReturnType.SPECIFIED))
|
|
|
+ list_data = getRow_ots(rows)
|
|
|
+ _count += len(list_data)
|
|
|
+ print("%d/%d"%(_count,total_count))
|
|
|
+ for _data in list_data:
|
|
|
+ _document = Document(_data)
|
|
|
+ task_queue.put(_document)
|
|
|
|
|
|
# docids = [223820830,224445409]
|
|
|
# for docid in docids:
|
|
@@ -368,9 +369,9 @@ def turn_document_status():
|
|
|
# document_partitionkey:int(docid)%500+1,
|
|
|
# }
|
|
|
# task_queue.put(Document(_dict))
|
|
|
- import pandas as pd
|
|
|
- df = pd.read_csv(r"C:\Users\Administrator\Desktop\export_241224_6.csv")
|
|
|
- list_docid = df["docid"]
|
|
|
+ # import pandas as pd
|
|
|
+ # df = pd.read_csv(r"C:\Users\Administrator\Desktop\export_241224_6.csv")
|
|
|
+ # list_docid = df["docid"]
|
|
|
# list_docid = [519497468]
|
|
|
|
|
|
# list_docid = []
|
|
@@ -385,13 +386,13 @@ def turn_document_status():
|
|
|
# if re.search("^\d+$",docid) is not None:
|
|
|
# list_docid.append(int(docid))
|
|
|
|
|
|
- for docid,construct_company,recall_flag in zip(list_docid,df["construct_company"],df["recall_flag"]):
|
|
|
- if recall_flag == 1:
|
|
|
- _dict = {document_docid:int(docid),
|
|
|
- document_partitionkey:int(docid)%500+1,
|
|
|
- "construct_company":construct_company
|
|
|
- }
|
|
|
- task_queue.put(Document(_dict))
|
|
|
+ # for docid,construct_company,recall_flag in zip(list_docid,df["construct_company"],df["recall_flag"]):
|
|
|
+ # if recall_flag == 1:
|
|
|
+ # _dict = {document_docid:int(docid),
|
|
|
+ # document_partitionkey:int(docid)%500+1,
|
|
|
+ # "construct_company":construct_company
|
|
|
+ # }
|
|
|
+ # task_queue.put(Document(_dict))
|
|
|
# for docid in df["docid2"]:
|
|
|
# _dict = {document_docid:int(docid),
|
|
|
# document_partitionkey:int(docid)%500+1,
|
|
@@ -434,6 +435,7 @@ def turn_document_status():
|
|
|
# n_product = ",".join(l_product[:500])
|
|
|
# item.setValue(document_product,n_product,True)
|
|
|
# item.fix_columns(ots_client,["extract_json","doctitle",""],True)
|
|
|
+ item.setValue(document_status,1,True)
|
|
|
item.update_row(ots_client)
|
|
|
# log("update %d status done"%(item.getProperties().get(document_docid)))
|
|
|
pass
|