|
@@ -255,22 +255,24 @@ def turn_document_tmp_status():
|
|
|
|
|
|
|
|
|
|
bool_query = BoolQuery(
|
|
bool_query = BoolQuery(
|
|
- # must_queries=[
|
|
|
|
- # BoolQuery(should_queries=[
|
|
|
|
- # # TermQuery("tenderee","山西利民工业有限责任公司"),
|
|
|
|
- # # MatchPhraseQuery("doctitle","中国电信"),
|
|
|
|
- # # MatchPhraseQuery("doctextcon","中国电信"),
|
|
|
|
- # # MatchPhraseQuery("attachmenttextcon","中国电信")]),
|
|
|
|
- # # RangeQuery(document_tmp_status,88,120,True,True),
|
|
|
|
- # RangeQuery("page_time","2022-03-24","2022-03-25",True,False),
|
|
|
|
- # ExistsQuery
|
|
|
|
- # #,TermQuery(document_tmp_docid,171146519)
|
|
|
|
- # ]
|
|
|
|
- # )
|
|
|
|
- # ],
|
|
|
|
- must_not_queries=[ExistsQuery("status"),
|
|
|
|
- ExistsQuery("page_time"),
|
|
|
|
- ]
|
|
|
|
|
|
+ must_queries=[
|
|
|
|
+ RangeQuery("status",66,71),
|
|
|
|
+ # BoolQuery(should_queries=[
|
|
|
|
+ # # TermQuery("tenderee","山西利民工业有限责任公司"),
|
|
|
|
+ # # MatchPhraseQuery("doctitle","中国电信"),
|
|
|
|
+ # # MatchPhraseQuery("doctextcon","中国电信"),
|
|
|
|
+ # # MatchPhraseQuery("attachmenttextcon","中国电信")]),
|
|
|
|
+ # # RangeQuery(document_tmp_status,88,120,True,True),
|
|
|
|
+ # RangeQuery("page_time","2022-03-24","2022-03-25",True,False),
|
|
|
|
+ # ExistsQuery
|
|
|
|
+ # #,TermQuery(document_tmp_docid,171146519)
|
|
|
|
+ # ]
|
|
|
|
+ # )
|
|
|
|
+ ],
|
|
|
|
+ # must_not_queries=[ExistsQuery("status"),
|
|
|
|
+ # ExistsQuery("page_time"),
|
|
|
|
+ #
|
|
|
|
+ # ]
|
|
)
|
|
)
|
|
|
|
|
|
rows,next_token,total_count,is_all_succeed = ots_client.search("document_tmp","document_tmp_index",
|
|
rows,next_token,total_count,is_all_succeed = ots_client.search("document_tmp","document_tmp_index",
|
|
@@ -278,7 +280,7 @@ def turn_document_tmp_status():
|
|
columns_to_get=ColumnsToGet(["extract_json"],return_type=ColumnReturnType.SPECIFIED))
|
|
columns_to_get=ColumnsToGet(["extract_json"],return_type=ColumnReturnType.SPECIFIED))
|
|
list_data = getRow_ots(rows)
|
|
list_data = getRow_ots(rows)
|
|
print(total_count)
|
|
print(total_count)
|
|
- print(list_data)
|
|
|
|
|
|
+ # print(list_data)
|
|
_count = len(list_data)
|
|
_count = len(list_data)
|
|
for _data in list_data:
|
|
for _data in list_data:
|
|
_document = Document_tmp(_data)
|
|
_document = Document_tmp(_data)
|
|
@@ -330,10 +332,15 @@ def turn_document_tmp_status():
|
|
|
|
|
|
#change status
|
|
#change status
|
|
# item.setValue(document_tmp_docchannel,item.getProperties().get(document_tmp_original_docchannel),True)
|
|
# item.setValue(document_tmp_docchannel,item.getProperties().get(document_tmp_original_docchannel),True)
|
|
- # item.setValue(document_tmp_status,random.randint(151,171),True)
|
|
|
|
- # item.update_row(ots_client)
|
|
|
|
|
|
+ _extract_json = item.getProperties().get(document_tmp_extract_json,"")
|
|
|
|
+ _extract_json = _extract_json.replace("\x06", "").replace("\x05", "").replace("\x07", "").replace('\\', '')
|
|
|
|
+ item.setValue(document_tmp_extract_json,_extract_json,True)
|
|
|
|
+ json.loads(_extract_json)
|
|
|
|
+ # item.setValue(document_tmp_status,71,True)
|
|
|
|
+ # item.setValue(document_tmp_save,1,True)
|
|
|
|
+ item.update_row(ots_client)
|
|
# log("update %d status done"%(item.getProperties().get(document_tmp_docid)))
|
|
# log("update %d status done"%(item.getProperties().get(document_tmp_docid)))
|
|
- item.delete_row(ots_client)
|
|
|
|
|
|
+ # item.delete_row(ots_client)
|
|
pass
|
|
pass
|
|
|
|
|
|
|
|
|