|
@@ -395,8 +395,54 @@ def turn_document_tmp_status():
|
|
|
mt = MultiThreadHandler(task_queue,_handle,None,30,ots_client=ots_client)
|
|
|
mt.run()
|
|
|
|
|
|
+def export_extract_ai():
|
|
|
+ filename = r"C:\Users\Administrator\Desktop\extract_ai.txt"
|
|
|
+ list_docid = []
|
|
|
+ from BaseDataMaintenance.dataSource.source import getConnect_ots
|
|
|
+ from BaseDataMaintenance.model.ots.document import Document
|
|
|
+ ots_client = getConnect_ots()
|
|
|
+ with open(filename,"r",encoding="utf8") as f:
|
|
|
+ while 1:
|
|
|
+ line = f.readline()
|
|
|
+ if not line:
|
|
|
+ break
|
|
|
+ line = line.strip()
|
|
|
+ if line!="":
|
|
|
+ try:
|
|
|
+ docid = line.split(":")[-1]
|
|
|
+ print(docid)
|
|
|
+ list_docid.append(int(docid))
|
|
|
+ except Exception as e:
|
|
|
+ pass
|
|
|
+ list_docid.reverse()
|
|
|
+ import pandas as pd
|
|
|
+ list_data = []
|
|
|
+ for docid in list_docid:
|
|
|
+ partitionkey = docid%500+1
|
|
|
+ _d = {document_tmp_partitionkey:partitionkey,
|
|
|
+ document_tmp_docid:docid}
|
|
|
+ dtmp = Document(_d)
|
|
|
+ if dtmp.fix_columns(ots_client,["extract_json_ai","docchannel"],True):
|
|
|
+ docchannel = dtmp.getProperties().get("docchannel",0)
|
|
|
+ extract_json_ai = dtmp.getProperties().get("extract_json_ai")
|
|
|
+ if docchannel in (52,101,118,119,120,121,122) and extract_json_ai is not None and extract_json_ai!="":
|
|
|
+ extract_ai = json.loads(extract_json_ai)
|
|
|
+ data_d = {
|
|
|
+ "docid":docid,
|
|
|
+ "招标人":extract_ai.get("招标信息",{}).get("招标人名称",""),
|
|
|
+ "项目预算":extract_ai.get("招标信息",{}).get("项目预算",""),
|
|
|
+ "招标人联系方式":extract_ai.get("招标信息",{}).get("招标人联系方式",""),
|
|
|
+ "中标信息":extract_ai.get("中标信息","[]")
|
|
|
+ }
|
|
|
+ list_data.append(data_d)
|
|
|
+ if len(list_data)>=200:
|
|
|
+ break
|
|
|
+ df = pd.DataFrame(list_data)
|
|
|
+ df.to_excel("%s.xlsx"%(filename))
|
|
|
+
|
|
|
|
|
|
|
|
|
if __name__=="__main__":
|
|
|
# turn_extract_status()
|
|
|
- turn_document_tmp_status()
|
|
|
+ # turn_document_tmp_status()
|
|
|
+ export_extract_ai()
|