|
@@ -395,123 +395,8 @@ def turn_document_tmp_status():
|
|
|
mt = MultiThreadHandler(task_queue,_handle,None,30,ots_client=ots_client)
|
|
|
mt.run()
|
|
|
|
|
|
-def export_extract_ai():
|
|
|
- filename = r"C:\Users\Administrator\Desktop\extract_ai.txt"
|
|
|
- list_docid = []
|
|
|
- from BaseDataMaintenance.dataSource.source import getConnect_ots
|
|
|
- from BaseDataMaintenance.model.ots.document import Document
|
|
|
- ots_client = getConnect_ots()
|
|
|
- with open(filename,"r",encoding="utf8") as f:
|
|
|
- while 1:
|
|
|
- line = f.readline()
|
|
|
- if not line:
|
|
|
- break
|
|
|
- line = line.strip()
|
|
|
- if line!="":
|
|
|
- try:
|
|
|
- docid = line.split(":")[-1]
|
|
|
- print(docid)
|
|
|
- list_docid.append(int(docid))
|
|
|
- except Exception as e:
|
|
|
- pass
|
|
|
- list_docid.reverse()
|
|
|
- import pandas as pd
|
|
|
- list_data = []
|
|
|
- for docid in list_docid:
|
|
|
- partitionkey = docid%500+1
|
|
|
- _d = {document_tmp_partitionkey:partitionkey,
|
|
|
- document_tmp_docid:docid}
|
|
|
- dtmp = Document(_d)
|
|
|
- if dtmp.fix_columns(ots_client,["extract_json_ai","docchannel"],True):
|
|
|
- docchannel = dtmp.getProperties().get("docchannel",0)
|
|
|
- extract_json_ai = dtmp.getProperties().get("extract_json_ai")
|
|
|
- if docchannel in (52,101,118,119,120,121,122) and extract_json_ai is not None and extract_json_ai!="":
|
|
|
- extract_ai = json.loads(extract_json_ai)
|
|
|
- data_d = {
|
|
|
- "docid":docid,
|
|
|
- "招标人":extract_ai.get("招标信息",{}).get("招标人名称",""),
|
|
|
- "项目预算":extract_ai.get("招标信息",{}).get("项目预算",""),
|
|
|
- "招标人联系方式":extract_ai.get("招标信息",{}).get("招标人联系方式",""),
|
|
|
- "中标信息":extract_ai.get("中标信息","[]")
|
|
|
- }
|
|
|
- list_data.append(data_d)
|
|
|
- if len(list_data)>=200:
|
|
|
- break
|
|
|
- df = pd.DataFrame(list_data)
|
|
|
- df.to_excel("%s.xlsx"%(filename))
|
|
|
-
|
|
|
-def export_extract_ai1():
|
|
|
- filename = r"C:\Users\Administrator\Desktop\extract_ai.txt"
|
|
|
- list_docid = []
|
|
|
- from BaseDataMaintenance.dataSource.source import getConnect_ots
|
|
|
- from BaseDataMaintenance.model.ots.document import Document
|
|
|
- ots_client = getConnect_ots()
|
|
|
- with open(filename,"r",encoding="utf8") as f:
|
|
|
- while 1:
|
|
|
- line = f.readline()
|
|
|
- if not line:
|
|
|
- break
|
|
|
- line = line.strip()
|
|
|
- if line!="":
|
|
|
- try:
|
|
|
- docid = line.split(":")[-1]
|
|
|
- print(docid)
|
|
|
- list_docid.append(int(docid))
|
|
|
- except Exception as e:
|
|
|
- pass
|
|
|
- list_docid.reverse()
|
|
|
- import pandas as pd
|
|
|
- list_data = []
|
|
|
- _count = 0
|
|
|
- for docid in list_docid:
|
|
|
- _count += 1
|
|
|
- print("%d/%d"%(_count,len(list_docid)))
|
|
|
- partitionkey = docid%500+1
|
|
|
- _d = {document_tmp_partitionkey:partitionkey,
|
|
|
- document_tmp_docid:docid}
|
|
|
- dtmp = Document(_d)
|
|
|
- if dtmp.fix_columns(ots_client,["extract_json","docchannel"],True):
|
|
|
- docchannel = dtmp.getProperties().get("docchannel",0)
|
|
|
- extract_json = dtmp.getProperties().get("extract_json")
|
|
|
- _extract = json.loads(extract_json)
|
|
|
- if docchannel in (52,101,118,119,120,121,122):
|
|
|
- docchannel_dict = {52:"招标公告",
|
|
|
- 101:"中标公告",
|
|
|
- 118:"废标公告",
|
|
|
- 119:"候选人公示",
|
|
|
- 120:"合同公告",
|
|
|
- 121:"开标记录",
|
|
|
- 122:"验收合同"}
|
|
|
- changed_tenderee = ""
|
|
|
- changed_win_tenderer = ""
|
|
|
- changed_win_price = ""
|
|
|
- prem = _extract.get("prem",{})
|
|
|
- for pack,pack_value in prem.items():
|
|
|
- rolelist = pack_value.get("roleList",[])
|
|
|
- for _role in rolelist:
|
|
|
- if _role.get("address") is None:
|
|
|
- if _role.get("role_name","")=="tenderee":
|
|
|
- changed_tenderee = _role.get("role_text","")
|
|
|
- if _role.get("role_name","")=="win_tenderer":
|
|
|
- changed_win_tenderer = _role.get("role_text","")
|
|
|
- changed_win_price = _role.get("role_money",{}).get("money")
|
|
|
- if changed_tenderee!="" or changed_win_tenderer!="" or changed_win_price!="":
|
|
|
- data_d = {
|
|
|
- "docid":docid,
|
|
|
- "公告类型":docchannel_dict.get(docchannel,""),
|
|
|
- "招标人":changed_tenderee,
|
|
|
- "中标人":changed_win_tenderer,
|
|
|
- "中标金额":changed_win_price,
|
|
|
- }
|
|
|
- list_data.append(data_d)
|
|
|
- # if len(list_data)>=200:
|
|
|
- # break
|
|
|
- df = pd.DataFrame(list_data)
|
|
|
- df.to_excel("%s.xlsx"%(filename))
|
|
|
|
|
|
|
|
|
if __name__=="__main__":
|
|
|
# turn_extract_status()
|
|
|
- # turn_document_tmp_status()
|
|
|
- # export_extract_ai()
|
|
|
- export_extract_ai1()
|
|
|
+ turn_document_tmp_status()
|