|
@@ -440,9 +440,78 @@ def export_extract_ai():
|
|
|
df = pd.DataFrame(list_data)
|
|
|
df.to_excel("%s.xlsx"%(filename))
|
|
|
|
|
|
+def export_extract_ai1():
|
|
|
+ filename = r"C:\Users\Administrator\Desktop\extract_ai.txt"
|
|
|
+ list_docid = []
|
|
|
+ from BaseDataMaintenance.dataSource.source import getConnect_ots
|
|
|
+ from BaseDataMaintenance.model.ots.document import Document
|
|
|
+ ots_client = getConnect_ots()
|
|
|
+ with open(filename,"r",encoding="utf8") as f:
|
|
|
+ while 1:
|
|
|
+ line = f.readline()
|
|
|
+ if not line:
|
|
|
+ break
|
|
|
+ line = line.strip()
|
|
|
+ if line!="":
|
|
|
+ try:
|
|
|
+ docid = line.split(":")[-1]
|
|
|
+ print(docid)
|
|
|
+ list_docid.append(int(docid))
|
|
|
+ except Exception as e:
|
|
|
+ pass
|
|
|
+ list_docid.reverse()
|
|
|
+ import pandas as pd
|
|
|
+ list_data = []
|
|
|
+ _count = 0
|
|
|
+ for docid in list_docid:
|
|
|
+ _count += 1
|
|
|
+ print("%d/%d"%(_count,len(list_docid)))
|
|
|
+ partitionkey = docid%500+1
|
|
|
+ _d = {document_tmp_partitionkey:partitionkey,
|
|
|
+ document_tmp_docid:docid}
|
|
|
+ dtmp = Document(_d)
|
|
|
+ if dtmp.fix_columns(ots_client,["extract_json","docchannel"],True):
|
|
|
+ docchannel = dtmp.getProperties().get("docchannel",0)
|
|
|
+ extract_json = dtmp.getProperties().get("extract_json")
|
|
|
+ _extract = json.loads(extract_json)
|
|
|
+ if docchannel in (52,101,118,119,120,121,122):
|
|
|
+ docchannel_dict = {52:"招标公告",
|
|
|
+ 101:"中标公告",
|
|
|
+ 118:"废标公告",
|
|
|
+ 119:"候选人公示",
|
|
|
+ 120:"合同公告",
|
|
|
+ 121:"开标记录",
|
|
|
+ 122:"验收合同"}
|
|
|
+ changed_tenderee = ""
|
|
|
+ changed_win_tenderer = ""
|
|
|
+ changed_win_price = ""
|
|
|
+ prem = _extract.get("prem",{})
|
|
|
+ for pack,pack_value in prem.items():
|
|
|
+ rolelist = pack_value.get("roleList",[])
|
|
|
+ for _role in rolelist:
|
|
|
+ if _role.get("address") is None:
|
|
|
+ if _role.get("role_name","")=="tenderee":
|
|
|
+ changed_tenderee = _role.get("role_text","")
|
|
|
+ if _role.get("role_name","")=="win_tenderer":
|
|
|
+ changed_win_tenderer = _role.get("role_text","")
|
|
|
+ changed_win_price = _role.get("role_money",{}).get("money")
|
|
|
+ if changed_tenderee!="" or changed_win_tenderer!="" or changed_win_price!="":
|
|
|
+ data_d = {
|
|
|
+ "docid":docid,
|
|
|
+ "公告类型":docchannel_dict.get(docchannel,""),
|
|
|
+ "招标人":changed_tenderee,
|
|
|
+ "中标人":changed_win_tenderer,
|
|
|
+ "中标金额":changed_win_price,
|
|
|
+ }
|
|
|
+ list_data.append(data_d)
|
|
|
+ # if len(list_data)>=200:
|
|
|
+ # break
|
|
|
+ df = pd.DataFrame(list_data)
|
|
|
+ df.to_excel("%s.xlsx"%(filename))
|
|
|
|
|
|
|
|
|
if __name__=="__main__":
|
|
|
# turn_extract_status()
|
|
|
# turn_document_tmp_status()
|
|
|
- export_extract_ai()
|
|
|
+ # export_extract_ai()
|
|
|
+ export_extract_ai1()
|