1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581 |
- #coding:utf8
- import sys
- import os
- sys.path.append(os.path.join(os.path.dirname(__file__),".."))
- import requests
- from utils.Utils import sendEmail,getCurrent_date,log
- import datetime
- import time
- from export.exportDocument import *
- import pandas as pd
- from apscheduler.schedulers.blocking import BlockingScheduler
- from export.exportUtils import rsa_encrpt
- import urllib.parse
- import base64
- def export_medicine_friday():
- current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-24*60*60))
- start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60))
- if current_date<="2022-04-25":
- if datetime.datetime.now().weekday()==0:
- for i in range(10):
- try:
- df_data = exportDocument_medicine(start_time,current_date)
- df = pd.DataFrame(df_data)
- filename = os.path.dirname(__file__)+"/data/%s年%s至%s医疗数据导出.xlsx"%(start_time[:4],start_time,current_date)
- df.to_excel(filename,columns=list_df_columns)
- host = "smtp.exmail.qq.com"
- username = "vip@bidizhaobiao.com"
- password = "Biaoxun66-"
- receivers = ["1985262186@qq.com","1175730271@qq.com","1265797328@qq.com","1289358902@qq.com"]
- attachs = [filename]
- sendEmail(host,username,password,receivers,attachs=attachs)
- break
- except Exception as e:
- traceback.print_exc()
- log(str(e))
- time.sleep(60)
- def export2():
- def removeData(df_data):
- list_remove_index = []
- list_c = df_data.get("招标人采购系统",[])
- for _c_i in range(len(list_c)):
- if list_c[_c_i]=="企业采购系统":
- list_remove_index.append(_c_i)
- list_remove_index.reverse()
- print(list_remove_index)
- for k,v in df_data.items():
- for _rc in list_remove_index:
- v.pop(_rc)
- for k,v in df_data.items():
- v = v[:500]
- log("start export2:>>>>>>>>>>>>>>>")
- current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-24*60*60))
- start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60))
- if current_date<="2024-02-28":
- if datetime.datetime.now().weekday()==4:
- for i in range(10):
- try:
- # start_time='2022-07-22'
- # current_date = '2022-07-28'
- log("start exporting export2:=================")
- # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"]
- columns = ["doctitle","doctextcon","attachmenttextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","time_bidopen"]
- dict_channel = getDict_docchannel()
- list_query = []
- str_keyword = '''
- 机房建设 多媒体教室建设 数据中心机房 班班通 电子设备
- 视频 LED 监控 视频监控系统 信息安全
- 信息化建设 智能化 网络安全服务 网络安全系统 等级保护测评
- 信息系统 大数据中心 智慧教室建设 数据安全建设 可视化指挥联动平台
- 音频处理器 教学专用仪器 多媒体扩声系统 智慧校园 数字化
- 交换 路由 无线 城域网建设 触控一体机
- 教学设备
-
-
- '''
- list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
- str_not_keyword = '''
- '''
- list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
- tenderee_keywrod = "医院、大学、高校、高中"
- list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
- log(str(list_keyword))
- bool_query = BoolQuery(must_queries=[
- generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_keyword,MatchPhraseQuery),
- generateBoolShouldQuery(["docchannel"],[52,102,114],TermQuery),
- RangeQuery("page_time",start_time,current_date,True,True),
- RangeQuery("status",151,300,True,True),
- # TermQuery("procurement_system","公安系统"),
- generateBoolShouldQuery(["province"],["湖南"],TermQuery),
- # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
- # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
- ],
- # must_not_queries=[
- # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
- # ]
- )
- list_query.append({"query":bool_query,"limit":700})
- list_row = getDocument(list_query,columns)
- log("get document %d rows"%len(list_row))
- df_data = {}
- set_line = set()
- # list_row = filterRow(list_row)
- getRowData(df_data,list_row,set_line,list_keyword,dict_channel,True)
- fixContactPerson(df_data,list_df_columns)
- removeData(df_data)
- df = pd.DataFrame(df_data)
- filename = os.path.dirname(__file__)+"/data/%s年%s至%s数据导出.xlsx"%(start_time[:4],start_time,current_date)
- df.to_excel(filename,columns=list_df_columns)
- log(str(filename))
- host = "smtp.exmail.qq.com"
- username = "vip@bidizhaobiao.com"
- password = "Biaoxun66-"
- receivers = ["1175730271@qq.com","493894608@qq.com"]
- # receivers = ["1175730271@qq.com"]
- attachs = [filename]
- sendEmail(host,username,password,receivers,attachs=attachs)
- break
- except Exception as e:
- traceback.print_exc()
- def export5():
- '''
- 客户信息
- 公司名:武汉市浩盛特种建材有限责任公司
- 客户姓名:高诗琴
- 会员号:15392839439
- 会员等级:高级会员
- 成交金额:5900元
- 服务期:12个月
- 截止推送项目时间:2024-02-17
- 申请原因:2.16成交的新客户,客户反馈跟销售沟通是人工每天整理好项目信息推送,现在寻求技术协助支持。
- 客户需求:
- 地区:河北省、北京市、天津市、山东省、江苏省、上海市、浙江省、福建省、广东省、海南省
- 业务关键词:膨胀剂,抗裂剂,防水剂,外加剂,防腐剂,阻锈剂,密实剂,耐久性,氧化镁,镁质,涂料
- (排除词,漆),需要招标信息
- 工作日下午四点文档信息推送到邮箱365531448@qq.com,每天更新最新的项目信息10条(不足10条可以按照更新信息推送)
- :return:
- '''
- def getRowData(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
- dict_line = {}
- # list_data = getRow_ots(rows)
- _index = 0
- rows.sort(key=lambda x:x.get("docid",0),reverse=True)
- set_col = set()
- df_columns = []
- for row in rows[:10]:
- _index += 1
- item = {}
- _dict = row
- set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
- set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
- set_dict_item_columns(set_col,df_columns,item,"公告类别",dict_channel.get(_dict.get("docchannel",""),""))
- set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time",""))
- set_dict_item_columns(set_col,df_columns,item,"公告标题",_dict.get("doctitle",""))
- set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
- set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
- set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
- set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
- set_dict_item_columns(set_col,df_columns,item,"代理单位联系人",_dict.get("agency_contact",""))
- set_dict_item_columns(set_col,df_columns,item,"代理单位联系人电话",_dict.get("agency_phone",""))
- set_dict_item_columns(set_col,df_columns,item,"链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid","")))
- for k,v in item.items():
- if k not in df_data:
- df_data[k] = []
- df_data[k].append(v)
- return df_columns
- log("start export5:>>>>>>>>>>>>>>>")
- current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
- if current_date<="2024-02-17":
- weekday = datetime.datetime.now().weekday()
- if weekday>=0 and weekday<=4:
- for i in range(10):
- try:
- # start_time='2022-07-22'
- # current_date = '2022-07-28'
- log("start exporting export2:=================")
- # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"]
- columns = ["doctitle","doctextcon","attachmenttextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","time_bidopen"]
- columns = ["doctitle","docchannel","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","time_bidopen"]
- dict_channel = getDict_docchannel()
- list_query = []
- str_province = '''
- 河北、北京、天津、山东、江苏、上海、浙江、福建、广东、海南
- '''
- str_keyword = '''
- 膨胀剂 抗裂剂 防水剂 外加剂 镁质
- 防腐剂 阻锈剂 密实剂 耐久性 氧化镁
-
- '''
- list_province = splitIntoList(str_province,"[\s\n、,,]")
- list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
- str_not_keyword = '''
- 漆,复合风管,检测,涂料工程,涂料多乐士
- '''
- list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
- tenderee_keywrod = "医院、大学、高校、高中"
- list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
- log(str(list_keyword))
- bool_query = BoolQuery(must_queries=[
- generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery),
- generateBoolShouldQuery(["docchannel"],[52,102,114],TermQuery),
- TermQuery("page_time",current_date),
- RangeQuery("status",151,300,True,True),
- # TermQuery("procurement_system","公安系统"),
- generateBoolShouldQuery(["province"],list_province,TermQuery),
- # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
- # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
- ],
- must_not_queries=[
- generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
- ]
- )
- list_query.append({"query":bool_query,"limit":700})
- list_row = getDocument(list_query,columns)
- log("get document %d rows"%len(list_row))
- df_data = {}
- set_line = set()
- # list_row = filterRow(list_row)
- df_column = getRowData(df_data,list_row,set_line,list_keyword,dict_channel,True)
- df = pd.DataFrame(df_data)
- print(df_data)
- filename = os.path.dirname(__file__)+"/data/%s年%s数据导出.xlsx"%(current_date[:4],current_date)
- df.to_excel(filename,columns=df_column)
- log(str(filename))
- host = "smtp.exmail.qq.com"
- username = "vip@bidizhaobiao.com"
- password = "Biaoxun66-"
- receivers = ["1175730271@qq.com","365531448@qq.com"]
- # receivers = ["1175730271@qq.com"]
- attachs = [filename]
- sendEmail(host,username,password,receivers,attachs=attachs)
- break
- except Exception as e:
- traceback.print_exc()
- def export_document_except():
- def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
- _index = 0
- for row in rows:
- _index += 1
- item = {}
- _dict = row
- _extract = json.loads(_dict.get("extract_json","{}"))
- product_attrs = _extract.get("product_attrs",{"data":[{}]}).get("data")
- docid = _dict.get("docid","")
- win_tenderer = ""
- bidding_budget = ""
- sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
- for _doc in sub_docs_json:
- if win_tenderer=="":
- win_tenderer = _doc.get("win_tenderer","")
- if bidding_budget=="":
- bidding_budget= _doc.get("bidding_budget","")
- for _attrs in product_attrs:
- set_dict_item_columns(set_columns1,list_df_column1,item,"公告链接",'=HYPERLINK("http://www.bidizhaobiao.com/info-%s.html","查看公告")'%(str(docid)))
- set_dict_item_columns(set_columns1,list_df_column1,item,"公告时间",_dict.get("page_time"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"项目标题",_dict.get("doctitle"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"采购单位",_dict.get("tenderee"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
- set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_dict.get("product"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
- set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
- _quantity = ""
- _uniPrice = ""
- try:
- _quantity = int(_attrs.get("quantity","0"))
- if _quantity==0:
- _quantity = ""
- except Exception as e:
- pass
- try:
- _uniPrice = float(_attrs.get("_uniPrice","0"))
- if _uniPrice==0:
- _uniPrice = ""
- except Exception as e:
- pass
- set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
- set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
- sumPrice = ""
- if _quantity!="" and _uniPrice!="":
- sumPrice = _quantity*_uniPrice
- set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
- set_dict_item_columns(set_columns1,list_df_column1,item,"项目金额(元)",bidding_budget)
- for k,v in item.items():
- if k not in df_data:
- df_data[k] = []
- df_data[k].append(v)
- current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
- current_date = '2022-04-01'
- columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","web_source_no","web_source_name","service_time","person_review","time_get_file_start","time_get_file_end","time_earnest_money_start","time_earnest_money_end"]
- dict_channel = getDict_docchannel()
- list_query = []
- str_keyword = '''
- 机房建设 多媒体教室建设 数据中心机房 班班通 电子设备
- 视频 LED 监控 视频监控系统 信息安全
- 信息化建设 网络安全 网络安全服务 网络安全系统 等级保护测评
- 信息系统 大数据中心 智慧教室建设 数据安全建设 可视化指挥联动平台
- 音频处理器 教学专用仪器 多媒体扩声系统 智慧校园 数字化
- 交换 路由 无线 信息安全 触控一体机
- 教学设备
-
- '''
- list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
- str_not_keyword = '''
- '''
- list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
- tenderee_keywrod = "医院、大学、高校、高中"
- list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
- s_q = BoolQuery(should_queries=[BoolQuery(must_queries=[TermQuery("docchannel",52)],
- must_not_queries=[WildcardQuery("tenderee","*")]),
- BoolQuery(must_queries=[generateBoolShouldQuery(["docchannel"],[101,119,120],TermQuery)],
- must_not_queries=[NestedQuery("sub_docs_json",WildcardQuery("sub_docs_json.win_tenderer","*"))])])
- log(str(list_keyword))
- set_columns1 = set()
- list_df_columns1 = []
- bool_query = BoolQuery(must_queries=[
- TermQuery("page_time",current_date),
- RangeQuery("status",151,451,True,True),
- s_q
- ],
- # must_not_queries=[
- # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
- # ]
- )
- list_query.append({"query":bool_query,"limit":500000})
- list_row = getDocument(list_query,columns)
- log("get document %d rows"%len(list_row))
- df_data = {}
- set_line = set()
- # list_row = filterRow(list_row,list_not_key)
- getRowData(df_data,list_row,set_line,list_keyword,dict_channel,True)
- # fixContactPerson(df_data,list_df_columns)
- df = pd.DataFrame(df_data)
- filename = os.path.dirname(__file__)+"/data/%s异常数据导出.xlsx"%(current_date)
- print(list_df_columns)
- df.to_excel(filename,columns=list_df_columns)
- class Export3():
- def trytimes(self):
- for _ in range(3):
- _succeed = self.export3()
- if _succeed:
- break
- def export3(self,):
- def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
- _index = 0
- for row in rows:
- _index += 1
- item = {}
- _dict = row
- _extract = json.loads(_dict.get("extract_json","{}"))
- demand_info = _extract.get("demand_info",{"data":[]}).get("data")
- docid = _dict.get("docid","")
- win_tenderer = ""
- bidding_budget = ""
- win_bid_price = ""
- sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
- for _doc in sub_docs_json:
- if win_tenderer=="":
- win_tenderer = _doc.get("win_tenderer","")
- if bidding_budget=="":
- bidding_budget= _doc.get("bidding_budget","")
- if win_bid_price=="":
- win_bid_price = _doc.get("win_bid_price","")
- if len(demand_info)==0:
- demand_info = [{}]
- demand_info = [{}]
- for _attrs in demand_info:
- set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
- set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"信息类型",dict_channel.get(_dict.get("docchannel")))
- set_dict_item_columns(set_columns1,list_df_column1,item,"发布时间",_dict.get("crtime"))
- b_or_w = ""
- if bidding_budget!="":
- b_or_w = float(bidding_budget)/10000
- elif win_bid_price!="":
- b_or_w = float(win_bid_price)/10000
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额/预算金额(万元)",b_or_w)
- set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位",_dict.get("tenderee"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"招标项目联系人","%s\n%s"%(_dict.get("tenderee_contact",""),_dict.get("tenderee_phone","")))
- # 中标单位 中标项目联系人 中标单位主要联系人 中标单位联系人 代理单位 代理项目联系人 代理单位联系人 比地招标公告地址
- set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位联系人","")
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标项目联系人","")
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位主要联系人","")
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位联系人","")
- set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位",_dict.get("agency",""))
- set_dict_item_columns(set_columns1,list_df_column1,item,"代理项目联系人","%s\n%s"%(_dict.get("agency_contact",""),_dict.get("agency_phone","")))
- set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位联系人","")
- _key ='''MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC8uZYYV6ls+5KMzUfvsPvv3XRdwkcTBj/ppB03mijUPHYTGvYSE0cQTbQrnIbXFtUYJguakpKLmVyH+T/w6vhxbQNlaykfe8RXEh4i4IJk8s/Qb0E0xODsjKBEr8VdDYeqqduWrtJpttXAvv93SsTPvgZBditRzJAzk0XH56zL1wIDAQAB'''
- _key = base64.b64decode(_key)
- _url_map = {"userId":"001795335",
- "timestamp":"%d"%(time.time()*1000),
- "docid":str(_dict.get("docid",""))}
- _encrpt_text = base64.b64encode(rsa_encrpt(json.dumps(_url_map),_key))
- _encrpt_text = urllib.parse.quote(_encrpt_text)
- _url = "http://www.bidizhaobiao.com/info-%s.html?emailSecret=%s"%(str(_dict.get("docid")),_encrpt_text)
- set_dict_item_columns(set_columns1,list_df_column1,item,"比地招标公告地址",_url)
- # set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_attrs.get("project_name"))
- #
- # set_dict_item_columns(set_columns1,list_df_column1,item,"需求",_attrs.get("demand"))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"预算(元)",_attrs.get("budget"))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"开始时间",_attrs.get("order_begin"))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
- # set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product"))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
- # _quantity = ""
- # _uniPrice = ""
- # try:
- # _quantity = int(_attrs.get("quantity","0"))
- # if _quantity==0:
- # _quantity = ""
- # except Exception as e:
- # pass
- # try:
- # _uniPrice = float(_attrs.get("unitPrice","0"))
- # if _uniPrice==0:
- # _uniPrice = ""
- # except Exception as e:
- # pass
- # set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
- # set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
- # sumPrice = ""
- # if _quantity!="" and _uniPrice!="":
- # sumPrice = _quantity*_uniPrice
- # set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
- # set_dict_item_columns(set_columns1,list_df_column1,item,"项目金额(元)",bidding_budget)
- for k,v in item.items():
- if k not in df_data:
- df_data[k] = []
- df_data[k].append(v)
- def getRowData1(df_data,rows,dict_channel,set_columns1,list_df_column1):
- _index = 0
- for row in rows:
- _index += 1
- item = {}
- _dict = row
- _extract = json.loads(_dict.get("extract_json","{}"))
- demand_info = _extract.get("demand_info",{"data":[]}).get("data")
- docid = _dict.get("docid","")
- win_tenderer = ""
- bidding_budget = ""
- win_bid_price = ""
- sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
- for _doc in sub_docs_json:
- if win_tenderer=="":
- win_tenderer = _doc.get("win_tenderer","")
- if bidding_budget=="":
- bidding_budget= _doc.get("bidding_budget","")
- if win_bid_price=="":
- win_bid_price = _doc.get("win_bid_price","")
- if len(demand_info)==0:
- demand_info = [{}]
- demand_info = [{}]
- for _attrs in demand_info:
- # set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
- # set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"采购人",_dict.get("tenderee"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"招标时间",_dict.get("page_time"))
- b_or_w = ""
- if bidding_budget!="":
- b_or_w = float(bidding_budget)/10000
- elif win_bid_price!="":
- b_or_w = float(win_bid_price)/10000
- set_dict_item_columns(set_columns1,list_df_column1,item,"招标预算",bidding_budget)
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标时间",_dict.get("page_time"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额",win_bid_price)
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标人",win_tenderer)
- set_dict_item_columns(set_columns1,list_df_column1,item,"采购模式",_dict.get("bidway"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"命中关键词",_dict.get("keyword"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"采购品牌",'')
- set_dict_item_columns(set_columns1,list_df_column1,item,"采购数量",'')
- for k,v in item.items():
- if k not in df_data:
- df_data[k] = []
- df_data[k].append(v)
- def getContacts(contacts):
- try:
- if contacts is None or contacts=="":
- return ""
- _contacts = json.loads(contacts)
- list_c = []
- _count = 0
- for _c in _contacts:
- _count += 1
- contact_person = _c.get("contact_person","")
- phone_no = _c.get("phone_no","")
- list_c.append("%s(企业联系人%d)\n%s"%(str(contact_person),_count,str(phone_no)))
- if _count>=5:
- break
- return ",\n".join(list_c)
- except Exception as e:
- return ""
- def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
- set_enterprise = set()
- if len(df_data.keys())>0:
- for _tenderee,_agency,_win in zip(df_data["招标单位"],df_data["代理单位"],df_data["中标单位"]):
- set_enterprise.add(_tenderee)
- set_enterprise.add(_agency)
- set_enterprise.add(_win)
- if "" in set_enterprise:
- set_enterprise.remove("")
- if None in set_enterprise:
- set_enterprise.remove(None)
- dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["contacts","procurement_system"])
- # conn = getConnection_oracle()
- # cursor = conn.cursor()
- if len(set_enterprise)>0:
- for _i in range(len(df_data["招标单位"])):
- _enterprise_name = df_data["招标单位"][_i]
- df_data["招标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
- _enterprise_name = df_data["代理单位"][_i]
- df_data["代理单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
- _enterprise_name = df_data["中标单位"][_i]
- df_data["中标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
- # if "采购系统" not in df_data:
- # df_data["采购系统"] = []
- # df_data["采购系统"].append(dict_enterprise.get(_enterprise_name,{}).get("procurement_system",""))
- # list_df_columns.extend(['采购系统'])
- '''
- export by customer's subscription
- :return:
- '''
- try:
- ots_client = getConnect_ots()
- subscription4 = '''
-
- 触控一体机 交互平板 交互一体机 交互智能平板 交互大屏 教学一体机 智慧屏 智慧黑板 互动黑板 班班通 多媒体教室 多媒体设备 多媒体系统 智慧教室
- '''
- # provinces = "河北 山西 广东 海南 江苏 安徽 山东 河南 湖北 湖南 重庆 黑龙江 陕西 甘肃 青海 宁夏"
- provinces = "海南 江苏 安徽 湖北 湖南 重庆 甘肃 青海 宁夏 河南 北京 天津"
- dict_channel = getDict_docchannel()
- list_province = splitIntoList(provinces,"\s|,|,|、")
- list_subscription4 = splitIntoList(subscription4,"\s|,|,|、")
- current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
- last_date = timeAdd(current_date,-1)
- # last_date = "2023-12-08"
- # current_date = "2023-12-10"
- set_columns1 = set()
- list_df_columns1 = []
- bool_query = BoolQuery(must_queries=[
- generateBoolShouldQuery(["docchannel"],[102,114],TermQuery),
- RangeQuery("page_time",last_date),
- RangeQuery("status",201,300,True,True),
- generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_subscription4,MatchPhraseQuery),
- generateBoolShouldQuery(["province"],list_province,WildcardQuery),
- RangeQuery("crtime","%s 21:00:00"%last_date,"%s 21:00:00"%current_date)
- ],
- # must_not_queries=[
- # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
- # ]
- )
- columns = ["doctitle","docchannel","original_docchannel","product","crtime","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","extract_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone"]
- list_query = [{"query":bool_query,"limit":50000}]
- list_row = getDocument(list_query,columns)
- filename0 = os.path.dirname(__file__)+"/data/%s订阅4订阅5数据导出数量%d_%s.xlsx"%(current_date,len(list_row),getCurrent_date("%Y-%m-%d_%H%M%S"))
- df_data = {}
- if len(list_row)>0:
- getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1)
- fixContactPerson(df_data,list_df_columns1)
- df = pd.DataFrame(df_data)
- df.to_excel(filename0,columns=list_df_columns1,index=False)
- else:
- df = pd.DataFrame(df_data)
- df.to_excel(filename0,index=False)
- host = "smtp.exmail.qq.com"
- username = "vip1@bidizhaobiao.com"
- password = "Biaoxun666+"
- # receivers = ["1175730271@qq.com","995116318@qq.com","huangxiaofang@cvte.com"]
- receivers = ["1175730271@qq.com","1208135584@qq.com","youyuer@cvte.com","chenyuxue@cvte.com"]
- # receivers = ["1175730271@qq.com"]
- attachs = [filename0]
- sendEmail(host,username,password,receivers,attachs=attachs)
- return True
- except Exception as e:
- traceback.print_exc()
- return False
- def export3_1(self,):
- def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
- _index = 0
- for row in rows:
- _index += 1
- item = {}
- _dict = row
- _extract = json.loads(_dict.get("extract_json","{}"))
- demand_info = _extract.get("demand_info",{"data":[]}).get("data")
- docid = _dict.get("docid","")
- win_tenderer = ""
- bidding_budget = ""
- win_bid_price = ""
- sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
- for _doc in sub_docs_json:
- if win_tenderer=="":
- win_tenderer = _doc.get("win_tenderer","")
- if bidding_budget=="":
- bidding_budget= _doc.get("bidding_budget","")
- if win_bid_price=="":
- win_bid_price = _doc.get("win_bid_price","")
- if len(demand_info)==0:
- demand_info = [{}]
- demand_info = [{}]
- for _attrs in demand_info:
- set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
- set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"信息类型",dict_channel.get(_dict.get("docchannel")))
- set_dict_item_columns(set_columns1,list_df_column1,item,"发布时间",_dict.get("crtime"))
- b_or_w = ""
- if bidding_budget!="":
- b_or_w = float(bidding_budget)/10000
- elif win_bid_price!="":
- b_or_w = float(win_bid_price)/10000
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额/预算金额(万元)",b_or_w)
- set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位",_dict.get("tenderee"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"招标项目联系人","%s\n%s"%(_dict.get("tenderee_contact",""),_dict.get("tenderee_phone","")))
- # 中标单位 中标项目联系人 中标单位主要联系人 中标单位联系人 代理单位 代理项目联系人 代理单位联系人 比地招标公告地址
- set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位联系人","")
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标项目联系人","")
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位主要联系人","")
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位联系人","")
- set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位",_dict.get("agency",""))
- set_dict_item_columns(set_columns1,list_df_column1,item,"代理项目联系人","%s\n%s"%(_dict.get("agency_contact",""),_dict.get("agency_phone","")))
- set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位联系人","")
- _key ='''MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC8uZYYV6ls+5KMzUfvsPvv3XRdwkcTBj/ppB03mijUPHYTGvYSE0cQTbQrnIbXFtUYJguakpKLmVyH+T/w6vhxbQNlaykfe8RXEh4i4IJk8s/Qb0E0xODsjKBEr8VdDYeqqduWrtJpttXAvv93SsTPvgZBditRzJAzk0XH56zL1wIDAQAB'''
- _key = base64.b64decode(_key)
- _url_map = {"userId":"001795335",
- "timestamp":"%d"%(time.time()*1000),
- "docid":str(_dict.get("docid",""))}
- _encrpt_text = base64.b64encode(rsa_encrpt(json.dumps(_url_map),_key))
- _encrpt_text = urllib.parse.quote(_encrpt_text)
- _url = "http://www.bidizhaobiao.com/info-%s.html?emailSecret=%s"%(str(_dict.get("docid")),_encrpt_text)
- set_dict_item_columns(set_columns1,list_df_column1,item,"比地招标公告地址",_url)
- # set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_attrs.get("project_name"))
- #
- # set_dict_item_columns(set_columns1,list_df_column1,item,"需求",_attrs.get("demand"))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"预算(元)",_attrs.get("budget"))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"开始时间",_attrs.get("order_begin"))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
- # set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product"))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
- # _quantity = ""
- # _uniPrice = ""
- # try:
- # _quantity = int(_attrs.get("quantity","0"))
- # if _quantity==0:
- # _quantity = ""
- # except Exception as e:
- # pass
- # try:
- # _uniPrice = float(_attrs.get("unitPrice","0"))
- # if _uniPrice==0:
- # _uniPrice = ""
- # except Exception as e:
- # pass
- # set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
- # set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
- # sumPrice = ""
- # if _quantity!="" and _uniPrice!="":
- # sumPrice = _quantity*_uniPrice
- # set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
- # set_dict_item_columns(set_columns1,list_df_column1,item,"项目金额(元)",bidding_budget)
- for k,v in item.items():
- if k not in df_data:
- df_data[k] = []
- df_data[k].append(v)
- def getContacts(contacts):
- try:
- if contacts is None or contacts=="":
- return ""
- if isinstance(contacts,str):
- _contacts = json.loads(contacts)
- else:
- _contacts = contacts
- list_c = []
- _count = 0
- for _c in _contacts:
- _count += 1
- contact_person = _c.get("contact_person","")
- phone_no = _c.get("phone_no","")
- list_c.append("%s(企业联系人%d)\n%s"%(str(contact_person),_count,str(phone_no)))
- if _count>=5:
- break
- print("getContacts",",\n".join(list_c))
- return ",\n".join(list_c)
- except Exception as e:
- traceback.print_exc()
- print("getContacts","")
- return ""
- def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
- set_enterprise = set()
- if len(df_data.keys())>0:
- for _tenderee,_agency,_win in zip(df_data["招标单位"],df_data["代理单位"],df_data["中标单位"]):
- set_enterprise.add(_tenderee)
- set_enterprise.add(_agency)
- set_enterprise.add(_win)
- if "" in set_enterprise:
- set_enterprise.remove("")
- if None in set_enterprise:
- set_enterprise.remove(None)
- dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["contacts","procurement_system"])
- # conn = getConnection_oracle()
- # cursor = conn.cursor()
- if len(set_enterprise)>0:
- for _i in range(len(df_data["招标单位"])):
- _enterprise_name = df_data["招标单位"][_i]
- df_data["招标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
- _enterprise_name = df_data["代理单位"][_i]
- df_data["代理单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
- _enterprise_name = df_data["中标单位"][_i]
- df_data["中标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
- # if "采购系统" not in df_data:
- # df_data["采购系统"] = []
- # df_data["采购系统"].append(dict_enterprise.get(_enterprise_name,{}).get("procurement_system",""))
- # list_df_columns.extend(['采购系统'])
- '''
- export by customer's subscription
- :return:
- '''
- try:
- ots_client = getConnect_ots()
- subscription4 = '''
-
- 录播、三个课堂、互动课堂、同步课堂、双师课堂、专递课堂、微格教室、远程课堂、督导巡课、推门听课、互动教学
- '''
- # provinces = "河北 山西 广东 海南 江苏 安徽 山东 河南 湖北 湖南 重庆 黑龙江 陕西 甘肃 青海 宁夏"
- provinces = "海南 江苏 安徽 湖北 湖南 重庆 甘肃 青海 宁夏 河南"
- dict_channel = getDict_docchannel()
- list_province = splitIntoList(provinces,"\s|,|,|、")
- list_subscription4 = splitIntoList(subscription4,"\s|,|,|、")
- current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
- last_date = timeAdd(current_date,-1)
- # last_date = "2023-08-14"
- # current_date = "2023-08-20"
- set_columns1 = set()
- list_df_columns1 = []
- bool_query = BoolQuery(must_queries=[
- generateBoolShouldQuery(["docchannel"],[102,114],TermQuery),
- RangeQuery("page_time",last_date),
- RangeQuery("status",201,300,True,True),
- generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_subscription4,MatchPhraseQuery),
- # generateBoolShouldQuery(["province"],list_province,WildcardQuery),
- RangeQuery("crtime","%s 21:00:00"%last_date,"%s 21:00:00"%current_date)
- ],
- # must_not_queries=[
- # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
- # ]
- )
- columns = ["doctitle","docchannel","original_docchannel","product","crtime","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","extract_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone"]
- list_query = [{"query":bool_query,"limit":50000}]
- list_row = getDocument(list_query,columns)
- filename0 = os.path.dirname(__file__)+"/data/%s订阅4订阅5数据导出数量%d_%s.xlsx"%(current_date,len(list_row),getCurrent_date("%Y-%m-%d_%H%M%S"))
- df_data = {}
- if len(list_row)>0:
- getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1)
- fixContactPerson(df_data,list_df_columns1)
- df = pd.DataFrame(df_data)
- df.to_excel(filename0,columns=list_df_columns1,index=False)
- else:
- df = pd.DataFrame(df_data)
- df.to_excel(filename0,index=False)
- host = "smtp.exmail.qq.com"
- username = "vip1@bidizhaobiao.com"
- password = "Biaoxun666+"
- # receivers = ["1175730271@qq.com","995116318@qq.com","huangxiaofang@cvte.com"]
- receivers = ["1175730271@qq.com","1208135584@qq.com","wanghongyan@cvte.com"]
- # receivers = ["1175730271@qq.com"]
- attachs = [filename0]
- sendEmail(host,username,password,receivers,attachs=attachs)
- return True
- except Exception as e:
- traceback.print_exc()
- return False
- def export4(self):
- def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
- dict_filter,dict_update = getFixData()
- _index = 0
- for row in rows:
- _index += 1
- item = {}
- _dict = row
- keys = row["keyword"]
- log("================")
- log(keys)
- log(_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon",""))
- _keyword = str(list(set([ i for i in re.findall("|".join([re.escape(i) for i in keys if i!='']),_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon","")) if i!='' ])))
- log(_keyword)
- _extract = json.loads(_dict.get("extract_json","{}"))
- if len(set(keys)&set(["AR一体机","一体机","短焦投影仪","教学一体机","云桌面","录播","电子书包","电子班牌","智慧黑板"]))>0:
- product_attrs = _extract.get("product_attrs",{"data":[{}]}).get("data")
- else:
- product_attrs = [{}]
- if len(product_attrs)==0:
- product_attrs = [{}]
- docid = _dict.get("docid","")
- win_tenderer = ""
- bidding_budget = ""
- win_bid_price = ""
- sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
- for _doc in sub_docs_json:
- if win_tenderer=="":
- win_tenderer = _doc.get("win_tenderer","")
- if bidding_budget=="":
- bidding_budget= _doc.get("bidding_budget","")
- if win_bid_price=="":
- win_bid_price = _doc.get("win_bid_price","")
- _index1 = 0
- if _dict.get("docid") in dict_filter:
- print("====filter")
- continue
- if _dict.get("docid") in dict_update:
- print("====update")
- win_bid_price = dict_update[_dict.get("docid")].get("win_bid_price","")
- bidding_budget= dict_update[_dict.get("docid")].get("bidding_budget","")
- for _attrs in product_attrs:
- _index1 += 1
- if re.search("运费",_attrs.get("product","")) is not None:
- if _index1==1:
- _attrs = {}
- else:
- continue
- set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
- # set_dict_item_columns(set_columns1,list_df_column1,item,"正文",_dict.get("doctextcon",""))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"附件",_dict.get("attachmenttextcon",""))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name",""))
- set_dict_item_columns(set_columns1,list_df_column1,item,"docid",_dict.get("docid"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"网站链接",'=HYPERLINK("http://www.bidizhaobiao.com/info-%s.html","查看公告")'%(str(docid)))
- set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"公告时间",_dict.get("page_time"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"招标公告时间",'')
- set_dict_item_columns(set_columns1,list_df_column1,item,"项目标题",_dict.get("doctitle"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"采购单位",_dict.get("tenderee"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标供应商",win_tenderer)
- set_dict_item_columns(set_columns1,list_df_column1,item,"采购方式",_dict.get("bidway",""))
- set_dict_item_columns(set_columns1,list_df_column1,item,"招标采购方式","")
- set_dict_item_columns(set_columns1,list_df_column1,item,"采购系统","")
- set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
- set_dict_item_columns(set_columns1,list_df_column1,item,"预算金额(元)",bidding_budget)
- # set_dict_item_columns(set_columns1,list_df_column1,item,"招标预算金额(元)","")
- _quantity = ""
- _uniPrice = ""
- try:
- _quantity = int(re.sub("[^\d]","",_attrs.get("quantity","0")))
- if _quantity==0:
- _quantity = ""
- except Exception as e:
- pass
- try:
- _uniPrice = float(_attrs.get("unitPrice","0"))
- if _uniPrice==0:
- _uniPrice = ""
- except Exception as e:
- pass
- set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
- set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
- sumPrice = ""
- if _quantity!="" and _uniPrice!="":
- sumPrice = _quantity*_uniPrice
- set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额(元)",win_bid_price)
- set_dict_item_columns(set_columns1,list_df_column1,item,"关键词",_keyword)
- for k,v in item.items():
- if k not in df_data:
- df_data[k] = []
- df_data[k].append(v)
- def filterRow(list_row,key="docid"):
- set_id = set()
- _filter_row = []
- for row in list_row:
- _v = row.get(key,"")
- if _v in set_id:
- continue
- set_id.add(_v)
- _filter_row.append(row)
- return _filter_row
- def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
- set_enterprise = set()
- if len(df_data.keys())>0:
- for _tenderee,_win_tenderer in zip(df_data["采购单位"],df_data["中标供应商"]):
- set_enterprise.add(_tenderee)
- set_enterprise.add(_win_tenderer)
- if "" in set_enterprise:
- set_enterprise.remove("")
- if None in set_enterprise:
- set_enterprise.remove(None)
- dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system"])
- # conn = getConnection_oracle()
- # cursor = conn.cursor()
- if len(set_enterprise)>0:
- for _i in range(len(df_data["采购单位"])):
- _enterprise_name = df_data["采购单位"][_i]
- # if df_data["招标联系人电话"][_i]=="":
- # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
- # if contacts is not None:
- # _person,_phone = getOneContact(contacts)
- # df_data["招标联系人"][_i] = _person
- # df_data["招标联系人电话"][_i] = _phone
- df_data["采购系统"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system","")
- def fixZhaobiao_page_time(df_data):
- ots_client = getConnect_ots()
- for i in range(len(df_data["docid"])):
- _docid = df_data["docid"][i]
- bool_query = BoolQuery(must_queries=[TermQuery("docids",_docid)])
- rows,next_token,total_count,is_all_succeed = ots_client.search("project2","project2_index",
- SearchQuery(bool_query,limit=1),
- ColumnsToGet(["zhao_biao_page_time","bidway","bidding_budget"],ColumnReturnType.SPECIFIED))
- list_data = getRow_ots(rows)
- if len(list_data)>0:
- df_data["招标公告时间"][i] = list_data[0].get("zhao_biao_page_time","")
- df_data["招标采购方式"][i] = list_data[0].get("bidway","")
- new_bidding_budget = list_data[0].get("bidding_budget",0)
- if df_data["预算金额(元)"][i]=="" or float(df_data["预算金额(元)"][i])==0:
- if df_data["中标金额(元)"][i]!="" and new_bidding_budget>=float(df_data["中标金额(元)"][i]):
- df_data["预算金额(元)"][i] = new_bidding_budget
- # df_data["招标预算金额(元)"][i] = list_data[0].get("bidding_budget","")
- def popRows(df_data):
- list_pop = []
- set_docid = set()
- for _c in range(len(df_data["采购系统"])):
- if df_data["采购系统"][_c]!="教育系统" or df_data["中标金额(元)"][_c]=="":
- list_pop.append(_c)
- set_docid.add(df_data["docid"][_c])
- list_pop.reverse()
- for k,v in df_data.items():
- for _p in list_pop:
- v.pop(_p)
- _index = 0
- set_docid1 = set()
- for _i in range(len(df_data["序号"])):
- _docid = df_data["docid"][_i]
- if _docid not in set_docid1:
- _index += 1
- set_docid1.add(_docid)
- df_data["序号"][_i] = _index
- return len(set_docid)
- df = pd.read_excel("20220927v1.4.xlsx",sheetname=1)
- list_search_dict = []
- for title_kw,content_kw,title_rm,content_rm in zip(df["标题采集关键词"],df["全文采集关键词"],df["标题排除词"],df["全文排除词"]):
- if not isinstance(title_kw,str):
- title_kw = ""
- if not isinstance(content_kw,str):
- content_kw = ""
- if not isinstance(title_rm,str):
- title_rm = ""
- if not isinstance(content_rm,str):
- content_rm = ""
- _dict = {"title_kw":re.split("\s|,|,",str(title_kw)),
- "content_kw":re.split("\s|,|,",str(content_kw)),
- "title_rm":re.split("\s|,|,",str(title_rm)),
- "content_rm":re.split("\s|,|,",str(content_rm))}
- list_search_dict.append(_dict)
- columns = ["extract_json","doctextcon","attachmenttextcon","doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","web_source_no","web_source_name","service_time","person_review","time_get_file_start","time_get_file_end","time_earnest_money_start","time_earnest_money_end"]
- current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
- last_date = timeAdd(current_date,-1)
- current_date = "2022-10-14"
- last_date = "2023-05-07"
- dict_channel = getDict_docchannel()
- list_mq = []
- list_query = []
- for _d in list_search_dict:
- print(_d)
- kw_should_q = []
- list_keys = []
- title_kw = _d.get("title_kw",[])
- if len(title_kw)>0:
- kw_should_q.append(generateBoolShouldQuery(["doctitle"],title_kw,MatchPhraseQuery))
- list_keys.extend(title_kw)
- content_kw = _d.get("content_kw",[])
- if len(content_kw)>0:
- kw_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_kw,MatchPhraseQuery))
- list_keys.extend(content_kw)
- rm_should_q = []
- title_rm = _d.get("title_rm",[])
- if len(title_rm)>0:
- rm_should_q.append(generateBoolShouldQuery(["doctitle"],title_rm,MatchPhraseQuery))
- content_rm = _d.get("content_rm",[])
- if len(content_rm)>0:
- rm_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_rm,MatchPhraseQuery))
- _query = BoolQuery(must_queries=[
- generateBoolShouldQuery(["docchannel"],[101],TermQuery),
- # RangeQuery("page_time",last_date,current_date),
- RangeQuery("page_time",current_date,last_date),
- # RangeQuery("status",201,300,True,True),
- # TermQuery("docid",263568527),
- # generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_keys,MatchPhraseQuery),
- BoolQuery(should_queries=kw_should_q),
- # generateBoolShouldQuery(["province"],list_province,WildcardQuery),
- # RangeQuery("crtime","%s"%last_date,"%s"%current_date)
- ],
- must_not_queries=rm_should_q)
- list_query.append({"query":_query,"limit":50000,"keyword":list_keys})
- break
- def getFixData():
- filename = "C:\\Users\\Administrator\\Desktop\\视源数据清洗.xlsx"
- dict_filter = {}
- dict_update = {}
- df = pd.read_excel(filename,0)
- for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]):
- if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"):
- dict_filter[_docid] = 1
- else:
- dict_update[_docid] = {"docid":_docid,
- "bidding_budget":bidding_budget,
- "win_bid_price":win_bid_price}
- df = pd.read_excel(filename,1)
- for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]):
- if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"):
- dict_filter[_docid] = 1
- else:
- dict_update[_docid] = {"docid":_docid,
- "bidding_budget":bidding_budget,
- "win_bid_price":win_bid_price}
- return dict_filter,dict_update
- list_row = getDocument(list_query,columns)
- list_row = filterRow(list_row)
- print("list_row",len(list_row))
- set_columns1 = set()
- list_df_columns1 = []
- df_data = {}
- getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1)
- print("len df_data",len(df_data["docid"]))
- fixContactPerson(df_data,list_df_columns1)
- fixZhaobiao_page_time(df_data)
- size_pop = popRows(df_data)
- print("size_pop",size_pop)
- df = pd.DataFrame(df_data)
- filename = os.path.dirname(__file__)+"/data/%s关键词数据导出数量%d_%s.xlsx"%(current_date,len(list_row)-size_pop,getCurrent_date("%Y-%m-%d_%H%M%S"))
- df.to_excel(filename,columns=list_df_columns1,index=False)
- host = "smtp.exmail.qq.com"
- username = "vip@bidizhaobiao.com"
- password = "Biaoxun66-"
- receivers = ["1175730271@qq.com","747012698@qq.com","995116318@qq.com"]
- receivers = ["1175730271@qq.com"]
- attachs = [filename]
- sendEmail(host,username,password,receivers,attachs=attachs)
- def export4_by_project(self):
- def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
- # dict_filter,dict_update = getFixData()
- ots_client = getConnect_ots()
- _index = 0
- for row in rows:
- _index += 1
- item = {}
- _dict = row
- keys = row["keyword"]
- log("================")
- log(keys)
- docid = row.get("docids","")
- log(docid)
- try:
- docid_0 = docid.split(",")[0]
- except Exception as e:
- continue
- log(docid)
- log(_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon",""))
- _keyword = str(list(set([ i for i in re.findall("|".join([re.escape(i) for i in keys if i!='']),_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon","")) if i!='' ])))
- log(_keyword)
- product_attrs = [{}]
- # for doc in docid.split(","):
- # _q = BoolQuery(must_queries=[TermQuery("docid",int(doc))])
- # r,n,t,_ = ots_client.search("document","document_index",SearchQuery(_q),columns_to_get=ColumnsToGet(["extract_json",ColumnReturnType.SPECIFIED]))
- # r = getRow_ots(r)
- # if len(r)>0:
- # _extract = json.loads(r[0].get("extract_json","{}"))
- # if len(set(keys)&set(["AR一体机","一体机","短焦投影仪","教学一体机","云桌面","录播","电子书包","电子班牌","智慧黑板"]))>0:
- # product_attrs = _extract.get("product_attrs",{"data":[{}]}).get("data")
- # else:
- # product_attrs = [{}]
- # if len(product_attrs)==0:
- # product_attrs = [{}]
- # win_tenderer = ""
- # bidding_budget = ""
- # win_bid_price = ""
- # sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
- # for _doc in sub_docs_json:
- # if win_tenderer=="":
- # win_tenderer = _doc.get("win_tenderer","")
- # if bidding_budget=="":
- # bidding_budget= _doc.get("bidding_budget","")
- # if win_bid_price=="":
- # win_bid_price = _doc.get("win_bid_price","")
- _index1 = 0
- # if _dict.get("docid") in dict_filter:
- # print("====filter")
- # continue
- # if _dict.get("docid") in dict_update:
- # print("====update")
- # win_bid_price = dict_update[_dict.get("docid")].get("win_bid_price","")
- # bidding_budget= dict_update[_dict.get("docid")].get("bidding_budget","")
- for _attrs in product_attrs:
- _index1 += 1
- if re.search("运费",_attrs.get("product","")) is not None:
- if _index1==1:
- _attrs = {}
- else:
- continue
- set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
- # set_dict_item_columns(set_columns1,list_df_column1,item,"正文",_dict.get("doctextcon",""))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"附件",_dict.get("attachmenttextcon",""))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name",""))
- set_dict_item_columns(set_columns1,list_df_column1,item,"docid",docid)
- set_dict_item_columns(set_columns1,list_df_column1,item,"网站链接",'=HYPERLINK("http://www.bidizhaobiao.com/info-%s.html","查看公告")'%(str(docid)))
- set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"公告时间",_dict.get("page_time"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"招标公告时间",_dict.get("zhao_biao_page_time"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"项目标题",_dict.get("doctitles"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"采购单位",_dict.get("tenderee"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标供应商",_dict.get("win_tenderer"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"采购方式",_dict.get("bidway",""))
- set_dict_item_columns(set_columns1,list_df_column1,item,"招标采购方式","")
- set_dict_item_columns(set_columns1,list_df_column1,item,"采购系统","")
- set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
- set_dict_item_columns(set_columns1,list_df_column1,item,"预算金额(元)",_dict.get("bidding_budget"))
- # set_dict_item_columns(set_columns1,list_df_column1,item,"招标预算金额(元)","")
- _quantity = ""
- _uniPrice = ""
- try:
- _quantity = int(re.sub("[^\d]","",_attrs.get("quantity","0")))
- if _quantity==0:
- _quantity = ""
- except Exception as e:
- pass
- try:
- _uniPrice = float(_attrs.get("unitPrice","0"))
- if _uniPrice==0:
- _uniPrice = ""
- except Exception as e:
- pass
- set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
- set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
- sumPrice = ""
- if _quantity!="" and _uniPrice!="":
- sumPrice = _quantity*_uniPrice
- set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
- set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额(元)",_dict.get("win_bid_price"))
- set_dict_item_columns(set_columns1,list_df_column1,item,"关键词",_keyword)
- for k,v in item.items():
- if k not in df_data:
- df_data[k] = []
- df_data[k].append(v)
- def filterRow(list_row,key="docid"):
- set_id = set()
- _filter_row = []
- for row in list_row:
- _v = row.get(key,"")
- if _v in set_id:
- continue
- set_id.add(_v)
- _filter_row.append(row)
- return _filter_row
- def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
- set_enterprise = set()
- if len(df_data.keys())>0:
- for _tenderee,_win_tenderer in zip(df_data["采购单位"],df_data["中标供应商"]):
- set_enterprise.add(_tenderee)
- set_enterprise.add(_win_tenderer)
- if "" in set_enterprise:
- set_enterprise.remove("")
- if None in set_enterprise:
- set_enterprise.remove(None)
- dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system"])
- # conn = getConnection_oracle()
- # cursor = conn.cursor()
- if len(set_enterprise)>0:
- for _i in range(len(df_data["采购单位"])):
- _enterprise_name = df_data["采购单位"][_i]
- # if df_data["招标联系人电话"][_i]=="":
- # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
- # if contacts is not None:
- # _person,_phone = getOneContact(contacts)
- # df_data["招标联系人"][_i] = _person
- # df_data["招标联系人电话"][_i] = _phone
- df_data["采购系统"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system","")
- def fixZhaobiao_page_time(df_data):
- ots_client = getConnect_ots()
- for i in range(len(df_data["docid"])):
- _docid = df_data["docid"][i]
- bool_query = BoolQuery(must_queries=[TermQuery("docids",_docid)])
- rows,next_token,total_count,is_all_succeed = ots_client.search("project2","project2_index",
- SearchQuery(bool_query,limit=1),
- ColumnsToGet(["zhao_biao_page_time","bidway","bidding_budget"],ColumnReturnType.SPECIFIED))
- list_data = getRow_ots(rows)
- if len(list_data)>0:
- df_data["招标公告时间"][i] = list_data[0].get("zhao_biao_page_time","")
- df_data["招标采购方式"][i] = list_data[0].get("bidway","")
- new_bidding_budget = list_data[0].get("bidding_budget",0)
- if df_data["预算金额(元)"][i]=="" or float(df_data["预算金额(元)"][i])==0:
- if df_data["中标金额(元)"][i]!="" and new_bidding_budget>=float(df_data["中标金额(元)"][i]):
- df_data["预算金额(元)"][i] = new_bidding_budget
- # df_data["招标预算金额(元)"][i] = list_data[0].get("bidding_budget","")
- def popRows(df_data):
- list_pop = []
- set_docid = set()
- for _c in range(len(df_data["采购系统"])):
- if df_data["采购系统"][_c]!="教育系统" or df_data["中标金额(元)"][_c]=="":
- list_pop.append(_c)
- set_docid.add(df_data["docid"][_c])
- list_pop.reverse()
- for k,v in df_data.items():
- for _p in list_pop:
- v.pop(_p)
- _index = 0
- set_docid1 = set()
- for _i in range(len(df_data["序号"])):
- _docid = df_data["docid"][_i]
- if _docid not in set_docid1:
- _index += 1
- set_docid1.add(_docid)
- df_data["序号"][_i] = _index
- return len(set_docid)
- df = pd.read_excel("20220927v1.4.xlsx",sheetname=1)
- list_search_dict = []
- for title_kw,content_kw,title_rm,content_rm in zip(df["标题采集关键词"],df["全文采集关键词"],df["标题排除词"],df["全文排除词"]):
- if not isinstance(title_kw,str):
- title_kw = ""
- if not isinstance(content_kw,str):
- content_kw = ""
- if not isinstance(title_rm,str):
- title_rm = ""
- if not isinstance(content_rm,str):
- content_rm = ""
- _dict = {"title_kw":re.split("\s|,|,",str(title_kw)),
- "content_kw":re.split("\s|,|,",str(content_kw)),
- "title_rm":re.split("\s|,|,",str(title_rm)),
- "content_rm":re.split("\s|,|,",str(content_rm))}
- list_search_dict.append(_dict)
- columns = ["docids","doctitles","product","province","bidway","city","district","zhao_biao_page_time","page_time","industry","info_type","tenderee","bidding_budget","project_code","project_name","win_tenderer","win_bid_price","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","web_source_no","web_source_name","service_time","person_review","time_get_file_start","time_get_file_end","time_earnest_money_start","time_earnest_money_end"]
- current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
- last_date = timeAdd(current_date,-1)
- current_date = "2022-10-14"
- last_date = "2023-05-07"
- dict_channel = getDict_docchannel()
- list_mq = []
- list_query = []
- for _d in list_search_dict:
- print(_d)
- kw_should_q = []
- list_keys = []
- title_kw = _d.get("title_kw",[])
- if len(title_kw)>0:
- kw_should_q.append(generateBoolShouldQuery(["doctitles"],title_kw,MatchPhraseQuery))
- list_keys.extend(title_kw)
- content_kw = _d.get("content_kw",[])
- if len(content_kw)>0:
- kw_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_kw,MatchPhraseQuery))
- list_keys.extend(content_kw)
- rm_should_q = []
- title_rm = _d.get("title_rm",[])
- if len(title_rm)>0:
- rm_should_q.append(generateBoolShouldQuery(["doctitles"],title_rm,MatchPhraseQuery))
- content_rm = _d.get("content_rm",[])
- if len(content_rm)>0:
- rm_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_rm,MatchPhraseQuery))
- _query = BoolQuery(must_queries=[
- # generateBoolShouldQuery(["docchannel"],[101],TermQuery),
- # RangeQuery("page_time",last_date,current_date),
- RangeQuery("page_time",current_date,last_date),
- RangeQuery("status",201,300,True,True),
- # TermQuery("docid",263568527),
- # generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_keys,MatchPhraseQuery),
- BoolQuery(should_queries=kw_should_q),
- # # generateBoolShouldQuery(["province"],list_province,WildcardQuery),
- # # RangeQuery("crtime","%s"%last_date,"%s"%current_date)
- ],
- must_not_queries=rm_should_q)
- list_query.append({"query":_query,"limit":50000,"keyword":list_keys})
- def getFixData():
- filename = "C:\\Users\\Administrator\\Desktop\\视源数据清洗.xlsx"
- dict_filter = {}
- dict_update = {}
- df = pd.read_excel(filename,0)
- for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]):
- if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"):
- dict_filter[_docid] = 1
- else:
- dict_update[_docid] = {"docid":_docid,
- "bidding_budget":bidding_budget,
- "win_bid_price":win_bid_price}
- df = pd.read_excel(filename,1)
- for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]):
- if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"):
- dict_filter[_docid] = 1
- else:
- dict_update[_docid] = {"docid":_docid,
- "bidding_budget":bidding_budget,
- "win_bid_price":win_bid_price}
- return dict_filter,dict_update
- list_row = getDocument(list_query,columns,table_name="project2",table_index="project2_index")
- list_row = filterRow(list_row,key="uuid")
- print("list_row",len(list_row))
- set_columns1 = set()
- list_df_columns1 = []
- df_data = {}
- getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1)
- print("len df_data",len(df_data["docid"]))
- fixContactPerson(df_data,list_df_columns1)
- # fixZhaobiao_page_time(df_data)
- # size_pop = popRows(df_data)
- size_pop = 0
- print("size_pop",size_pop)
- df = pd.DataFrame(df_data)
- filename = os.path.dirname(__file__)+"/data/%s关键词数据导出数量%d_%s.xlsx"%(current_date,len(list_row)-size_pop,getCurrent_date("%Y-%m-%d_%H%M%S"))
- df.to_excel(filename,columns=list_df_columns1,index=False)
- host = "smtp.exmail.qq.com"
- username = "vip@bidizhaobiao.com"
- password = "Biaoxun66-"
- receivers = ["1175730271@qq.com","747012698@qq.com","995116318@qq.com"]
- receivers = ["1175730271@qq.com"]
- attachs = [filename]
- sendEmail(host,username,password,receivers,attachs=attachs)
- import json
- def getUserInfo(self,code,userid):
- data = {
- "codeStr": code,
- "userId": userid,
- }
- auth = {'Content-Type': 'application/json',"Authorization": "Bearer eyJhbGciOiJSUzI1NiJ9.eyJpc3MiOiJCWEtDX1VOSVRFX0FQSSIsInN1YiI6IjIwIiwiZGF0YSI6eyJkZXZpY2VUeXBlIjoiMjAiLCJyb2xlIjoiMjAiLCJpZCI6ImJ4a2Mtd29ya2JlbmNoIn0sImV4cCI6MTk2NjY5MjE4MSwiaWF0IjoxNjUxMDQ0MTgxfQ.lFurpoMOaVmCH3GKmJ_cqrseuSaEZJBndp8PE4QpjFY6R2mfXh5e96zn_Ma3qVkp-NKAlpA1nRYl1y08xkzj07KEx4HO_Nh6v3sfGwnDyRUz35SW3yr1fhvnbh5hEpnoCVJnFQfoMXFfn780VuOcKd01lNUjABFbSLvypDv8p-gJzkrE7z5YB53tZ_lGm_dbzKihTBjG1sBYEKJT3ekYz5Px_n-lw05IkUdbckE7n6Xj7PRPTaEjzoc3PF_pgESsdcTPSDhJlLR8x63YCqbmWy6ydhwnlEQE5rHIFA2Q5VhIxUcT7fXUBZNGmzRgaqxB4dIg2369IhpeBAJ2TQ63qg"}
- r = requests.patch("https://unite.bidizhaobiao.com/api/v1/authorization/services",json=data,headers=auth)
- if r.status_code==200:
- return r.status_code,json.loads(r.content.decode("utf8"))
- return r.status_code,""
- def export_15824381998():
- def removeData(df_data):
- list_remove_index = []
- list_c = df_data.get("招标人采购系统",[])
- for _c_i in range(len(list_c)):
- if list_c[_c_i]=="企业采购系统":
- list_remove_index.append(_c_i)
- list_remove_index.reverse()
- print(list_remove_index)
- for k,v in df_data.items():
- for _rc in list_remove_index:
- v.pop(_rc)
- for k,v in df_data.items():
- v = v[:500]
- def getRowData_zb(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
- dict_line = {}
- # list_data = getRow_ots(rows)
- _index = 0
- rows.sort(key=lambda x:x.get("docid",0),reverse=True)
- set_col = set()
- df_columns = []
- for row in rows:
- _index += 1
- item = {}
- _dict = row
- bidding_budget = 0
- sub_docs = json.loads(_dict.get("sub_docs_json","[]"))
- for doc in sub_docs:
- if doc.get("bidding_budget",0)>0:
- bidding_budget = doc.get("bidding_budget",0)
- set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
- set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
- set_dict_item_columns(set_col,df_columns,item,"公告类别",dict_channel.get(_dict.get("docchannel",""),""))
- set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time",""))
- set_dict_item_columns(set_col,df_columns,item,"公告标题",_dict.get("doctitle",""))
- set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product",""))
- set_dict_item_columns(set_col,df_columns,item,"采购方式",_dict.get("bidway",""))
- set_dict_item_columns(set_col,df_columns,item,"项目金额",bidding_budget)
- set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
- set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
- set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
- set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
- set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
- set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
- set_dict_item_columns(set_col,df_columns,item,"获取文件截止时间",_dict.get("time_get_file_end",""))
- set_dict_item_columns(set_col,df_columns,item,"开标时间",_dict.get("time_bidopen",""))
- set_dict_item_columns(set_col,df_columns,item,"链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid","")))
- for k,v in item.items():
- if k not in df_data:
- df_data[k] = []
- df_data[k].append(v)
- return df_columns
- def getRowData_yc(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
- dict_line = {}
- # list_data = getRow_ots(rows)
- _index = 0
- rows.sort(key=lambda x:x.get("docid",0),reverse=True)
- set_col = set()
- df_columns = []
- for row in rows:
- _index += 1
- item = {}
- _dict = row
- _type = _dict.get("type",0)
- if _type==0:
- _type = "周期预测"
- elif _type==1:
- _type = "采购意向"
- elif _type==2:
- _type = "到期预测"
- else:
- _type = "废标重招"
- set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
- set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
- set_dict_item_columns(set_col,df_columns,item,"预测类型",_type)
- set_dict_item_columns(set_col,df_columns,item,"预计开始时间",_dict.get("may_begin",""))
- set_dict_item_columns(set_col,df_columns,item,"预计结束时间",_dict.get("may_end",""))
- set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product",""))
- set_dict_item_columns(set_col,df_columns,item,"上次采购时间",_dict.get("last_page_time",""))
- set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
- set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
- set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
- set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
- set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
- set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
- for k,v in item.items():
- if k not in df_data:
- df_data[k] = []
- df_data[k].append(v)
- return df_columns
- def getRowData_win(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
- dict_line = {}
- # list_data = getRow_ots(rows)
- _index = 0
- rows.sort(key=lambda x:x.get("docid",0),reverse=True)
- set_col = set()
- df_columns = []
- for row in rows:
- _index += 1
- item = {}
- _dict = row
- bidding_budget = 0
- sub_docs = json.loads(_dict.get("sub_docs_json","[]"))
- zbr = ""
- zbje = ""
- for doc in sub_docs:
- if doc.get("bidding_budget",0)>0:
- bidding_budget = doc.get("bidding_budget",0)
- if doc.get("win_tenderer") is not None:
- zbr += str(doc.get("win_tenderer"))+"\r\n"
- zbje += str(doc.get("win_bid_price",0))+"\r\n"
- if doc.get("second_tenderer") is not None:
- zbr += str(doc.get("second_tenderer"))+"\r\n"
- zbje += str(doc.get("second_bid_price",0))+"\r\n"
- if doc.get("third_tenderer") is not None:
- zbr += str(doc.get("third_tenderer"))+"\r\n"
- zbje += str(doc.get("third_bid_price",0))+"\r\n"
- set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
- set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
- set_dict_item_columns(set_col,df_columns,item,"公告类别",dict_channel.get(_dict.get("docchannel",""),""))
- set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time",""))
- set_dict_item_columns(set_col,df_columns,item,"公告标题",_dict.get("doctitle",""))
- set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product",""))
- set_dict_item_columns(set_col,df_columns,item,"采购方式",_dict.get("bidway",""))
- set_dict_item_columns(set_col,df_columns,item,"项目金额",bidding_budget)
- set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
- set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
- set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
- set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
- set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
- set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
- set_dict_item_columns(set_col,df_columns,item,"获取文件截止时间",_dict.get("time_get_file_end",""))
- set_dict_item_columns(set_col,df_columns,item,"开标时间",_dict.get("time_bidopen",""))
- set_dict_item_columns(set_col,df_columns,item,"中标公司",zbr)
- set_dict_item_columns(set_col,df_columns,item,"中标金额",zbje)
- set_dict_item_columns(set_col,df_columns,item,"链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid","")))
- for k,v in item.items():
- if k not in df_data:
- df_data[k] = []
- df_data[k].append(v)
- return df_columns
- def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
- set_enterprise = set()
- if len(df_data.keys())>0:
- for _tenderee,_agency in zip(df_data["招标单位"],df_data["代理单位"]):
- set_enterprise.add(_tenderee)
- set_enterprise.add(_agency)
- if "" in set_enterprise:
- set_enterprise.remove("")
- if None in set_enterprise:
- set_enterprise.remove(None)
- dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system","company_org_type","reg_capital","actual_capital","contacts","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code","legal_person_name","phone_number"])
- # conn = getConnection_oracle()
- # cursor = conn.cursor()
- if len(set_enterprise)>0:
- for _i in range(len(df_data["招标单位"])):
- _enterprise_name = df_data["招标单位"][_i]
- if df_data["招标联系人电话"][_i]=="":
- contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
- if contacts is not None:
- _person,_phone = getOneContact(contacts)
- df_data["招标联系人"][_i] = _person
- df_data["招标联系人电话"][_i] = _phone
- _enterprise_name = df_data["代理单位"][_i]
- if df_data["代理联系人电话"][_i]=="":
- contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
- if contacts is not None:
- _person,_phone = getOneContact(contacts)
- df_data["代理联系人"][_i] = _person
- df_data["代理联系人电话"][_i] = _phone
- # _enterprise_name = df_data["中标单位"][_i]
- # if get_legal_person:
- # _person = dict_enterprise.get(_enterprise_name,{}).get("legal_person_name","")
- # _phone = dict_enterprise.get(_enterprise_name,{}).get("phone_number","")
- # if len(_phone)==11 and _phone[0]=="1":
- # df_data["中标单位联系人"][_i] = _person
- # df_data["中标单位联系电话"][_i] = _phone
- # else:
- # if df_data["中标单位联系电话"][_i]=="":
- # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
- # if contacts is not None:
- # _person,_phone = getOneContact(contacts,mobile_only=True)
- # df_data["中标单位联系人"][_i] = _person
- # df_data["中标单位联系电话"][_i] = _phone
- log("start export 15824381998:>>>>>>>>>>>>>>>")
- current_date = getCurrent_date(format="%Y-%m-%d")
- last_date = timeAdd(current_date,-2)
- start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60))
- if current_date<="2025-05-01":
- for i in range(10):
- try:
- # start_time='2022-07-22'
- # current_date = '2022-07-28'
- log("start exporting export2:=================")
- # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"]
- dict_channel = getDict_docchannel()
- list_query = []
- str_keyword = '''
- 光伏组件
- '''
- list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
- str_not_keyword = '''
- 清洗机器人
- '''
- list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
- tenderee_keywrod = "医院、大学、高校、高中"
- list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
- log(str(list_keyword))
- columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_get_file_end","time_bidopen"]
- bool_query = BoolQuery(must_queries=[
- generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery),
- generateBoolShouldQuery(["docchannel"],[51,52,102,103,104,105,114],TermQuery),
- RangeQuery("crtime",last_date+" 21:00:00",current_date+" 21:00:00",True,True),
- RangeQuery("status",201,300,True,True),
- # TermQuery("procurement_system","公安系统"),
- # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
- # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
- # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
- ],
- must_not_queries=[
- generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_not_key,MatchPhraseQuery),
- ]
- )
- list_row = getDocument([{"query":bool_query,"limit":500}],columns,thread_count=1)
- log("get document %d rows"%len(list_row))
- df_data = {}
- set_line = set()
- # list_row = filterRow(list_row)
- df_columns = getRowData_zb(df_data,list_row,set_line,list_keyword,dict_channel,True)
- fixContactPerson(df_data,df_columns)
- df = pd.DataFrame(df_data)
-
- df_data_filter = {}
- for c in df_columns:
- if c not in df_data_filter:
- df_data_filter[c] = []
- for tenderee_i in range(len(df_data["招标单位"])):
- if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","中国大唐集团有限公司"):
- for c in df_columns:
- df_data_filter[c].append(df_data[c][tenderee_i])
- df_f = pd.DataFrame(df_data_filter)
- time_end = timeAdd(current_date,60)
- columns = ["project_name","type","province","city","bidding_budget","city","demand","last_page_time"]
- bool_query = BoolQuery(must_queries=[
- generateBoolShouldQuery(["demand","product"],list_keyword,MatchPhraseQuery),
- RangeQuery("may_end",range_from=current_date),
- RangeQuery("may_end",range_to=time_end),
- # TermQuery("procurement_system","公安系统"),
- # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
- # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
- # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
- ],
- must_not_queries=[
- generateBoolShouldQuery(["demand","product"],list_not_key,MatchPhraseQuery),
- ]
- )
- list_row = getDocument([{"query":bool_query,"limit":500}],columns,table_name="preproject",table_index="preproject_index",thread_count=1,sort_column="may_end")
- log("get document %d rows"%len(list_row))
- df_data = {}
- set_line = set()
- # list_row = filterRow(list_row)
- df_columns_yc = getRowData_yc(df_data,list_row,set_line,list_keyword,dict_channel,True)
- fixContactPerson(df_data,df_columns_yc)
- df1 = pd.DataFrame(df_data)
- df_data_filter = {}
- for c in df_columns_yc:
- if c not in df_data_filter:
- df_data_filter[c] = []
- for tenderee_i in range(len(df_data["招标单位"])):
- if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","中国大唐集团有限公司"):
- for c in df_columns_yc:
- if c not in df_data_filter:
- df_data_filter[c] = []
- df_data_filter[c].append(df_data[c][tenderee_i])
- df1_f = pd.DataFrame(df_data_filter)
- filename = os.path.dirname(__file__)+"/data/%s年%s招标数据导出%s.xlsx"%(start_time[:4],current_date,getCurrent_date(format="%Y-%m-%d_%H%M%S"))
- with pd.ExcelWriter(filename) as writer:
- df.to_excel(writer,sheet_name="招标数据",columns=df_columns if not df.empty else None)
- df_f.to_excel(writer,sheet_name="特定甲方招标数据",columns=df_columns if not df_f.empty else None)
- df1.to_excel(writer,sheet_name="项目预测",columns=df_columns_yc[:-3] if not df1.empty else None)
- df1_f.to_excel(writer,sheet_name="特定甲方项目预测",columns=df_columns_yc[:-3] if not df1_f.empty else None)
- log(str(filename))
- host = "smtp.exmail.qq.com"
- username = "vip@bidizhaobiao.com"
- password = "Biaoxun66-"
- receivers = ["1175730271@qq.com","565748324@qq.com","1396488964@qq.com"]
- # receivers = ["1175730271@qq.com"]
- attachs = [filename]
- sendEmail(host,username,password,receivers,attachs=attachs)
- df_data_filter = {}
- for c in df_columns:
- if c not in df_data_filter:
- df_data_filter[c] = []
- for tenderee_i in range(len(df_data["招标单位"])):
- if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","国家能源投资集团有限责任公司","国家能源集团龙源阿里新能源(阿里)有限公司"):
- for c in df_columns:
- df_data_filter[c].append(df_data[c][tenderee_i])
- df_f = pd.DataFrame(df_data_filter)
- df_data_filter1 = {}
- for c in df_columns_yc:
- if c not in df_data_filter1:
- df_data_filter1[c] = []
- for tenderee_i in range(len(df_data["招标单位"])):
- if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","国家能源投资集团有限责任公司","国家能源集团龙源阿里新能源(阿里)有限公司"):
- for c in df_columns_yc:
- if c not in df_data_filter1:
- df_data_filter1[c] = []
- df_data_filter1[c].append(df_data[c][tenderee_i])
- df1_f1 = pd.DataFrame(df_data_filter1)
- filename = os.path.dirname(__file__)+"/data/%s年%s招标数据导出%s.xlsx"%(start_time[:4],current_date,getCurrent_date(format="%Y-%m-%d_%H%M%S"))
- with pd.ExcelWriter(filename) as writer:
- df_f.to_excel(writer,sheet_name="特定甲方招标数据",columns=df_columns if not df_f.empty else None)
- df1_f1.to_excel(writer,sheet_name="特定甲方项目预测",columns=df_columns_yc[:-3] if not df1_f.empty else None)
- log(str(filename))
- host = "smtp.exmail.qq.com"
- username = "vip@bidizhaobiao.com"
- password = "Biaoxun66-"
- receivers = ["450604061@qq.com"]
- # receivers = ["1175730271@qq.com"]
- attachs = [filename]
- sendEmail(host,username,password,receivers,attachs=attachs)
- columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_get_file_end","time_bidopen"]
- bool_query = BoolQuery(must_queries=[
- generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery),
- generateBoolShouldQuery(["docchannel"],[101,118,119,120,121,122],TermQuery),
- RangeQuery("crtime",last_date+" 21:00:00",current_date+" 21:00:00",True,True),
- RangeQuery("status",201,300,True,True),
- # TermQuery("procurement_system","公安系统"),
- # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
- # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
- # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
- ],
- must_not_queries=[
- generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_not_key,MatchPhraseQuery),
- ]
- )
- list_row = getDocument([{"query":bool_query,"limit":500}],columns,thread_count=1)
- log("get document %d rows"%len(list_row))
- df_data = {}
- set_line = set()
- # list_row = filterRow(list_row)
- df_columns = getRowData_win(df_data,list_row,set_line,list_keyword,dict_channel,True)
- fixContactPerson(df_data,df_columns)
- df = pd.DataFrame(df_data)
- # 调整行高
- # for i in range(1, len(df) + 1):
- # df.style.set_properties(max_col=100, min_col=1, max_row=i, min_row=i, h=20, wrap_text=True) # 设置行高为20,自动换行
- styled_df = (df.style
- .set_properties(**{'text-align': 'center', 'white-space': 'pre-line'})
- .set_table_styles([{'selector': 'td', 'props': [('text-align', 'center')]}]))
- filename = os.path.dirname(__file__)+"/data/%s年%s中标数据导出%s.xlsx"%(start_time[:4],current_date,getCurrent_date(format="%Y-%m-%d_%H%M%S"))
- with pd.ExcelWriter(filename,engine="openpyxl") as writer:
- styled_df.to_excel(writer,columns = df_columns,index=False)
- adjust_excel(filename,filename,columns=["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R"])
- host = "smtp.exmail.qq.com"
- username = "vip@bidizhaobiao.com"
- password = "Biaoxun66-"
- receivers = ["1175730271@qq.com","565748324@qq.com","1396488964@qq.com","1141385052@qq.com","1713739820@qq.com"]
- # receivers = ["1175730271@qq.com"]
- attachs = [filename]
- sendEmail(host,username,password,receivers,attachs=attachs)
- break
- except Exception as e:
- traceback.print_exc()
- def export_13510123669():
- def removeData(df_data):
- list_remove_index = []
- list_c = df_data.get("招标人采购系统",[])
- for _c_i in range(len(list_c)):
- if list_c[_c_i]=="企业采购系统":
- list_remove_index.append(_c_i)
- list_remove_index.reverse()
- print(list_remove_index)
- for k,v in df_data.items():
- for _rc in list_remove_index:
- v.pop(_rc)
- for k,v in df_data.items():
- v = v[:500]
- def getRowData_zb(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
- dict_line = {}
- # list_data = getRow_ots(rows)
- _index = 0
- rows.sort(key=lambda x:x.get("docid",0),reverse=True)
- set_col = set()
- df_columns = []
- for row in rows:
- _index += 1
- item = {}
- _dict = row
- bidding_budget = 0
- sub_docs = json.loads(_dict.get("sub_docs_json","[]"))
- zbr = ""
- zbje = 0
- win_tenderer_manager = ""
- win_tenderer_phone = ""
- for doc in sub_docs:
- if doc.get("bidding_budget",0)>0:
- bidding_budget = doc.get("bidding_budget",0)
- if doc.get("win_tenderer") is not None:
- zbr = str(doc.get("win_tenderer",""))
- zbje = doc.get("win_bid_price",0)
- win_tenderer_manager = str(doc.get("win_tenderer_manager",""))
- win_tenderer_phone = str(doc.get("win_tenderer_phone",""))
- doc_type = ""
- if dict_channel.get(_dict.get("docchannel",""),"")[:2] in ("中标","合同","候选","废标","开标","验收"):
- doc_type = "中标"
- else:
- doc_type = "招标"
- set_dict_item_columns(set_col,df_columns,item,"序号",_index)
- set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province","未知"))
- set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city","未知"))
- set_dict_item_columns(set_col,df_columns,item,"公告类型",doc_type)
- set_dict_item_columns(set_col,df_columns,item,"公告名称",_dict.get("doctitle",""))
- set_dict_item_columns(set_col,df_columns,item,"项目类型",'')
- set_dict_item_columns(set_col,df_columns,item,"项目编号",_dict.get("project_code"))
- set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time",""))
- set_dict_item_columns(set_col,df_columns,item,"信息类型","%s-%s"%(dict_channel.get(_dict.get("docchannel",""),""),_dict.get("bidway","")))
- set_dict_item_columns(set_col,df_columns,item,"报名开始时间",_dict.get("time_registration_start",""))
- set_dict_item_columns(set_col,df_columns,item,"报名截止时间",_dict.get("time_registration_end",""))
- set_dict_item_columns(set_col,df_columns,item,"投标开始时间",_dict.get("time_bidstart",""))
- set_dict_item_columns(set_col,df_columns,item,"投标结束时间",_dict.get("time_bidopen",""))
- set_dict_item_columns(set_col,df_columns,item,"预算金额(万元)",bidding_budget/10000)
- set_dict_item_columns(set_col,df_columns,item,"成交金额(万元)",zbje/10000)
- set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
- set_dict_item_columns(set_col,df_columns,item,"招标单位所在地","")
- set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
- set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
- set_dict_item_columns(set_col,df_columns,item,"招标联系人1","")
- set_dict_item_columns(set_col,df_columns,item,"招标联系人电话1","")
- set_dict_item_columns(set_col,df_columns,item,"招标联系人2","")
- set_dict_item_columns(set_col,df_columns,item,"招标联系人电话2","")
- set_dict_item_columns(set_col,df_columns,item,"招标联系人3","")
- set_dict_item_columns(set_col,df_columns,item,"招标联系人电话3","")
- set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
- set_dict_item_columns(set_col,df_columns,item,"代理单位所在地","")
- set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
- set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
- set_dict_item_columns(set_col,df_columns,item,"代理联系人1","")
- set_dict_item_columns(set_col,df_columns,item,"代理联系人电话1","")
- set_dict_item_columns(set_col,df_columns,item,"代理联系人2","")
- set_dict_item_columns(set_col,df_columns,item,"代理联系人电话2","")
- set_dict_item_columns(set_col,df_columns,item,"代理联系人3","")
- set_dict_item_columns(set_col,df_columns,item,"代理联系人电话3","")
- set_dict_item_columns(set_col,df_columns,item,"中标单位",zbr)
- set_dict_item_columns(set_col,df_columns,item,"中标单位所在地","")
- set_dict_item_columns(set_col,df_columns,item,"项目中标联系人",win_tenderer_manager)
- set_dict_item_columns(set_col,df_columns,item,"项目中标联系电话",win_tenderer_phone)
- set_dict_item_columns(set_col,df_columns,item,"中标法人","")
- set_dict_item_columns(set_col,df_columns,item,"中标法人电话","")
- set_dict_item_columns(set_col,df_columns,item,"中标股东","")
- set_dict_item_columns(set_col,df_columns,item,"中标股东电话","")
- set_dict_item_columns(set_col,df_columns,item,"中标高管","")
- set_dict_item_columns(set_col,df_columns,item,"中标高管电话","")
- set_dict_item_columns(set_col,df_columns,item,"中标联系人1","")
- set_dict_item_columns(set_col,df_columns,item,"中标联系人电话1","")
- set_dict_item_columns(set_col,df_columns,item,"中标联系人2","")
- set_dict_item_columns(set_col,df_columns,item,"中标联系人电话2","")
- set_dict_item_columns(set_col,df_columns,item,"中标联系人3","")
- set_dict_item_columns(set_col,df_columns,item,"中标联系人电话3","")
- set_dict_item_columns(set_col,df_columns,item,"招标/采购内容",_dict.get("product",""))
- set_dict_item_columns(set_col,df_columns,item,"资质","")
- set_dict_item_columns(set_col,df_columns,item,"公告详情链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid","")))
- set_dict_item_columns(set_col,df_columns,item,"招投标分析(合作商)","")
- set_dict_item_columns(set_col,df_columns,item,"历史中标单位","")
- set_dict_item_columns(set_col,df_columns,item,"应对策略(作为填)","")
- for k,v in item.items():
- if k not in df_data:
- df_data[k] = []
- df_data[k].append(v)
- return df_columns
- def getRowData_yc(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
- dict_line = {}
- # list_data = getRow_ots(rows)
- _index = 0
- rows.sort(key=lambda x:x.get("docid",0),reverse=True)
- set_col = set()
- df_columns = []
- for row in rows:
- _index += 1
- item = {}
- _dict = row
- _type = _dict.get("type",0)
- if _type==0:
- _type = "周期预测"
- elif _type==1:
- _type = "采购意向"
- elif _type==2:
- _type = "到期预测"
- else:
- _type = "废标重招"
- set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
- set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
- set_dict_item_columns(set_col,df_columns,item,"类型",'')
- set_dict_item_columns(set_col,df_columns,item,"预测类型",_type)
- set_dict_item_columns(set_col,df_columns,item,"项目类型","")
- set_dict_item_columns(set_col,df_columns,item,"预计开始时间",_dict.get("may_begin",""))
- set_dict_item_columns(set_col,df_columns,item,"预计结束时间",_dict.get("may_end",""))
- set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product",""))
- set_dict_item_columns(set_col,df_columns,item,"上次采购时间",_dict.get("last_page_time",""))
- set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
- set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
- set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
- set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
- set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
- set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
- set_dict_item_columns(set_col,df_columns,item,"招投标分析(合作商)","")
- set_dict_item_columns(set_col,df_columns,item,"历史中标单位","")
- set_dict_item_columns(set_col,df_columns,item,"应对策略(作为填)","")
- for k,v in item.items():
- if k not in df_data:
- df_data[k] = []
- df_data[k].append(v)
- return df_columns
- def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
- set_enterprise = set()
- if len(df_data.keys())>0:
- for _tenderee,_agency in zip(df_data["招标单位"],df_data["代理单位"]):
- set_enterprise.add(_tenderee)
- set_enterprise.add(_agency)
- if "" in set_enterprise:
- set_enterprise.remove("")
- if None in set_enterprise:
- set_enterprise.remove(None)
- dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system","company_org_type","reg_capital","actual_capital","contacts","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code","legal_person_name","phone_number"])
- # conn = getConnection_oracle()
- # cursor = conn.cursor()
- if len(set_enterprise)>0:
- for _i in range(len(df_data["招标单位"])):
- _enterprise_name = df_data["招标单位"][_i]
- if df_data["招标联系人电话"][_i]=="":
- contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
- if contacts is not None:
- _person,_phone = getOneContact(contacts)
- df_data["招标联系人"][_i] = _person
- df_data["招标联系人电话"][_i] = _phone
- df_data["项目类型"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system")
- _enterprise_name = df_data["代理单位"][_i]
- if df_data["代理联系人电话"][_i]=="":
- contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
- if contacts is not None:
- _person,_phone = getOneContact(contacts)
- df_data["代理联系人"][_i] = _person
- df_data["代理联系人电话"][_i] = _phone
- # _enterprise_name = df_data["中标单位"][_i]
- # if get_legal_person:
- # _person = dict_enterprise.get(_enterprise_name,{}).get("legal_person_name","")
- # _phone = dict_enterprise.get(_enterprise_name,{}).get("phone_number","")
- # if len(_phone)==11 and _phone[0]=="1":
- # df_data["中标单位联系人"][_i] = _person
- # df_data["中标单位联系电话"][_i] = _phone
- # else:
- # if df_data["中标单位联系电话"][_i]=="":
- # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
- # if contacts is not None:
- # _person,_phone = getOneContact(contacts,mobile_only=True)
- # df_data["中标单位联系人"][_i] = _person
- # df_data["中标单位联系电话"][_i] = _phone
- def fixContactPerson1(df_data,list_df_columns,get_legal_person=False):
- set_enterprise = set()
- if len(df_data.keys())>0:
- for _tenderee,_agency,_win in zip(df_data["招标单位"],df_data["代理单位"],df_data["中标单位"]):
- set_enterprise.add(_tenderee)
- set_enterprise.add(_agency)
- set_enterprise.add(_win)
- if "" in set_enterprise:
- set_enterprise.remove("")
- if None in set_enterprise:
- set_enterprise.remove(None)
- dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system","company_org_type","reg_capital","actual_capital","contacts","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code","legal_person_name","phone_number","reg_location","province","city"])
- # print("dict_enterprise",dict_enterprise)
- # conn = getConnection_oracle()
- # cursor = conn.cursor()
- if len(set_enterprise)>0:
- for _i in range(len(df_data["招标单位"])):
- _enterprise_name = df_data["招标单位"][_i]
- contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
- if contacts is not None:
- for _index in range(min(len(contacts),3)):
- contact_person,phone_no = contacts[_index].get("contact_person"),contacts[_index].get("phone_no")
- is_legal_person,is_manager,is_shareholder = contacts[_index].get("is_legal_person",0),contacts[_index].get("is_manager",0),contacts[_index].get("is_shareholder",0)
- df_data["招标联系人%s"%str(_index+1)][_i] = contact_person
- df_data["招标联系人电话%s"%str(_index+1)][_i] = phone_no
- df_data["招标单位所在地"][_i] = "%s-%s"%(dict_enterprise.get(_enterprise_name,{}).get("province",""),dict_enterprise.get(_enterprise_name,{}).get("city",""))
- df_data["项目类型"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system")
- _enterprise_name = df_data["代理单位"][_i]
- contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
- if contacts is not None:
- for _index in range(min(len(contacts),3)):
- contact_person,phone_no = contacts[_index].get("contact_person"),contacts[_index].get("phone_no")
- is_legal_person,is_manager,is_shareholder = contacts[_index].get("is_legal_person",0),contacts[_index].get("is_manager",0),contacts[_index].get("is_shareholder",0)
- df_data["代理联系人%s"%str(_index+1)][_i] = contact_person
- df_data["代理联系人电话%s"%str(_index+1)][_i] = phone_no
- df_data["代理单位所在地"][_i] = "%s-%s"%(dict_enterprise.get(_enterprise_name,{}).get("province",""),dict_enterprise.get(_enterprise_name,{}).get("city",""))
- _enterprise_name = df_data["中标单位"][_i]
- df_data["中标单位所在地"][_i] = "%s-%s"%(dict_enterprise.get(_enterprise_name,{}).get("province",""),dict_enterprise.get(_enterprise_name,{}).get("city",""))
- _person = dict_enterprise.get(_enterprise_name,{}).get("legal_person_name","")
- _phone = dict_enterprise.get(_enterprise_name,{}).get("phone_number","")
- if len(_phone)==11 and _phone[0]=="1":
- df_data["中标法人"][_i] = _person
- df_data["中标法人电话"][_i] = _phone
- contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
- if contacts is not None:
- for _index in range(min(len(contacts),3)):
- contact_person,phone_no = contacts[_index].get("contact_person"),contacts[_index].get("phone_no")
- is_legal_person,is_manager,is_shareholder = contacts[_index].get("is_legal_person",0),contacts[_index].get("is_manager",0),contacts[_index].get("is_shareholder",0)
- if is_manager:
- df_data["中标高管"][_i] = contact_person
- df_data["中标高管电话"][_i] = phone_no
- if is_shareholder:
- df_data["中标股东"][_i] = contact_person
- df_data["中标股东电话"][_i] = phone_no
- df_data["中标联系人%s"%str(_index+1)][_i] = contact_person
- df_data["中标联系人电话%s"%str(_index+1)][_i] = phone_no
- log("start export 15824381998:>>>>>>>>>>>>>>>")
- current_date = getCurrent_date(format="%Y-%m-%d")
- start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60))
- if current_date<="2025-02-04" and datetime.datetime.now().weekday() in (1,4):
- for i in range(10):
- try:
- if datetime.datetime.now().weekday()==1:
- last_date = timeAdd(current_date,-4)
- if datetime.datetime.now().weekday() in (4,):
- last_date = timeAdd(current_date,-3)
- # start_time='2022-07-22'
- # current_date = '2022-07-28'
- page_time_start = timeAdd(current_date,-7)
- log("start exporting export2:=================")
- # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"]
- dict_channel = getDict_docchannel()
- list_query = []
- str_keyword = '''
- 多功能位移机,失能群体安全转移,助行机器人,行走机器人,护理机器人,大小便护理,洗浴机,洗浴机器人,吃饭机器人,养老护理,老年评估,防摔,智能护理,智慧康养
- '''
- list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
- str_not_keyword = '''
- 清洗机器人
- '''
- list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
- tenderee_keywrod = "医院、大学、高校、高中"
- list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
- log(str(list_keyword))
- columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_get_file_end","time_bidopen","time_bidstart","time_registration_start","time_registration_end"]
- bool_query = BoolQuery(must_queries=[
- generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery),
- generateBoolShouldQuery(["docchannel"],[51,52,102,103,105,104,114,121,122,101,119,120,118],TermQuery),
- RangeQuery("crtime",last_date+" 10:00:00",current_date+" 10:00:00",True,True),
- RangeQuery("status",201,300,True,True),
- RangeQuery("page_time",page_time_start)
- # TermQuery("procurement_system","公安系统"),
- # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
- # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
- # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
- ],
- must_not_queries=[
- generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_not_key,MatchPhraseQuery),
- ]
- )
- list_row = getDocument([{"query":bool_query,"limit":500}],columns,thread_count=1)
- log("get document %d rows"%len(list_row))
- df_data = {}
- set_line = set()
- # list_row = filterRow(list_row)
- df_columns = getRowData_zb(df_data,list_row,set_line,list_keyword,dict_channel,True)
- fixContactPerson1(df_data,df_columns)
- df = pd.DataFrame(df_data)
- df_data_filter_11 = {}
- for c in df_columns:
- if c not in df_data_filter_11:
- df_data_filter_11[c] = []
- df_data_filter_12 = {}
- for c in df_columns:
- if c not in df_data_filter_12:
- df_data_filter_12[c] = []
- for _i in range(len(df_data["项目类型"])):
- if df_data["项目类型"][_i] in ("教育系统","科研系统"):
- for c in df_columns:
- if c not in df_data_filter_11:
- df_data_filter_11[c] = []
- df_data_filter_11[c].append(df_data[c][_i])
- else:
- for c in df_columns:
- if c not in df_data_filter_12:
- df_data_filter_12[c] = []
- df_data_filter_12[c].append(df_data[c][_i])
- df_f11 = pd.DataFrame(df_data_filter_11)
- df_f12 = pd.DataFrame(df_data_filter_12)
- time_end = timeAdd(current_date,60)
- columns = ["project_name","type","province","city","bidding_budget","city","demand","last_page_time"]
- bool_query = BoolQuery(must_queries=[
- generateBoolShouldQuery(["demand","product"],list_keyword,MatchPhraseQuery),
- RangeQuery("may_end",range_from=current_date),
- RangeQuery("may_end",range_to=time_end),
- # TermQuery("procurement_system","公安系统"),
- # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
- # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
- # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
- ],
- must_not_queries=[
- generateBoolShouldQuery(["demand","product"],list_not_key,MatchPhraseQuery),
- ]
- )
- list_row = getDocument([{"query":bool_query,"limit":500}],columns,table_name="preproject",table_index="preproject_index",thread_count=1,sort_column="may_end")
- log("get document %d rows"%len(list_row))
- df_data = {}
- set_line = set()
- # list_row = filterRow(list_row)
- df_columns_yc = getRowData_yc(df_data,list_row,set_line,list_keyword,dict_channel,True)
- fixContactPerson(df_data,df_columns_yc)
- for _i in range(len(df_data["项目类型"])):
- if df_data["项目类型"][_i] in ("政府办公室系统","财政系统","民事系统","企业采购系统","卫生系统"):
- df_data["类型"][_i] = "政企"
- elif df_data["项目类型"][_i] in ("教育系统","科研系统"):
- df_data["类型"][_i] = "教育"
- else:
- df_data["类型"][_i] = "其他"
- df1 = pd.DataFrame(df_data)
- filename = os.path.dirname(__file__)+"/data/%s年%s项目数据导出.xlsx"%(start_time[:4],current_date)
- with pd.ExcelWriter(filename) as writer:
- df_f11.to_excel(writer,sheet_name="教育",columns=df_columns if not df_f11.empty else None,index=False)
- df_f12.to_excel(writer,sheet_name="政企",columns=df_columns if not df_f12.empty else None,index=False)
- df1.to_excel(writer,sheet_name="项目预测",columns=df_columns_yc if not df1.empty else None)
- log(str(filename))
- host = "smtp.exmail.qq.com"
- username = "vip@bidizhaobiao.com"
- password = "Biaoxun66-"
- # receivers = ["724949655@qq.com","1396488964@qq.com"]
- receivers = ["md47@zuowei.com"]
- # receivers = ["1175730271@qq.com"]
- attachs = [filename]
- sendEmail(host,username,password,receivers,attachs=attachs)
- break
- except Exception as e:
- traceback.print_exc()
- def job_export():
- _scheduler = BlockingScheduler()
- e3 = Export3()
- # _scheduler.add_job(export_medicine_friday,"cron",hour=8)
- # _scheduler.add_job(export_medicine_friday,"cron",second="*/1")
- _scheduler.add_job(export2,"cron",hour=9)
- _scheduler.add_job(export5,"cron",hour=16)
- # _scheduler.add_job(e3.export4,"cron",hour=7)
- _scheduler.add_job(e3.trytimes,"cron",hour=21)
- _scheduler.add_job(e3.export3_1,"cron",hour=21)
- _scheduler.add_job(export_15824381998,"cron",hour=21)
- _scheduler.add_job(export_13510123669,"cron",hour=10)
- _scheduler.start()
- if __name__=="__main__":
- # job_export()
- # export_medicine_friday()
- # export2()
- # export_document_except()
- # e3 = Export3()
- # e3.export4_by_project()
- # e3.export4()
- # e3.trytimes()
- # e3.export3_1()
- # export5()
- # export_15824381998()
- export_13510123669()
|