exportJobs.py 133 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581
  1. #coding:utf8
  2. import sys
  3. import os
  4. sys.path.append(os.path.join(os.path.dirname(__file__),".."))
  5. import requests
  6. from utils.Utils import sendEmail,getCurrent_date,log
  7. import datetime
  8. import time
  9. from export.exportDocument import *
  10. import pandas as pd
  11. from apscheduler.schedulers.blocking import BlockingScheduler
  12. from export.exportUtils import rsa_encrpt
  13. import urllib.parse
  14. import base64
  15. def export_medicine_friday():
  16. current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-24*60*60))
  17. start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60))
  18. if current_date<="2022-04-25":
  19. if datetime.datetime.now().weekday()==0:
  20. for i in range(10):
  21. try:
  22. df_data = exportDocument_medicine(start_time,current_date)
  23. df = pd.DataFrame(df_data)
  24. filename = os.path.dirname(__file__)+"/data/%s年%s至%s医疗数据导出.xlsx"%(start_time[:4],start_time,current_date)
  25. df.to_excel(filename,columns=list_df_columns)
  26. host = "smtp.exmail.qq.com"
  27. username = "vip@bidizhaobiao.com"
  28. password = "Biaoxun66-"
  29. receivers = ["1985262186@qq.com","1175730271@qq.com","1265797328@qq.com","1289358902@qq.com"]
  30. attachs = [filename]
  31. sendEmail(host,username,password,receivers,attachs=attachs)
  32. break
  33. except Exception as e:
  34. traceback.print_exc()
  35. log(str(e))
  36. time.sleep(60)
  37. def export2():
  38. def removeData(df_data):
  39. list_remove_index = []
  40. list_c = df_data.get("招标人采购系统",[])
  41. for _c_i in range(len(list_c)):
  42. if list_c[_c_i]=="企业采购系统":
  43. list_remove_index.append(_c_i)
  44. list_remove_index.reverse()
  45. print(list_remove_index)
  46. for k,v in df_data.items():
  47. for _rc in list_remove_index:
  48. v.pop(_rc)
  49. for k,v in df_data.items():
  50. v = v[:500]
  51. log("start export2:>>>>>>>>>>>>>>>")
  52. current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-24*60*60))
  53. start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60))
  54. if current_date<="2024-02-28":
  55. if datetime.datetime.now().weekday()==4:
  56. for i in range(10):
  57. try:
  58. # start_time='2022-07-22'
  59. # current_date = '2022-07-28'
  60. log("start exporting export2:=================")
  61. # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"]
  62. columns = ["doctitle","doctextcon","attachmenttextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","time_bidopen"]
  63. dict_channel = getDict_docchannel()
  64. list_query = []
  65. str_keyword = '''
  66. 机房建设 多媒体教室建设 数据中心机房 班班通 电子设备
  67. 视频 LED 监控 视频监控系统 信息安全
  68. 信息化建设 智能化 网络安全服务 网络安全系统 等级保护测评
  69. 信息系统 大数据中心 智慧教室建设 数据安全建设 可视化指挥联动平台
  70. 音频处理器 教学专用仪器 多媒体扩声系统 智慧校园 数字化
  71. 交换 路由 无线 城域网建设 触控一体机
  72. 教学设备
  73. '''
  74. list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
  75. str_not_keyword = '''
  76. '''
  77. list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
  78. tenderee_keywrod = "医院、大学、高校、高中"
  79. list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
  80. log(str(list_keyword))
  81. bool_query = BoolQuery(must_queries=[
  82. generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_keyword,MatchPhraseQuery),
  83. generateBoolShouldQuery(["docchannel"],[52,102,114],TermQuery),
  84. RangeQuery("page_time",start_time,current_date,True,True),
  85. RangeQuery("status",151,300,True,True),
  86. # TermQuery("procurement_system","公安系统"),
  87. generateBoolShouldQuery(["province"],["湖南"],TermQuery),
  88. # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
  89. # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
  90. ],
  91. # must_not_queries=[
  92. # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
  93. # ]
  94. )
  95. list_query.append({"query":bool_query,"limit":700})
  96. list_row = getDocument(list_query,columns)
  97. log("get document %d rows"%len(list_row))
  98. df_data = {}
  99. set_line = set()
  100. # list_row = filterRow(list_row)
  101. getRowData(df_data,list_row,set_line,list_keyword,dict_channel,True)
  102. fixContactPerson(df_data,list_df_columns)
  103. removeData(df_data)
  104. df = pd.DataFrame(df_data)
  105. filename = os.path.dirname(__file__)+"/data/%s年%s至%s数据导出.xlsx"%(start_time[:4],start_time,current_date)
  106. df.to_excel(filename,columns=list_df_columns)
  107. log(str(filename))
  108. host = "smtp.exmail.qq.com"
  109. username = "vip@bidizhaobiao.com"
  110. password = "Biaoxun66-"
  111. receivers = ["1175730271@qq.com","493894608@qq.com"]
  112. # receivers = ["1175730271@qq.com"]
  113. attachs = [filename]
  114. sendEmail(host,username,password,receivers,attachs=attachs)
  115. break
  116. except Exception as e:
  117. traceback.print_exc()
  118. def export5():
  119. '''
  120. 客户信息
  121. 公司名:武汉市浩盛特种建材有限责任公司
  122. 客户姓名:高诗琴
  123. 会员号:15392839439
  124. 会员等级:高级会员
  125. 成交金额:5900元
  126. 服务期:12个月
  127. 截止推送项目时间:2024-02-17
  128. 申请原因:2.16成交的新客户,客户反馈跟销售沟通是人工每天整理好项目信息推送,现在寻求技术协助支持。
  129. 客户需求:
  130. 地区:河北省、北京市、天津市、山东省、江苏省、上海市、浙江省、福建省、广东省、海南省
  131. 业务关键词:膨胀剂,抗裂剂,防水剂,外加剂,防腐剂,阻锈剂,密实剂,耐久性,氧化镁,镁质,涂料
  132. (排除词,漆),需要招标信息
  133. 工作日下午四点文档信息推送到邮箱365531448@qq.com,每天更新最新的项目信息10条(不足10条可以按照更新信息推送)
  134. :return:
  135. '''
  136. def getRowData(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
  137. dict_line = {}
  138. # list_data = getRow_ots(rows)
  139. _index = 0
  140. rows.sort(key=lambda x:x.get("docid",0),reverse=True)
  141. set_col = set()
  142. df_columns = []
  143. for row in rows[:10]:
  144. _index += 1
  145. item = {}
  146. _dict = row
  147. set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
  148. set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
  149. set_dict_item_columns(set_col,df_columns,item,"公告类别",dict_channel.get(_dict.get("docchannel",""),""))
  150. set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time",""))
  151. set_dict_item_columns(set_col,df_columns,item,"公告标题",_dict.get("doctitle",""))
  152. set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
  153. set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
  154. set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
  155. set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
  156. set_dict_item_columns(set_col,df_columns,item,"代理单位联系人",_dict.get("agency_contact",""))
  157. set_dict_item_columns(set_col,df_columns,item,"代理单位联系人电话",_dict.get("agency_phone",""))
  158. set_dict_item_columns(set_col,df_columns,item,"链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid","")))
  159. for k,v in item.items():
  160. if k not in df_data:
  161. df_data[k] = []
  162. df_data[k].append(v)
  163. return df_columns
  164. log("start export5:>>>>>>>>>>>>>>>")
  165. current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
  166. if current_date<="2024-02-17":
  167. weekday = datetime.datetime.now().weekday()
  168. if weekday>=0 and weekday<=4:
  169. for i in range(10):
  170. try:
  171. # start_time='2022-07-22'
  172. # current_date = '2022-07-28'
  173. log("start exporting export2:=================")
  174. # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"]
  175. columns = ["doctitle","doctextcon","attachmenttextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","time_bidopen"]
  176. columns = ["doctitle","docchannel","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","time_bidopen"]
  177. dict_channel = getDict_docchannel()
  178. list_query = []
  179. str_province = '''
  180. 河北、北京、天津、山东、江苏、上海、浙江、福建、广东、海南
  181. '''
  182. str_keyword = '''
  183. 膨胀剂 抗裂剂 防水剂 外加剂 镁质
  184. 防腐剂 阻锈剂 密实剂 耐久性 氧化镁
  185. '''
  186. list_province = splitIntoList(str_province,"[\s\n、,,]")
  187. list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
  188. str_not_keyword = '''
  189. 漆,复合风管,检测,涂料工程,涂料多乐士
  190. '''
  191. list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
  192. tenderee_keywrod = "医院、大学、高校、高中"
  193. list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
  194. log(str(list_keyword))
  195. bool_query = BoolQuery(must_queries=[
  196. generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery),
  197. generateBoolShouldQuery(["docchannel"],[52,102,114],TermQuery),
  198. TermQuery("page_time",current_date),
  199. RangeQuery("status",151,300,True,True),
  200. # TermQuery("procurement_system","公安系统"),
  201. generateBoolShouldQuery(["province"],list_province,TermQuery),
  202. # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
  203. # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
  204. ],
  205. must_not_queries=[
  206. generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
  207. ]
  208. )
  209. list_query.append({"query":bool_query,"limit":700})
  210. list_row = getDocument(list_query,columns)
  211. log("get document %d rows"%len(list_row))
  212. df_data = {}
  213. set_line = set()
  214. # list_row = filterRow(list_row)
  215. df_column = getRowData(df_data,list_row,set_line,list_keyword,dict_channel,True)
  216. df = pd.DataFrame(df_data)
  217. print(df_data)
  218. filename = os.path.dirname(__file__)+"/data/%s年%s数据导出.xlsx"%(current_date[:4],current_date)
  219. df.to_excel(filename,columns=df_column)
  220. log(str(filename))
  221. host = "smtp.exmail.qq.com"
  222. username = "vip@bidizhaobiao.com"
  223. password = "Biaoxun66-"
  224. receivers = ["1175730271@qq.com","365531448@qq.com"]
  225. # receivers = ["1175730271@qq.com"]
  226. attachs = [filename]
  227. sendEmail(host,username,password,receivers,attachs=attachs)
  228. break
  229. except Exception as e:
  230. traceback.print_exc()
  231. def export_document_except():
  232. def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
  233. _index = 0
  234. for row in rows:
  235. _index += 1
  236. item = {}
  237. _dict = row
  238. _extract = json.loads(_dict.get("extract_json","{}"))
  239. product_attrs = _extract.get("product_attrs",{"data":[{}]}).get("data")
  240. docid = _dict.get("docid","")
  241. win_tenderer = ""
  242. bidding_budget = ""
  243. sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
  244. for _doc in sub_docs_json:
  245. if win_tenderer=="":
  246. win_tenderer = _doc.get("win_tenderer","")
  247. if bidding_budget=="":
  248. bidding_budget= _doc.get("bidding_budget","")
  249. for _attrs in product_attrs:
  250. set_dict_item_columns(set_columns1,list_df_column1,item,"公告链接",'=HYPERLINK("http://www.bidizhaobiao.com/info-%s.html","查看公告")'%(str(docid)))
  251. set_dict_item_columns(set_columns1,list_df_column1,item,"公告时间",_dict.get("page_time"))
  252. set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
  253. set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
  254. set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district"))
  255. set_dict_item_columns(set_columns1,list_df_column1,item,"项目标题",_dict.get("doctitle"))
  256. set_dict_item_columns(set_columns1,list_df_column1,item,"采购单位",_dict.get("tenderee"))
  257. set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
  258. set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_dict.get("product"))
  259. set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
  260. set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
  261. _quantity = ""
  262. _uniPrice = ""
  263. try:
  264. _quantity = int(_attrs.get("quantity","0"))
  265. if _quantity==0:
  266. _quantity = ""
  267. except Exception as e:
  268. pass
  269. try:
  270. _uniPrice = float(_attrs.get("_uniPrice","0"))
  271. if _uniPrice==0:
  272. _uniPrice = ""
  273. except Exception as e:
  274. pass
  275. set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
  276. set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
  277. sumPrice = ""
  278. if _quantity!="" and _uniPrice!="":
  279. sumPrice = _quantity*_uniPrice
  280. set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
  281. set_dict_item_columns(set_columns1,list_df_column1,item,"项目金额(元)",bidding_budget)
  282. for k,v in item.items():
  283. if k not in df_data:
  284. df_data[k] = []
  285. df_data[k].append(v)
  286. current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
  287. current_date = '2022-04-01'
  288. columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","web_source_no","web_source_name","service_time","person_review","time_get_file_start","time_get_file_end","time_earnest_money_start","time_earnest_money_end"]
  289. dict_channel = getDict_docchannel()
  290. list_query = []
  291. str_keyword = '''
  292. 机房建设 多媒体教室建设 数据中心机房 班班通 电子设备
  293. 视频 LED 监控 视频监控系统 信息安全
  294. 信息化建设 网络安全 网络安全服务 网络安全系统 等级保护测评
  295. 信息系统 大数据中心 智慧教室建设 数据安全建设 可视化指挥联动平台
  296. 音频处理器 教学专用仪器 多媒体扩声系统 智慧校园 数字化
  297. 交换 路由 无线 信息安全 触控一体机
  298. 教学设备
  299. '''
  300. list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
  301. str_not_keyword = '''
  302. '''
  303. list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
  304. tenderee_keywrod = "医院、大学、高校、高中"
  305. list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
  306. s_q = BoolQuery(should_queries=[BoolQuery(must_queries=[TermQuery("docchannel",52)],
  307. must_not_queries=[WildcardQuery("tenderee","*")]),
  308. BoolQuery(must_queries=[generateBoolShouldQuery(["docchannel"],[101,119,120],TermQuery)],
  309. must_not_queries=[NestedQuery("sub_docs_json",WildcardQuery("sub_docs_json.win_tenderer","*"))])])
  310. log(str(list_keyword))
  311. set_columns1 = set()
  312. list_df_columns1 = []
  313. bool_query = BoolQuery(must_queries=[
  314. TermQuery("page_time",current_date),
  315. RangeQuery("status",151,451,True,True),
  316. s_q
  317. ],
  318. # must_not_queries=[
  319. # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
  320. # ]
  321. )
  322. list_query.append({"query":bool_query,"limit":500000})
  323. list_row = getDocument(list_query,columns)
  324. log("get document %d rows"%len(list_row))
  325. df_data = {}
  326. set_line = set()
  327. # list_row = filterRow(list_row,list_not_key)
  328. getRowData(df_data,list_row,set_line,list_keyword,dict_channel,True)
  329. # fixContactPerson(df_data,list_df_columns)
  330. df = pd.DataFrame(df_data)
  331. filename = os.path.dirname(__file__)+"/data/%s异常数据导出.xlsx"%(current_date)
  332. print(list_df_columns)
  333. df.to_excel(filename,columns=list_df_columns)
  334. class Export3():
  335. def trytimes(self):
  336. for _ in range(3):
  337. _succeed = self.export3()
  338. if _succeed:
  339. break
  340. def export3(self,):
  341. def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
  342. _index = 0
  343. for row in rows:
  344. _index += 1
  345. item = {}
  346. _dict = row
  347. _extract = json.loads(_dict.get("extract_json","{}"))
  348. demand_info = _extract.get("demand_info",{"data":[]}).get("data")
  349. docid = _dict.get("docid","")
  350. win_tenderer = ""
  351. bidding_budget = ""
  352. win_bid_price = ""
  353. sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
  354. for _doc in sub_docs_json:
  355. if win_tenderer=="":
  356. win_tenderer = _doc.get("win_tenderer","")
  357. if bidding_budget=="":
  358. bidding_budget= _doc.get("bidding_budget","")
  359. if win_bid_price=="":
  360. win_bid_price = _doc.get("win_bid_price","")
  361. if len(demand_info)==0:
  362. demand_info = [{}]
  363. demand_info = [{}]
  364. for _attrs in demand_info:
  365. set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
  366. set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
  367. set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name"))
  368. set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
  369. set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
  370. set_dict_item_columns(set_columns1,list_df_column1,item,"信息类型",dict_channel.get(_dict.get("docchannel")))
  371. set_dict_item_columns(set_columns1,list_df_column1,item,"发布时间",_dict.get("crtime"))
  372. b_or_w = ""
  373. if bidding_budget!="":
  374. b_or_w = float(bidding_budget)/10000
  375. elif win_bid_price!="":
  376. b_or_w = float(win_bid_price)/10000
  377. set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额/预算金额(万元)",b_or_w)
  378. set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位",_dict.get("tenderee"))
  379. set_dict_item_columns(set_columns1,list_df_column1,item,"招标项目联系人","%s\n%s"%(_dict.get("tenderee_contact",""),_dict.get("tenderee_phone","")))
  380. # 中标单位 中标项目联系人 中标单位主要联系人 中标单位联系人 代理单位 代理项目联系人 代理单位联系人 比地招标公告地址
  381. set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位联系人","")
  382. set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
  383. set_dict_item_columns(set_columns1,list_df_column1,item,"中标项目联系人","")
  384. set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位主要联系人","")
  385. set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位联系人","")
  386. set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位",_dict.get("agency",""))
  387. set_dict_item_columns(set_columns1,list_df_column1,item,"代理项目联系人","%s\n%s"%(_dict.get("agency_contact",""),_dict.get("agency_phone","")))
  388. set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位联系人","")
  389. _key ='''MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC8uZYYV6ls+5KMzUfvsPvv3XRdwkcTBj/ppB03mijUPHYTGvYSE0cQTbQrnIbXFtUYJguakpKLmVyH+T/w6vhxbQNlaykfe8RXEh4i4IJk8s/Qb0E0xODsjKBEr8VdDYeqqduWrtJpttXAvv93SsTPvgZBditRzJAzk0XH56zL1wIDAQAB'''
  390. _key = base64.b64decode(_key)
  391. _url_map = {"userId":"001795335",
  392. "timestamp":"%d"%(time.time()*1000),
  393. "docid":str(_dict.get("docid",""))}
  394. _encrpt_text = base64.b64encode(rsa_encrpt(json.dumps(_url_map),_key))
  395. _encrpt_text = urllib.parse.quote(_encrpt_text)
  396. _url = "http://www.bidizhaobiao.com/info-%s.html?emailSecret=%s"%(str(_dict.get("docid")),_encrpt_text)
  397. set_dict_item_columns(set_columns1,list_df_column1,item,"比地招标公告地址",_url)
  398. # set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
  399. # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_attrs.get("project_name"))
  400. #
  401. # set_dict_item_columns(set_columns1,list_df_column1,item,"需求",_attrs.get("demand"))
  402. # set_dict_item_columns(set_columns1,list_df_column1,item,"预算(元)",_attrs.get("budget"))
  403. # set_dict_item_columns(set_columns1,list_df_column1,item,"开始时间",_attrs.get("order_begin"))
  404. # set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
  405. # set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product"))
  406. # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
  407. # set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
  408. # _quantity = ""
  409. # _uniPrice = ""
  410. # try:
  411. # _quantity = int(_attrs.get("quantity","0"))
  412. # if _quantity==0:
  413. # _quantity = ""
  414. # except Exception as e:
  415. # pass
  416. # try:
  417. # _uniPrice = float(_attrs.get("unitPrice","0"))
  418. # if _uniPrice==0:
  419. # _uniPrice = ""
  420. # except Exception as e:
  421. # pass
  422. # set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
  423. # set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
  424. # sumPrice = ""
  425. # if _quantity!="" and _uniPrice!="":
  426. # sumPrice = _quantity*_uniPrice
  427. # set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
  428. # set_dict_item_columns(set_columns1,list_df_column1,item,"项目金额(元)",bidding_budget)
  429. for k,v in item.items():
  430. if k not in df_data:
  431. df_data[k] = []
  432. df_data[k].append(v)
  433. def getRowData1(df_data,rows,dict_channel,set_columns1,list_df_column1):
  434. _index = 0
  435. for row in rows:
  436. _index += 1
  437. item = {}
  438. _dict = row
  439. _extract = json.loads(_dict.get("extract_json","{}"))
  440. demand_info = _extract.get("demand_info",{"data":[]}).get("data")
  441. docid = _dict.get("docid","")
  442. win_tenderer = ""
  443. bidding_budget = ""
  444. win_bid_price = ""
  445. sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
  446. for _doc in sub_docs_json:
  447. if win_tenderer=="":
  448. win_tenderer = _doc.get("win_tenderer","")
  449. if bidding_budget=="":
  450. bidding_budget= _doc.get("bidding_budget","")
  451. if win_bid_price=="":
  452. win_bid_price = _doc.get("win_bid_price","")
  453. if len(demand_info)==0:
  454. demand_info = [{}]
  455. demand_info = [{}]
  456. for _attrs in demand_info:
  457. # set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
  458. # set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
  459. set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district"))
  460. set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
  461. set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name"))
  462. set_dict_item_columns(set_columns1,list_df_column1,item,"采购人",_dict.get("tenderee"))
  463. set_dict_item_columns(set_columns1,list_df_column1,item,"招标时间",_dict.get("page_time"))
  464. b_or_w = ""
  465. if bidding_budget!="":
  466. b_or_w = float(bidding_budget)/10000
  467. elif win_bid_price!="":
  468. b_or_w = float(win_bid_price)/10000
  469. set_dict_item_columns(set_columns1,list_df_column1,item,"招标预算",bidding_budget)
  470. set_dict_item_columns(set_columns1,list_df_column1,item,"中标时间",_dict.get("page_time"))
  471. set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额",win_bid_price)
  472. set_dict_item_columns(set_columns1,list_df_column1,item,"中标人",win_tenderer)
  473. set_dict_item_columns(set_columns1,list_df_column1,item,"采购模式",_dict.get("bidway"))
  474. set_dict_item_columns(set_columns1,list_df_column1,item,"命中关键词",_dict.get("keyword"))
  475. set_dict_item_columns(set_columns1,list_df_column1,item,"采购品牌",'')
  476. set_dict_item_columns(set_columns1,list_df_column1,item,"采购数量",'')
  477. for k,v in item.items():
  478. if k not in df_data:
  479. df_data[k] = []
  480. df_data[k].append(v)
  481. def getContacts(contacts):
  482. try:
  483. if contacts is None or contacts=="":
  484. return ""
  485. _contacts = json.loads(contacts)
  486. list_c = []
  487. _count = 0
  488. for _c in _contacts:
  489. _count += 1
  490. contact_person = _c.get("contact_person","")
  491. phone_no = _c.get("phone_no","")
  492. list_c.append("%s(企业联系人%d)\n%s"%(str(contact_person),_count,str(phone_no)))
  493. if _count>=5:
  494. break
  495. return ",\n".join(list_c)
  496. except Exception as e:
  497. return ""
  498. def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
  499. set_enterprise = set()
  500. if len(df_data.keys())>0:
  501. for _tenderee,_agency,_win in zip(df_data["招标单位"],df_data["代理单位"],df_data["中标单位"]):
  502. set_enterprise.add(_tenderee)
  503. set_enterprise.add(_agency)
  504. set_enterprise.add(_win)
  505. if "" in set_enterprise:
  506. set_enterprise.remove("")
  507. if None in set_enterprise:
  508. set_enterprise.remove(None)
  509. dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["contacts","procurement_system"])
  510. # conn = getConnection_oracle()
  511. # cursor = conn.cursor()
  512. if len(set_enterprise)>0:
  513. for _i in range(len(df_data["招标单位"])):
  514. _enterprise_name = df_data["招标单位"][_i]
  515. df_data["招标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
  516. _enterprise_name = df_data["代理单位"][_i]
  517. df_data["代理单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
  518. _enterprise_name = df_data["中标单位"][_i]
  519. df_data["中标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
  520. # if "采购系统" not in df_data:
  521. # df_data["采购系统"] = []
  522. # df_data["采购系统"].append(dict_enterprise.get(_enterprise_name,{}).get("procurement_system",""))
  523. # list_df_columns.extend(['采购系统'])
  524. '''
  525. export by customer's subscription
  526. :return:
  527. '''
  528. try:
  529. ots_client = getConnect_ots()
  530. subscription4 = '''
  531. 触控一体机 交互平板 交互一体机 交互智能平板 交互大屏 教学一体机 智慧屏 智慧黑板 互动黑板 班班通 多媒体教室 多媒体设备 多媒体系统 智慧教室
  532. '''
  533. # provinces = "河北 山西 广东 海南 江苏 安徽 山东 河南 湖北 湖南 重庆 黑龙江 陕西 甘肃 青海 宁夏"
  534. provinces = "海南 江苏 安徽 湖北 湖南 重庆 甘肃 青海 宁夏 河南 北京 天津"
  535. dict_channel = getDict_docchannel()
  536. list_province = splitIntoList(provinces,"\s|,|,|、")
  537. list_subscription4 = splitIntoList(subscription4,"\s|,|,|、")
  538. current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
  539. last_date = timeAdd(current_date,-1)
  540. # last_date = "2023-12-08"
  541. # current_date = "2023-12-10"
  542. set_columns1 = set()
  543. list_df_columns1 = []
  544. bool_query = BoolQuery(must_queries=[
  545. generateBoolShouldQuery(["docchannel"],[102,114],TermQuery),
  546. RangeQuery("page_time",last_date),
  547. RangeQuery("status",201,300,True,True),
  548. generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_subscription4,MatchPhraseQuery),
  549. generateBoolShouldQuery(["province"],list_province,WildcardQuery),
  550. RangeQuery("crtime","%s 21:00:00"%last_date,"%s 21:00:00"%current_date)
  551. ],
  552. # must_not_queries=[
  553. # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
  554. # ]
  555. )
  556. columns = ["doctitle","docchannel","original_docchannel","product","crtime","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","extract_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone"]
  557. list_query = [{"query":bool_query,"limit":50000}]
  558. list_row = getDocument(list_query,columns)
  559. filename0 = os.path.dirname(__file__)+"/data/%s订阅4订阅5数据导出数量%d_%s.xlsx"%(current_date,len(list_row),getCurrent_date("%Y-%m-%d_%H%M%S"))
  560. df_data = {}
  561. if len(list_row)>0:
  562. getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1)
  563. fixContactPerson(df_data,list_df_columns1)
  564. df = pd.DataFrame(df_data)
  565. df.to_excel(filename0,columns=list_df_columns1,index=False)
  566. else:
  567. df = pd.DataFrame(df_data)
  568. df.to_excel(filename0,index=False)
  569. host = "smtp.exmail.qq.com"
  570. username = "vip1@bidizhaobiao.com"
  571. password = "Biaoxun666+"
  572. # receivers = ["1175730271@qq.com","995116318@qq.com","huangxiaofang@cvte.com"]
  573. receivers = ["1175730271@qq.com","1208135584@qq.com","youyuer@cvte.com","chenyuxue@cvte.com"]
  574. # receivers = ["1175730271@qq.com"]
  575. attachs = [filename0]
  576. sendEmail(host,username,password,receivers,attachs=attachs)
  577. return True
  578. except Exception as e:
  579. traceback.print_exc()
  580. return False
  581. def export3_1(self,):
  582. def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
  583. _index = 0
  584. for row in rows:
  585. _index += 1
  586. item = {}
  587. _dict = row
  588. _extract = json.loads(_dict.get("extract_json","{}"))
  589. demand_info = _extract.get("demand_info",{"data":[]}).get("data")
  590. docid = _dict.get("docid","")
  591. win_tenderer = ""
  592. bidding_budget = ""
  593. win_bid_price = ""
  594. sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
  595. for _doc in sub_docs_json:
  596. if win_tenderer=="":
  597. win_tenderer = _doc.get("win_tenderer","")
  598. if bidding_budget=="":
  599. bidding_budget= _doc.get("bidding_budget","")
  600. if win_bid_price=="":
  601. win_bid_price = _doc.get("win_bid_price","")
  602. if len(demand_info)==0:
  603. demand_info = [{}]
  604. demand_info = [{}]
  605. for _attrs in demand_info:
  606. set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
  607. set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
  608. set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name"))
  609. set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
  610. set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
  611. set_dict_item_columns(set_columns1,list_df_column1,item,"信息类型",dict_channel.get(_dict.get("docchannel")))
  612. set_dict_item_columns(set_columns1,list_df_column1,item,"发布时间",_dict.get("crtime"))
  613. b_or_w = ""
  614. if bidding_budget!="":
  615. b_or_w = float(bidding_budget)/10000
  616. elif win_bid_price!="":
  617. b_or_w = float(win_bid_price)/10000
  618. set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额/预算金额(万元)",b_or_w)
  619. set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位",_dict.get("tenderee"))
  620. set_dict_item_columns(set_columns1,list_df_column1,item,"招标项目联系人","%s\n%s"%(_dict.get("tenderee_contact",""),_dict.get("tenderee_phone","")))
  621. # 中标单位 中标项目联系人 中标单位主要联系人 中标单位联系人 代理单位 代理项目联系人 代理单位联系人 比地招标公告地址
  622. set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位联系人","")
  623. set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
  624. set_dict_item_columns(set_columns1,list_df_column1,item,"中标项目联系人","")
  625. set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位主要联系人","")
  626. set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位联系人","")
  627. set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位",_dict.get("agency",""))
  628. set_dict_item_columns(set_columns1,list_df_column1,item,"代理项目联系人","%s\n%s"%(_dict.get("agency_contact",""),_dict.get("agency_phone","")))
  629. set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位联系人","")
  630. _key ='''MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC8uZYYV6ls+5KMzUfvsPvv3XRdwkcTBj/ppB03mijUPHYTGvYSE0cQTbQrnIbXFtUYJguakpKLmVyH+T/w6vhxbQNlaykfe8RXEh4i4IJk8s/Qb0E0xODsjKBEr8VdDYeqqduWrtJpttXAvv93SsTPvgZBditRzJAzk0XH56zL1wIDAQAB'''
  631. _key = base64.b64decode(_key)
  632. _url_map = {"userId":"001795335",
  633. "timestamp":"%d"%(time.time()*1000),
  634. "docid":str(_dict.get("docid",""))}
  635. _encrpt_text = base64.b64encode(rsa_encrpt(json.dumps(_url_map),_key))
  636. _encrpt_text = urllib.parse.quote(_encrpt_text)
  637. _url = "http://www.bidizhaobiao.com/info-%s.html?emailSecret=%s"%(str(_dict.get("docid")),_encrpt_text)
  638. set_dict_item_columns(set_columns1,list_df_column1,item,"比地招标公告地址",_url)
  639. # set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
  640. # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_attrs.get("project_name"))
  641. #
  642. # set_dict_item_columns(set_columns1,list_df_column1,item,"需求",_attrs.get("demand"))
  643. # set_dict_item_columns(set_columns1,list_df_column1,item,"预算(元)",_attrs.get("budget"))
  644. # set_dict_item_columns(set_columns1,list_df_column1,item,"开始时间",_attrs.get("order_begin"))
  645. # set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
  646. # set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product"))
  647. # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
  648. # set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
  649. # _quantity = ""
  650. # _uniPrice = ""
  651. # try:
  652. # _quantity = int(_attrs.get("quantity","0"))
  653. # if _quantity==0:
  654. # _quantity = ""
  655. # except Exception as e:
  656. # pass
  657. # try:
  658. # _uniPrice = float(_attrs.get("unitPrice","0"))
  659. # if _uniPrice==0:
  660. # _uniPrice = ""
  661. # except Exception as e:
  662. # pass
  663. # set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
  664. # set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
  665. # sumPrice = ""
  666. # if _quantity!="" and _uniPrice!="":
  667. # sumPrice = _quantity*_uniPrice
  668. # set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
  669. # set_dict_item_columns(set_columns1,list_df_column1,item,"项目金额(元)",bidding_budget)
  670. for k,v in item.items():
  671. if k not in df_data:
  672. df_data[k] = []
  673. df_data[k].append(v)
  674. def getContacts(contacts):
  675. try:
  676. if contacts is None or contacts=="":
  677. return ""
  678. if isinstance(contacts,str):
  679. _contacts = json.loads(contacts)
  680. else:
  681. _contacts = contacts
  682. list_c = []
  683. _count = 0
  684. for _c in _contacts:
  685. _count += 1
  686. contact_person = _c.get("contact_person","")
  687. phone_no = _c.get("phone_no","")
  688. list_c.append("%s(企业联系人%d)\n%s"%(str(contact_person),_count,str(phone_no)))
  689. if _count>=5:
  690. break
  691. print("getContacts",",\n".join(list_c))
  692. return ",\n".join(list_c)
  693. except Exception as e:
  694. traceback.print_exc()
  695. print("getContacts","")
  696. return ""
  697. def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
  698. set_enterprise = set()
  699. if len(df_data.keys())>0:
  700. for _tenderee,_agency,_win in zip(df_data["招标单位"],df_data["代理单位"],df_data["中标单位"]):
  701. set_enterprise.add(_tenderee)
  702. set_enterprise.add(_agency)
  703. set_enterprise.add(_win)
  704. if "" in set_enterprise:
  705. set_enterprise.remove("")
  706. if None in set_enterprise:
  707. set_enterprise.remove(None)
  708. dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["contacts","procurement_system"])
  709. # conn = getConnection_oracle()
  710. # cursor = conn.cursor()
  711. if len(set_enterprise)>0:
  712. for _i in range(len(df_data["招标单位"])):
  713. _enterprise_name = df_data["招标单位"][_i]
  714. df_data["招标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
  715. _enterprise_name = df_data["代理单位"][_i]
  716. df_data["代理单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
  717. _enterprise_name = df_data["中标单位"][_i]
  718. df_data["中标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
  719. # if "采购系统" not in df_data:
  720. # df_data["采购系统"] = []
  721. # df_data["采购系统"].append(dict_enterprise.get(_enterprise_name,{}).get("procurement_system",""))
  722. # list_df_columns.extend(['采购系统'])
  723. '''
  724. export by customer's subscription
  725. :return:
  726. '''
  727. try:
  728. ots_client = getConnect_ots()
  729. subscription4 = '''
  730. 录播、三个课堂、互动课堂、同步课堂、双师课堂、专递课堂、微格教室、远程课堂、督导巡课、推门听课、互动教学
  731. '''
  732. # provinces = "河北 山西 广东 海南 江苏 安徽 山东 河南 湖北 湖南 重庆 黑龙江 陕西 甘肃 青海 宁夏"
  733. provinces = "海南 江苏 安徽 湖北 湖南 重庆 甘肃 青海 宁夏 河南"
  734. dict_channel = getDict_docchannel()
  735. list_province = splitIntoList(provinces,"\s|,|,|、")
  736. list_subscription4 = splitIntoList(subscription4,"\s|,|,|、")
  737. current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
  738. last_date = timeAdd(current_date,-1)
  739. # last_date = "2023-08-14"
  740. # current_date = "2023-08-20"
  741. set_columns1 = set()
  742. list_df_columns1 = []
  743. bool_query = BoolQuery(must_queries=[
  744. generateBoolShouldQuery(["docchannel"],[102,114],TermQuery),
  745. RangeQuery("page_time",last_date),
  746. RangeQuery("status",201,300,True,True),
  747. generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_subscription4,MatchPhraseQuery),
  748. # generateBoolShouldQuery(["province"],list_province,WildcardQuery),
  749. RangeQuery("crtime","%s 21:00:00"%last_date,"%s 21:00:00"%current_date)
  750. ],
  751. # must_not_queries=[
  752. # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
  753. # ]
  754. )
  755. columns = ["doctitle","docchannel","original_docchannel","product","crtime","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","extract_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone"]
  756. list_query = [{"query":bool_query,"limit":50000}]
  757. list_row = getDocument(list_query,columns)
  758. filename0 = os.path.dirname(__file__)+"/data/%s订阅4订阅5数据导出数量%d_%s.xlsx"%(current_date,len(list_row),getCurrent_date("%Y-%m-%d_%H%M%S"))
  759. df_data = {}
  760. if len(list_row)>0:
  761. getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1)
  762. fixContactPerson(df_data,list_df_columns1)
  763. df = pd.DataFrame(df_data)
  764. df.to_excel(filename0,columns=list_df_columns1,index=False)
  765. else:
  766. df = pd.DataFrame(df_data)
  767. df.to_excel(filename0,index=False)
  768. host = "smtp.exmail.qq.com"
  769. username = "vip1@bidizhaobiao.com"
  770. password = "Biaoxun666+"
  771. # receivers = ["1175730271@qq.com","995116318@qq.com","huangxiaofang@cvte.com"]
  772. receivers = ["1175730271@qq.com","1208135584@qq.com","wanghongyan@cvte.com"]
  773. # receivers = ["1175730271@qq.com"]
  774. attachs = [filename0]
  775. sendEmail(host,username,password,receivers,attachs=attachs)
  776. return True
  777. except Exception as e:
  778. traceback.print_exc()
  779. return False
  780. def export4(self):
  781. def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
  782. dict_filter,dict_update = getFixData()
  783. _index = 0
  784. for row in rows:
  785. _index += 1
  786. item = {}
  787. _dict = row
  788. keys = row["keyword"]
  789. log("================")
  790. log(keys)
  791. log(_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon",""))
  792. _keyword = str(list(set([ i for i in re.findall("|".join([re.escape(i) for i in keys if i!='']),_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon","")) if i!='' ])))
  793. log(_keyword)
  794. _extract = json.loads(_dict.get("extract_json","{}"))
  795. if len(set(keys)&set(["AR一体机","一体机","短焦投影仪","教学一体机","云桌面","录播","电子书包","电子班牌","智慧黑板"]))>0:
  796. product_attrs = _extract.get("product_attrs",{"data":[{}]}).get("data")
  797. else:
  798. product_attrs = [{}]
  799. if len(product_attrs)==0:
  800. product_attrs = [{}]
  801. docid = _dict.get("docid","")
  802. win_tenderer = ""
  803. bidding_budget = ""
  804. win_bid_price = ""
  805. sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
  806. for _doc in sub_docs_json:
  807. if win_tenderer=="":
  808. win_tenderer = _doc.get("win_tenderer","")
  809. if bidding_budget=="":
  810. bidding_budget= _doc.get("bidding_budget","")
  811. if win_bid_price=="":
  812. win_bid_price = _doc.get("win_bid_price","")
  813. _index1 = 0
  814. if _dict.get("docid") in dict_filter:
  815. print("====filter")
  816. continue
  817. if _dict.get("docid") in dict_update:
  818. print("====update")
  819. win_bid_price = dict_update[_dict.get("docid")].get("win_bid_price","")
  820. bidding_budget= dict_update[_dict.get("docid")].get("bidding_budget","")
  821. for _attrs in product_attrs:
  822. _index1 += 1
  823. if re.search("运费",_attrs.get("product","")) is not None:
  824. if _index1==1:
  825. _attrs = {}
  826. else:
  827. continue
  828. set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
  829. # set_dict_item_columns(set_columns1,list_df_column1,item,"正文",_dict.get("doctextcon",""))
  830. # set_dict_item_columns(set_columns1,list_df_column1,item,"附件",_dict.get("attachmenttextcon",""))
  831. # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name",""))
  832. set_dict_item_columns(set_columns1,list_df_column1,item,"docid",_dict.get("docid"))
  833. set_dict_item_columns(set_columns1,list_df_column1,item,"网站链接",'=HYPERLINK("http://www.bidizhaobiao.com/info-%s.html","查看公告")'%(str(docid)))
  834. set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
  835. set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
  836. set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district"))
  837. set_dict_item_columns(set_columns1,list_df_column1,item,"公告时间",_dict.get("page_time"))
  838. set_dict_item_columns(set_columns1,list_df_column1,item,"招标公告时间",'')
  839. set_dict_item_columns(set_columns1,list_df_column1,item,"项目标题",_dict.get("doctitle"))
  840. set_dict_item_columns(set_columns1,list_df_column1,item,"采购单位",_dict.get("tenderee"))
  841. set_dict_item_columns(set_columns1,list_df_column1,item,"中标供应商",win_tenderer)
  842. set_dict_item_columns(set_columns1,list_df_column1,item,"采购方式",_dict.get("bidway",""))
  843. set_dict_item_columns(set_columns1,list_df_column1,item,"招标采购方式","")
  844. set_dict_item_columns(set_columns1,list_df_column1,item,"采购系统","")
  845. set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product"))
  846. set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
  847. # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
  848. set_dict_item_columns(set_columns1,list_df_column1,item,"预算金额(元)",bidding_budget)
  849. # set_dict_item_columns(set_columns1,list_df_column1,item,"招标预算金额(元)","")
  850. _quantity = ""
  851. _uniPrice = ""
  852. try:
  853. _quantity = int(re.sub("[^\d]","",_attrs.get("quantity","0")))
  854. if _quantity==0:
  855. _quantity = ""
  856. except Exception as e:
  857. pass
  858. try:
  859. _uniPrice = float(_attrs.get("unitPrice","0"))
  860. if _uniPrice==0:
  861. _uniPrice = ""
  862. except Exception as e:
  863. pass
  864. set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
  865. set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
  866. sumPrice = ""
  867. if _quantity!="" and _uniPrice!="":
  868. sumPrice = _quantity*_uniPrice
  869. set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
  870. set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额(元)",win_bid_price)
  871. set_dict_item_columns(set_columns1,list_df_column1,item,"关键词",_keyword)
  872. for k,v in item.items():
  873. if k not in df_data:
  874. df_data[k] = []
  875. df_data[k].append(v)
  876. def filterRow(list_row,key="docid"):
  877. set_id = set()
  878. _filter_row = []
  879. for row in list_row:
  880. _v = row.get(key,"")
  881. if _v in set_id:
  882. continue
  883. set_id.add(_v)
  884. _filter_row.append(row)
  885. return _filter_row
  886. def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
  887. set_enterprise = set()
  888. if len(df_data.keys())>0:
  889. for _tenderee,_win_tenderer in zip(df_data["采购单位"],df_data["中标供应商"]):
  890. set_enterprise.add(_tenderee)
  891. set_enterprise.add(_win_tenderer)
  892. if "" in set_enterprise:
  893. set_enterprise.remove("")
  894. if None in set_enterprise:
  895. set_enterprise.remove(None)
  896. dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system"])
  897. # conn = getConnection_oracle()
  898. # cursor = conn.cursor()
  899. if len(set_enterprise)>0:
  900. for _i in range(len(df_data["采购单位"])):
  901. _enterprise_name = df_data["采购单位"][_i]
  902. # if df_data["招标联系人电话"][_i]=="":
  903. # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
  904. # if contacts is not None:
  905. # _person,_phone = getOneContact(contacts)
  906. # df_data["招标联系人"][_i] = _person
  907. # df_data["招标联系人电话"][_i] = _phone
  908. df_data["采购系统"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system","")
  909. def fixZhaobiao_page_time(df_data):
  910. ots_client = getConnect_ots()
  911. for i in range(len(df_data["docid"])):
  912. _docid = df_data["docid"][i]
  913. bool_query = BoolQuery(must_queries=[TermQuery("docids",_docid)])
  914. rows,next_token,total_count,is_all_succeed = ots_client.search("project2","project2_index",
  915. SearchQuery(bool_query,limit=1),
  916. ColumnsToGet(["zhao_biao_page_time","bidway","bidding_budget"],ColumnReturnType.SPECIFIED))
  917. list_data = getRow_ots(rows)
  918. if len(list_data)>0:
  919. df_data["招标公告时间"][i] = list_data[0].get("zhao_biao_page_time","")
  920. df_data["招标采购方式"][i] = list_data[0].get("bidway","")
  921. new_bidding_budget = list_data[0].get("bidding_budget",0)
  922. if df_data["预算金额(元)"][i]=="" or float(df_data["预算金额(元)"][i])==0:
  923. if df_data["中标金额(元)"][i]!="" and new_bidding_budget>=float(df_data["中标金额(元)"][i]):
  924. df_data["预算金额(元)"][i] = new_bidding_budget
  925. # df_data["招标预算金额(元)"][i] = list_data[0].get("bidding_budget","")
  926. def popRows(df_data):
  927. list_pop = []
  928. set_docid = set()
  929. for _c in range(len(df_data["采购系统"])):
  930. if df_data["采购系统"][_c]!="教育系统" or df_data["中标金额(元)"][_c]=="":
  931. list_pop.append(_c)
  932. set_docid.add(df_data["docid"][_c])
  933. list_pop.reverse()
  934. for k,v in df_data.items():
  935. for _p in list_pop:
  936. v.pop(_p)
  937. _index = 0
  938. set_docid1 = set()
  939. for _i in range(len(df_data["序号"])):
  940. _docid = df_data["docid"][_i]
  941. if _docid not in set_docid1:
  942. _index += 1
  943. set_docid1.add(_docid)
  944. df_data["序号"][_i] = _index
  945. return len(set_docid)
  946. df = pd.read_excel("20220927v1.4.xlsx",sheetname=1)
  947. list_search_dict = []
  948. for title_kw,content_kw,title_rm,content_rm in zip(df["标题采集关键词"],df["全文采集关键词"],df["标题排除词"],df["全文排除词"]):
  949. if not isinstance(title_kw,str):
  950. title_kw = ""
  951. if not isinstance(content_kw,str):
  952. content_kw = ""
  953. if not isinstance(title_rm,str):
  954. title_rm = ""
  955. if not isinstance(content_rm,str):
  956. content_rm = ""
  957. _dict = {"title_kw":re.split("\s|,|,",str(title_kw)),
  958. "content_kw":re.split("\s|,|,",str(content_kw)),
  959. "title_rm":re.split("\s|,|,",str(title_rm)),
  960. "content_rm":re.split("\s|,|,",str(content_rm))}
  961. list_search_dict.append(_dict)
  962. columns = ["extract_json","doctextcon","attachmenttextcon","doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","web_source_no","web_source_name","service_time","person_review","time_get_file_start","time_get_file_end","time_earnest_money_start","time_earnest_money_end"]
  963. current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
  964. last_date = timeAdd(current_date,-1)
  965. current_date = "2022-10-14"
  966. last_date = "2023-05-07"
  967. dict_channel = getDict_docchannel()
  968. list_mq = []
  969. list_query = []
  970. for _d in list_search_dict:
  971. print(_d)
  972. kw_should_q = []
  973. list_keys = []
  974. title_kw = _d.get("title_kw",[])
  975. if len(title_kw)>0:
  976. kw_should_q.append(generateBoolShouldQuery(["doctitle"],title_kw,MatchPhraseQuery))
  977. list_keys.extend(title_kw)
  978. content_kw = _d.get("content_kw",[])
  979. if len(content_kw)>0:
  980. kw_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_kw,MatchPhraseQuery))
  981. list_keys.extend(content_kw)
  982. rm_should_q = []
  983. title_rm = _d.get("title_rm",[])
  984. if len(title_rm)>0:
  985. rm_should_q.append(generateBoolShouldQuery(["doctitle"],title_rm,MatchPhraseQuery))
  986. content_rm = _d.get("content_rm",[])
  987. if len(content_rm)>0:
  988. rm_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_rm,MatchPhraseQuery))
  989. _query = BoolQuery(must_queries=[
  990. generateBoolShouldQuery(["docchannel"],[101],TermQuery),
  991. # RangeQuery("page_time",last_date,current_date),
  992. RangeQuery("page_time",current_date,last_date),
  993. # RangeQuery("status",201,300,True,True),
  994. # TermQuery("docid",263568527),
  995. # generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_keys,MatchPhraseQuery),
  996. BoolQuery(should_queries=kw_should_q),
  997. # generateBoolShouldQuery(["province"],list_province,WildcardQuery),
  998. # RangeQuery("crtime","%s"%last_date,"%s"%current_date)
  999. ],
  1000. must_not_queries=rm_should_q)
  1001. list_query.append({"query":_query,"limit":50000,"keyword":list_keys})
  1002. break
  1003. def getFixData():
  1004. filename = "C:\\Users\\Administrator\\Desktop\\视源数据清洗.xlsx"
  1005. dict_filter = {}
  1006. dict_update = {}
  1007. df = pd.read_excel(filename,0)
  1008. for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]):
  1009. if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"):
  1010. dict_filter[_docid] = 1
  1011. else:
  1012. dict_update[_docid] = {"docid":_docid,
  1013. "bidding_budget":bidding_budget,
  1014. "win_bid_price":win_bid_price}
  1015. df = pd.read_excel(filename,1)
  1016. for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]):
  1017. if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"):
  1018. dict_filter[_docid] = 1
  1019. else:
  1020. dict_update[_docid] = {"docid":_docid,
  1021. "bidding_budget":bidding_budget,
  1022. "win_bid_price":win_bid_price}
  1023. return dict_filter,dict_update
  1024. list_row = getDocument(list_query,columns)
  1025. list_row = filterRow(list_row)
  1026. print("list_row",len(list_row))
  1027. set_columns1 = set()
  1028. list_df_columns1 = []
  1029. df_data = {}
  1030. getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1)
  1031. print("len df_data",len(df_data["docid"]))
  1032. fixContactPerson(df_data,list_df_columns1)
  1033. fixZhaobiao_page_time(df_data)
  1034. size_pop = popRows(df_data)
  1035. print("size_pop",size_pop)
  1036. df = pd.DataFrame(df_data)
  1037. filename = os.path.dirname(__file__)+"/data/%s关键词数据导出数量%d_%s.xlsx"%(current_date,len(list_row)-size_pop,getCurrent_date("%Y-%m-%d_%H%M%S"))
  1038. df.to_excel(filename,columns=list_df_columns1,index=False)
  1039. host = "smtp.exmail.qq.com"
  1040. username = "vip@bidizhaobiao.com"
  1041. password = "Biaoxun66-"
  1042. receivers = ["1175730271@qq.com","747012698@qq.com","995116318@qq.com"]
  1043. receivers = ["1175730271@qq.com"]
  1044. attachs = [filename]
  1045. sendEmail(host,username,password,receivers,attachs=attachs)
  1046. def export4_by_project(self):
  1047. def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
  1048. # dict_filter,dict_update = getFixData()
  1049. ots_client = getConnect_ots()
  1050. _index = 0
  1051. for row in rows:
  1052. _index += 1
  1053. item = {}
  1054. _dict = row
  1055. keys = row["keyword"]
  1056. log("================")
  1057. log(keys)
  1058. docid = row.get("docids","")
  1059. log(docid)
  1060. try:
  1061. docid_0 = docid.split(",")[0]
  1062. except Exception as e:
  1063. continue
  1064. log(docid)
  1065. log(_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon",""))
  1066. _keyword = str(list(set([ i for i in re.findall("|".join([re.escape(i) for i in keys if i!='']),_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon","")) if i!='' ])))
  1067. log(_keyword)
  1068. product_attrs = [{}]
  1069. # for doc in docid.split(","):
  1070. # _q = BoolQuery(must_queries=[TermQuery("docid",int(doc))])
  1071. # r,n,t,_ = ots_client.search("document","document_index",SearchQuery(_q),columns_to_get=ColumnsToGet(["extract_json",ColumnReturnType.SPECIFIED]))
  1072. # r = getRow_ots(r)
  1073. # if len(r)>0:
  1074. # _extract = json.loads(r[0].get("extract_json","{}"))
  1075. # if len(set(keys)&set(["AR一体机","一体机","短焦投影仪","教学一体机","云桌面","录播","电子书包","电子班牌","智慧黑板"]))>0:
  1076. # product_attrs = _extract.get("product_attrs",{"data":[{}]}).get("data")
  1077. # else:
  1078. # product_attrs = [{}]
  1079. # if len(product_attrs)==0:
  1080. # product_attrs = [{}]
  1081. # win_tenderer = ""
  1082. # bidding_budget = ""
  1083. # win_bid_price = ""
  1084. # sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
  1085. # for _doc in sub_docs_json:
  1086. # if win_tenderer=="":
  1087. # win_tenderer = _doc.get("win_tenderer","")
  1088. # if bidding_budget=="":
  1089. # bidding_budget= _doc.get("bidding_budget","")
  1090. # if win_bid_price=="":
  1091. # win_bid_price = _doc.get("win_bid_price","")
  1092. _index1 = 0
  1093. # if _dict.get("docid") in dict_filter:
  1094. # print("====filter")
  1095. # continue
  1096. # if _dict.get("docid") in dict_update:
  1097. # print("====update")
  1098. # win_bid_price = dict_update[_dict.get("docid")].get("win_bid_price","")
  1099. # bidding_budget= dict_update[_dict.get("docid")].get("bidding_budget","")
  1100. for _attrs in product_attrs:
  1101. _index1 += 1
  1102. if re.search("运费",_attrs.get("product","")) is not None:
  1103. if _index1==1:
  1104. _attrs = {}
  1105. else:
  1106. continue
  1107. set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
  1108. # set_dict_item_columns(set_columns1,list_df_column1,item,"正文",_dict.get("doctextcon",""))
  1109. # set_dict_item_columns(set_columns1,list_df_column1,item,"附件",_dict.get("attachmenttextcon",""))
  1110. # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name",""))
  1111. set_dict_item_columns(set_columns1,list_df_column1,item,"docid",docid)
  1112. set_dict_item_columns(set_columns1,list_df_column1,item,"网站链接",'=HYPERLINK("http://www.bidizhaobiao.com/info-%s.html","查看公告")'%(str(docid)))
  1113. set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
  1114. set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
  1115. set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district"))
  1116. set_dict_item_columns(set_columns1,list_df_column1,item,"公告时间",_dict.get("page_time"))
  1117. set_dict_item_columns(set_columns1,list_df_column1,item,"招标公告时间",_dict.get("zhao_biao_page_time"))
  1118. set_dict_item_columns(set_columns1,list_df_column1,item,"项目标题",_dict.get("doctitles"))
  1119. set_dict_item_columns(set_columns1,list_df_column1,item,"采购单位",_dict.get("tenderee"))
  1120. set_dict_item_columns(set_columns1,list_df_column1,item,"中标供应商",_dict.get("win_tenderer"))
  1121. set_dict_item_columns(set_columns1,list_df_column1,item,"采购方式",_dict.get("bidway",""))
  1122. set_dict_item_columns(set_columns1,list_df_column1,item,"招标采购方式","")
  1123. set_dict_item_columns(set_columns1,list_df_column1,item,"采购系统","")
  1124. set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product"))
  1125. set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
  1126. # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
  1127. set_dict_item_columns(set_columns1,list_df_column1,item,"预算金额(元)",_dict.get("bidding_budget"))
  1128. # set_dict_item_columns(set_columns1,list_df_column1,item,"招标预算金额(元)","")
  1129. _quantity = ""
  1130. _uniPrice = ""
  1131. try:
  1132. _quantity = int(re.sub("[^\d]","",_attrs.get("quantity","0")))
  1133. if _quantity==0:
  1134. _quantity = ""
  1135. except Exception as e:
  1136. pass
  1137. try:
  1138. _uniPrice = float(_attrs.get("unitPrice","0"))
  1139. if _uniPrice==0:
  1140. _uniPrice = ""
  1141. except Exception as e:
  1142. pass
  1143. set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
  1144. set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
  1145. sumPrice = ""
  1146. if _quantity!="" and _uniPrice!="":
  1147. sumPrice = _quantity*_uniPrice
  1148. set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
  1149. set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额(元)",_dict.get("win_bid_price"))
  1150. set_dict_item_columns(set_columns1,list_df_column1,item,"关键词",_keyword)
  1151. for k,v in item.items():
  1152. if k not in df_data:
  1153. df_data[k] = []
  1154. df_data[k].append(v)
  1155. def filterRow(list_row,key="docid"):
  1156. set_id = set()
  1157. _filter_row = []
  1158. for row in list_row:
  1159. _v = row.get(key,"")
  1160. if _v in set_id:
  1161. continue
  1162. set_id.add(_v)
  1163. _filter_row.append(row)
  1164. return _filter_row
  1165. def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
  1166. set_enterprise = set()
  1167. if len(df_data.keys())>0:
  1168. for _tenderee,_win_tenderer in zip(df_data["采购单位"],df_data["中标供应商"]):
  1169. set_enterprise.add(_tenderee)
  1170. set_enterprise.add(_win_tenderer)
  1171. if "" in set_enterprise:
  1172. set_enterprise.remove("")
  1173. if None in set_enterprise:
  1174. set_enterprise.remove(None)
  1175. dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system"])
  1176. # conn = getConnection_oracle()
  1177. # cursor = conn.cursor()
  1178. if len(set_enterprise)>0:
  1179. for _i in range(len(df_data["采购单位"])):
  1180. _enterprise_name = df_data["采购单位"][_i]
  1181. # if df_data["招标联系人电话"][_i]=="":
  1182. # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
  1183. # if contacts is not None:
  1184. # _person,_phone = getOneContact(contacts)
  1185. # df_data["招标联系人"][_i] = _person
  1186. # df_data["招标联系人电话"][_i] = _phone
  1187. df_data["采购系统"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system","")
  1188. def fixZhaobiao_page_time(df_data):
  1189. ots_client = getConnect_ots()
  1190. for i in range(len(df_data["docid"])):
  1191. _docid = df_data["docid"][i]
  1192. bool_query = BoolQuery(must_queries=[TermQuery("docids",_docid)])
  1193. rows,next_token,total_count,is_all_succeed = ots_client.search("project2","project2_index",
  1194. SearchQuery(bool_query,limit=1),
  1195. ColumnsToGet(["zhao_biao_page_time","bidway","bidding_budget"],ColumnReturnType.SPECIFIED))
  1196. list_data = getRow_ots(rows)
  1197. if len(list_data)>0:
  1198. df_data["招标公告时间"][i] = list_data[0].get("zhao_biao_page_time","")
  1199. df_data["招标采购方式"][i] = list_data[0].get("bidway","")
  1200. new_bidding_budget = list_data[0].get("bidding_budget",0)
  1201. if df_data["预算金额(元)"][i]=="" or float(df_data["预算金额(元)"][i])==0:
  1202. if df_data["中标金额(元)"][i]!="" and new_bidding_budget>=float(df_data["中标金额(元)"][i]):
  1203. df_data["预算金额(元)"][i] = new_bidding_budget
  1204. # df_data["招标预算金额(元)"][i] = list_data[0].get("bidding_budget","")
  1205. def popRows(df_data):
  1206. list_pop = []
  1207. set_docid = set()
  1208. for _c in range(len(df_data["采购系统"])):
  1209. if df_data["采购系统"][_c]!="教育系统" or df_data["中标金额(元)"][_c]=="":
  1210. list_pop.append(_c)
  1211. set_docid.add(df_data["docid"][_c])
  1212. list_pop.reverse()
  1213. for k,v in df_data.items():
  1214. for _p in list_pop:
  1215. v.pop(_p)
  1216. _index = 0
  1217. set_docid1 = set()
  1218. for _i in range(len(df_data["序号"])):
  1219. _docid = df_data["docid"][_i]
  1220. if _docid not in set_docid1:
  1221. _index += 1
  1222. set_docid1.add(_docid)
  1223. df_data["序号"][_i] = _index
  1224. return len(set_docid)
  1225. df = pd.read_excel("20220927v1.4.xlsx",sheetname=1)
  1226. list_search_dict = []
  1227. for title_kw,content_kw,title_rm,content_rm in zip(df["标题采集关键词"],df["全文采集关键词"],df["标题排除词"],df["全文排除词"]):
  1228. if not isinstance(title_kw,str):
  1229. title_kw = ""
  1230. if not isinstance(content_kw,str):
  1231. content_kw = ""
  1232. if not isinstance(title_rm,str):
  1233. title_rm = ""
  1234. if not isinstance(content_rm,str):
  1235. content_rm = ""
  1236. _dict = {"title_kw":re.split("\s|,|,",str(title_kw)),
  1237. "content_kw":re.split("\s|,|,",str(content_kw)),
  1238. "title_rm":re.split("\s|,|,",str(title_rm)),
  1239. "content_rm":re.split("\s|,|,",str(content_rm))}
  1240. list_search_dict.append(_dict)
  1241. columns = ["docids","doctitles","product","province","bidway","city","district","zhao_biao_page_time","page_time","industry","info_type","tenderee","bidding_budget","project_code","project_name","win_tenderer","win_bid_price","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","web_source_no","web_source_name","service_time","person_review","time_get_file_start","time_get_file_end","time_earnest_money_start","time_earnest_money_end"]
  1242. current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
  1243. last_date = timeAdd(current_date,-1)
  1244. current_date = "2022-10-14"
  1245. last_date = "2023-05-07"
  1246. dict_channel = getDict_docchannel()
  1247. list_mq = []
  1248. list_query = []
  1249. for _d in list_search_dict:
  1250. print(_d)
  1251. kw_should_q = []
  1252. list_keys = []
  1253. title_kw = _d.get("title_kw",[])
  1254. if len(title_kw)>0:
  1255. kw_should_q.append(generateBoolShouldQuery(["doctitles"],title_kw,MatchPhraseQuery))
  1256. list_keys.extend(title_kw)
  1257. content_kw = _d.get("content_kw",[])
  1258. if len(content_kw)>0:
  1259. kw_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_kw,MatchPhraseQuery))
  1260. list_keys.extend(content_kw)
  1261. rm_should_q = []
  1262. title_rm = _d.get("title_rm",[])
  1263. if len(title_rm)>0:
  1264. rm_should_q.append(generateBoolShouldQuery(["doctitles"],title_rm,MatchPhraseQuery))
  1265. content_rm = _d.get("content_rm",[])
  1266. if len(content_rm)>0:
  1267. rm_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_rm,MatchPhraseQuery))
  1268. _query = BoolQuery(must_queries=[
  1269. # generateBoolShouldQuery(["docchannel"],[101],TermQuery),
  1270. # RangeQuery("page_time",last_date,current_date),
  1271. RangeQuery("page_time",current_date,last_date),
  1272. RangeQuery("status",201,300,True,True),
  1273. # TermQuery("docid",263568527),
  1274. # generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_keys,MatchPhraseQuery),
  1275. BoolQuery(should_queries=kw_should_q),
  1276. # # generateBoolShouldQuery(["province"],list_province,WildcardQuery),
  1277. # # RangeQuery("crtime","%s"%last_date,"%s"%current_date)
  1278. ],
  1279. must_not_queries=rm_should_q)
  1280. list_query.append({"query":_query,"limit":50000,"keyword":list_keys})
  1281. def getFixData():
  1282. filename = "C:\\Users\\Administrator\\Desktop\\视源数据清洗.xlsx"
  1283. dict_filter = {}
  1284. dict_update = {}
  1285. df = pd.read_excel(filename,0)
  1286. for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]):
  1287. if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"):
  1288. dict_filter[_docid] = 1
  1289. else:
  1290. dict_update[_docid] = {"docid":_docid,
  1291. "bidding_budget":bidding_budget,
  1292. "win_bid_price":win_bid_price}
  1293. df = pd.read_excel(filename,1)
  1294. for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]):
  1295. if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"):
  1296. dict_filter[_docid] = 1
  1297. else:
  1298. dict_update[_docid] = {"docid":_docid,
  1299. "bidding_budget":bidding_budget,
  1300. "win_bid_price":win_bid_price}
  1301. return dict_filter,dict_update
  1302. list_row = getDocument(list_query,columns,table_name="project2",table_index="project2_index")
  1303. list_row = filterRow(list_row,key="uuid")
  1304. print("list_row",len(list_row))
  1305. set_columns1 = set()
  1306. list_df_columns1 = []
  1307. df_data = {}
  1308. getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1)
  1309. print("len df_data",len(df_data["docid"]))
  1310. fixContactPerson(df_data,list_df_columns1)
  1311. # fixZhaobiao_page_time(df_data)
  1312. # size_pop = popRows(df_data)
  1313. size_pop = 0
  1314. print("size_pop",size_pop)
  1315. df = pd.DataFrame(df_data)
  1316. filename = os.path.dirname(__file__)+"/data/%s关键词数据导出数量%d_%s.xlsx"%(current_date,len(list_row)-size_pop,getCurrent_date("%Y-%m-%d_%H%M%S"))
  1317. df.to_excel(filename,columns=list_df_columns1,index=False)
  1318. host = "smtp.exmail.qq.com"
  1319. username = "vip@bidizhaobiao.com"
  1320. password = "Biaoxun66-"
  1321. receivers = ["1175730271@qq.com","747012698@qq.com","995116318@qq.com"]
  1322. receivers = ["1175730271@qq.com"]
  1323. attachs = [filename]
  1324. sendEmail(host,username,password,receivers,attachs=attachs)
  1325. import json
  1326. def getUserInfo(self,code,userid):
  1327. data = {
  1328. "codeStr": code,
  1329. "userId": userid,
  1330. }
  1331. auth = {'Content-Type': 'application/json',"Authorization": "Bearer eyJhbGciOiJSUzI1NiJ9.eyJpc3MiOiJCWEtDX1VOSVRFX0FQSSIsInN1YiI6IjIwIiwiZGF0YSI6eyJkZXZpY2VUeXBlIjoiMjAiLCJyb2xlIjoiMjAiLCJpZCI6ImJ4a2Mtd29ya2JlbmNoIn0sImV4cCI6MTk2NjY5MjE4MSwiaWF0IjoxNjUxMDQ0MTgxfQ.lFurpoMOaVmCH3GKmJ_cqrseuSaEZJBndp8PE4QpjFY6R2mfXh5e96zn_Ma3qVkp-NKAlpA1nRYl1y08xkzj07KEx4HO_Nh6v3sfGwnDyRUz35SW3yr1fhvnbh5hEpnoCVJnFQfoMXFfn780VuOcKd01lNUjABFbSLvypDv8p-gJzkrE7z5YB53tZ_lGm_dbzKihTBjG1sBYEKJT3ekYz5Px_n-lw05IkUdbckE7n6Xj7PRPTaEjzoc3PF_pgESsdcTPSDhJlLR8x63YCqbmWy6ydhwnlEQE5rHIFA2Q5VhIxUcT7fXUBZNGmzRgaqxB4dIg2369IhpeBAJ2TQ63qg"}
  1332. r = requests.patch("https://unite.bidizhaobiao.com/api/v1/authorization/services",json=data,headers=auth)
  1333. if r.status_code==200:
  1334. return r.status_code,json.loads(r.content.decode("utf8"))
  1335. return r.status_code,""
  1336. def export_15824381998():
  1337. def removeData(df_data):
  1338. list_remove_index = []
  1339. list_c = df_data.get("招标人采购系统",[])
  1340. for _c_i in range(len(list_c)):
  1341. if list_c[_c_i]=="企业采购系统":
  1342. list_remove_index.append(_c_i)
  1343. list_remove_index.reverse()
  1344. print(list_remove_index)
  1345. for k,v in df_data.items():
  1346. for _rc in list_remove_index:
  1347. v.pop(_rc)
  1348. for k,v in df_data.items():
  1349. v = v[:500]
  1350. def getRowData_zb(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
  1351. dict_line = {}
  1352. # list_data = getRow_ots(rows)
  1353. _index = 0
  1354. rows.sort(key=lambda x:x.get("docid",0),reverse=True)
  1355. set_col = set()
  1356. df_columns = []
  1357. for row in rows:
  1358. _index += 1
  1359. item = {}
  1360. _dict = row
  1361. bidding_budget = 0
  1362. sub_docs = json.loads(_dict.get("sub_docs_json","[]"))
  1363. for doc in sub_docs:
  1364. if doc.get("bidding_budget",0)>0:
  1365. bidding_budget = doc.get("bidding_budget",0)
  1366. set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
  1367. set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
  1368. set_dict_item_columns(set_col,df_columns,item,"公告类别",dict_channel.get(_dict.get("docchannel",""),""))
  1369. set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time",""))
  1370. set_dict_item_columns(set_col,df_columns,item,"公告标题",_dict.get("doctitle",""))
  1371. set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product",""))
  1372. set_dict_item_columns(set_col,df_columns,item,"采购方式",_dict.get("bidway",""))
  1373. set_dict_item_columns(set_col,df_columns,item,"项目金额",bidding_budget)
  1374. set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
  1375. set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
  1376. set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
  1377. set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
  1378. set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
  1379. set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
  1380. set_dict_item_columns(set_col,df_columns,item,"获取文件截止时间",_dict.get("time_get_file_end",""))
  1381. set_dict_item_columns(set_col,df_columns,item,"开标时间",_dict.get("time_bidopen",""))
  1382. set_dict_item_columns(set_col,df_columns,item,"链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid","")))
  1383. for k,v in item.items():
  1384. if k not in df_data:
  1385. df_data[k] = []
  1386. df_data[k].append(v)
  1387. return df_columns
  1388. def getRowData_yc(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
  1389. dict_line = {}
  1390. # list_data = getRow_ots(rows)
  1391. _index = 0
  1392. rows.sort(key=lambda x:x.get("docid",0),reverse=True)
  1393. set_col = set()
  1394. df_columns = []
  1395. for row in rows:
  1396. _index += 1
  1397. item = {}
  1398. _dict = row
  1399. _type = _dict.get("type",0)
  1400. if _type==0:
  1401. _type = "周期预测"
  1402. elif _type==1:
  1403. _type = "采购意向"
  1404. elif _type==2:
  1405. _type = "到期预测"
  1406. else:
  1407. _type = "废标重招"
  1408. set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
  1409. set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
  1410. set_dict_item_columns(set_col,df_columns,item,"预测类型",_type)
  1411. set_dict_item_columns(set_col,df_columns,item,"预计开始时间",_dict.get("may_begin",""))
  1412. set_dict_item_columns(set_col,df_columns,item,"预计结束时间",_dict.get("may_end",""))
  1413. set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product",""))
  1414. set_dict_item_columns(set_col,df_columns,item,"上次采购时间",_dict.get("last_page_time",""))
  1415. set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
  1416. set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
  1417. set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
  1418. set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
  1419. set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
  1420. set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
  1421. for k,v in item.items():
  1422. if k not in df_data:
  1423. df_data[k] = []
  1424. df_data[k].append(v)
  1425. return df_columns
  1426. def getRowData_win(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
  1427. dict_line = {}
  1428. # list_data = getRow_ots(rows)
  1429. _index = 0
  1430. rows.sort(key=lambda x:x.get("docid",0),reverse=True)
  1431. set_col = set()
  1432. df_columns = []
  1433. for row in rows:
  1434. _index += 1
  1435. item = {}
  1436. _dict = row
  1437. bidding_budget = 0
  1438. sub_docs = json.loads(_dict.get("sub_docs_json","[]"))
  1439. zbr = ""
  1440. zbje = ""
  1441. for doc in sub_docs:
  1442. if doc.get("bidding_budget",0)>0:
  1443. bidding_budget = doc.get("bidding_budget",0)
  1444. if doc.get("win_tenderer") is not None:
  1445. zbr += str(doc.get("win_tenderer"))+"\r\n"
  1446. zbje += str(doc.get("win_bid_price",0))+"\r\n"
  1447. if doc.get("second_tenderer") is not None:
  1448. zbr += str(doc.get("second_tenderer"))+"\r\n"
  1449. zbje += str(doc.get("second_bid_price",0))+"\r\n"
  1450. if doc.get("third_tenderer") is not None:
  1451. zbr += str(doc.get("third_tenderer"))+"\r\n"
  1452. zbje += str(doc.get("third_bid_price",0))+"\r\n"
  1453. set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
  1454. set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
  1455. set_dict_item_columns(set_col,df_columns,item,"公告类别",dict_channel.get(_dict.get("docchannel",""),""))
  1456. set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time",""))
  1457. set_dict_item_columns(set_col,df_columns,item,"公告标题",_dict.get("doctitle",""))
  1458. set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product",""))
  1459. set_dict_item_columns(set_col,df_columns,item,"采购方式",_dict.get("bidway",""))
  1460. set_dict_item_columns(set_col,df_columns,item,"项目金额",bidding_budget)
  1461. set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
  1462. set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
  1463. set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
  1464. set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
  1465. set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
  1466. set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
  1467. set_dict_item_columns(set_col,df_columns,item,"获取文件截止时间",_dict.get("time_get_file_end",""))
  1468. set_dict_item_columns(set_col,df_columns,item,"开标时间",_dict.get("time_bidopen",""))
  1469. set_dict_item_columns(set_col,df_columns,item,"中标公司",zbr)
  1470. set_dict_item_columns(set_col,df_columns,item,"中标金额",zbje)
  1471. set_dict_item_columns(set_col,df_columns,item,"链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid","")))
  1472. for k,v in item.items():
  1473. if k not in df_data:
  1474. df_data[k] = []
  1475. df_data[k].append(v)
  1476. return df_columns
  1477. def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
  1478. set_enterprise = set()
  1479. if len(df_data.keys())>0:
  1480. for _tenderee,_agency in zip(df_data["招标单位"],df_data["代理单位"]):
  1481. set_enterprise.add(_tenderee)
  1482. set_enterprise.add(_agency)
  1483. if "" in set_enterprise:
  1484. set_enterprise.remove("")
  1485. if None in set_enterprise:
  1486. set_enterprise.remove(None)
  1487. dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system","company_org_type","reg_capital","actual_capital","contacts","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code","legal_person_name","phone_number"])
  1488. # conn = getConnection_oracle()
  1489. # cursor = conn.cursor()
  1490. if len(set_enterprise)>0:
  1491. for _i in range(len(df_data["招标单位"])):
  1492. _enterprise_name = df_data["招标单位"][_i]
  1493. if df_data["招标联系人电话"][_i]=="":
  1494. contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
  1495. if contacts is not None:
  1496. _person,_phone = getOneContact(contacts)
  1497. df_data["招标联系人"][_i] = _person
  1498. df_data["招标联系人电话"][_i] = _phone
  1499. _enterprise_name = df_data["代理单位"][_i]
  1500. if df_data["代理联系人电话"][_i]=="":
  1501. contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
  1502. if contacts is not None:
  1503. _person,_phone = getOneContact(contacts)
  1504. df_data["代理联系人"][_i] = _person
  1505. df_data["代理联系人电话"][_i] = _phone
  1506. # _enterprise_name = df_data["中标单位"][_i]
  1507. # if get_legal_person:
  1508. # _person = dict_enterprise.get(_enterprise_name,{}).get("legal_person_name","")
  1509. # _phone = dict_enterprise.get(_enterprise_name,{}).get("phone_number","")
  1510. # if len(_phone)==11 and _phone[0]=="1":
  1511. # df_data["中标单位联系人"][_i] = _person
  1512. # df_data["中标单位联系电话"][_i] = _phone
  1513. # else:
  1514. # if df_data["中标单位联系电话"][_i]=="":
  1515. # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
  1516. # if contacts is not None:
  1517. # _person,_phone = getOneContact(contacts,mobile_only=True)
  1518. # df_data["中标单位联系人"][_i] = _person
  1519. # df_data["中标单位联系电话"][_i] = _phone
  1520. log("start export 15824381998:>>>>>>>>>>>>>>>")
  1521. current_date = getCurrent_date(format="%Y-%m-%d")
  1522. last_date = timeAdd(current_date,-2)
  1523. start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60))
  1524. if current_date<="2025-05-01":
  1525. for i in range(10):
  1526. try:
  1527. # start_time='2022-07-22'
  1528. # current_date = '2022-07-28'
  1529. log("start exporting export2:=================")
  1530. # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"]
  1531. dict_channel = getDict_docchannel()
  1532. list_query = []
  1533. str_keyword = '''
  1534. 光伏组件
  1535. '''
  1536. list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
  1537. str_not_keyword = '''
  1538. 清洗机器人
  1539. '''
  1540. list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
  1541. tenderee_keywrod = "医院、大学、高校、高中"
  1542. list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
  1543. log(str(list_keyword))
  1544. columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_get_file_end","time_bidopen"]
  1545. bool_query = BoolQuery(must_queries=[
  1546. generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery),
  1547. generateBoolShouldQuery(["docchannel"],[51,52,102,103,104,105,114],TermQuery),
  1548. RangeQuery("crtime",last_date+" 21:00:00",current_date+" 21:00:00",True,True),
  1549. RangeQuery("status",201,300,True,True),
  1550. # TermQuery("procurement_system","公安系统"),
  1551. # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
  1552. # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
  1553. # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
  1554. ],
  1555. must_not_queries=[
  1556. generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_not_key,MatchPhraseQuery),
  1557. ]
  1558. )
  1559. list_row = getDocument([{"query":bool_query,"limit":500}],columns,thread_count=1)
  1560. log("get document %d rows"%len(list_row))
  1561. df_data = {}
  1562. set_line = set()
  1563. # list_row = filterRow(list_row)
  1564. df_columns = getRowData_zb(df_data,list_row,set_line,list_keyword,dict_channel,True)
  1565. fixContactPerson(df_data,df_columns)
  1566. df = pd.DataFrame(df_data)
  1567. df_data_filter = {}
  1568. for c in df_columns:
  1569. if c not in df_data_filter:
  1570. df_data_filter[c] = []
  1571. for tenderee_i in range(len(df_data["招标单位"])):
  1572. if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","中国大唐集团有限公司"):
  1573. for c in df_columns:
  1574. df_data_filter[c].append(df_data[c][tenderee_i])
  1575. df_f = pd.DataFrame(df_data_filter)
  1576. time_end = timeAdd(current_date,60)
  1577. columns = ["project_name","type","province","city","bidding_budget","city","demand","last_page_time"]
  1578. bool_query = BoolQuery(must_queries=[
  1579. generateBoolShouldQuery(["demand","product"],list_keyword,MatchPhraseQuery),
  1580. RangeQuery("may_end",range_from=current_date),
  1581. RangeQuery("may_end",range_to=time_end),
  1582. # TermQuery("procurement_system","公安系统"),
  1583. # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
  1584. # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
  1585. # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
  1586. ],
  1587. must_not_queries=[
  1588. generateBoolShouldQuery(["demand","product"],list_not_key,MatchPhraseQuery),
  1589. ]
  1590. )
  1591. list_row = getDocument([{"query":bool_query,"limit":500}],columns,table_name="preproject",table_index="preproject_index",thread_count=1,sort_column="may_end")
  1592. log("get document %d rows"%len(list_row))
  1593. df_data = {}
  1594. set_line = set()
  1595. # list_row = filterRow(list_row)
  1596. df_columns_yc = getRowData_yc(df_data,list_row,set_line,list_keyword,dict_channel,True)
  1597. fixContactPerson(df_data,df_columns_yc)
  1598. df1 = pd.DataFrame(df_data)
  1599. df_data_filter = {}
  1600. for c in df_columns_yc:
  1601. if c not in df_data_filter:
  1602. df_data_filter[c] = []
  1603. for tenderee_i in range(len(df_data["招标单位"])):
  1604. if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","中国大唐集团有限公司"):
  1605. for c in df_columns_yc:
  1606. if c not in df_data_filter:
  1607. df_data_filter[c] = []
  1608. df_data_filter[c].append(df_data[c][tenderee_i])
  1609. df1_f = pd.DataFrame(df_data_filter)
  1610. filename = os.path.dirname(__file__)+"/data/%s年%s招标数据导出%s.xlsx"%(start_time[:4],current_date,getCurrent_date(format="%Y-%m-%d_%H%M%S"))
  1611. with pd.ExcelWriter(filename) as writer:
  1612. df.to_excel(writer,sheet_name="招标数据",columns=df_columns if not df.empty else None)
  1613. df_f.to_excel(writer,sheet_name="特定甲方招标数据",columns=df_columns if not df_f.empty else None)
  1614. df1.to_excel(writer,sheet_name="项目预测",columns=df_columns_yc[:-3] if not df1.empty else None)
  1615. df1_f.to_excel(writer,sheet_name="特定甲方项目预测",columns=df_columns_yc[:-3] if not df1_f.empty else None)
  1616. log(str(filename))
  1617. host = "smtp.exmail.qq.com"
  1618. username = "vip@bidizhaobiao.com"
  1619. password = "Biaoxun66-"
  1620. receivers = ["1175730271@qq.com","565748324@qq.com","1396488964@qq.com"]
  1621. # receivers = ["1175730271@qq.com"]
  1622. attachs = [filename]
  1623. sendEmail(host,username,password,receivers,attachs=attachs)
  1624. df_data_filter = {}
  1625. for c in df_columns:
  1626. if c not in df_data_filter:
  1627. df_data_filter[c] = []
  1628. for tenderee_i in range(len(df_data["招标单位"])):
  1629. if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","国家能源投资集团有限责任公司","国家能源集团龙源阿里新能源(阿里)有限公司"):
  1630. for c in df_columns:
  1631. df_data_filter[c].append(df_data[c][tenderee_i])
  1632. df_f = pd.DataFrame(df_data_filter)
  1633. df_data_filter1 = {}
  1634. for c in df_columns_yc:
  1635. if c not in df_data_filter1:
  1636. df_data_filter1[c] = []
  1637. for tenderee_i in range(len(df_data["招标单位"])):
  1638. if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","国家能源投资集团有限责任公司","国家能源集团龙源阿里新能源(阿里)有限公司"):
  1639. for c in df_columns_yc:
  1640. if c not in df_data_filter1:
  1641. df_data_filter1[c] = []
  1642. df_data_filter1[c].append(df_data[c][tenderee_i])
  1643. df1_f1 = pd.DataFrame(df_data_filter1)
  1644. filename = os.path.dirname(__file__)+"/data/%s年%s招标数据导出%s.xlsx"%(start_time[:4],current_date,getCurrent_date(format="%Y-%m-%d_%H%M%S"))
  1645. with pd.ExcelWriter(filename) as writer:
  1646. df_f.to_excel(writer,sheet_name="特定甲方招标数据",columns=df_columns if not df_f.empty else None)
  1647. df1_f1.to_excel(writer,sheet_name="特定甲方项目预测",columns=df_columns_yc[:-3] if not df1_f.empty else None)
  1648. log(str(filename))
  1649. host = "smtp.exmail.qq.com"
  1650. username = "vip@bidizhaobiao.com"
  1651. password = "Biaoxun66-"
  1652. receivers = ["450604061@qq.com"]
  1653. # receivers = ["1175730271@qq.com"]
  1654. attachs = [filename]
  1655. sendEmail(host,username,password,receivers,attachs=attachs)
  1656. columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_get_file_end","time_bidopen"]
  1657. bool_query = BoolQuery(must_queries=[
  1658. generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery),
  1659. generateBoolShouldQuery(["docchannel"],[101,118,119,120,121,122],TermQuery),
  1660. RangeQuery("crtime",last_date+" 21:00:00",current_date+" 21:00:00",True,True),
  1661. RangeQuery("status",201,300,True,True),
  1662. # TermQuery("procurement_system","公安系统"),
  1663. # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
  1664. # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
  1665. # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
  1666. ],
  1667. must_not_queries=[
  1668. generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_not_key,MatchPhraseQuery),
  1669. ]
  1670. )
  1671. list_row = getDocument([{"query":bool_query,"limit":500}],columns,thread_count=1)
  1672. log("get document %d rows"%len(list_row))
  1673. df_data = {}
  1674. set_line = set()
  1675. # list_row = filterRow(list_row)
  1676. df_columns = getRowData_win(df_data,list_row,set_line,list_keyword,dict_channel,True)
  1677. fixContactPerson(df_data,df_columns)
  1678. df = pd.DataFrame(df_data)
  1679. # 调整行高
  1680. # for i in range(1, len(df) + 1):
  1681. # df.style.set_properties(max_col=100, min_col=1, max_row=i, min_row=i, h=20, wrap_text=True) # 设置行高为20,自动换行
  1682. styled_df = (df.style
  1683. .set_properties(**{'text-align': 'center', 'white-space': 'pre-line'})
  1684. .set_table_styles([{'selector': 'td', 'props': [('text-align', 'center')]}]))
  1685. filename = os.path.dirname(__file__)+"/data/%s年%s中标数据导出%s.xlsx"%(start_time[:4],current_date,getCurrent_date(format="%Y-%m-%d_%H%M%S"))
  1686. with pd.ExcelWriter(filename,engine="openpyxl") as writer:
  1687. styled_df.to_excel(writer,columns = df_columns,index=False)
  1688. adjust_excel(filename,filename,columns=["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R"])
  1689. host = "smtp.exmail.qq.com"
  1690. username = "vip@bidizhaobiao.com"
  1691. password = "Biaoxun66-"
  1692. receivers = ["1175730271@qq.com","565748324@qq.com","1396488964@qq.com","1141385052@qq.com","1713739820@qq.com"]
  1693. # receivers = ["1175730271@qq.com"]
  1694. attachs = [filename]
  1695. sendEmail(host,username,password,receivers,attachs=attachs)
  1696. break
  1697. except Exception as e:
  1698. traceback.print_exc()
  1699. def export_13510123669():
  1700. def removeData(df_data):
  1701. list_remove_index = []
  1702. list_c = df_data.get("招标人采购系统",[])
  1703. for _c_i in range(len(list_c)):
  1704. if list_c[_c_i]=="企业采购系统":
  1705. list_remove_index.append(_c_i)
  1706. list_remove_index.reverse()
  1707. print(list_remove_index)
  1708. for k,v in df_data.items():
  1709. for _rc in list_remove_index:
  1710. v.pop(_rc)
  1711. for k,v in df_data.items():
  1712. v = v[:500]
  1713. def getRowData_zb(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
  1714. dict_line = {}
  1715. # list_data = getRow_ots(rows)
  1716. _index = 0
  1717. rows.sort(key=lambda x:x.get("docid",0),reverse=True)
  1718. set_col = set()
  1719. df_columns = []
  1720. for row in rows:
  1721. _index += 1
  1722. item = {}
  1723. _dict = row
  1724. bidding_budget = 0
  1725. sub_docs = json.loads(_dict.get("sub_docs_json","[]"))
  1726. zbr = ""
  1727. zbje = 0
  1728. win_tenderer_manager = ""
  1729. win_tenderer_phone = ""
  1730. for doc in sub_docs:
  1731. if doc.get("bidding_budget",0)>0:
  1732. bidding_budget = doc.get("bidding_budget",0)
  1733. if doc.get("win_tenderer") is not None:
  1734. zbr = str(doc.get("win_tenderer",""))
  1735. zbje = doc.get("win_bid_price",0)
  1736. win_tenderer_manager = str(doc.get("win_tenderer_manager",""))
  1737. win_tenderer_phone = str(doc.get("win_tenderer_phone",""))
  1738. doc_type = ""
  1739. if dict_channel.get(_dict.get("docchannel",""),"")[:2] in ("中标","合同","候选","废标","开标","验收"):
  1740. doc_type = "中标"
  1741. else:
  1742. doc_type = "招标"
  1743. set_dict_item_columns(set_col,df_columns,item,"序号",_index)
  1744. set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province","未知"))
  1745. set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city","未知"))
  1746. set_dict_item_columns(set_col,df_columns,item,"公告类型",doc_type)
  1747. set_dict_item_columns(set_col,df_columns,item,"公告名称",_dict.get("doctitle",""))
  1748. set_dict_item_columns(set_col,df_columns,item,"项目类型",'')
  1749. set_dict_item_columns(set_col,df_columns,item,"项目编号",_dict.get("project_code"))
  1750. set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time",""))
  1751. set_dict_item_columns(set_col,df_columns,item,"信息类型","%s-%s"%(dict_channel.get(_dict.get("docchannel",""),""),_dict.get("bidway","")))
  1752. set_dict_item_columns(set_col,df_columns,item,"报名开始时间",_dict.get("time_registration_start",""))
  1753. set_dict_item_columns(set_col,df_columns,item,"报名截止时间",_dict.get("time_registration_end",""))
  1754. set_dict_item_columns(set_col,df_columns,item,"投标开始时间",_dict.get("time_bidstart",""))
  1755. set_dict_item_columns(set_col,df_columns,item,"投标结束时间",_dict.get("time_bidopen",""))
  1756. set_dict_item_columns(set_col,df_columns,item,"预算金额(万元)",bidding_budget/10000)
  1757. set_dict_item_columns(set_col,df_columns,item,"成交金额(万元)",zbje/10000)
  1758. set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
  1759. set_dict_item_columns(set_col,df_columns,item,"招标单位所在地","")
  1760. set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
  1761. set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
  1762. set_dict_item_columns(set_col,df_columns,item,"招标联系人1","")
  1763. set_dict_item_columns(set_col,df_columns,item,"招标联系人电话1","")
  1764. set_dict_item_columns(set_col,df_columns,item,"招标联系人2","")
  1765. set_dict_item_columns(set_col,df_columns,item,"招标联系人电话2","")
  1766. set_dict_item_columns(set_col,df_columns,item,"招标联系人3","")
  1767. set_dict_item_columns(set_col,df_columns,item,"招标联系人电话3","")
  1768. set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
  1769. set_dict_item_columns(set_col,df_columns,item,"代理单位所在地","")
  1770. set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
  1771. set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
  1772. set_dict_item_columns(set_col,df_columns,item,"代理联系人1","")
  1773. set_dict_item_columns(set_col,df_columns,item,"代理联系人电话1","")
  1774. set_dict_item_columns(set_col,df_columns,item,"代理联系人2","")
  1775. set_dict_item_columns(set_col,df_columns,item,"代理联系人电话2","")
  1776. set_dict_item_columns(set_col,df_columns,item,"代理联系人3","")
  1777. set_dict_item_columns(set_col,df_columns,item,"代理联系人电话3","")
  1778. set_dict_item_columns(set_col,df_columns,item,"中标单位",zbr)
  1779. set_dict_item_columns(set_col,df_columns,item,"中标单位所在地","")
  1780. set_dict_item_columns(set_col,df_columns,item,"项目中标联系人",win_tenderer_manager)
  1781. set_dict_item_columns(set_col,df_columns,item,"项目中标联系电话",win_tenderer_phone)
  1782. set_dict_item_columns(set_col,df_columns,item,"中标法人","")
  1783. set_dict_item_columns(set_col,df_columns,item,"中标法人电话","")
  1784. set_dict_item_columns(set_col,df_columns,item,"中标股东","")
  1785. set_dict_item_columns(set_col,df_columns,item,"中标股东电话","")
  1786. set_dict_item_columns(set_col,df_columns,item,"中标高管","")
  1787. set_dict_item_columns(set_col,df_columns,item,"中标高管电话","")
  1788. set_dict_item_columns(set_col,df_columns,item,"中标联系人1","")
  1789. set_dict_item_columns(set_col,df_columns,item,"中标联系人电话1","")
  1790. set_dict_item_columns(set_col,df_columns,item,"中标联系人2","")
  1791. set_dict_item_columns(set_col,df_columns,item,"中标联系人电话2","")
  1792. set_dict_item_columns(set_col,df_columns,item,"中标联系人3","")
  1793. set_dict_item_columns(set_col,df_columns,item,"中标联系人电话3","")
  1794. set_dict_item_columns(set_col,df_columns,item,"招标/采购内容",_dict.get("product",""))
  1795. set_dict_item_columns(set_col,df_columns,item,"资质","")
  1796. set_dict_item_columns(set_col,df_columns,item,"公告详情链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid","")))
  1797. set_dict_item_columns(set_col,df_columns,item,"招投标分析(合作商)","")
  1798. set_dict_item_columns(set_col,df_columns,item,"历史中标单位","")
  1799. set_dict_item_columns(set_col,df_columns,item,"应对策略(作为填)","")
  1800. for k,v in item.items():
  1801. if k not in df_data:
  1802. df_data[k] = []
  1803. df_data[k].append(v)
  1804. return df_columns
  1805. def getRowData_yc(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
  1806. dict_line = {}
  1807. # list_data = getRow_ots(rows)
  1808. _index = 0
  1809. rows.sort(key=lambda x:x.get("docid",0),reverse=True)
  1810. set_col = set()
  1811. df_columns = []
  1812. for row in rows:
  1813. _index += 1
  1814. item = {}
  1815. _dict = row
  1816. _type = _dict.get("type",0)
  1817. if _type==0:
  1818. _type = "周期预测"
  1819. elif _type==1:
  1820. _type = "采购意向"
  1821. elif _type==2:
  1822. _type = "到期预测"
  1823. else:
  1824. _type = "废标重招"
  1825. set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
  1826. set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
  1827. set_dict_item_columns(set_col,df_columns,item,"类型",'')
  1828. set_dict_item_columns(set_col,df_columns,item,"预测类型",_type)
  1829. set_dict_item_columns(set_col,df_columns,item,"项目类型","")
  1830. set_dict_item_columns(set_col,df_columns,item,"预计开始时间",_dict.get("may_begin",""))
  1831. set_dict_item_columns(set_col,df_columns,item,"预计结束时间",_dict.get("may_end",""))
  1832. set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product",""))
  1833. set_dict_item_columns(set_col,df_columns,item,"上次采购时间",_dict.get("last_page_time",""))
  1834. set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
  1835. set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
  1836. set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
  1837. set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
  1838. set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
  1839. set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
  1840. set_dict_item_columns(set_col,df_columns,item,"招投标分析(合作商)","")
  1841. set_dict_item_columns(set_col,df_columns,item,"历史中标单位","")
  1842. set_dict_item_columns(set_col,df_columns,item,"应对策略(作为填)","")
  1843. for k,v in item.items():
  1844. if k not in df_data:
  1845. df_data[k] = []
  1846. df_data[k].append(v)
  1847. return df_columns
  1848. def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
  1849. set_enterprise = set()
  1850. if len(df_data.keys())>0:
  1851. for _tenderee,_agency in zip(df_data["招标单位"],df_data["代理单位"]):
  1852. set_enterprise.add(_tenderee)
  1853. set_enterprise.add(_agency)
  1854. if "" in set_enterprise:
  1855. set_enterprise.remove("")
  1856. if None in set_enterprise:
  1857. set_enterprise.remove(None)
  1858. dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system","company_org_type","reg_capital","actual_capital","contacts","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code","legal_person_name","phone_number"])
  1859. # conn = getConnection_oracle()
  1860. # cursor = conn.cursor()
  1861. if len(set_enterprise)>0:
  1862. for _i in range(len(df_data["招标单位"])):
  1863. _enterprise_name = df_data["招标单位"][_i]
  1864. if df_data["招标联系人电话"][_i]=="":
  1865. contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
  1866. if contacts is not None:
  1867. _person,_phone = getOneContact(contacts)
  1868. df_data["招标联系人"][_i] = _person
  1869. df_data["招标联系人电话"][_i] = _phone
  1870. df_data["项目类型"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system")
  1871. _enterprise_name = df_data["代理单位"][_i]
  1872. if df_data["代理联系人电话"][_i]=="":
  1873. contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
  1874. if contacts is not None:
  1875. _person,_phone = getOneContact(contacts)
  1876. df_data["代理联系人"][_i] = _person
  1877. df_data["代理联系人电话"][_i] = _phone
  1878. # _enterprise_name = df_data["中标单位"][_i]
  1879. # if get_legal_person:
  1880. # _person = dict_enterprise.get(_enterprise_name,{}).get("legal_person_name","")
  1881. # _phone = dict_enterprise.get(_enterprise_name,{}).get("phone_number","")
  1882. # if len(_phone)==11 and _phone[0]=="1":
  1883. # df_data["中标单位联系人"][_i] = _person
  1884. # df_data["中标单位联系电话"][_i] = _phone
  1885. # else:
  1886. # if df_data["中标单位联系电话"][_i]=="":
  1887. # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
  1888. # if contacts is not None:
  1889. # _person,_phone = getOneContact(contacts,mobile_only=True)
  1890. # df_data["中标单位联系人"][_i] = _person
  1891. # df_data["中标单位联系电话"][_i] = _phone
  1892. def fixContactPerson1(df_data,list_df_columns,get_legal_person=False):
  1893. set_enterprise = set()
  1894. if len(df_data.keys())>0:
  1895. for _tenderee,_agency,_win in zip(df_data["招标单位"],df_data["代理单位"],df_data["中标单位"]):
  1896. set_enterprise.add(_tenderee)
  1897. set_enterprise.add(_agency)
  1898. set_enterprise.add(_win)
  1899. if "" in set_enterprise:
  1900. set_enterprise.remove("")
  1901. if None in set_enterprise:
  1902. set_enterprise.remove(None)
  1903. dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system","company_org_type","reg_capital","actual_capital","contacts","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code","legal_person_name","phone_number","reg_location","province","city"])
  1904. # print("dict_enterprise",dict_enterprise)
  1905. # conn = getConnection_oracle()
  1906. # cursor = conn.cursor()
  1907. if len(set_enterprise)>0:
  1908. for _i in range(len(df_data["招标单位"])):
  1909. _enterprise_name = df_data["招标单位"][_i]
  1910. contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
  1911. if contacts is not None:
  1912. for _index in range(min(len(contacts),3)):
  1913. contact_person,phone_no = contacts[_index].get("contact_person"),contacts[_index].get("phone_no")
  1914. is_legal_person,is_manager,is_shareholder = contacts[_index].get("is_legal_person",0),contacts[_index].get("is_manager",0),contacts[_index].get("is_shareholder",0)
  1915. df_data["招标联系人%s"%str(_index+1)][_i] = contact_person
  1916. df_data["招标联系人电话%s"%str(_index+1)][_i] = phone_no
  1917. df_data["招标单位所在地"][_i] = "%s-%s"%(dict_enterprise.get(_enterprise_name,{}).get("province",""),dict_enterprise.get(_enterprise_name,{}).get("city",""))
  1918. df_data["项目类型"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system")
  1919. _enterprise_name = df_data["代理单位"][_i]
  1920. contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
  1921. if contacts is not None:
  1922. for _index in range(min(len(contacts),3)):
  1923. contact_person,phone_no = contacts[_index].get("contact_person"),contacts[_index].get("phone_no")
  1924. is_legal_person,is_manager,is_shareholder = contacts[_index].get("is_legal_person",0),contacts[_index].get("is_manager",0),contacts[_index].get("is_shareholder",0)
  1925. df_data["代理联系人%s"%str(_index+1)][_i] = contact_person
  1926. df_data["代理联系人电话%s"%str(_index+1)][_i] = phone_no
  1927. df_data["代理单位所在地"][_i] = "%s-%s"%(dict_enterprise.get(_enterprise_name,{}).get("province",""),dict_enterprise.get(_enterprise_name,{}).get("city",""))
  1928. _enterprise_name = df_data["中标单位"][_i]
  1929. df_data["中标单位所在地"][_i] = "%s-%s"%(dict_enterprise.get(_enterprise_name,{}).get("province",""),dict_enterprise.get(_enterprise_name,{}).get("city",""))
  1930. _person = dict_enterprise.get(_enterprise_name,{}).get("legal_person_name","")
  1931. _phone = dict_enterprise.get(_enterprise_name,{}).get("phone_number","")
  1932. if len(_phone)==11 and _phone[0]=="1":
  1933. df_data["中标法人"][_i] = _person
  1934. df_data["中标法人电话"][_i] = _phone
  1935. contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
  1936. if contacts is not None:
  1937. for _index in range(min(len(contacts),3)):
  1938. contact_person,phone_no = contacts[_index].get("contact_person"),contacts[_index].get("phone_no")
  1939. is_legal_person,is_manager,is_shareholder = contacts[_index].get("is_legal_person",0),contacts[_index].get("is_manager",0),contacts[_index].get("is_shareholder",0)
  1940. if is_manager:
  1941. df_data["中标高管"][_i] = contact_person
  1942. df_data["中标高管电话"][_i] = phone_no
  1943. if is_shareholder:
  1944. df_data["中标股东"][_i] = contact_person
  1945. df_data["中标股东电话"][_i] = phone_no
  1946. df_data["中标联系人%s"%str(_index+1)][_i] = contact_person
  1947. df_data["中标联系人电话%s"%str(_index+1)][_i] = phone_no
  1948. log("start export 15824381998:>>>>>>>>>>>>>>>")
  1949. current_date = getCurrent_date(format="%Y-%m-%d")
  1950. start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60))
  1951. if current_date<="2025-02-04" and datetime.datetime.now().weekday() in (1,4):
  1952. for i in range(10):
  1953. try:
  1954. if datetime.datetime.now().weekday()==1:
  1955. last_date = timeAdd(current_date,-4)
  1956. if datetime.datetime.now().weekday() in (4,):
  1957. last_date = timeAdd(current_date,-3)
  1958. # start_time='2022-07-22'
  1959. # current_date = '2022-07-28'
  1960. page_time_start = timeAdd(current_date,-7)
  1961. log("start exporting export2:=================")
  1962. # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"]
  1963. dict_channel = getDict_docchannel()
  1964. list_query = []
  1965. str_keyword = '''
  1966. 多功能位移机,失能群体安全转移,助行机器人,行走机器人,护理机器人,大小便护理,洗浴机,洗浴机器人,吃饭机器人,养老护理,老年评估,防摔,智能护理,智慧康养
  1967. '''
  1968. list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
  1969. str_not_keyword = '''
  1970. 清洗机器人
  1971. '''
  1972. list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
  1973. tenderee_keywrod = "医院、大学、高校、高中"
  1974. list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
  1975. log(str(list_keyword))
  1976. columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_get_file_end","time_bidopen","time_bidstart","time_registration_start","time_registration_end"]
  1977. bool_query = BoolQuery(must_queries=[
  1978. generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery),
  1979. generateBoolShouldQuery(["docchannel"],[51,52,102,103,105,104,114,121,122,101,119,120,118],TermQuery),
  1980. RangeQuery("crtime",last_date+" 10:00:00",current_date+" 10:00:00",True,True),
  1981. RangeQuery("status",201,300,True,True),
  1982. RangeQuery("page_time",page_time_start)
  1983. # TermQuery("procurement_system","公安系统"),
  1984. # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
  1985. # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
  1986. # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
  1987. ],
  1988. must_not_queries=[
  1989. generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_not_key,MatchPhraseQuery),
  1990. ]
  1991. )
  1992. list_row = getDocument([{"query":bool_query,"limit":500}],columns,thread_count=1)
  1993. log("get document %d rows"%len(list_row))
  1994. df_data = {}
  1995. set_line = set()
  1996. # list_row = filterRow(list_row)
  1997. df_columns = getRowData_zb(df_data,list_row,set_line,list_keyword,dict_channel,True)
  1998. fixContactPerson1(df_data,df_columns)
  1999. df = pd.DataFrame(df_data)
  2000. df_data_filter_11 = {}
  2001. for c in df_columns:
  2002. if c not in df_data_filter_11:
  2003. df_data_filter_11[c] = []
  2004. df_data_filter_12 = {}
  2005. for c in df_columns:
  2006. if c not in df_data_filter_12:
  2007. df_data_filter_12[c] = []
  2008. for _i in range(len(df_data["项目类型"])):
  2009. if df_data["项目类型"][_i] in ("教育系统","科研系统"):
  2010. for c in df_columns:
  2011. if c not in df_data_filter_11:
  2012. df_data_filter_11[c] = []
  2013. df_data_filter_11[c].append(df_data[c][_i])
  2014. else:
  2015. for c in df_columns:
  2016. if c not in df_data_filter_12:
  2017. df_data_filter_12[c] = []
  2018. df_data_filter_12[c].append(df_data[c][_i])
  2019. df_f11 = pd.DataFrame(df_data_filter_11)
  2020. df_f12 = pd.DataFrame(df_data_filter_12)
  2021. time_end = timeAdd(current_date,60)
  2022. columns = ["project_name","type","province","city","bidding_budget","city","demand","last_page_time"]
  2023. bool_query = BoolQuery(must_queries=[
  2024. generateBoolShouldQuery(["demand","product"],list_keyword,MatchPhraseQuery),
  2025. RangeQuery("may_end",range_from=current_date),
  2026. RangeQuery("may_end",range_to=time_end),
  2027. # TermQuery("procurement_system","公安系统"),
  2028. # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
  2029. # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
  2030. # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
  2031. ],
  2032. must_not_queries=[
  2033. generateBoolShouldQuery(["demand","product"],list_not_key,MatchPhraseQuery),
  2034. ]
  2035. )
  2036. list_row = getDocument([{"query":bool_query,"limit":500}],columns,table_name="preproject",table_index="preproject_index",thread_count=1,sort_column="may_end")
  2037. log("get document %d rows"%len(list_row))
  2038. df_data = {}
  2039. set_line = set()
  2040. # list_row = filterRow(list_row)
  2041. df_columns_yc = getRowData_yc(df_data,list_row,set_line,list_keyword,dict_channel,True)
  2042. fixContactPerson(df_data,df_columns_yc)
  2043. for _i in range(len(df_data["项目类型"])):
  2044. if df_data["项目类型"][_i] in ("政府办公室系统","财政系统","民事系统","企业采购系统","卫生系统"):
  2045. df_data["类型"][_i] = "政企"
  2046. elif df_data["项目类型"][_i] in ("教育系统","科研系统"):
  2047. df_data["类型"][_i] = "教育"
  2048. else:
  2049. df_data["类型"][_i] = "其他"
  2050. df1 = pd.DataFrame(df_data)
  2051. filename = os.path.dirname(__file__)+"/data/%s年%s项目数据导出.xlsx"%(start_time[:4],current_date)
  2052. with pd.ExcelWriter(filename) as writer:
  2053. df_f11.to_excel(writer,sheet_name="教育",columns=df_columns if not df_f11.empty else None,index=False)
  2054. df_f12.to_excel(writer,sheet_name="政企",columns=df_columns if not df_f12.empty else None,index=False)
  2055. df1.to_excel(writer,sheet_name="项目预测",columns=df_columns_yc if not df1.empty else None)
  2056. log(str(filename))
  2057. host = "smtp.exmail.qq.com"
  2058. username = "vip@bidizhaobiao.com"
  2059. password = "Biaoxun66-"
  2060. # receivers = ["724949655@qq.com","1396488964@qq.com"]
  2061. receivers = ["md47@zuowei.com"]
  2062. # receivers = ["1175730271@qq.com"]
  2063. attachs = [filename]
  2064. sendEmail(host,username,password,receivers,attachs=attachs)
  2065. break
  2066. except Exception as e:
  2067. traceback.print_exc()
  2068. def job_export():
  2069. _scheduler = BlockingScheduler()
  2070. e3 = Export3()
  2071. # _scheduler.add_job(export_medicine_friday,"cron",hour=8)
  2072. # _scheduler.add_job(export_medicine_friday,"cron",second="*/1")
  2073. _scheduler.add_job(export2,"cron",hour=9)
  2074. _scheduler.add_job(export5,"cron",hour=16)
  2075. # _scheduler.add_job(e3.export4,"cron",hour=7)
  2076. _scheduler.add_job(e3.trytimes,"cron",hour=21)
  2077. _scheduler.add_job(e3.export3_1,"cron",hour=21)
  2078. _scheduler.add_job(export_15824381998,"cron",hour=21)
  2079. _scheduler.add_job(export_13510123669,"cron",hour=10)
  2080. _scheduler.start()
  2081. if __name__=="__main__":
  2082. # job_export()
  2083. # export_medicine_friday()
  2084. # export2()
  2085. # export_document_except()
  2086. # e3 = Export3()
  2087. # e3.export4_by_project()
  2088. # e3.export4()
  2089. # e3.trytimes()
  2090. # e3.export3_1()
  2091. # export5()
  2092. # export_15824381998()
  2093. export_13510123669()