#coding:utf8 import sys import os sys.path.append(os.path.join(os.path.dirname(__file__),"..")) import requests from utils.Utils import sendEmail,getCurrent_date,log import datetime import time from export.exportDocument import * import pandas as pd from apscheduler.schedulers.blocking import BlockingScheduler from export.exportUtils import rsa_encrpt import urllib.parse import base64 def export_medicine_friday(): current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-24*60*60)) start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60)) if current_date<="2022-04-25": if datetime.datetime.now().weekday()==0: for i in range(10): try: df_data = exportDocument_medicine(start_time,current_date) df = pd.DataFrame(df_data) filename = os.path.dirname(__file__)+"/data/%s年%s至%s医疗数据导出.xlsx"%(start_time[:4],start_time,current_date) df.to_excel(filename,columns=list_df_columns) host = "smtp.exmail.qq.com" username = "vip@bidizhaobiao.com" password = "Biaoxun66-" receivers = ["1985262186@qq.com","1175730271@qq.com","1265797328@qq.com","1289358902@qq.com"] attachs = [filename] sendEmail(host,username,password,receivers,attachs=attachs) break except Exception as e: traceback.print_exc() log(str(e)) time.sleep(60) def export2(): def removeData(df_data): list_remove_index = [] list_c = df_data.get("招标人采购系统",[]) for _c_i in range(len(list_c)): if list_c[_c_i]=="企业采购系统": list_remove_index.append(_c_i) list_remove_index.reverse() print(list_remove_index) for k,v in df_data.items(): for _rc in list_remove_index: v.pop(_rc) for k,v in df_data.items(): v = v[:500] log("start export2:>>>>>>>>>>>>>>>") current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-24*60*60)) start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60)) if current_date<="2024-02-28": if datetime.datetime.now().weekday()==4: for i in range(10): try: # start_time='2022-07-22' # current_date = '2022-07-28' log("start exporting export2:=================") # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"] columns = ["doctitle","doctextcon","attachmenttextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","time_bidopen"] dict_channel = getDict_docchannel() list_query = [] str_keyword = ''' 机房建设 多媒体教室建设 数据中心机房 班班通 电子设备 视频 LED 监控 视频监控系统 信息安全 信息化建设 智能化 网络安全服务 网络安全系统 等级保护测评 信息系统 大数据中心 智慧教室建设 数据安全建设 可视化指挥联动平台 音频处理器 教学专用仪器 多媒体扩声系统 智慧校园 数字化 交换 路由 无线 城域网建设 触控一体机 教学设备 ''' list_keyword = splitIntoList(str_keyword,"[\s\n、,,]") str_not_keyword = ''' ''' list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]") tenderee_keywrod = "医院、大学、高校、高中" list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]") log(str(list_keyword)) bool_query = BoolQuery(must_queries=[ generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_keyword,MatchPhraseQuery), generateBoolShouldQuery(["docchannel"],[52,102,114],TermQuery), RangeQuery("page_time",start_time,current_date,True,True), RangeQuery("status",151,300,True,True), # TermQuery("procurement_system","公安系统"), generateBoolShouldQuery(["province"],["湖南"],TermQuery), # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery) # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery), ], # must_not_queries=[ # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery), # ] ) list_query.append({"query":bool_query,"limit":700}) list_row = getDocument(list_query,columns) log("get document %d rows"%len(list_row)) df_data = {} set_line = set() # list_row = filterRow(list_row) getRowData(df_data,list_row,set_line,list_keyword,dict_channel,True) fixContactPerson(df_data,list_df_columns) removeData(df_data) df = pd.DataFrame(df_data) filename = os.path.dirname(__file__)+"/data/%s年%s至%s数据导出.xlsx"%(start_time[:4],start_time,current_date) df.to_excel(filename,columns=list_df_columns) log(str(filename)) host = "smtp.exmail.qq.com" username = "vip@bidizhaobiao.com" password = "Biaoxun66-" receivers = ["1175730271@qq.com","493894608@qq.com"] # receivers = ["1175730271@qq.com"] attachs = [filename] sendEmail(host,username,password,receivers,attachs=attachs) break except Exception as e: traceback.print_exc() def export5(): ''' 客户信息 公司名:武汉市浩盛特种建材有限责任公司 客户姓名:高诗琴 会员号:15392839439 会员等级:高级会员 成交金额:5900元 服务期:12个月 截止推送项目时间:2024-02-17 申请原因:2.16成交的新客户,客户反馈跟销售沟通是人工每天整理好项目信息推送,现在寻求技术协助支持。 客户需求: 地区:河北省、北京市、天津市、山东省、江苏省、上海市、浙江省、福建省、广东省、海南省 业务关键词:膨胀剂,抗裂剂,防水剂,外加剂,防腐剂,阻锈剂,密实剂,耐久性,氧化镁,镁质,涂料 (排除词,漆),需要招标信息 工作日下午四点文档信息推送到邮箱365531448@qq.com,每天更新最新的项目信息10条(不足10条可以按照更新信息推送) :return: ''' def getRowData(df_data,rows,set_line,list_keyword,dict_channel,dumplicate): dict_line = {} # list_data = getRow_ots(rows) _index = 0 rows.sort(key=lambda x:x.get("docid",0),reverse=True) set_col = set() df_columns = [] for row in rows[:10]: _index += 1 item = {} _dict = row set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province","")) set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city","")) set_dict_item_columns(set_col,df_columns,item,"公告类别",dict_channel.get(_dict.get("docchannel",""),"")) set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time","")) set_dict_item_columns(set_col,df_columns,item,"公告标题",_dict.get("doctitle","")) set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee","")) set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact","")) set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone","")) set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency","")) set_dict_item_columns(set_col,df_columns,item,"代理单位联系人",_dict.get("agency_contact","")) set_dict_item_columns(set_col,df_columns,item,"代理单位联系人电话",_dict.get("agency_phone","")) set_dict_item_columns(set_col,df_columns,item,"链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid",""))) for k,v in item.items(): if k not in df_data: df_data[k] = [] df_data[k].append(v) return df_columns log("start export5:>>>>>>>>>>>>>>>") current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime()))) if current_date<="2024-02-17": weekday = datetime.datetime.now().weekday() if weekday>=0 and weekday<=4: for i in range(10): try: # start_time='2022-07-22' # current_date = '2022-07-28' log("start exporting export2:=================") # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"] columns = ["doctitle","doctextcon","attachmenttextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","time_bidopen"] columns = ["doctitle","docchannel","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","time_bidopen"] dict_channel = getDict_docchannel() list_query = [] str_province = ''' 河北、北京、天津、山东、江苏、上海、浙江、福建、广东、海南 ''' str_keyword = ''' 膨胀剂 抗裂剂 防水剂 外加剂 镁质 防腐剂 阻锈剂 密实剂 耐久性 氧化镁 ''' list_province = splitIntoList(str_province,"[\s\n、,,]") list_keyword = splitIntoList(str_keyword,"[\s\n、,,]") str_not_keyword = ''' 漆,复合风管,检测,涂料工程,涂料多乐士 ''' list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]") tenderee_keywrod = "医院、大学、高校、高中" list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]") log(str(list_keyword)) bool_query = BoolQuery(must_queries=[ generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery), generateBoolShouldQuery(["docchannel"],[52,102,114],TermQuery), TermQuery("page_time",current_date), RangeQuery("status",151,300,True,True), # TermQuery("procurement_system","公安系统"), generateBoolShouldQuery(["province"],list_province,TermQuery), # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery) # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery), ], must_not_queries=[ generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery), ] ) list_query.append({"query":bool_query,"limit":700}) list_row = getDocument(list_query,columns) log("get document %d rows"%len(list_row)) df_data = {} set_line = set() # list_row = filterRow(list_row) df_column = getRowData(df_data,list_row,set_line,list_keyword,dict_channel,True) df = pd.DataFrame(df_data) print(df_data) filename = os.path.dirname(__file__)+"/data/%s年%s数据导出.xlsx"%(current_date[:4],current_date) df.to_excel(filename,columns=df_column) log(str(filename)) host = "smtp.exmail.qq.com" username = "vip@bidizhaobiao.com" password = "Biaoxun66-" receivers = ["1175730271@qq.com","365531448@qq.com"] # receivers = ["1175730271@qq.com"] attachs = [filename] sendEmail(host,username,password,receivers,attachs=attachs) break except Exception as e: traceback.print_exc() def export_document_except(): def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1): _index = 0 for row in rows: _index += 1 item = {} _dict = row _extract = json.loads(_dict.get("extract_json","{}")) product_attrs = _extract.get("product_attrs",{"data":[{}]}).get("data") docid = _dict.get("docid","") win_tenderer = "" bidding_budget = "" sub_docs_json = json.loads(_dict.get("sub_docs_json","[]")) for _doc in sub_docs_json: if win_tenderer=="": win_tenderer = _doc.get("win_tenderer","") if bidding_budget=="": bidding_budget= _doc.get("bidding_budget","") for _attrs in product_attrs: set_dict_item_columns(set_columns1,list_df_column1,item,"公告链接",'=HYPERLINK("http://www.bidizhaobiao.com/info-%s.html","查看公告")'%(str(docid))) set_dict_item_columns(set_columns1,list_df_column1,item,"公告时间",_dict.get("page_time")) set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province")) set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city")) set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district")) set_dict_item_columns(set_columns1,list_df_column1,item,"项目标题",_dict.get("doctitle")) set_dict_item_columns(set_columns1,list_df_column1,item,"采购单位",_dict.get("tenderee")) set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer) set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_dict.get("product")) set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs","")) set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand","")) _quantity = "" _uniPrice = "" try: _quantity = int(_attrs.get("quantity","0")) if _quantity==0: _quantity = "" except Exception as e: pass try: _uniPrice = float(_attrs.get("_uniPrice","0")) if _uniPrice==0: _uniPrice = "" except Exception as e: pass set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity) set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice) sumPrice = "" if _quantity!="" and _uniPrice!="": sumPrice = _quantity*_uniPrice set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice) set_dict_item_columns(set_columns1,list_df_column1,item,"项目金额(元)",bidding_budget) for k,v in item.items(): if k not in df_data: df_data[k] = [] df_data[k].append(v) current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime()))) current_date = '2022-04-01' columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","web_source_no","web_source_name","service_time","person_review","time_get_file_start","time_get_file_end","time_earnest_money_start","time_earnest_money_end"] dict_channel = getDict_docchannel() list_query = [] str_keyword = ''' 机房建设 多媒体教室建设 数据中心机房 班班通 电子设备 视频 LED 监控 视频监控系统 信息安全 信息化建设 网络安全 网络安全服务 网络安全系统 等级保护测评 信息系统 大数据中心 智慧教室建设 数据安全建设 可视化指挥联动平台 音频处理器 教学专用仪器 多媒体扩声系统 智慧校园 数字化 交换 路由 无线 信息安全 触控一体机 教学设备 ''' list_keyword = splitIntoList(str_keyword,"[\s\n、,,]") str_not_keyword = ''' ''' list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]") tenderee_keywrod = "医院、大学、高校、高中" list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]") s_q = BoolQuery(should_queries=[BoolQuery(must_queries=[TermQuery("docchannel",52)], must_not_queries=[WildcardQuery("tenderee","*")]), BoolQuery(must_queries=[generateBoolShouldQuery(["docchannel"],[101,119,120],TermQuery)], must_not_queries=[NestedQuery("sub_docs_json",WildcardQuery("sub_docs_json.win_tenderer","*"))])]) log(str(list_keyword)) set_columns1 = set() list_df_columns1 = [] bool_query = BoolQuery(must_queries=[ TermQuery("page_time",current_date), RangeQuery("status",151,451,True,True), s_q ], # must_not_queries=[ # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery), # ] ) list_query.append({"query":bool_query,"limit":500000}) list_row = getDocument(list_query,columns) log("get document %d rows"%len(list_row)) df_data = {} set_line = set() # list_row = filterRow(list_row,list_not_key) getRowData(df_data,list_row,set_line,list_keyword,dict_channel,True) # fixContactPerson(df_data,list_df_columns) df = pd.DataFrame(df_data) filename = os.path.dirname(__file__)+"/data/%s异常数据导出.xlsx"%(current_date) print(list_df_columns) df.to_excel(filename,columns=list_df_columns) class Export3(): def trytimes(self): for _ in range(3): _succeed = self.export3() if _succeed: break def export3(self,): def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1): _index = 0 for row in rows: _index += 1 item = {} _dict = row _extract = json.loads(_dict.get("extract_json","{}")) demand_info = _extract.get("demand_info",{"data":[]}).get("data") docid = _dict.get("docid","") win_tenderer = "" bidding_budget = "" win_bid_price = "" sub_docs_json = json.loads(_dict.get("sub_docs_json","[]")) for _doc in sub_docs_json: if win_tenderer=="": win_tenderer = _doc.get("win_tenderer","") if bidding_budget=="": bidding_budget= _doc.get("bidding_budget","") if win_bid_price=="": win_bid_price = _doc.get("win_bid_price","") if len(demand_info)==0: demand_info = [{}] demand_info = [{}] for _attrs in demand_info: set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index) set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code")) set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name")) set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province")) set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city")) set_dict_item_columns(set_columns1,list_df_column1,item,"信息类型",dict_channel.get(_dict.get("docchannel"))) set_dict_item_columns(set_columns1,list_df_column1,item,"发布时间",_dict.get("crtime")) b_or_w = "" if bidding_budget!="": b_or_w = float(bidding_budget)/10000 elif win_bid_price!="": b_or_w = float(win_bid_price)/10000 set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额/预算金额(万元)",b_or_w) set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位",_dict.get("tenderee")) set_dict_item_columns(set_columns1,list_df_column1,item,"招标项目联系人","%s\n%s"%(_dict.get("tenderee_contact",""),_dict.get("tenderee_phone",""))) # 中标单位 中标项目联系人 中标单位主要联系人 中标单位联系人 代理单位 代理项目联系人 代理单位联系人 比地招标公告地址 set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位联系人","") set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer) set_dict_item_columns(set_columns1,list_df_column1,item,"中标项目联系人","") set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位主要联系人","") set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位联系人","") set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位",_dict.get("agency","")) set_dict_item_columns(set_columns1,list_df_column1,item,"代理项目联系人","%s\n%s"%(_dict.get("agency_contact",""),_dict.get("agency_phone",""))) set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位联系人","") _key ='''MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC8uZYYV6ls+5KMzUfvsPvv3XRdwkcTBj/ppB03mijUPHYTGvYSE0cQTbQrnIbXFtUYJguakpKLmVyH+T/w6vhxbQNlaykfe8RXEh4i4IJk8s/Qb0E0xODsjKBEr8VdDYeqqduWrtJpttXAvv93SsTPvgZBditRzJAzk0XH56zL1wIDAQAB''' _key = base64.b64decode(_key) _url_map = {"userId":"001795335", "timestamp":"%d"%(time.time()*1000), "docid":str(_dict.get("docid",""))} _encrpt_text = base64.b64encode(rsa_encrpt(json.dumps(_url_map),_key)) _encrpt_text = urllib.parse.quote(_encrpt_text) _url = "http://www.bidizhaobiao.com/info-%s.html?emailSecret=%s"%(str(_dict.get("docid")),_encrpt_text) set_dict_item_columns(set_columns1,list_df_column1,item,"比地招标公告地址",_url) # set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code")) # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_attrs.get("project_name")) # # set_dict_item_columns(set_columns1,list_df_column1,item,"需求",_attrs.get("demand")) # set_dict_item_columns(set_columns1,list_df_column1,item,"预算(元)",_attrs.get("budget")) # set_dict_item_columns(set_columns1,list_df_column1,item,"开始时间",_attrs.get("order_begin")) # set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer) # set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product")) # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs","")) # set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand","")) # _quantity = "" # _uniPrice = "" # try: # _quantity = int(_attrs.get("quantity","0")) # if _quantity==0: # _quantity = "" # except Exception as e: # pass # try: # _uniPrice = float(_attrs.get("unitPrice","0")) # if _uniPrice==0: # _uniPrice = "" # except Exception as e: # pass # set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity) # set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice) # sumPrice = "" # if _quantity!="" and _uniPrice!="": # sumPrice = _quantity*_uniPrice # set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice) # set_dict_item_columns(set_columns1,list_df_column1,item,"项目金额(元)",bidding_budget) for k,v in item.items(): if k not in df_data: df_data[k] = [] df_data[k].append(v) def getRowData1(df_data,rows,dict_channel,set_columns1,list_df_column1): _index = 0 for row in rows: _index += 1 item = {} _dict = row _extract = json.loads(_dict.get("extract_json","{}")) demand_info = _extract.get("demand_info",{"data":[]}).get("data") docid = _dict.get("docid","") win_tenderer = "" bidding_budget = "" win_bid_price = "" sub_docs_json = json.loads(_dict.get("sub_docs_json","[]")) for _doc in sub_docs_json: if win_tenderer=="": win_tenderer = _doc.get("win_tenderer","") if bidding_budget=="": bidding_budget= _doc.get("bidding_budget","") if win_bid_price=="": win_bid_price = _doc.get("win_bid_price","") if len(demand_info)==0: demand_info = [{}] demand_info = [{}] for _attrs in demand_info: # set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index) # set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code")) set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district")) set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province")) set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name")) set_dict_item_columns(set_columns1,list_df_column1,item,"采购人",_dict.get("tenderee")) set_dict_item_columns(set_columns1,list_df_column1,item,"招标时间",_dict.get("page_time")) b_or_w = "" if bidding_budget!="": b_or_w = float(bidding_budget)/10000 elif win_bid_price!="": b_or_w = float(win_bid_price)/10000 set_dict_item_columns(set_columns1,list_df_column1,item,"招标预算",bidding_budget) set_dict_item_columns(set_columns1,list_df_column1,item,"中标时间",_dict.get("page_time")) set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额",win_bid_price) set_dict_item_columns(set_columns1,list_df_column1,item,"中标人",win_tenderer) set_dict_item_columns(set_columns1,list_df_column1,item,"采购模式",_dict.get("bidway")) set_dict_item_columns(set_columns1,list_df_column1,item,"命中关键词",_dict.get("keyword")) set_dict_item_columns(set_columns1,list_df_column1,item,"采购品牌",'') set_dict_item_columns(set_columns1,list_df_column1,item,"采购数量",'') for k,v in item.items(): if k not in df_data: df_data[k] = [] df_data[k].append(v) def getContacts(contacts): try: if contacts is None or contacts=="": return "" _contacts = json.loads(contacts) list_c = [] _count = 0 for _c in _contacts: _count += 1 contact_person = _c.get("contact_person","") phone_no = _c.get("phone_no","") list_c.append("%s(企业联系人%d)\n%s"%(str(contact_person),_count,str(phone_no))) if _count>=5: break return ",\n".join(list_c) except Exception as e: return "" def fixContactPerson(df_data,list_df_columns,get_legal_person=False): set_enterprise = set() if len(df_data.keys())>0: for _tenderee,_agency,_win in zip(df_data["招标单位"],df_data["代理单位"],df_data["中标单位"]): set_enterprise.add(_tenderee) set_enterprise.add(_agency) set_enterprise.add(_win) if "" in set_enterprise: set_enterprise.remove("") if None in set_enterprise: set_enterprise.remove(None) dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["contacts","procurement_system"]) # conn = getConnection_oracle() # cursor = conn.cursor() if len(set_enterprise)>0: for _i in range(len(df_data["招标单位"])): _enterprise_name = df_data["招标单位"][_i] df_data["招标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts")) _enterprise_name = df_data["代理单位"][_i] df_data["代理单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts")) _enterprise_name = df_data["中标单位"][_i] df_data["中标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts")) # if "采购系统" not in df_data: # df_data["采购系统"] = [] # df_data["采购系统"].append(dict_enterprise.get(_enterprise_name,{}).get("procurement_system","")) # list_df_columns.extend(['采购系统']) ''' export by customer's subscription :return: ''' try: ots_client = getConnect_ots() subscription4 = ''' 触控一体机 交互平板 交互一体机 交互智能平板 交互大屏 教学一体机 智慧屏 智慧黑板 互动黑板 班班通 多媒体教室 多媒体设备 多媒体系统 智慧教室 ''' # provinces = "河北 山西 广东 海南 江苏 安徽 山东 河南 湖北 湖南 重庆 黑龙江 陕西 甘肃 青海 宁夏" provinces = "海南 江苏 安徽 湖北 湖南 重庆 甘肃 青海 宁夏 河南 北京 天津" dict_channel = getDict_docchannel() list_province = splitIntoList(provinces,"\s|,|,|、") list_subscription4 = splitIntoList(subscription4,"\s|,|,|、") current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime()))) last_date = timeAdd(current_date,-1) # last_date = "2023-12-08" # current_date = "2023-12-10" set_columns1 = set() list_df_columns1 = [] bool_query = BoolQuery(must_queries=[ generateBoolShouldQuery(["docchannel"],[102,114],TermQuery), RangeQuery("page_time",last_date), RangeQuery("status",201,300,True,True), generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_subscription4,MatchPhraseQuery), generateBoolShouldQuery(["province"],list_province,WildcardQuery), RangeQuery("crtime","%s 21:00:00"%last_date,"%s 21:00:00"%current_date) ], # must_not_queries=[ # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery), # ] ) columns = ["doctitle","docchannel","original_docchannel","product","crtime","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","extract_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone"] list_query = [{"query":bool_query,"limit":50000}] list_row = getDocument(list_query,columns) filename0 = os.path.dirname(__file__)+"/data/%s订阅4订阅5数据导出数量%d_%s.xlsx"%(current_date,len(list_row),getCurrent_date("%Y-%m-%d_%H%M%S")) df_data = {} if len(list_row)>0: getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1) fixContactPerson(df_data,list_df_columns1) df = pd.DataFrame(df_data) df.to_excel(filename0,columns=list_df_columns1,index=False) else: df = pd.DataFrame(df_data) df.to_excel(filename0,index=False) host = "smtp.exmail.qq.com" username = "vip1@bidizhaobiao.com" password = "Biaoxun666+" # receivers = ["1175730271@qq.com","995116318@qq.com","huangxiaofang@cvte.com"] receivers = ["1175730271@qq.com","1208135584@qq.com","youyuer@cvte.com","chenyuxue@cvte.com"] # receivers = ["1175730271@qq.com"] attachs = [filename0] sendEmail(host,username,password,receivers,attachs=attachs) return True except Exception as e: traceback.print_exc() return False def export3_1(self,): def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1): _index = 0 for row in rows: _index += 1 item = {} _dict = row _extract = json.loads(_dict.get("extract_json","{}")) demand_info = _extract.get("demand_info",{"data":[]}).get("data") docid = _dict.get("docid","") win_tenderer = "" bidding_budget = "" win_bid_price = "" sub_docs_json = json.loads(_dict.get("sub_docs_json","[]")) for _doc in sub_docs_json: if win_tenderer=="": win_tenderer = _doc.get("win_tenderer","") if bidding_budget=="": bidding_budget= _doc.get("bidding_budget","") if win_bid_price=="": win_bid_price = _doc.get("win_bid_price","") if len(demand_info)==0: demand_info = [{}] demand_info = [{}] for _attrs in demand_info: set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index) set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code")) set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name")) set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province")) set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city")) set_dict_item_columns(set_columns1,list_df_column1,item,"信息类型",dict_channel.get(_dict.get("docchannel"))) set_dict_item_columns(set_columns1,list_df_column1,item,"发布时间",_dict.get("crtime")) b_or_w = "" if bidding_budget!="": b_or_w = float(bidding_budget)/10000 elif win_bid_price!="": b_or_w = float(win_bid_price)/10000 set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额/预算金额(万元)",b_or_w) set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位",_dict.get("tenderee")) set_dict_item_columns(set_columns1,list_df_column1,item,"招标项目联系人","%s\n%s"%(_dict.get("tenderee_contact",""),_dict.get("tenderee_phone",""))) # 中标单位 中标项目联系人 中标单位主要联系人 中标单位联系人 代理单位 代理项目联系人 代理单位联系人 比地招标公告地址 set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位联系人","") set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer) set_dict_item_columns(set_columns1,list_df_column1,item,"中标项目联系人","") set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位主要联系人","") set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位联系人","") set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位",_dict.get("agency","")) set_dict_item_columns(set_columns1,list_df_column1,item,"代理项目联系人","%s\n%s"%(_dict.get("agency_contact",""),_dict.get("agency_phone",""))) set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位联系人","") _key ='''MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC8uZYYV6ls+5KMzUfvsPvv3XRdwkcTBj/ppB03mijUPHYTGvYSE0cQTbQrnIbXFtUYJguakpKLmVyH+T/w6vhxbQNlaykfe8RXEh4i4IJk8s/Qb0E0xODsjKBEr8VdDYeqqduWrtJpttXAvv93SsTPvgZBditRzJAzk0XH56zL1wIDAQAB''' _key = base64.b64decode(_key) _url_map = {"userId":"001795335", "timestamp":"%d"%(time.time()*1000), "docid":str(_dict.get("docid",""))} _encrpt_text = base64.b64encode(rsa_encrpt(json.dumps(_url_map),_key)) _encrpt_text = urllib.parse.quote(_encrpt_text) _url = "http://www.bidizhaobiao.com/info-%s.html?emailSecret=%s"%(str(_dict.get("docid")),_encrpt_text) set_dict_item_columns(set_columns1,list_df_column1,item,"比地招标公告地址",_url) # set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code")) # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_attrs.get("project_name")) # # set_dict_item_columns(set_columns1,list_df_column1,item,"需求",_attrs.get("demand")) # set_dict_item_columns(set_columns1,list_df_column1,item,"预算(元)",_attrs.get("budget")) # set_dict_item_columns(set_columns1,list_df_column1,item,"开始时间",_attrs.get("order_begin")) # set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer) # set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product")) # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs","")) # set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand","")) # _quantity = "" # _uniPrice = "" # try: # _quantity = int(_attrs.get("quantity","0")) # if _quantity==0: # _quantity = "" # except Exception as e: # pass # try: # _uniPrice = float(_attrs.get("unitPrice","0")) # if _uniPrice==0: # _uniPrice = "" # except Exception as e: # pass # set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity) # set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice) # sumPrice = "" # if _quantity!="" and _uniPrice!="": # sumPrice = _quantity*_uniPrice # set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice) # set_dict_item_columns(set_columns1,list_df_column1,item,"项目金额(元)",bidding_budget) for k,v in item.items(): if k not in df_data: df_data[k] = [] df_data[k].append(v) def getContacts(contacts): try: if contacts is None or contacts=="": return "" if isinstance(contacts,str): _contacts = json.loads(contacts) else: _contacts = contacts list_c = [] _count = 0 for _c in _contacts: _count += 1 contact_person = _c.get("contact_person","") phone_no = _c.get("phone_no","") list_c.append("%s(企业联系人%d)\n%s"%(str(contact_person),_count,str(phone_no))) if _count>=5: break print("getContacts",",\n".join(list_c)) return ",\n".join(list_c) except Exception as e: traceback.print_exc() print("getContacts","") return "" def fixContactPerson(df_data,list_df_columns,get_legal_person=False): set_enterprise = set() if len(df_data.keys())>0: for _tenderee,_agency,_win in zip(df_data["招标单位"],df_data["代理单位"],df_data["中标单位"]): set_enterprise.add(_tenderee) set_enterprise.add(_agency) set_enterprise.add(_win) if "" in set_enterprise: set_enterprise.remove("") if None in set_enterprise: set_enterprise.remove(None) dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["contacts","procurement_system"]) # conn = getConnection_oracle() # cursor = conn.cursor() if len(set_enterprise)>0: for _i in range(len(df_data["招标单位"])): _enterprise_name = df_data["招标单位"][_i] df_data["招标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts")) _enterprise_name = df_data["代理单位"][_i] df_data["代理单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts")) _enterprise_name = df_data["中标单位"][_i] df_data["中标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts")) # if "采购系统" not in df_data: # df_data["采购系统"] = [] # df_data["采购系统"].append(dict_enterprise.get(_enterprise_name,{}).get("procurement_system","")) # list_df_columns.extend(['采购系统']) ''' export by customer's subscription :return: ''' try: ots_client = getConnect_ots() subscription4 = ''' 录播、三个课堂、互动课堂、同步课堂、双师课堂、专递课堂、微格教室、远程课堂、督导巡课、推门听课、互动教学 ''' # provinces = "河北 山西 广东 海南 江苏 安徽 山东 河南 湖北 湖南 重庆 黑龙江 陕西 甘肃 青海 宁夏" provinces = "海南 江苏 安徽 湖北 湖南 重庆 甘肃 青海 宁夏 河南" dict_channel = getDict_docchannel() list_province = splitIntoList(provinces,"\s|,|,|、") list_subscription4 = splitIntoList(subscription4,"\s|,|,|、") current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime()))) last_date = timeAdd(current_date,-1) # last_date = "2023-08-14" # current_date = "2023-08-20" set_columns1 = set() list_df_columns1 = [] bool_query = BoolQuery(must_queries=[ generateBoolShouldQuery(["docchannel"],[102,114],TermQuery), RangeQuery("page_time",last_date), RangeQuery("status",201,300,True,True), generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_subscription4,MatchPhraseQuery), # generateBoolShouldQuery(["province"],list_province,WildcardQuery), RangeQuery("crtime","%s 21:00:00"%last_date,"%s 21:00:00"%current_date) ], # must_not_queries=[ # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery), # ] ) columns = ["doctitle","docchannel","original_docchannel","product","crtime","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","extract_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone"] list_query = [{"query":bool_query,"limit":50000}] list_row = getDocument(list_query,columns) filename0 = os.path.dirname(__file__)+"/data/%s订阅4订阅5数据导出数量%d_%s.xlsx"%(current_date,len(list_row),getCurrent_date("%Y-%m-%d_%H%M%S")) df_data = {} if len(list_row)>0: getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1) fixContactPerson(df_data,list_df_columns1) df = pd.DataFrame(df_data) df.to_excel(filename0,columns=list_df_columns1,index=False) else: df = pd.DataFrame(df_data) df.to_excel(filename0,index=False) host = "smtp.exmail.qq.com" username = "vip1@bidizhaobiao.com" password = "Biaoxun666+" # receivers = ["1175730271@qq.com","995116318@qq.com","huangxiaofang@cvte.com"] receivers = ["1175730271@qq.com","1208135584@qq.com","wanghongyan@cvte.com"] # receivers = ["1175730271@qq.com"] attachs = [filename0] sendEmail(host,username,password,receivers,attachs=attachs) return True except Exception as e: traceback.print_exc() return False def export4(self): def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1): dict_filter,dict_update = getFixData() _index = 0 for row in rows: _index += 1 item = {} _dict = row keys = row["keyword"] log("================") log(keys) log(_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon","")) _keyword = str(list(set([ i for i in re.findall("|".join([re.escape(i) for i in keys if i!='']),_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon","")) if i!='' ]))) log(_keyword) _extract = json.loads(_dict.get("extract_json","{}")) if len(set(keys)&set(["AR一体机","一体机","短焦投影仪","教学一体机","云桌面","录播","电子书包","电子班牌","智慧黑板"]))>0: product_attrs = _extract.get("product_attrs",{"data":[{}]}).get("data") else: product_attrs = [{}] if len(product_attrs)==0: product_attrs = [{}] docid = _dict.get("docid","") win_tenderer = "" bidding_budget = "" win_bid_price = "" sub_docs_json = json.loads(_dict.get("sub_docs_json","[]")) for _doc in sub_docs_json: if win_tenderer=="": win_tenderer = _doc.get("win_tenderer","") if bidding_budget=="": bidding_budget= _doc.get("bidding_budget","") if win_bid_price=="": win_bid_price = _doc.get("win_bid_price","") _index1 = 0 if _dict.get("docid") in dict_filter: print("====filter") continue if _dict.get("docid") in dict_update: print("====update") win_bid_price = dict_update[_dict.get("docid")].get("win_bid_price","") bidding_budget= dict_update[_dict.get("docid")].get("bidding_budget","") for _attrs in product_attrs: _index1 += 1 if re.search("运费",_attrs.get("product","")) is not None: if _index1==1: _attrs = {} else: continue set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index) # set_dict_item_columns(set_columns1,list_df_column1,item,"正文",_dict.get("doctextcon","")) # set_dict_item_columns(set_columns1,list_df_column1,item,"附件",_dict.get("attachmenttextcon","")) # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name","")) set_dict_item_columns(set_columns1,list_df_column1,item,"docid",_dict.get("docid")) set_dict_item_columns(set_columns1,list_df_column1,item,"网站链接",'=HYPERLINK("http://www.bidizhaobiao.com/info-%s.html","查看公告")'%(str(docid))) set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province")) set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city")) set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district")) set_dict_item_columns(set_columns1,list_df_column1,item,"公告时间",_dict.get("page_time")) set_dict_item_columns(set_columns1,list_df_column1,item,"招标公告时间",'') set_dict_item_columns(set_columns1,list_df_column1,item,"项目标题",_dict.get("doctitle")) set_dict_item_columns(set_columns1,list_df_column1,item,"采购单位",_dict.get("tenderee")) set_dict_item_columns(set_columns1,list_df_column1,item,"中标供应商",win_tenderer) set_dict_item_columns(set_columns1,list_df_column1,item,"采购方式",_dict.get("bidway","")) set_dict_item_columns(set_columns1,list_df_column1,item,"招标采购方式","") set_dict_item_columns(set_columns1,list_df_column1,item,"采购系统","") set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product")) set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand","")) # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs","")) set_dict_item_columns(set_columns1,list_df_column1,item,"预算金额(元)",bidding_budget) # set_dict_item_columns(set_columns1,list_df_column1,item,"招标预算金额(元)","") _quantity = "" _uniPrice = "" try: _quantity = int(re.sub("[^\d]","",_attrs.get("quantity","0"))) if _quantity==0: _quantity = "" except Exception as e: pass try: _uniPrice = float(_attrs.get("unitPrice","0")) if _uniPrice==0: _uniPrice = "" except Exception as e: pass set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity) set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice) sumPrice = "" if _quantity!="" and _uniPrice!="": sumPrice = _quantity*_uniPrice set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice) set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额(元)",win_bid_price) set_dict_item_columns(set_columns1,list_df_column1,item,"关键词",_keyword) for k,v in item.items(): if k not in df_data: df_data[k] = [] df_data[k].append(v) def filterRow(list_row,key="docid"): set_id = set() _filter_row = [] for row in list_row: _v = row.get(key,"") if _v in set_id: continue set_id.add(_v) _filter_row.append(row) return _filter_row def fixContactPerson(df_data,list_df_columns,get_legal_person=False): set_enterprise = set() if len(df_data.keys())>0: for _tenderee,_win_tenderer in zip(df_data["采购单位"],df_data["中标供应商"]): set_enterprise.add(_tenderee) set_enterprise.add(_win_tenderer) if "" in set_enterprise: set_enterprise.remove("") if None in set_enterprise: set_enterprise.remove(None) dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system"]) # conn = getConnection_oracle() # cursor = conn.cursor() if len(set_enterprise)>0: for _i in range(len(df_data["采购单位"])): _enterprise_name = df_data["采购单位"][_i] # if df_data["招标联系人电话"][_i]=="": # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts") # if contacts is not None: # _person,_phone = getOneContact(contacts) # df_data["招标联系人"][_i] = _person # df_data["招标联系人电话"][_i] = _phone df_data["采购系统"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system","") def fixZhaobiao_page_time(df_data): ots_client = getConnect_ots() for i in range(len(df_data["docid"])): _docid = df_data["docid"][i] bool_query = BoolQuery(must_queries=[TermQuery("docids",_docid)]) rows,next_token,total_count,is_all_succeed = ots_client.search("project2","project2_index", SearchQuery(bool_query,limit=1), ColumnsToGet(["zhao_biao_page_time","bidway","bidding_budget"],ColumnReturnType.SPECIFIED)) list_data = getRow_ots(rows) if len(list_data)>0: df_data["招标公告时间"][i] = list_data[0].get("zhao_biao_page_time","") df_data["招标采购方式"][i] = list_data[0].get("bidway","") new_bidding_budget = list_data[0].get("bidding_budget",0) if df_data["预算金额(元)"][i]=="" or float(df_data["预算金额(元)"][i])==0: if df_data["中标金额(元)"][i]!="" and new_bidding_budget>=float(df_data["中标金额(元)"][i]): df_data["预算金额(元)"][i] = new_bidding_budget # df_data["招标预算金额(元)"][i] = list_data[0].get("bidding_budget","") def popRows(df_data): list_pop = [] set_docid = set() for _c in range(len(df_data["采购系统"])): if df_data["采购系统"][_c]!="教育系统" or df_data["中标金额(元)"][_c]=="": list_pop.append(_c) set_docid.add(df_data["docid"][_c]) list_pop.reverse() for k,v in df_data.items(): for _p in list_pop: v.pop(_p) _index = 0 set_docid1 = set() for _i in range(len(df_data["序号"])): _docid = df_data["docid"][_i] if _docid not in set_docid1: _index += 1 set_docid1.add(_docid) df_data["序号"][_i] = _index return len(set_docid) df = pd.read_excel("20220927v1.4.xlsx",sheetname=1) list_search_dict = [] for title_kw,content_kw,title_rm,content_rm in zip(df["标题采集关键词"],df["全文采集关键词"],df["标题排除词"],df["全文排除词"]): if not isinstance(title_kw,str): title_kw = "" if not isinstance(content_kw,str): content_kw = "" if not isinstance(title_rm,str): title_rm = "" if not isinstance(content_rm,str): content_rm = "" _dict = {"title_kw":re.split("\s|,|,",str(title_kw)), "content_kw":re.split("\s|,|,",str(content_kw)), "title_rm":re.split("\s|,|,",str(title_rm)), "content_rm":re.split("\s|,|,",str(content_rm))} list_search_dict.append(_dict) columns = ["extract_json","doctextcon","attachmenttextcon","doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","web_source_no","web_source_name","service_time","person_review","time_get_file_start","time_get_file_end","time_earnest_money_start","time_earnest_money_end"] current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime()))) last_date = timeAdd(current_date,-1) current_date = "2022-10-14" last_date = "2023-05-07" dict_channel = getDict_docchannel() list_mq = [] list_query = [] for _d in list_search_dict: print(_d) kw_should_q = [] list_keys = [] title_kw = _d.get("title_kw",[]) if len(title_kw)>0: kw_should_q.append(generateBoolShouldQuery(["doctitle"],title_kw,MatchPhraseQuery)) list_keys.extend(title_kw) content_kw = _d.get("content_kw",[]) if len(content_kw)>0: kw_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_kw,MatchPhraseQuery)) list_keys.extend(content_kw) rm_should_q = [] title_rm = _d.get("title_rm",[]) if len(title_rm)>0: rm_should_q.append(generateBoolShouldQuery(["doctitle"],title_rm,MatchPhraseQuery)) content_rm = _d.get("content_rm",[]) if len(content_rm)>0: rm_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_rm,MatchPhraseQuery)) _query = BoolQuery(must_queries=[ generateBoolShouldQuery(["docchannel"],[101],TermQuery), # RangeQuery("page_time",last_date,current_date), RangeQuery("page_time",current_date,last_date), # RangeQuery("status",201,300,True,True), # TermQuery("docid",263568527), # generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_keys,MatchPhraseQuery), BoolQuery(should_queries=kw_should_q), # generateBoolShouldQuery(["province"],list_province,WildcardQuery), # RangeQuery("crtime","%s"%last_date,"%s"%current_date) ], must_not_queries=rm_should_q) list_query.append({"query":_query,"limit":50000,"keyword":list_keys}) break def getFixData(): filename = "C:\\Users\\Administrator\\Desktop\\视源数据清洗.xlsx" dict_filter = {} dict_update = {} df = pd.read_excel(filename,0) for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]): if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"): dict_filter[_docid] = 1 else: dict_update[_docid] = {"docid":_docid, "bidding_budget":bidding_budget, "win_bid_price":win_bid_price} df = pd.read_excel(filename,1) for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]): if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"): dict_filter[_docid] = 1 else: dict_update[_docid] = {"docid":_docid, "bidding_budget":bidding_budget, "win_bid_price":win_bid_price} return dict_filter,dict_update list_row = getDocument(list_query,columns) list_row = filterRow(list_row) print("list_row",len(list_row)) set_columns1 = set() list_df_columns1 = [] df_data = {} getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1) print("len df_data",len(df_data["docid"])) fixContactPerson(df_data,list_df_columns1) fixZhaobiao_page_time(df_data) size_pop = popRows(df_data) print("size_pop",size_pop) df = pd.DataFrame(df_data) filename = os.path.dirname(__file__)+"/data/%s关键词数据导出数量%d_%s.xlsx"%(current_date,len(list_row)-size_pop,getCurrent_date("%Y-%m-%d_%H%M%S")) df.to_excel(filename,columns=list_df_columns1,index=False) host = "smtp.exmail.qq.com" username = "vip@bidizhaobiao.com" password = "Biaoxun66-" receivers = ["1175730271@qq.com","747012698@qq.com","995116318@qq.com"] receivers = ["1175730271@qq.com"] attachs = [filename] sendEmail(host,username,password,receivers,attachs=attachs) def export4_by_project(self): def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1): # dict_filter,dict_update = getFixData() ots_client = getConnect_ots() _index = 0 for row in rows: _index += 1 item = {} _dict = row keys = row["keyword"] log("================") log(keys) docid = row.get("docids","") log(docid) try: docid_0 = docid.split(",")[0] except Exception as e: continue log(docid) log(_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon","")) _keyword = str(list(set([ i for i in re.findall("|".join([re.escape(i) for i in keys if i!='']),_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon","")) if i!='' ]))) log(_keyword) product_attrs = [{}] # for doc in docid.split(","): # _q = BoolQuery(must_queries=[TermQuery("docid",int(doc))]) # r,n,t,_ = ots_client.search("document","document_index",SearchQuery(_q),columns_to_get=ColumnsToGet(["extract_json",ColumnReturnType.SPECIFIED])) # r = getRow_ots(r) # if len(r)>0: # _extract = json.loads(r[0].get("extract_json","{}")) # if len(set(keys)&set(["AR一体机","一体机","短焦投影仪","教学一体机","云桌面","录播","电子书包","电子班牌","智慧黑板"]))>0: # product_attrs = _extract.get("product_attrs",{"data":[{}]}).get("data") # else: # product_attrs = [{}] # if len(product_attrs)==0: # product_attrs = [{}] # win_tenderer = "" # bidding_budget = "" # win_bid_price = "" # sub_docs_json = json.loads(_dict.get("sub_docs_json","[]")) # for _doc in sub_docs_json: # if win_tenderer=="": # win_tenderer = _doc.get("win_tenderer","") # if bidding_budget=="": # bidding_budget= _doc.get("bidding_budget","") # if win_bid_price=="": # win_bid_price = _doc.get("win_bid_price","") _index1 = 0 # if _dict.get("docid") in dict_filter: # print("====filter") # continue # if _dict.get("docid") in dict_update: # print("====update") # win_bid_price = dict_update[_dict.get("docid")].get("win_bid_price","") # bidding_budget= dict_update[_dict.get("docid")].get("bidding_budget","") for _attrs in product_attrs: _index1 += 1 if re.search("运费",_attrs.get("product","")) is not None: if _index1==1: _attrs = {} else: continue set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index) # set_dict_item_columns(set_columns1,list_df_column1,item,"正文",_dict.get("doctextcon","")) # set_dict_item_columns(set_columns1,list_df_column1,item,"附件",_dict.get("attachmenttextcon","")) # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name","")) set_dict_item_columns(set_columns1,list_df_column1,item,"docid",docid) set_dict_item_columns(set_columns1,list_df_column1,item,"网站链接",'=HYPERLINK("http://www.bidizhaobiao.com/info-%s.html","查看公告")'%(str(docid))) set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province")) set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city")) set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district")) set_dict_item_columns(set_columns1,list_df_column1,item,"公告时间",_dict.get("page_time")) set_dict_item_columns(set_columns1,list_df_column1,item,"招标公告时间",_dict.get("zhao_biao_page_time")) set_dict_item_columns(set_columns1,list_df_column1,item,"项目标题",_dict.get("doctitles")) set_dict_item_columns(set_columns1,list_df_column1,item,"采购单位",_dict.get("tenderee")) set_dict_item_columns(set_columns1,list_df_column1,item,"中标供应商",_dict.get("win_tenderer")) set_dict_item_columns(set_columns1,list_df_column1,item,"采购方式",_dict.get("bidway","")) set_dict_item_columns(set_columns1,list_df_column1,item,"招标采购方式","") set_dict_item_columns(set_columns1,list_df_column1,item,"采购系统","") set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product")) set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand","")) # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs","")) set_dict_item_columns(set_columns1,list_df_column1,item,"预算金额(元)",_dict.get("bidding_budget")) # set_dict_item_columns(set_columns1,list_df_column1,item,"招标预算金额(元)","") _quantity = "" _uniPrice = "" try: _quantity = int(re.sub("[^\d]","",_attrs.get("quantity","0"))) if _quantity==0: _quantity = "" except Exception as e: pass try: _uniPrice = float(_attrs.get("unitPrice","0")) if _uniPrice==0: _uniPrice = "" except Exception as e: pass set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity) set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice) sumPrice = "" if _quantity!="" and _uniPrice!="": sumPrice = _quantity*_uniPrice set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice) set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额(元)",_dict.get("win_bid_price")) set_dict_item_columns(set_columns1,list_df_column1,item,"关键词",_keyword) for k,v in item.items(): if k not in df_data: df_data[k] = [] df_data[k].append(v) def filterRow(list_row,key="docid"): set_id = set() _filter_row = [] for row in list_row: _v = row.get(key,"") if _v in set_id: continue set_id.add(_v) _filter_row.append(row) return _filter_row def fixContactPerson(df_data,list_df_columns,get_legal_person=False): set_enterprise = set() if len(df_data.keys())>0: for _tenderee,_win_tenderer in zip(df_data["采购单位"],df_data["中标供应商"]): set_enterprise.add(_tenderee) set_enterprise.add(_win_tenderer) if "" in set_enterprise: set_enterprise.remove("") if None in set_enterprise: set_enterprise.remove(None) dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system"]) # conn = getConnection_oracle() # cursor = conn.cursor() if len(set_enterprise)>0: for _i in range(len(df_data["采购单位"])): _enterprise_name = df_data["采购单位"][_i] # if df_data["招标联系人电话"][_i]=="": # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts") # if contacts is not None: # _person,_phone = getOneContact(contacts) # df_data["招标联系人"][_i] = _person # df_data["招标联系人电话"][_i] = _phone df_data["采购系统"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system","") def fixZhaobiao_page_time(df_data): ots_client = getConnect_ots() for i in range(len(df_data["docid"])): _docid = df_data["docid"][i] bool_query = BoolQuery(must_queries=[TermQuery("docids",_docid)]) rows,next_token,total_count,is_all_succeed = ots_client.search("project2","project2_index", SearchQuery(bool_query,limit=1), ColumnsToGet(["zhao_biao_page_time","bidway","bidding_budget"],ColumnReturnType.SPECIFIED)) list_data = getRow_ots(rows) if len(list_data)>0: df_data["招标公告时间"][i] = list_data[0].get("zhao_biao_page_time","") df_data["招标采购方式"][i] = list_data[0].get("bidway","") new_bidding_budget = list_data[0].get("bidding_budget",0) if df_data["预算金额(元)"][i]=="" or float(df_data["预算金额(元)"][i])==0: if df_data["中标金额(元)"][i]!="" and new_bidding_budget>=float(df_data["中标金额(元)"][i]): df_data["预算金额(元)"][i] = new_bidding_budget # df_data["招标预算金额(元)"][i] = list_data[0].get("bidding_budget","") def popRows(df_data): list_pop = [] set_docid = set() for _c in range(len(df_data["采购系统"])): if df_data["采购系统"][_c]!="教育系统" or df_data["中标金额(元)"][_c]=="": list_pop.append(_c) set_docid.add(df_data["docid"][_c]) list_pop.reverse() for k,v in df_data.items(): for _p in list_pop: v.pop(_p) _index = 0 set_docid1 = set() for _i in range(len(df_data["序号"])): _docid = df_data["docid"][_i] if _docid not in set_docid1: _index += 1 set_docid1.add(_docid) df_data["序号"][_i] = _index return len(set_docid) df = pd.read_excel("20220927v1.4.xlsx",sheetname=1) list_search_dict = [] for title_kw,content_kw,title_rm,content_rm in zip(df["标题采集关键词"],df["全文采集关键词"],df["标题排除词"],df["全文排除词"]): if not isinstance(title_kw,str): title_kw = "" if not isinstance(content_kw,str): content_kw = "" if not isinstance(title_rm,str): title_rm = "" if not isinstance(content_rm,str): content_rm = "" _dict = {"title_kw":re.split("\s|,|,",str(title_kw)), "content_kw":re.split("\s|,|,",str(content_kw)), "title_rm":re.split("\s|,|,",str(title_rm)), "content_rm":re.split("\s|,|,",str(content_rm))} list_search_dict.append(_dict) columns = ["docids","doctitles","product","province","bidway","city","district","zhao_biao_page_time","page_time","industry","info_type","tenderee","bidding_budget","project_code","project_name","win_tenderer","win_bid_price","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","web_source_no","web_source_name","service_time","person_review","time_get_file_start","time_get_file_end","time_earnest_money_start","time_earnest_money_end"] current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime()))) last_date = timeAdd(current_date,-1) current_date = "2022-10-14" last_date = "2023-05-07" dict_channel = getDict_docchannel() list_mq = [] list_query = [] for _d in list_search_dict: print(_d) kw_should_q = [] list_keys = [] title_kw = _d.get("title_kw",[]) if len(title_kw)>0: kw_should_q.append(generateBoolShouldQuery(["doctitles"],title_kw,MatchPhraseQuery)) list_keys.extend(title_kw) content_kw = _d.get("content_kw",[]) if len(content_kw)>0: kw_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_kw,MatchPhraseQuery)) list_keys.extend(content_kw) rm_should_q = [] title_rm = _d.get("title_rm",[]) if len(title_rm)>0: rm_should_q.append(generateBoolShouldQuery(["doctitles"],title_rm,MatchPhraseQuery)) content_rm = _d.get("content_rm",[]) if len(content_rm)>0: rm_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_rm,MatchPhraseQuery)) _query = BoolQuery(must_queries=[ # generateBoolShouldQuery(["docchannel"],[101],TermQuery), # RangeQuery("page_time",last_date,current_date), RangeQuery("page_time",current_date,last_date), RangeQuery("status",201,300,True,True), # TermQuery("docid",263568527), # generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_keys,MatchPhraseQuery), BoolQuery(should_queries=kw_should_q), # # generateBoolShouldQuery(["province"],list_province,WildcardQuery), # # RangeQuery("crtime","%s"%last_date,"%s"%current_date) ], must_not_queries=rm_should_q) list_query.append({"query":_query,"limit":50000,"keyword":list_keys}) def getFixData(): filename = "C:\\Users\\Administrator\\Desktop\\视源数据清洗.xlsx" dict_filter = {} dict_update = {} df = pd.read_excel(filename,0) for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]): if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"): dict_filter[_docid] = 1 else: dict_update[_docid] = {"docid":_docid, "bidding_budget":bidding_budget, "win_bid_price":win_bid_price} df = pd.read_excel(filename,1) for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]): if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"): dict_filter[_docid] = 1 else: dict_update[_docid] = {"docid":_docid, "bidding_budget":bidding_budget, "win_bid_price":win_bid_price} return dict_filter,dict_update list_row = getDocument(list_query,columns,table_name="project2",table_index="project2_index") list_row = filterRow(list_row,key="uuid") print("list_row",len(list_row)) set_columns1 = set() list_df_columns1 = [] df_data = {} getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1) print("len df_data",len(df_data["docid"])) fixContactPerson(df_data,list_df_columns1) # fixZhaobiao_page_time(df_data) # size_pop = popRows(df_data) size_pop = 0 print("size_pop",size_pop) df = pd.DataFrame(df_data) filename = os.path.dirname(__file__)+"/data/%s关键词数据导出数量%d_%s.xlsx"%(current_date,len(list_row)-size_pop,getCurrent_date("%Y-%m-%d_%H%M%S")) df.to_excel(filename,columns=list_df_columns1,index=False) host = "smtp.exmail.qq.com" username = "vip@bidizhaobiao.com" password = "Biaoxun66-" receivers = ["1175730271@qq.com","747012698@qq.com","995116318@qq.com"] receivers = ["1175730271@qq.com"] attachs = [filename] sendEmail(host,username,password,receivers,attachs=attachs) import json def getUserInfo(self,code,userid): data = { "codeStr": code, "userId": userid, } auth = {'Content-Type': 'application/json',"Authorization": "Bearer eyJhbGciOiJSUzI1NiJ9.eyJpc3MiOiJCWEtDX1VOSVRFX0FQSSIsInN1YiI6IjIwIiwiZGF0YSI6eyJkZXZpY2VUeXBlIjoiMjAiLCJyb2xlIjoiMjAiLCJpZCI6ImJ4a2Mtd29ya2JlbmNoIn0sImV4cCI6MTk2NjY5MjE4MSwiaWF0IjoxNjUxMDQ0MTgxfQ.lFurpoMOaVmCH3GKmJ_cqrseuSaEZJBndp8PE4QpjFY6R2mfXh5e96zn_Ma3qVkp-NKAlpA1nRYl1y08xkzj07KEx4HO_Nh6v3sfGwnDyRUz35SW3yr1fhvnbh5hEpnoCVJnFQfoMXFfn780VuOcKd01lNUjABFbSLvypDv8p-gJzkrE7z5YB53tZ_lGm_dbzKihTBjG1sBYEKJT3ekYz5Px_n-lw05IkUdbckE7n6Xj7PRPTaEjzoc3PF_pgESsdcTPSDhJlLR8x63YCqbmWy6ydhwnlEQE5rHIFA2Q5VhIxUcT7fXUBZNGmzRgaqxB4dIg2369IhpeBAJ2TQ63qg"} r = requests.patch("https://unite.bidizhaobiao.com/api/v1/authorization/services",json=data,headers=auth) if r.status_code==200: return r.status_code,json.loads(r.content.decode("utf8")) return r.status_code,"" def export_15824381998(): def removeData(df_data): list_remove_index = [] list_c = df_data.get("招标人采购系统",[]) for _c_i in range(len(list_c)): if list_c[_c_i]=="企业采购系统": list_remove_index.append(_c_i) list_remove_index.reverse() print(list_remove_index) for k,v in df_data.items(): for _rc in list_remove_index: v.pop(_rc) for k,v in df_data.items(): v = v[:500] def getRowData_zb(df_data,rows,set_line,list_keyword,dict_channel,dumplicate): dict_line = {} # list_data = getRow_ots(rows) _index = 0 rows.sort(key=lambda x:x.get("docid",0),reverse=True) set_col = set() df_columns = [] for row in rows: _index += 1 item = {} _dict = row bidding_budget = 0 sub_docs = json.loads(_dict.get("sub_docs_json","[]")) for doc in sub_docs: if doc.get("bidding_budget",0)>0: bidding_budget = doc.get("bidding_budget",0) set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province","")) set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city","")) set_dict_item_columns(set_col,df_columns,item,"公告类别",dict_channel.get(_dict.get("docchannel",""),"")) set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time","")) set_dict_item_columns(set_col,df_columns,item,"公告标题",_dict.get("doctitle","")) set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product","")) set_dict_item_columns(set_col,df_columns,item,"采购方式",_dict.get("bidway","")) set_dict_item_columns(set_col,df_columns,item,"项目金额",bidding_budget) set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee","")) set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact","")) set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone","")) set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency","")) set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact","")) set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone","")) set_dict_item_columns(set_col,df_columns,item,"获取文件截止时间",_dict.get("time_get_file_end","")) set_dict_item_columns(set_col,df_columns,item,"开标时间",_dict.get("time_bidopen","")) set_dict_item_columns(set_col,df_columns,item,"链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid",""))) for k,v in item.items(): if k not in df_data: df_data[k] = [] df_data[k].append(v) return df_columns def getRowData_yc(df_data,rows,set_line,list_keyword,dict_channel,dumplicate): dict_line = {} # list_data = getRow_ots(rows) _index = 0 rows.sort(key=lambda x:x.get("docid",0),reverse=True) set_col = set() df_columns = [] for row in rows: _index += 1 item = {} _dict = row _type = _dict.get("type",0) if _type==0: _type = "周期预测" elif _type==1: _type = "采购意向" elif _type==2: _type = "到期预测" else: _type = "废标重招" set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province","")) set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city","")) set_dict_item_columns(set_col,df_columns,item,"预测类型",_type) set_dict_item_columns(set_col,df_columns,item,"预计开始时间",_dict.get("may_begin","")) set_dict_item_columns(set_col,df_columns,item,"预计结束时间",_dict.get("may_end","")) set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product","")) set_dict_item_columns(set_col,df_columns,item,"上次采购时间",_dict.get("last_page_time","")) set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee","")) set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact","")) set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone","")) set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency","")) set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact","")) set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone","")) for k,v in item.items(): if k not in df_data: df_data[k] = [] df_data[k].append(v) return df_columns def getRowData_win(df_data,rows,set_line,list_keyword,dict_channel,dumplicate): dict_line = {} # list_data = getRow_ots(rows) _index = 0 rows.sort(key=lambda x:x.get("docid",0),reverse=True) set_col = set() df_columns = [] for row in rows: _index += 1 item = {} _dict = row bidding_budget = 0 sub_docs = json.loads(_dict.get("sub_docs_json","[]")) zbr = "" zbje = "" for doc in sub_docs: if doc.get("bidding_budget",0)>0: bidding_budget = doc.get("bidding_budget",0) if doc.get("win_tenderer") is not None: zbr += str(doc.get("win_tenderer"))+"\r\n" zbje += str(doc.get("win_bid_price",0))+"\r\n" if doc.get("second_tenderer") is not None: zbr += str(doc.get("second_tenderer"))+"\r\n" zbje += str(doc.get("second_bid_price",0))+"\r\n" if doc.get("third_tenderer") is not None: zbr += str(doc.get("third_tenderer"))+"\r\n" zbje += str(doc.get("third_bid_price",0))+"\r\n" set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province","")) set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city","")) set_dict_item_columns(set_col,df_columns,item,"公告类别",dict_channel.get(_dict.get("docchannel",""),"")) set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time","")) set_dict_item_columns(set_col,df_columns,item,"公告标题",_dict.get("doctitle","")) set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product","")) set_dict_item_columns(set_col,df_columns,item,"采购方式",_dict.get("bidway","")) set_dict_item_columns(set_col,df_columns,item,"项目金额",bidding_budget) set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee","")) set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact","")) set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone","")) set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency","")) set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact","")) set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone","")) set_dict_item_columns(set_col,df_columns,item,"获取文件截止时间",_dict.get("time_get_file_end","")) set_dict_item_columns(set_col,df_columns,item,"开标时间",_dict.get("time_bidopen","")) set_dict_item_columns(set_col,df_columns,item,"中标公司",zbr) set_dict_item_columns(set_col,df_columns,item,"中标金额",zbje) set_dict_item_columns(set_col,df_columns,item,"链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid",""))) for k,v in item.items(): if k not in df_data: df_data[k] = [] df_data[k].append(v) return df_columns def fixContactPerson(df_data,list_df_columns,get_legal_person=False): set_enterprise = set() if len(df_data.keys())>0: for _tenderee,_agency in zip(df_data["招标单位"],df_data["代理单位"]): set_enterprise.add(_tenderee) set_enterprise.add(_agency) if "" in set_enterprise: set_enterprise.remove("") if None in set_enterprise: set_enterprise.remove(None) dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system","company_org_type","reg_capital","actual_capital","contacts","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code","legal_person_name","phone_number"]) # conn = getConnection_oracle() # cursor = conn.cursor() if len(set_enterprise)>0: for _i in range(len(df_data["招标单位"])): _enterprise_name = df_data["招标单位"][_i] if df_data["招标联系人电话"][_i]=="": contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts") if contacts is not None: _person,_phone = getOneContact(contacts) df_data["招标联系人"][_i] = _person df_data["招标联系人电话"][_i] = _phone _enterprise_name = df_data["代理单位"][_i] if df_data["代理联系人电话"][_i]=="": contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts") if contacts is not None: _person,_phone = getOneContact(contacts) df_data["代理联系人"][_i] = _person df_data["代理联系人电话"][_i] = _phone # _enterprise_name = df_data["中标单位"][_i] # if get_legal_person: # _person = dict_enterprise.get(_enterprise_name,{}).get("legal_person_name","") # _phone = dict_enterprise.get(_enterprise_name,{}).get("phone_number","") # if len(_phone)==11 and _phone[0]=="1": # df_data["中标单位联系人"][_i] = _person # df_data["中标单位联系电话"][_i] = _phone # else: # if df_data["中标单位联系电话"][_i]=="": # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts") # if contacts is not None: # _person,_phone = getOneContact(contacts,mobile_only=True) # df_data["中标单位联系人"][_i] = _person # df_data["中标单位联系电话"][_i] = _phone log("start export 15824381998:>>>>>>>>>>>>>>>") current_date = getCurrent_date(format="%Y-%m-%d") last_date = timeAdd(current_date,-2) start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60)) if current_date<="2025-05-01": for i in range(10): try: # start_time='2022-07-22' # current_date = '2022-07-28' log("start exporting export2:=================") # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"] dict_channel = getDict_docchannel() list_query = [] str_keyword = ''' 光伏组件 ''' list_keyword = splitIntoList(str_keyword,"[\s\n、,,]") str_not_keyword = ''' 清洗机器人 ''' list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]") tenderee_keywrod = "医院、大学、高校、高中" list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]") log(str(list_keyword)) columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_get_file_end","time_bidopen"] bool_query = BoolQuery(must_queries=[ generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery), generateBoolShouldQuery(["docchannel"],[51,52,102,103,104,105,114],TermQuery), RangeQuery("crtime",last_date+" 21:00:00",current_date+" 21:00:00",True,True), RangeQuery("status",201,300,True,True), # TermQuery("procurement_system","公安系统"), # generateBoolShouldQuery(["province"],["湖南"],TermQuery), # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery) # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery), ], must_not_queries=[ generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_not_key,MatchPhraseQuery), ] ) list_row = getDocument([{"query":bool_query,"limit":500}],columns,thread_count=1) log("get document %d rows"%len(list_row)) df_data = {} set_line = set() # list_row = filterRow(list_row) df_columns = getRowData_zb(df_data,list_row,set_line,list_keyword,dict_channel,True) fixContactPerson(df_data,df_columns) df = pd.DataFrame(df_data) df_data_filter = {} for c in df_columns: if c not in df_data_filter: df_data_filter[c] = [] for tenderee_i in range(len(df_data["招标单位"])): if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","中国大唐集团有限公司"): for c in df_columns: df_data_filter[c].append(df_data[c][tenderee_i]) df_f = pd.DataFrame(df_data_filter) time_end = timeAdd(current_date,60) columns = ["project_name","type","province","city","bidding_budget","city","demand","last_page_time"] bool_query = BoolQuery(must_queries=[ generateBoolShouldQuery(["demand","product"],list_keyword,MatchPhraseQuery), RangeQuery("may_end",range_from=current_date), RangeQuery("may_end",range_to=time_end), # TermQuery("procurement_system","公安系统"), # generateBoolShouldQuery(["province"],["湖南"],TermQuery), # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery) # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery), ], must_not_queries=[ generateBoolShouldQuery(["demand","product"],list_not_key,MatchPhraseQuery), ] ) list_row = getDocument([{"query":bool_query,"limit":500}],columns,table_name="preproject",table_index="preproject_index",thread_count=1,sort_column="may_end") log("get document %d rows"%len(list_row)) df_data = {} set_line = set() # list_row = filterRow(list_row) df_columns_yc = getRowData_yc(df_data,list_row,set_line,list_keyword,dict_channel,True) fixContactPerson(df_data,df_columns_yc) df1 = pd.DataFrame(df_data) df_data_filter = {} for c in df_columns_yc: if c not in df_data_filter: df_data_filter[c] = [] for tenderee_i in range(len(df_data["招标单位"])): if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","中国大唐集团有限公司"): for c in df_columns_yc: if c not in df_data_filter: df_data_filter[c] = [] df_data_filter[c].append(df_data[c][tenderee_i]) df1_f = pd.DataFrame(df_data_filter) filename = os.path.dirname(__file__)+"/data/%s年%s招标数据导出%s.xlsx"%(start_time[:4],current_date,getCurrent_date(format="%Y-%m-%d_%H%M%S")) with pd.ExcelWriter(filename) as writer: df.to_excel(writer,sheet_name="招标数据",columns=df_columns if not df.empty else None) df_f.to_excel(writer,sheet_name="特定甲方招标数据",columns=df_columns if not df_f.empty else None) df1.to_excel(writer,sheet_name="项目预测",columns=df_columns_yc[:-3] if not df1.empty else None) df1_f.to_excel(writer,sheet_name="特定甲方项目预测",columns=df_columns_yc[:-3] if not df1_f.empty else None) log(str(filename)) host = "smtp.exmail.qq.com" username = "vip@bidizhaobiao.com" password = "Biaoxun66-" receivers = ["1175730271@qq.com","565748324@qq.com","1396488964@qq.com"] # receivers = ["1175730271@qq.com"] attachs = [filename] sendEmail(host,username,password,receivers,attachs=attachs) df_data_filter = {} for c in df_columns: if c not in df_data_filter: df_data_filter[c] = [] for tenderee_i in range(len(df_data["招标单位"])): if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","国家能源投资集团有限责任公司","国家能源集团龙源阿里新能源(阿里)有限公司"): for c in df_columns: df_data_filter[c].append(df_data[c][tenderee_i]) df_f = pd.DataFrame(df_data_filter) df_data_filter1 = {} for c in df_columns_yc: if c not in df_data_filter1: df_data_filter1[c] = [] for tenderee_i in range(len(df_data["招标单位"])): if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","国家能源投资集团有限责任公司","国家能源集团龙源阿里新能源(阿里)有限公司"): for c in df_columns_yc: if c not in df_data_filter1: df_data_filter1[c] = [] df_data_filter1[c].append(df_data[c][tenderee_i]) df1_f1 = pd.DataFrame(df_data_filter1) filename = os.path.dirname(__file__)+"/data/%s年%s招标数据导出%s.xlsx"%(start_time[:4],current_date,getCurrent_date(format="%Y-%m-%d_%H%M%S")) with pd.ExcelWriter(filename) as writer: df_f.to_excel(writer,sheet_name="特定甲方招标数据",columns=df_columns if not df_f.empty else None) df1_f1.to_excel(writer,sheet_name="特定甲方项目预测",columns=df_columns_yc[:-3] if not df1_f.empty else None) log(str(filename)) host = "smtp.exmail.qq.com" username = "vip@bidizhaobiao.com" password = "Biaoxun66-" receivers = ["450604061@qq.com"] # receivers = ["1175730271@qq.com"] attachs = [filename] sendEmail(host,username,password,receivers,attachs=attachs) columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_get_file_end","time_bidopen"] bool_query = BoolQuery(must_queries=[ generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery), generateBoolShouldQuery(["docchannel"],[101,118,119,120,121,122],TermQuery), RangeQuery("crtime",last_date+" 21:00:00",current_date+" 21:00:00",True,True), RangeQuery("status",201,300,True,True), # TermQuery("procurement_system","公安系统"), # generateBoolShouldQuery(["province"],["湖南"],TermQuery), # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery) # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery), ], must_not_queries=[ generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_not_key,MatchPhraseQuery), ] ) list_row = getDocument([{"query":bool_query,"limit":500}],columns,thread_count=1) log("get document %d rows"%len(list_row)) df_data = {} set_line = set() # list_row = filterRow(list_row) df_columns = getRowData_win(df_data,list_row,set_line,list_keyword,dict_channel,True) fixContactPerson(df_data,df_columns) df = pd.DataFrame(df_data) # 调整行高 # for i in range(1, len(df) + 1): # df.style.set_properties(max_col=100, min_col=1, max_row=i, min_row=i, h=20, wrap_text=True) # 设置行高为20,自动换行 styled_df = (df.style .set_properties(**{'text-align': 'center', 'white-space': 'pre-line'}) .set_table_styles([{'selector': 'td', 'props': [('text-align', 'center')]}])) filename = os.path.dirname(__file__)+"/data/%s年%s中标数据导出%s.xlsx"%(start_time[:4],current_date,getCurrent_date(format="%Y-%m-%d_%H%M%S")) with pd.ExcelWriter(filename,engine="openpyxl") as writer: styled_df.to_excel(writer,columns = df_columns,index=False) adjust_excel(filename,filename,columns=["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R"]) host = "smtp.exmail.qq.com" username = "vip@bidizhaobiao.com" password = "Biaoxun66-" receivers = ["1175730271@qq.com","565748324@qq.com","1396488964@qq.com","1141385052@qq.com","1713739820@qq.com"] # receivers = ["1175730271@qq.com"] attachs = [filename] sendEmail(host,username,password,receivers,attachs=attachs) break except Exception as e: traceback.print_exc() def export_13510123669(): def removeData(df_data): list_remove_index = [] list_c = df_data.get("招标人采购系统",[]) for _c_i in range(len(list_c)): if list_c[_c_i]=="企业采购系统": list_remove_index.append(_c_i) list_remove_index.reverse() print(list_remove_index) for k,v in df_data.items(): for _rc in list_remove_index: v.pop(_rc) for k,v in df_data.items(): v = v[:500] def getRowData_zb(df_data,rows,set_line,list_keyword,dict_channel,dumplicate): dict_line = {} # list_data = getRow_ots(rows) _index = 0 rows.sort(key=lambda x:x.get("docid",0),reverse=True) set_col = set() df_columns = [] for row in rows: _index += 1 item = {} _dict = row bidding_budget = 0 sub_docs = json.loads(_dict.get("sub_docs_json","[]")) zbr = "" zbje = 0 win_tenderer_manager = "" win_tenderer_phone = "" for doc in sub_docs: if doc.get("bidding_budget",0)>0: bidding_budget = doc.get("bidding_budget",0) if doc.get("win_tenderer") is not None: zbr = str(doc.get("win_tenderer","")) zbje = doc.get("win_bid_price",0) win_tenderer_manager = str(doc.get("win_tenderer_manager","")) win_tenderer_phone = str(doc.get("win_tenderer_phone","")) doc_type = "" if dict_channel.get(_dict.get("docchannel",""),"")[:2] in ("中标","合同","候选","废标","开标","验收"): doc_type = "中标" else: doc_type = "招标" set_dict_item_columns(set_col,df_columns,item,"序号",_index) set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province","未知")) set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city","未知")) set_dict_item_columns(set_col,df_columns,item,"公告类型",doc_type) set_dict_item_columns(set_col,df_columns,item,"公告名称",_dict.get("doctitle","")) set_dict_item_columns(set_col,df_columns,item,"项目类型",'') set_dict_item_columns(set_col,df_columns,item,"项目编号",_dict.get("project_code")) set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time","")) set_dict_item_columns(set_col,df_columns,item,"信息类型","%s-%s"%(dict_channel.get(_dict.get("docchannel",""),""),_dict.get("bidway",""))) set_dict_item_columns(set_col,df_columns,item,"报名开始时间",_dict.get("time_registration_start","")) set_dict_item_columns(set_col,df_columns,item,"报名截止时间",_dict.get("time_registration_end","")) set_dict_item_columns(set_col,df_columns,item,"投标开始时间",_dict.get("time_bidstart","")) set_dict_item_columns(set_col,df_columns,item,"投标结束时间",_dict.get("time_bidopen","")) set_dict_item_columns(set_col,df_columns,item,"预算金额(万元)",bidding_budget/10000) set_dict_item_columns(set_col,df_columns,item,"成交金额(万元)",zbje/10000) set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee","")) set_dict_item_columns(set_col,df_columns,item,"招标单位所在地","") set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact","")) set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone","")) set_dict_item_columns(set_col,df_columns,item,"招标联系人1","") set_dict_item_columns(set_col,df_columns,item,"招标联系人电话1","") set_dict_item_columns(set_col,df_columns,item,"招标联系人2","") set_dict_item_columns(set_col,df_columns,item,"招标联系人电话2","") set_dict_item_columns(set_col,df_columns,item,"招标联系人3","") set_dict_item_columns(set_col,df_columns,item,"招标联系人电话3","") set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency","")) set_dict_item_columns(set_col,df_columns,item,"代理单位所在地","") set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact","")) set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone","")) set_dict_item_columns(set_col,df_columns,item,"代理联系人1","") set_dict_item_columns(set_col,df_columns,item,"代理联系人电话1","") set_dict_item_columns(set_col,df_columns,item,"代理联系人2","") set_dict_item_columns(set_col,df_columns,item,"代理联系人电话2","") set_dict_item_columns(set_col,df_columns,item,"代理联系人3","") set_dict_item_columns(set_col,df_columns,item,"代理联系人电话3","") set_dict_item_columns(set_col,df_columns,item,"中标单位",zbr) set_dict_item_columns(set_col,df_columns,item,"中标单位所在地","") set_dict_item_columns(set_col,df_columns,item,"项目中标联系人",win_tenderer_manager) set_dict_item_columns(set_col,df_columns,item,"项目中标联系电话",win_tenderer_phone) set_dict_item_columns(set_col,df_columns,item,"中标法人","") set_dict_item_columns(set_col,df_columns,item,"中标法人电话","") set_dict_item_columns(set_col,df_columns,item,"中标股东","") set_dict_item_columns(set_col,df_columns,item,"中标股东电话","") set_dict_item_columns(set_col,df_columns,item,"中标高管","") set_dict_item_columns(set_col,df_columns,item,"中标高管电话","") set_dict_item_columns(set_col,df_columns,item,"中标联系人1","") set_dict_item_columns(set_col,df_columns,item,"中标联系人电话1","") set_dict_item_columns(set_col,df_columns,item,"中标联系人2","") set_dict_item_columns(set_col,df_columns,item,"中标联系人电话2","") set_dict_item_columns(set_col,df_columns,item,"中标联系人3","") set_dict_item_columns(set_col,df_columns,item,"中标联系人电话3","") set_dict_item_columns(set_col,df_columns,item,"招标/采购内容",_dict.get("product","")) set_dict_item_columns(set_col,df_columns,item,"资质","") set_dict_item_columns(set_col,df_columns,item,"公告详情链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid",""))) set_dict_item_columns(set_col,df_columns,item,"招投标分析(合作商)","") set_dict_item_columns(set_col,df_columns,item,"历史中标单位","") set_dict_item_columns(set_col,df_columns,item,"应对策略(作为填)","") for k,v in item.items(): if k not in df_data: df_data[k] = [] df_data[k].append(v) return df_columns def getRowData_yc(df_data,rows,set_line,list_keyword,dict_channel,dumplicate): dict_line = {} # list_data = getRow_ots(rows) _index = 0 rows.sort(key=lambda x:x.get("docid",0),reverse=True) set_col = set() df_columns = [] for row in rows: _index += 1 item = {} _dict = row _type = _dict.get("type",0) if _type==0: _type = "周期预测" elif _type==1: _type = "采购意向" elif _type==2: _type = "到期预测" else: _type = "废标重招" set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province","")) set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city","")) set_dict_item_columns(set_col,df_columns,item,"类型",'') set_dict_item_columns(set_col,df_columns,item,"预测类型",_type) set_dict_item_columns(set_col,df_columns,item,"项目类型","") set_dict_item_columns(set_col,df_columns,item,"预计开始时间",_dict.get("may_begin","")) set_dict_item_columns(set_col,df_columns,item,"预计结束时间",_dict.get("may_end","")) set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product","")) set_dict_item_columns(set_col,df_columns,item,"上次采购时间",_dict.get("last_page_time","")) set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee","")) set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact","")) set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone","")) set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency","")) set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact","")) set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone","")) set_dict_item_columns(set_col,df_columns,item,"招投标分析(合作商)","") set_dict_item_columns(set_col,df_columns,item,"历史中标单位","") set_dict_item_columns(set_col,df_columns,item,"应对策略(作为填)","") for k,v in item.items(): if k not in df_data: df_data[k] = [] df_data[k].append(v) return df_columns def fixContactPerson(df_data,list_df_columns,get_legal_person=False): set_enterprise = set() if len(df_data.keys())>0: for _tenderee,_agency in zip(df_data["招标单位"],df_data["代理单位"]): set_enterprise.add(_tenderee) set_enterprise.add(_agency) if "" in set_enterprise: set_enterprise.remove("") if None in set_enterprise: set_enterprise.remove(None) dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system","company_org_type","reg_capital","actual_capital","contacts","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code","legal_person_name","phone_number"]) # conn = getConnection_oracle() # cursor = conn.cursor() if len(set_enterprise)>0: for _i in range(len(df_data["招标单位"])): _enterprise_name = df_data["招标单位"][_i] if df_data["招标联系人电话"][_i]=="": contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts") if contacts is not None: _person,_phone = getOneContact(contacts) df_data["招标联系人"][_i] = _person df_data["招标联系人电话"][_i] = _phone df_data["项目类型"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system") _enterprise_name = df_data["代理单位"][_i] if df_data["代理联系人电话"][_i]=="": contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts") if contacts is not None: _person,_phone = getOneContact(contacts) df_data["代理联系人"][_i] = _person df_data["代理联系人电话"][_i] = _phone # _enterprise_name = df_data["中标单位"][_i] # if get_legal_person: # _person = dict_enterprise.get(_enterprise_name,{}).get("legal_person_name","") # _phone = dict_enterprise.get(_enterprise_name,{}).get("phone_number","") # if len(_phone)==11 and _phone[0]=="1": # df_data["中标单位联系人"][_i] = _person # df_data["中标单位联系电话"][_i] = _phone # else: # if df_data["中标单位联系电话"][_i]=="": # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts") # if contacts is not None: # _person,_phone = getOneContact(contacts,mobile_only=True) # df_data["中标单位联系人"][_i] = _person # df_data["中标单位联系电话"][_i] = _phone def fixContactPerson1(df_data,list_df_columns,get_legal_person=False): set_enterprise = set() if len(df_data.keys())>0: for _tenderee,_agency,_win in zip(df_data["招标单位"],df_data["代理单位"],df_data["中标单位"]): set_enterprise.add(_tenderee) set_enterprise.add(_agency) set_enterprise.add(_win) if "" in set_enterprise: set_enterprise.remove("") if None in set_enterprise: set_enterprise.remove(None) dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system","company_org_type","reg_capital","actual_capital","contacts","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code","legal_person_name","phone_number","reg_location","province","city"]) # print("dict_enterprise",dict_enterprise) # conn = getConnection_oracle() # cursor = conn.cursor() if len(set_enterprise)>0: for _i in range(len(df_data["招标单位"])): _enterprise_name = df_data["招标单位"][_i] contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts") if contacts is not None: for _index in range(min(len(contacts),3)): contact_person,phone_no = contacts[_index].get("contact_person"),contacts[_index].get("phone_no") is_legal_person,is_manager,is_shareholder = contacts[_index].get("is_legal_person",0),contacts[_index].get("is_manager",0),contacts[_index].get("is_shareholder",0) df_data["招标联系人%s"%str(_index+1)][_i] = contact_person df_data["招标联系人电话%s"%str(_index+1)][_i] = phone_no df_data["招标单位所在地"][_i] = "%s-%s"%(dict_enterprise.get(_enterprise_name,{}).get("province",""),dict_enterprise.get(_enterprise_name,{}).get("city","")) df_data["项目类型"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system") _enterprise_name = df_data["代理单位"][_i] contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts") if contacts is not None: for _index in range(min(len(contacts),3)): contact_person,phone_no = contacts[_index].get("contact_person"),contacts[_index].get("phone_no") is_legal_person,is_manager,is_shareholder = contacts[_index].get("is_legal_person",0),contacts[_index].get("is_manager",0),contacts[_index].get("is_shareholder",0) df_data["代理联系人%s"%str(_index+1)][_i] = contact_person df_data["代理联系人电话%s"%str(_index+1)][_i] = phone_no df_data["代理单位所在地"][_i] = "%s-%s"%(dict_enterprise.get(_enterprise_name,{}).get("province",""),dict_enterprise.get(_enterprise_name,{}).get("city","")) _enterprise_name = df_data["中标单位"][_i] df_data["中标单位所在地"][_i] = "%s-%s"%(dict_enterprise.get(_enterprise_name,{}).get("province",""),dict_enterprise.get(_enterprise_name,{}).get("city","")) _person = dict_enterprise.get(_enterprise_name,{}).get("legal_person_name","") _phone = dict_enterprise.get(_enterprise_name,{}).get("phone_number","") if len(_phone)==11 and _phone[0]=="1": df_data["中标法人"][_i] = _person df_data["中标法人电话"][_i] = _phone contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts") if contacts is not None: for _index in range(min(len(contacts),3)): contact_person,phone_no = contacts[_index].get("contact_person"),contacts[_index].get("phone_no") is_legal_person,is_manager,is_shareholder = contacts[_index].get("is_legal_person",0),contacts[_index].get("is_manager",0),contacts[_index].get("is_shareholder",0) if is_manager: df_data["中标高管"][_i] = contact_person df_data["中标高管电话"][_i] = phone_no if is_shareholder: df_data["中标股东"][_i] = contact_person df_data["中标股东电话"][_i] = phone_no df_data["中标联系人%s"%str(_index+1)][_i] = contact_person df_data["中标联系人电话%s"%str(_index+1)][_i] = phone_no log("start export 15824381998:>>>>>>>>>>>>>>>") current_date = getCurrent_date(format="%Y-%m-%d") start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60)) if current_date<="2025-02-04" and datetime.datetime.now().weekday() in (1,4): for i in range(10): try: if datetime.datetime.now().weekday()==1: last_date = timeAdd(current_date,-4) if datetime.datetime.now().weekday() in (4,): last_date = timeAdd(current_date,-3) # start_time='2022-07-22' # current_date = '2022-07-28' page_time_start = timeAdd(current_date,-7) log("start exporting export2:=================") # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"] dict_channel = getDict_docchannel() list_query = [] str_keyword = ''' 多功能位移机,失能群体安全转移,助行机器人,行走机器人,护理机器人,大小便护理,洗浴机,洗浴机器人,吃饭机器人,养老护理,老年评估,防摔,智能护理,智慧康养 ''' list_keyword = splitIntoList(str_keyword,"[\s\n、,,]") str_not_keyword = ''' 清洗机器人 ''' list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]") tenderee_keywrod = "医院、大学、高校、高中" list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]") log(str(list_keyword)) columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_get_file_end","time_bidopen","time_bidstart","time_registration_start","time_registration_end"] bool_query = BoolQuery(must_queries=[ generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery), generateBoolShouldQuery(["docchannel"],[51,52,102,103,105,104,114,121,122,101,119,120,118],TermQuery), RangeQuery("crtime",last_date+" 10:00:00",current_date+" 10:00:00",True,True), RangeQuery("status",201,300,True,True), RangeQuery("page_time",page_time_start) # TermQuery("procurement_system","公安系统"), # generateBoolShouldQuery(["province"],["湖南"],TermQuery), # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery) # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery), ], must_not_queries=[ generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_not_key,MatchPhraseQuery), ] ) list_row = getDocument([{"query":bool_query,"limit":500}],columns,thread_count=1) log("get document %d rows"%len(list_row)) df_data = {} set_line = set() # list_row = filterRow(list_row) df_columns = getRowData_zb(df_data,list_row,set_line,list_keyword,dict_channel,True) fixContactPerson1(df_data,df_columns) df = pd.DataFrame(df_data) df_data_filter_11 = {} for c in df_columns: if c not in df_data_filter_11: df_data_filter_11[c] = [] df_data_filter_12 = {} for c in df_columns: if c not in df_data_filter_12: df_data_filter_12[c] = [] for _i in range(len(df_data["项目类型"])): if df_data["项目类型"][_i] in ("教育系统","科研系统"): for c in df_columns: if c not in df_data_filter_11: df_data_filter_11[c] = [] df_data_filter_11[c].append(df_data[c][_i]) else: for c in df_columns: if c not in df_data_filter_12: df_data_filter_12[c] = [] df_data_filter_12[c].append(df_data[c][_i]) df_f11 = pd.DataFrame(df_data_filter_11) df_f12 = pd.DataFrame(df_data_filter_12) time_end = timeAdd(current_date,60) columns = ["project_name","type","province","city","bidding_budget","city","demand","last_page_time"] bool_query = BoolQuery(must_queries=[ generateBoolShouldQuery(["demand","product"],list_keyword,MatchPhraseQuery), RangeQuery("may_end",range_from=current_date), RangeQuery("may_end",range_to=time_end), # TermQuery("procurement_system","公安系统"), # generateBoolShouldQuery(["province"],["湖南"],TermQuery), # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery) # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery), ], must_not_queries=[ generateBoolShouldQuery(["demand","product"],list_not_key,MatchPhraseQuery), ] ) list_row = getDocument([{"query":bool_query,"limit":500}],columns,table_name="preproject",table_index="preproject_index",thread_count=1,sort_column="may_end") log("get document %d rows"%len(list_row)) df_data = {} set_line = set() # list_row = filterRow(list_row) df_columns_yc = getRowData_yc(df_data,list_row,set_line,list_keyword,dict_channel,True) fixContactPerson(df_data,df_columns_yc) for _i in range(len(df_data["项目类型"])): if df_data["项目类型"][_i] in ("政府办公室系统","财政系统","民事系统","企业采购系统","卫生系统"): df_data["类型"][_i] = "政企" elif df_data["项目类型"][_i] in ("教育系统","科研系统"): df_data["类型"][_i] = "教育" else: df_data["类型"][_i] = "其他" df1 = pd.DataFrame(df_data) filename = os.path.dirname(__file__)+"/data/%s年%s项目数据导出.xlsx"%(start_time[:4],current_date) with pd.ExcelWriter(filename) as writer: df_f11.to_excel(writer,sheet_name="教育",columns=df_columns if not df_f11.empty else None,index=False) df_f12.to_excel(writer,sheet_name="政企",columns=df_columns if not df_f12.empty else None,index=False) df1.to_excel(writer,sheet_name="项目预测",columns=df_columns_yc if not df1.empty else None) log(str(filename)) host = "smtp.exmail.qq.com" username = "vip@bidizhaobiao.com" password = "Biaoxun66-" # receivers = ["724949655@qq.com","1396488964@qq.com"] receivers = ["md47@zuowei.com"] # receivers = ["1175730271@qq.com"] attachs = [filename] sendEmail(host,username,password,receivers,attachs=attachs) break except Exception as e: traceback.print_exc() def job_export(): _scheduler = BlockingScheduler() e3 = Export3() # _scheduler.add_job(export_medicine_friday,"cron",hour=8) # _scheduler.add_job(export_medicine_friday,"cron",second="*/1") _scheduler.add_job(export2,"cron",hour=9) _scheduler.add_job(export5,"cron",hour=16) # _scheduler.add_job(e3.export4,"cron",hour=7) _scheduler.add_job(e3.trytimes,"cron",hour=21) _scheduler.add_job(e3.export3_1,"cron",hour=21) _scheduler.add_job(export_15824381998,"cron",hour=21) _scheduler.add_job(export_13510123669,"cron",hour=10) _scheduler.start() if __name__=="__main__": # job_export() # export_medicine_friday() # export2() # export_document_except() # e3 = Export3() # e3.export4_by_project() # e3.export4() # e3.trytimes() # e3.export3_1() # export5() # export_15824381998() export_13510123669()