|
|
@@ -4,39 +4,2578 @@ import sys
|
|
|
import os
|
|
|
sys.path.append(os.path.join(os.path.dirname(__file__),".."))
|
|
|
|
|
|
-from utils.Utils import sendEmail,getCurrent_date
|
|
|
+import requests
|
|
|
+
|
|
|
+from utils.Utils import sendEmail,getCurrent_date,log
|
|
|
|
|
|
import datetime
|
|
|
import time
|
|
|
-from export.exportDocument import exportDocument_medicine,list_df_columns
|
|
|
+from export.exportDocument import *
|
|
|
import pandas as pd
|
|
|
|
|
|
from apscheduler.schedulers.blocking import BlockingScheduler
|
|
|
|
|
|
+from export.exportUtils import rsa_encrpt
|
|
|
+
|
|
|
+import urllib.parse
|
|
|
+
|
|
|
+import base64
|
|
|
+
|
|
|
def export_medicine_friday():
|
|
|
- current_date = getCurrent_date("%Y-%m-%d")
|
|
|
- start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-6*24*60*60))
|
|
|
+ current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-24*60*60))
|
|
|
+ start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60))
|
|
|
if current_date<="2022-04-25":
|
|
|
+ if datetime.datetime.now().weekday()==0:
|
|
|
+ for i in range(10):
|
|
|
+ try:
|
|
|
+ df_data = exportDocument_medicine(start_time,current_date)
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+
|
|
|
+ filename = os.path.dirname(__file__)+"/data/%s年%s至%s医疗数据导出.xlsx"%(start_time[:4],start_time,current_date)
|
|
|
+ df.to_excel(filename,columns=list_df_columns)
|
|
|
+
|
|
|
+ host = "smtp.exmail.qq.com"
|
|
|
+ username = "vip@bidizhaobiao.com"
|
|
|
+ password = "Biaoxun66-"
|
|
|
+ receivers = ["1985262186@qq.com","1175730271@qq.com","1265797328@qq.com","1289358902@qq.com"]
|
|
|
+ attachs = [filename]
|
|
|
+
|
|
|
+ sendEmail(host,username,password,receivers,attachs=attachs)
|
|
|
+ break
|
|
|
+ except Exception as e:
|
|
|
+ traceback.print_exc()
|
|
|
+ log(str(e))
|
|
|
+ time.sleep(60)
|
|
|
+
|
|
|
+def export2():
|
|
|
+ def removeData(df_data):
|
|
|
+ list_remove_index = []
|
|
|
+ list_c = df_data.get("招标人采购系统",[])
|
|
|
+ for _c_i in range(len(list_c)):
|
|
|
+ if list_c[_c_i]=="企业采购系统":
|
|
|
+ list_remove_index.append(_c_i)
|
|
|
+ list_remove_index.reverse()
|
|
|
+ print(list_remove_index)
|
|
|
+ for k,v in df_data.items():
|
|
|
+ for _rc in list_remove_index:
|
|
|
+ v.pop(_rc)
|
|
|
+ for k,v in df_data.items():
|
|
|
+ v = v[:500]
|
|
|
+
|
|
|
+ log("start export2:>>>>>>>>>>>>>>>")
|
|
|
+ current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-24*60*60))
|
|
|
+ start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60))
|
|
|
+ if current_date<="2024-02-28":
|
|
|
if datetime.datetime.now().weekday()==4:
|
|
|
- df_data = exportDocument_medicine(start_time,current_date)
|
|
|
- df = pd.DataFrame(df_data)
|
|
|
+ for i in range(10):
|
|
|
+ try:
|
|
|
+ # start_time='2022-07-22'
|
|
|
+ # current_date = '2022-07-28'
|
|
|
+ log("start exporting export2:=================")
|
|
|
+ # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"]
|
|
|
+ columns = ["doctitle","doctextcon","attachmenttextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","time_bidopen"]
|
|
|
+ dict_channel = getDict_docchannel()
|
|
|
+
|
|
|
+ list_query = []
|
|
|
+
|
|
|
+ str_keyword = '''
|
|
|
+
|
|
|
+机房建设 多媒体教室建设 数据中心机房 班班通 电子设备
|
|
|
+视频 LED 监控 视频监控系统 信息安全
|
|
|
+信息化建设 智能化 网络安全服务 网络安全系统 等级保护测评
|
|
|
+信息系统 大数据中心 智慧教室建设 数据安全建设 可视化指挥联动平台
|
|
|
+音频处理器 教学专用仪器 多媒体扩声系统 智慧校园 数字化
|
|
|
+交换 路由 无线 城域网建设 触控一体机
|
|
|
+教学设备
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ '''
|
|
|
+ list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
|
|
|
+
|
|
|
+ str_not_keyword = '''
|
|
|
+ '''
|
|
|
+ list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
|
|
|
+
|
|
|
+ tenderee_keywrod = "医院、大学、高校、高中"
|
|
|
+ list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
|
|
|
+
|
|
|
+ log(str(list_keyword))
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_keyword,MatchPhraseQuery),
|
|
|
+ generateBoolShouldQuery(["docchannel"],[52,102,114],TermQuery),
|
|
|
+ RangeQuery("page_time",start_time,current_date,True,True),
|
|
|
+ RangeQuery("status",151,300,True,True),
|
|
|
+ # TermQuery("procurement_system","公安系统"),
|
|
|
+ generateBoolShouldQuery(["province"],["湖南"],TermQuery),
|
|
|
+ # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
|
|
|
+ # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
|
|
|
+ ],
|
|
|
+ # must_not_queries=[
|
|
|
+ # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
|
|
|
+ # ]
|
|
|
+ )
|
|
|
+
|
|
|
+ list_query.append({"query":bool_query,"limit":700})
|
|
|
+ list_row = getDocument(list_query,columns)
|
|
|
+
|
|
|
+ log("get document %d rows"%len(list_row))
|
|
|
+
|
|
|
+ df_data = {}
|
|
|
+ set_line = set()
|
|
|
+ # list_row = filterRow(list_row)
|
|
|
+ getRowData(df_data,list_row,set_line,list_keyword,dict_channel,True)
|
|
|
+
|
|
|
+ fixContactPerson(df_data,list_df_columns)
|
|
|
+
|
|
|
+ removeData(df_data)
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+
|
|
|
+
|
|
|
+ filename = os.path.dirname(__file__)+"/data/%s年%s至%s数据导出.xlsx"%(start_time[:4],start_time,current_date)
|
|
|
+ df.to_excel(filename,columns=list_df_columns)
|
|
|
+ log(str(filename))
|
|
|
+
|
|
|
+ host = "smtp.exmail.qq.com"
|
|
|
+ username = "vip@bidizhaobiao.com"
|
|
|
+ password = "Biaoxun66-"
|
|
|
+ receivers = ["1175730271@qq.com","493894608@qq.com"]
|
|
|
+ # receivers = ["1175730271@qq.com"]
|
|
|
+ attachs = [filename]
|
|
|
+
|
|
|
+ sendEmail(host,username,password,receivers,attachs=attachs)
|
|
|
+ break
|
|
|
+ except Exception as e:
|
|
|
+ traceback.print_exc()
|
|
|
+
|
|
|
+def export5():
|
|
|
+ '''
|
|
|
+ 客户信息
|
|
|
+公司名:武汉市浩盛特种建材有限责任公司
|
|
|
+
|
|
|
+客户姓名:高诗琴
|
|
|
+
|
|
|
+会员号:15392839439
|
|
|
+
|
|
|
+会员等级:高级会员
|
|
|
+成交金额:5900元
|
|
|
+服务期:12个月
|
|
|
+截止推送项目时间:2024-02-17
|
|
|
+
|
|
|
+申请原因:2.16成交的新客户,客户反馈跟销售沟通是人工每天整理好项目信息推送,现在寻求技术协助支持。
|
|
|
+客户需求:
|
|
|
+地区:河北省、北京市、天津市、山东省、江苏省、上海市、浙江省、福建省、广东省、海南省
|
|
|
+业务关键词:膨胀剂,抗裂剂,防水剂,外加剂,防腐剂,阻锈剂,密实剂,耐久性,氧化镁,镁质,涂料
|
|
|
+
|
|
|
+(排除词,漆),需要招标信息
|
|
|
+工作日下午四点文档信息推送到邮箱365531448@qq.com,每天更新最新的项目信息10条(不足10条可以按照更新信息推送)
|
|
|
+ :return:
|
|
|
+ '''
|
|
|
+ def getRowData(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
|
|
|
+
|
|
|
+ dict_line = {}
|
|
|
+ # list_data = getRow_ots(rows)
|
|
|
+ _index = 0
|
|
|
+ rows.sort(key=lambda x:x.get("docid",0),reverse=True)
|
|
|
+ set_col = set()
|
|
|
+ df_columns = []
|
|
|
+ for row in rows[:10]:
|
|
|
+ _index += 1
|
|
|
+ item = {}
|
|
|
+ _dict = row
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"公告类别",dict_channel.get(_dict.get("docchannel",""),""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"公告标题",_dict.get("doctitle",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理单位联系人",_dict.get("agency_contact",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理单位联系人电话",_dict.get("agency_phone",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid","")))
|
|
|
+
|
|
|
+ for k,v in item.items():
|
|
|
+ if k not in df_data:
|
|
|
+ df_data[k] = []
|
|
|
+ df_data[k].append(v)
|
|
|
+ return df_columns
|
|
|
+
|
|
|
+
|
|
|
+ log("start export5:>>>>>>>>>>>>>>>")
|
|
|
+ current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
|
|
|
+ if current_date<="2024-02-17":
|
|
|
+ weekday = datetime.datetime.now().weekday()
|
|
|
+ if weekday>=0 and weekday<=4:
|
|
|
+ for i in range(10):
|
|
|
+ try:
|
|
|
+ # start_time='2022-07-22'
|
|
|
+ # current_date = '2022-07-28'
|
|
|
+ log("start exporting export2:=================")
|
|
|
+ # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"]
|
|
|
+ columns = ["doctitle","doctextcon","attachmenttextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","time_bidopen"]
|
|
|
+ columns = ["doctitle","docchannel","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","time_bidopen"]
|
|
|
+ dict_channel = getDict_docchannel()
|
|
|
+
|
|
|
+ list_query = []
|
|
|
+
|
|
|
+ str_province = '''
|
|
|
+ 河北、北京、天津、山东、江苏、上海、浙江、福建、广东、海南
|
|
|
+ '''
|
|
|
+
|
|
|
+ str_keyword = '''
|
|
|
+
|
|
|
+膨胀剂 抗裂剂 防水剂 外加剂 镁质
|
|
|
+防腐剂 阻锈剂 密实剂 耐久性 氧化镁
|
|
|
+
|
|
|
+ '''
|
|
|
+ list_province = splitIntoList(str_province,"[\s\n、,,]")
|
|
|
+
|
|
|
+ list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
|
|
|
+
|
|
|
+ str_not_keyword = '''
|
|
|
+ 漆,复合风管,检测,涂料工程,涂料多乐士
|
|
|
+ '''
|
|
|
+ list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
|
|
|
+
|
|
|
+ tenderee_keywrod = "医院、大学、高校、高中"
|
|
|
+ list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
|
|
|
+
|
|
|
+ log(str(list_keyword))
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery),
|
|
|
+ generateBoolShouldQuery(["docchannel"],[52,102,114],TermQuery),
|
|
|
+ TermQuery("page_time",current_date),
|
|
|
+ RangeQuery("status",151,300,True,True),
|
|
|
+ # TermQuery("procurement_system","公安系统"),
|
|
|
+ generateBoolShouldQuery(["province"],list_province,TermQuery),
|
|
|
+ # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
|
|
|
+ # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
|
|
|
+ ],
|
|
|
+ must_not_queries=[
|
|
|
+ generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
|
|
|
+ ]
|
|
|
+ )
|
|
|
+
|
|
|
+ list_query.append({"query":bool_query,"limit":700})
|
|
|
+ list_row = getDocument(list_query,columns)
|
|
|
+
|
|
|
+ log("get document %d rows"%len(list_row))
|
|
|
+
|
|
|
+ df_data = {}
|
|
|
+ set_line = set()
|
|
|
+ # list_row = filterRow(list_row)
|
|
|
+ df_column = getRowData(df_data,list_row,set_line,list_keyword,dict_channel,True)
|
|
|
+
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+ print(df_data)
|
|
|
+
|
|
|
+
|
|
|
+ filename = os.path.dirname(__file__)+"/data/%s年%s数据导出.xlsx"%(current_date[:4],current_date)
|
|
|
+ df.to_excel(filename,columns=df_column)
|
|
|
+ log(str(filename))
|
|
|
+
|
|
|
+ host = "smtp.exmail.qq.com"
|
|
|
+ username = "vip@bidizhaobiao.com"
|
|
|
+ password = "Biaoxun66-"
|
|
|
+ receivers = ["1175730271@qq.com","365531448@qq.com"]
|
|
|
+ # receivers = ["1175730271@qq.com"]
|
|
|
+ attachs = [filename]
|
|
|
+
|
|
|
+ sendEmail(host,username,password,receivers,attachs=attachs)
|
|
|
+ break
|
|
|
+ except Exception as e:
|
|
|
+ traceback.print_exc()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def export_document_except():
|
|
|
+ def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
|
|
|
+
|
|
|
+ _index = 0
|
|
|
+ for row in rows:
|
|
|
+ _index += 1
|
|
|
+ item = {}
|
|
|
+ _dict = row
|
|
|
+ _extract = json.loads(_dict.get("extract_json","{}"))
|
|
|
+ product_attrs = _extract.get("product_attrs",{"data":[{}]}).get("data")
|
|
|
+ docid = _dict.get("docid","")
|
|
|
+ win_tenderer = ""
|
|
|
+ bidding_budget = ""
|
|
|
+ sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
|
|
|
+ for _doc in sub_docs_json:
|
|
|
+ if win_tenderer=="":
|
|
|
+ win_tenderer = _doc.get("win_tenderer","")
|
|
|
+ if bidding_budget=="":
|
|
|
+ bidding_budget= _doc.get("bidding_budget","")
|
|
|
+ for _attrs in product_attrs:
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"公告链接",'=HYPERLINK("http://www.bidizhaobiao.com/info-%s.html","查看公告")'%(str(docid)))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"公告时间",_dict.get("page_time"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"项目标题",_dict.get("doctitle"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"采购单位",_dict.get("tenderee"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_dict.get("product"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
|
|
|
+ _quantity = ""
|
|
|
+ _uniPrice = ""
|
|
|
+ try:
|
|
|
+ _quantity = int(_attrs.get("quantity","0"))
|
|
|
+ if _quantity==0:
|
|
|
+ _quantity = ""
|
|
|
+ except Exception as e:
|
|
|
+ pass
|
|
|
+ try:
|
|
|
+ _uniPrice = float(_attrs.get("_uniPrice","0"))
|
|
|
+ if _uniPrice==0:
|
|
|
+ _uniPrice = ""
|
|
|
+ except Exception as e:
|
|
|
+ pass
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
|
|
|
+ sumPrice = ""
|
|
|
+ if _quantity!="" and _uniPrice!="":
|
|
|
+ sumPrice = _quantity*_uniPrice
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"项目金额(元)",bidding_budget)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ for k,v in item.items():
|
|
|
+ if k not in df_data:
|
|
|
+ df_data[k] = []
|
|
|
+ df_data[k].append(v)
|
|
|
+
|
|
|
+ current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
|
|
|
+ current_date = '2022-04-01'
|
|
|
+
|
|
|
+ columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","web_source_no","web_source_name","service_time","person_review","time_get_file_start","time_get_file_end","time_earnest_money_start","time_earnest_money_end"]
|
|
|
+ dict_channel = getDict_docchannel()
|
|
|
+
|
|
|
+
|
|
|
+ list_query = []
|
|
|
+
|
|
|
+ str_keyword = '''
|
|
|
+ 机房建设 多媒体教室建设 数据中心机房 班班通 电子设备
|
|
|
+视频 LED 监控 视频监控系统 信息安全
|
|
|
+信息化建设 网络安全 网络安全服务 网络安全系统 等级保护测评
|
|
|
+信息系统 大数据中心 智慧教室建设 数据安全建设 可视化指挥联动平台
|
|
|
+音频处理器 教学专用仪器 多媒体扩声系统 智慧校园 数字化
|
|
|
+交换 路由 无线 信息安全 触控一体机
|
|
|
+教学设备
|
|
|
+
|
|
|
+ '''
|
|
|
+ list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
|
|
|
+
|
|
|
+ str_not_keyword = '''
|
|
|
+ '''
|
|
|
+ list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
|
|
|
+
|
|
|
+ tenderee_keywrod = "医院、大学、高校、高中"
|
|
|
+ list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
|
|
|
+ s_q = BoolQuery(should_queries=[BoolQuery(must_queries=[TermQuery("docchannel",52)],
|
|
|
+ must_not_queries=[WildcardQuery("tenderee","*")]),
|
|
|
+ BoolQuery(must_queries=[generateBoolShouldQuery(["docchannel"],[101,119,120],TermQuery)],
|
|
|
+ must_not_queries=[NestedQuery("sub_docs_json",WildcardQuery("sub_docs_json.win_tenderer","*"))])])
|
|
|
+ log(str(list_keyword))
|
|
|
+ set_columns1 = set()
|
|
|
+ list_df_columns1 = []
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ TermQuery("page_time",current_date),
|
|
|
+ RangeQuery("status",151,451,True,True),
|
|
|
+ s_q
|
|
|
+ ],
|
|
|
+ # must_not_queries=[
|
|
|
+ # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
|
|
|
+ # ]
|
|
|
+ )
|
|
|
+
|
|
|
+ list_query.append({"query":bool_query,"limit":500000})
|
|
|
+ list_row = getDocument(list_query,columns)
|
|
|
+
|
|
|
+ log("get document %d rows"%len(list_row))
|
|
|
+ df_data = {}
|
|
|
+ set_line = set()
|
|
|
+ # list_row = filterRow(list_row,list_not_key)
|
|
|
+ getRowData(df_data,list_row,set_line,list_keyword,dict_channel,True)
|
|
|
+
|
|
|
+ # fixContactPerson(df_data,list_df_columns)
|
|
|
+
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+
|
|
|
+ filename = os.path.dirname(__file__)+"/data/%s异常数据导出.xlsx"%(current_date)
|
|
|
+ print(list_df_columns)
|
|
|
+ df.to_excel(filename,columns=list_df_columns)
|
|
|
+
|
|
|
+class Export3():
|
|
|
+
|
|
|
+ def trytimes(self):
|
|
|
+ for _ in range(3):
|
|
|
+ _succeed = self.export3()
|
|
|
+ if _succeed:
|
|
|
+ break
|
|
|
+
|
|
|
+ def export3(self,):
|
|
|
+
|
|
|
+ def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
|
|
|
+
|
|
|
+ _index = 0
|
|
|
+ for row in rows:
|
|
|
+
|
|
|
+
|
|
|
+ _index += 1
|
|
|
+ item = {}
|
|
|
+ _dict = row
|
|
|
+ _extract = json.loads(_dict.get("extract_json","{}"))
|
|
|
+ demand_info = _extract.get("demand_info",{"data":[]}).get("data")
|
|
|
+ docid = _dict.get("docid","")
|
|
|
+ win_tenderer = ""
|
|
|
+ bidding_budget = ""
|
|
|
+ win_bid_price = ""
|
|
|
+ sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
|
|
|
+ for _doc in sub_docs_json:
|
|
|
+ if win_tenderer=="":
|
|
|
+ win_tenderer = _doc.get("win_tenderer","")
|
|
|
+ if bidding_budget=="":
|
|
|
+ bidding_budget= _doc.get("bidding_budget","")
|
|
|
+ if win_bid_price=="":
|
|
|
+ win_bid_price = _doc.get("win_bid_price","")
|
|
|
+ if len(demand_info)==0:
|
|
|
+ demand_info = [{}]
|
|
|
+ demand_info = [{}]
|
|
|
+ for _attrs in demand_info:
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"信息类型",dict_channel.get(_dict.get("docchannel")))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"发布时间",_dict.get("crtime"))
|
|
|
+
|
|
|
+ b_or_w = ""
|
|
|
+ if bidding_budget!="":
|
|
|
+ b_or_w = float(bidding_budget)/10000
|
|
|
+ elif win_bid_price!="":
|
|
|
+ b_or_w = float(win_bid_price)/10000
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额/预算金额(万元)",b_or_w)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位",_dict.get("tenderee"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"招标项目联系人","%s\n%s"%(_dict.get("tenderee_contact",""),_dict.get("tenderee_phone","")))
|
|
|
+
|
|
|
+
|
|
|
+# 中标单位 中标项目联系人 中标单位主要联系人 中标单位联系人 代理单位 代理项目联系人 代理单位联系人 比地招标公告地址
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位联系人","")
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标项目联系人","")
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位主要联系人","")
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位联系人","")
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位",_dict.get("agency",""))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"代理项目联系人","%s\n%s"%(_dict.get("agency_contact",""),_dict.get("agency_phone","")))
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位联系人","")
|
|
|
+
|
|
|
+
|
|
|
+ _key ='''MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC8uZYYV6ls+5KMzUfvsPvv3XRdwkcTBj/ppB03mijUPHYTGvYSE0cQTbQrnIbXFtUYJguakpKLmVyH+T/w6vhxbQNlaykfe8RXEh4i4IJk8s/Qb0E0xODsjKBEr8VdDYeqqduWrtJpttXAvv93SsTPvgZBditRzJAzk0XH56zL1wIDAQAB'''
|
|
|
+
|
|
|
+ _key = base64.b64decode(_key)
|
|
|
+ _url_map = {"userId":"001795335",
|
|
|
+ "timestamp":"%d"%(time.time()*1000),
|
|
|
+ "docid":str(_dict.get("docid",""))}
|
|
|
+ _encrpt_text = base64.b64encode(rsa_encrpt(json.dumps(_url_map),_key))
|
|
|
+ _encrpt_text = urllib.parse.quote(_encrpt_text)
|
|
|
+ _url = "http://www.bidizhaobiao.com/info-%s.html?emailSecret=%s"%(str(_dict.get("docid")),_encrpt_text)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"比地招标公告地址",_url)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_attrs.get("project_name"))
|
|
|
+ #
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"需求",_attrs.get("demand"))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"预算(元)",_attrs.get("budget"))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"开始时间",_attrs.get("order_begin"))
|
|
|
+
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product"))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
|
|
|
+ # _quantity = ""
|
|
|
+ # _uniPrice = ""
|
|
|
+ # try:
|
|
|
+ # _quantity = int(_attrs.get("quantity","0"))
|
|
|
+ # if _quantity==0:
|
|
|
+ # _quantity = ""
|
|
|
+ # except Exception as e:
|
|
|
+ # pass
|
|
|
+ # try:
|
|
|
+ # _uniPrice = float(_attrs.get("unitPrice","0"))
|
|
|
+ # if _uniPrice==0:
|
|
|
+ # _uniPrice = ""
|
|
|
+ # except Exception as e:
|
|
|
+ # pass
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
|
|
|
+ # sumPrice = ""
|
|
|
+ # if _quantity!="" and _uniPrice!="":
|
|
|
+ # sumPrice = _quantity*_uniPrice
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"项目金额(元)",bidding_budget)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ for k,v in item.items():
|
|
|
+ if k not in df_data:
|
|
|
+ df_data[k] = []
|
|
|
+ df_data[k].append(v)
|
|
|
+
|
|
|
+ def getRowData1(df_data,rows,dict_channel,set_columns1,list_df_column1):
|
|
|
+
|
|
|
+ _index = 0
|
|
|
+ for row in rows:
|
|
|
+
|
|
|
+
|
|
|
+ _index += 1
|
|
|
+ item = {}
|
|
|
+ _dict = row
|
|
|
+ _extract = json.loads(_dict.get("extract_json","{}"))
|
|
|
+ demand_info = _extract.get("demand_info",{"data":[]}).get("data")
|
|
|
+ docid = _dict.get("docid","")
|
|
|
+ win_tenderer = ""
|
|
|
+ bidding_budget = ""
|
|
|
+ win_bid_price = ""
|
|
|
+ sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
|
|
|
+ for _doc in sub_docs_json:
|
|
|
+ if win_tenderer=="":
|
|
|
+ win_tenderer = _doc.get("win_tenderer","")
|
|
|
+ if bidding_budget=="":
|
|
|
+ bidding_budget= _doc.get("bidding_budget","")
|
|
|
+ if win_bid_price=="":
|
|
|
+ win_bid_price = _doc.get("win_bid_price","")
|
|
|
+ if len(demand_info)==0:
|
|
|
+ demand_info = [{}]
|
|
|
+ demand_info = [{}]
|
|
|
+ for _attrs in demand_info:
|
|
|
+
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"采购人",_dict.get("tenderee"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"招标时间",_dict.get("page_time"))
|
|
|
+
|
|
|
+
|
|
|
+ b_or_w = ""
|
|
|
+ if bidding_budget!="":
|
|
|
+ b_or_w = float(bidding_budget)/10000
|
|
|
+ elif win_bid_price!="":
|
|
|
+ b_or_w = float(win_bid_price)/10000
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"招标预算",bidding_budget)
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标时间",_dict.get("page_time"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额",win_bid_price)
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标人",win_tenderer)
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"采购模式",_dict.get("bidway"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"命中关键词",_dict.get("keyword"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"采购品牌",'')
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"采购数量",'')
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ for k,v in item.items():
|
|
|
+ if k not in df_data:
|
|
|
+ df_data[k] = []
|
|
|
+ df_data[k].append(v)
|
|
|
+
|
|
|
+ def getContacts(contacts):
|
|
|
+
|
|
|
+ try:
|
|
|
+ if contacts is None or contacts=="":
|
|
|
+ return ""
|
|
|
+
|
|
|
+ _contacts = json.loads(contacts)
|
|
|
+
|
|
|
+ list_c = []
|
|
|
+ _count = 0
|
|
|
+ for _c in _contacts:
|
|
|
+ _count += 1
|
|
|
+ contact_person = _c.get("contact_person","")
|
|
|
+ phone_no = _c.get("phone_no","")
|
|
|
+ list_c.append("%s(企业联系人%d)\n%s"%(str(contact_person),_count,str(phone_no)))
|
|
|
+ if _count>=5:
|
|
|
+ break
|
|
|
+ return ",\n".join(list_c)
|
|
|
+ except Exception as e:
|
|
|
+ return ""
|
|
|
+
|
|
|
+
|
|
|
+ def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
|
|
|
+ set_enterprise = set()
|
|
|
+ if len(df_data.keys())>0:
|
|
|
+ for _tenderee,_agency,_win in zip(df_data["招标单位"],df_data["代理单位"],df_data["中标单位"]):
|
|
|
+ set_enterprise.add(_tenderee)
|
|
|
+ set_enterprise.add(_agency)
|
|
|
+ set_enterprise.add(_win)
|
|
|
+ if "" in set_enterprise:
|
|
|
+ set_enterprise.remove("")
|
|
|
+ if None in set_enterprise:
|
|
|
+ set_enterprise.remove(None)
|
|
|
+ dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["contacts","procurement_system"])
|
|
|
+
|
|
|
+
|
|
|
+ # conn = getConnection_oracle()
|
|
|
+ # cursor = conn.cursor()
|
|
|
+ if len(set_enterprise)>0:
|
|
|
+ for _i in range(len(df_data["招标单位"])):
|
|
|
+ _enterprise_name = df_data["招标单位"][_i]
|
|
|
+
|
|
|
+ df_data["招标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
|
|
|
+
|
|
|
+ _enterprise_name = df_data["代理单位"][_i]
|
|
|
+
|
|
|
+ df_data["代理单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
|
|
|
+
|
|
|
+ _enterprise_name = df_data["中标单位"][_i]
|
|
|
+
|
|
|
+ df_data["中标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
|
|
|
+
|
|
|
+ # if "采购系统" not in df_data:
|
|
|
+ # df_data["采购系统"] = []
|
|
|
+ # df_data["采购系统"].append(dict_enterprise.get(_enterprise_name,{}).get("procurement_system",""))
|
|
|
+
|
|
|
+ # list_df_columns.extend(['采购系统'])
|
|
|
+ '''
|
|
|
+ export by customer's subscription
|
|
|
+ :return:
|
|
|
+ '''
|
|
|
+ try:
|
|
|
+ ots_client = getConnect_ots()
|
|
|
+
|
|
|
+ subscription4 = '''
|
|
|
+
|
|
|
+ 触控一体机 交互平板 交互一体机 交互智能平板 交互大屏 教学一体机 智慧屏 智慧黑板 互动黑板 班班通 多媒体教室 多媒体设备 多媒体系统 智慧教室
|
|
|
+ '''
|
|
|
+ # provinces = "河北 山西 广东 海南 江苏 安徽 山东 河南 湖北 湖南 重庆 黑龙江 陕西 甘肃 青海 宁夏"
|
|
|
+ provinces = "海南 江苏 安徽 湖北 湖南 重庆 甘肃 青海 宁夏 河南 北京 天津"
|
|
|
+
|
|
|
+
|
|
|
+ dict_channel = getDict_docchannel()
|
|
|
+ list_province = splitIntoList(provinces,"\s|,|,|、")
|
|
|
+ list_subscription4 = splitIntoList(subscription4,"\s|,|,|、")
|
|
|
+
|
|
|
+ current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
|
|
|
+ last_date = timeAdd(current_date,-1)
|
|
|
+
|
|
|
+ # last_date = "2023-12-08"
|
|
|
+ # current_date = "2023-12-10"
|
|
|
+
|
|
|
+ set_columns1 = set()
|
|
|
+ list_df_columns1 = []
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ generateBoolShouldQuery(["docchannel"],[102,114],TermQuery),
|
|
|
+ RangeQuery("page_time",last_date),
|
|
|
+ RangeQuery("status",201,300,True,True),
|
|
|
+ generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_subscription4,MatchPhraseQuery),
|
|
|
+ generateBoolShouldQuery(["province"],list_province,WildcardQuery),
|
|
|
+ RangeQuery("crtime","%s 21:00:00"%last_date,"%s 21:00:00"%current_date)
|
|
|
+ ],
|
|
|
+ # must_not_queries=[
|
|
|
+ # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
|
|
|
+ # ]
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+ columns = ["doctitle","docchannel","original_docchannel","product","crtime","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","extract_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone"]
|
|
|
+ list_query = [{"query":bool_query,"limit":50000}]
|
|
|
+ list_row = getDocument(list_query,columns)
|
|
|
+
|
|
|
+ filename0 = os.path.dirname(__file__)+"/data/%s订阅4订阅5数据导出数量%d_%s.xlsx"%(current_date,len(list_row),getCurrent_date("%Y-%m-%d_%H%M%S"))
|
|
|
+ df_data = {}
|
|
|
+ if len(list_row)>0:
|
|
|
+ getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1)
|
|
|
+
|
|
|
+ fixContactPerson(df_data,list_df_columns1)
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+ df.to_excel(filename0,columns=list_df_columns1,index=False)
|
|
|
+ else:
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+ df.to_excel(filename0,index=False)
|
|
|
+
|
|
|
+ host = "smtp.exmail.qq.com"
|
|
|
+ username = "vip1@bidizhaobiao.com"
|
|
|
+ password = "Biaoxun666+"
|
|
|
+ # receivers = ["1175730271@qq.com","995116318@qq.com","huangxiaofang@cvte.com"]
|
|
|
+ receivers = ["1175730271@qq.com","1208135584@qq.com","youyuer@cvte.com","chenyuxue@cvte.com"]
|
|
|
+
|
|
|
+ # receivers = ["1175730271@qq.com"]
|
|
|
+
|
|
|
+ attachs = [filename0]
|
|
|
+
|
|
|
+ sendEmail(host,username,password,receivers,attachs=attachs)
|
|
|
+
|
|
|
+ return True
|
|
|
+ except Exception as e:
|
|
|
+ traceback.print_exc()
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+ def export3_1(self,):
|
|
|
+
|
|
|
+ def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
|
|
|
+
|
|
|
+ _index = 0
|
|
|
+ for row in rows:
|
|
|
+
|
|
|
+
|
|
|
+ _index += 1
|
|
|
+ item = {}
|
|
|
+ _dict = row
|
|
|
+ _extract = json.loads(_dict.get("extract_json","{}"))
|
|
|
+ demand_info = _extract.get("demand_info",{"data":[]}).get("data")
|
|
|
+ docid = _dict.get("docid","")
|
|
|
+ win_tenderer = ""
|
|
|
+ bidding_budget = ""
|
|
|
+ win_bid_price = ""
|
|
|
+ sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
|
|
|
+ for _doc in sub_docs_json:
|
|
|
+ if win_tenderer=="":
|
|
|
+ win_tenderer = _doc.get("win_tenderer","")
|
|
|
+ if bidding_budget=="":
|
|
|
+ bidding_budget= _doc.get("bidding_budget","")
|
|
|
+ if win_bid_price=="":
|
|
|
+ win_bid_price = _doc.get("win_bid_price","")
|
|
|
+ if len(demand_info)==0:
|
|
|
+ demand_info = [{}]
|
|
|
+ demand_info = [{}]
|
|
|
+ for _attrs in demand_info:
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"信息类型",dict_channel.get(_dict.get("docchannel")))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"发布时间",_dict.get("crtime"))
|
|
|
+
|
|
|
+ b_or_w = ""
|
|
|
+ if bidding_budget!="":
|
|
|
+ b_or_w = float(bidding_budget)/10000
|
|
|
+ elif win_bid_price!="":
|
|
|
+ b_or_w = float(win_bid_price)/10000
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额/预算金额(万元)",b_or_w)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位",_dict.get("tenderee"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"招标项目联系人","%s\n%s"%(_dict.get("tenderee_contact",""),_dict.get("tenderee_phone","")))
|
|
|
+
|
|
|
+
|
|
|
+ # 中标单位 中标项目联系人 中标单位主要联系人 中标单位联系人 代理单位 代理项目联系人 代理单位联系人 比地招标公告地址
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"招标单位联系人","")
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标项目联系人","")
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位主要联系人","")
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位联系人","")
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位",_dict.get("agency",""))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"代理项目联系人","%s\n%s"%(_dict.get("agency_contact",""),_dict.get("agency_phone","")))
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"代理单位联系人","")
|
|
|
+
|
|
|
+
|
|
|
+ _key ='''MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC8uZYYV6ls+5KMzUfvsPvv3XRdwkcTBj/ppB03mijUPHYTGvYSE0cQTbQrnIbXFtUYJguakpKLmVyH+T/w6vhxbQNlaykfe8RXEh4i4IJk8s/Qb0E0xODsjKBEr8VdDYeqqduWrtJpttXAvv93SsTPvgZBditRzJAzk0XH56zL1wIDAQAB'''
|
|
|
+
|
|
|
+ _key = base64.b64decode(_key)
|
|
|
+ _url_map = {"userId":"001795335",
|
|
|
+ "timestamp":"%d"%(time.time()*1000),
|
|
|
+ "docid":str(_dict.get("docid",""))}
|
|
|
+ _encrpt_text = base64.b64encode(rsa_encrpt(json.dumps(_url_map),_key))
|
|
|
+ _encrpt_text = urllib.parse.quote(_encrpt_text)
|
|
|
+ _url = "http://www.bidizhaobiao.com/info-%s.html?emailSecret=%s"%(str(_dict.get("docid")),_encrpt_text)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"比地招标公告地址",_url)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"项目编号",_dict.get("project_code"))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_attrs.get("project_name"))
|
|
|
+ #
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"需求",_attrs.get("demand"))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"预算(元)",_attrs.get("budget"))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"开始时间",_attrs.get("order_begin"))
|
|
|
+
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"中标单位",win_tenderer)
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product"))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
|
|
|
+ # _quantity = ""
|
|
|
+ # _uniPrice = ""
|
|
|
+ # try:
|
|
|
+ # _quantity = int(_attrs.get("quantity","0"))
|
|
|
+ # if _quantity==0:
|
|
|
+ # _quantity = ""
|
|
|
+ # except Exception as e:
|
|
|
+ # pass
|
|
|
+ # try:
|
|
|
+ # _uniPrice = float(_attrs.get("unitPrice","0"))
|
|
|
+ # if _uniPrice==0:
|
|
|
+ # _uniPrice = ""
|
|
|
+ # except Exception as e:
|
|
|
+ # pass
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
|
|
|
+ # sumPrice = ""
|
|
|
+ # if _quantity!="" and _uniPrice!="":
|
|
|
+ # sumPrice = _quantity*_uniPrice
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"项目金额(元)",bidding_budget)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ for k,v in item.items():
|
|
|
+ if k not in df_data:
|
|
|
+ df_data[k] = []
|
|
|
+ df_data[k].append(v)
|
|
|
+
|
|
|
+
|
|
|
+ def getContacts(contacts):
|
|
|
+
|
|
|
+ try:
|
|
|
+ if contacts is None or contacts=="":
|
|
|
+ return ""
|
|
|
+
|
|
|
+ if isinstance(contacts,str):
|
|
|
+ _contacts = json.loads(contacts)
|
|
|
+ else:
|
|
|
+ _contacts = contacts
|
|
|
+
|
|
|
+ list_c = []
|
|
|
+ _count = 0
|
|
|
+ for _c in _contacts:
|
|
|
+ _count += 1
|
|
|
+ contact_person = _c.get("contact_person","")
|
|
|
+ phone_no = _c.get("phone_no","")
|
|
|
+ list_c.append("%s(企业联系人%d)\n%s"%(str(contact_person),_count,str(phone_no)))
|
|
|
+ if _count>=5:
|
|
|
+ break
|
|
|
+ print("getContacts",",\n".join(list_c))
|
|
|
+ return ",\n".join(list_c)
|
|
|
+ except Exception as e:
|
|
|
+ traceback.print_exc()
|
|
|
+ print("getContacts","")
|
|
|
+ return ""
|
|
|
+
|
|
|
+
|
|
|
+ def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
|
|
|
+ set_enterprise = set()
|
|
|
+ if len(df_data.keys())>0:
|
|
|
+ for _tenderee,_agency,_win in zip(df_data["招标单位"],df_data["代理单位"],df_data["中标单位"]):
|
|
|
+ set_enterprise.add(_tenderee)
|
|
|
+ set_enterprise.add(_agency)
|
|
|
+ set_enterprise.add(_win)
|
|
|
+ if "" in set_enterprise:
|
|
|
+ set_enterprise.remove("")
|
|
|
+ if None in set_enterprise:
|
|
|
+ set_enterprise.remove(None)
|
|
|
+ dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["contacts","procurement_system"])
|
|
|
+
|
|
|
+
|
|
|
+ # conn = getConnection_oracle()
|
|
|
+ # cursor = conn.cursor()
|
|
|
+ if len(set_enterprise)>0:
|
|
|
+ for _i in range(len(df_data["招标单位"])):
|
|
|
+ _enterprise_name = df_data["招标单位"][_i]
|
|
|
+
|
|
|
+ df_data["招标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
|
|
|
+
|
|
|
+ _enterprise_name = df_data["代理单位"][_i]
|
|
|
+
|
|
|
+ df_data["代理单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
|
|
|
+
|
|
|
+ _enterprise_name = df_data["中标单位"][_i]
|
|
|
+
|
|
|
+ df_data["中标单位联系人"][_i] = getContacts(dict_enterprise.get(_enterprise_name,{}).get("contacts"))
|
|
|
+
|
|
|
+ # if "采购系统" not in df_data:
|
|
|
+ # df_data["采购系统"] = []
|
|
|
+ # df_data["采购系统"].append(dict_enterprise.get(_enterprise_name,{}).get("procurement_system",""))
|
|
|
+
|
|
|
+ # list_df_columns.extend(['采购系统'])
|
|
|
+ '''
|
|
|
+ export by customer's subscription
|
|
|
+ :return:
|
|
|
+ '''
|
|
|
+ try:
|
|
|
+ ots_client = getConnect_ots()
|
|
|
+
|
|
|
+ subscription4 = '''
|
|
|
+
|
|
|
+ 录播、三个课堂、互动课堂、同步课堂、双师课堂、专递课堂、微格教室、远程课堂、督导巡课、推门听课、互动教学
|
|
|
+ '''
|
|
|
+ # provinces = "河北 山西 广东 海南 江苏 安徽 山东 河南 湖北 湖南 重庆 黑龙江 陕西 甘肃 青海 宁夏"
|
|
|
+ provinces = "海南 江苏 安徽 湖北 湖南 重庆 甘肃 青海 宁夏 河南"
|
|
|
+
|
|
|
+
|
|
|
+ dict_channel = getDict_docchannel()
|
|
|
+ list_province = splitIntoList(provinces,"\s|,|,|、")
|
|
|
+ list_subscription4 = splitIntoList(subscription4,"\s|,|,|、")
|
|
|
+
|
|
|
+ current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
|
|
|
+ last_date = timeAdd(current_date,-1)
|
|
|
+
|
|
|
+ # last_date = "2023-08-14"
|
|
|
+ # current_date = "2023-08-20"
|
|
|
+
|
|
|
+ set_columns1 = set()
|
|
|
+ list_df_columns1 = []
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ generateBoolShouldQuery(["docchannel"],[102,114],TermQuery),
|
|
|
+ RangeQuery("page_time",last_date),
|
|
|
+ RangeQuery("status",201,300,True,True),
|
|
|
+ generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_subscription4,MatchPhraseQuery),
|
|
|
+ # generateBoolShouldQuery(["province"],list_province,WildcardQuery),
|
|
|
+ RangeQuery("crtime","%s 21:00:00"%last_date,"%s 21:00:00"%current_date)
|
|
|
+ ],
|
|
|
+ # must_not_queries=[
|
|
|
+ # generateBoolShouldQuery(["doctitle"],list_not_key,MatchPhraseQuery),
|
|
|
+ # ]
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+ columns = ["doctitle","docchannel","original_docchannel","product","crtime","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","extract_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone"]
|
|
|
+ list_query = [{"query":bool_query,"limit":50000}]
|
|
|
+ list_row = getDocument(list_query,columns)
|
|
|
+
|
|
|
+ filename0 = os.path.dirname(__file__)+"/data/%s订阅4订阅5数据导出数量%d_%s.xlsx"%(current_date,len(list_row),getCurrent_date("%Y-%m-%d_%H%M%S"))
|
|
|
+ df_data = {}
|
|
|
+ if len(list_row)>0:
|
|
|
+ getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1)
|
|
|
+
|
|
|
+ fixContactPerson(df_data,list_df_columns1)
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+ df.to_excel(filename0,columns=list_df_columns1,index=False)
|
|
|
+ else:
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+ df.to_excel(filename0,index=False)
|
|
|
|
|
|
- filename = os.path.dirname(__file__)+"/data/%s年%s至%s医疗数据导出.xlsx"%(start_time[:-4],start_time,current_date)
|
|
|
- df.to_excel(filename,columns=['公告标题', '公告类别', '省份', '城市', '发布时间', '项目编号', '招标单位', '招标联系人', '招标联系人电话', '代理单位', '代理联系人', '代理联系人电话', '比地招标公告地址', '中标单位', '中标金额', '招标金额', '中标单位联系人', '中标单位联系电话'])
|
|
|
host = "smtp.exmail.qq.com"
|
|
|
- username = "vip@bidizhaobiao.com"
|
|
|
- password = "Biaoxun66-"
|
|
|
- receivers = ["1985262186@qq.com","1175730271@qq.com","1265797328@qq.com","1289358902@qq.com"]
|
|
|
- attachs = [filename]
|
|
|
+ username = "vip1@bidizhaobiao.com"
|
|
|
+ password = "Biaoxun666+"
|
|
|
+ # receivers = ["1175730271@qq.com","995116318@qq.com","huangxiaofang@cvte.com"]
|
|
|
+ receivers = ["1175730271@qq.com","1208135584@qq.com","wanghongyan@cvte.com"]
|
|
|
+
|
|
|
+ # receivers = ["1175730271@qq.com"]
|
|
|
+
|
|
|
+ attachs = [filename0]
|
|
|
+
|
|
|
sendEmail(host,username,password,receivers,attachs=attachs)
|
|
|
|
|
|
+ return True
|
|
|
+ except Exception as e:
|
|
|
+ traceback.print_exc()
|
|
|
+ return False
|
|
|
+
|
|
|
+ def export4(self):
|
|
|
+
|
|
|
+ def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
|
|
|
+
|
|
|
+ dict_filter,dict_update = getFixData()
|
|
|
+ _index = 0
|
|
|
+ for row in rows:
|
|
|
+
|
|
|
|
|
|
-def job_medicine_friday():
|
|
|
+ _index += 1
|
|
|
+ item = {}
|
|
|
+ _dict = row
|
|
|
+ keys = row["keyword"]
|
|
|
+ log("================")
|
|
|
+ log(keys)
|
|
|
+ log(_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon",""))
|
|
|
+ _keyword = str(list(set([ i for i in re.findall("|".join([re.escape(i) for i in keys if i!='']),_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon","")) if i!='' ])))
|
|
|
+ log(_keyword)
|
|
|
+ _extract = json.loads(_dict.get("extract_json","{}"))
|
|
|
+ if len(set(keys)&set(["AR一体机","一体机","短焦投影仪","教学一体机","云桌面","录播","电子书包","电子班牌","智慧黑板"]))>0:
|
|
|
+ product_attrs = _extract.get("product_attrs",{"data":[{}]}).get("data")
|
|
|
+ else:
|
|
|
+ product_attrs = [{}]
|
|
|
+ if len(product_attrs)==0:
|
|
|
+ product_attrs = [{}]
|
|
|
+ docid = _dict.get("docid","")
|
|
|
+ win_tenderer = ""
|
|
|
+ bidding_budget = ""
|
|
|
+ win_bid_price = ""
|
|
|
+ sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
|
|
|
+ for _doc in sub_docs_json:
|
|
|
+ if win_tenderer=="":
|
|
|
+ win_tenderer = _doc.get("win_tenderer","")
|
|
|
+ if bidding_budget=="":
|
|
|
+ bidding_budget= _doc.get("bidding_budget","")
|
|
|
+ if win_bid_price=="":
|
|
|
+ win_bid_price = _doc.get("win_bid_price","")
|
|
|
+ _index1 = 0
|
|
|
+ if _dict.get("docid") in dict_filter:
|
|
|
+ print("====filter")
|
|
|
+ continue
|
|
|
+ if _dict.get("docid") in dict_update:
|
|
|
+ print("====update")
|
|
|
+ win_bid_price = dict_update[_dict.get("docid")].get("win_bid_price","")
|
|
|
+ bidding_budget= dict_update[_dict.get("docid")].get("bidding_budget","")
|
|
|
+ for _attrs in product_attrs:
|
|
|
+ _index1 += 1
|
|
|
+ if re.search("运费",_attrs.get("product","")) is not None:
|
|
|
+ if _index1==1:
|
|
|
+ _attrs = {}
|
|
|
+ else:
|
|
|
+ continue
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"正文",_dict.get("doctextcon",""))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"附件",_dict.get("attachmenttextcon",""))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name",""))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"docid",_dict.get("docid"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"网站链接",'=HYPERLINK("http://www.bidizhaobiao.com/info-%s.html","查看公告")'%(str(docid)))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"公告时间",_dict.get("page_time"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"招标公告时间",'')
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"项目标题",_dict.get("doctitle"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"采购单位",_dict.get("tenderee"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标供应商",win_tenderer)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"采购方式",_dict.get("bidway",""))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"招标采购方式","")
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"采购系统","")
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
|
|
|
+
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"预算金额(元)",bidding_budget)
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"招标预算金额(元)","")
|
|
|
+ _quantity = ""
|
|
|
+ _uniPrice = ""
|
|
|
+ try:
|
|
|
+ _quantity = int(re.sub("[^\d]","",_attrs.get("quantity","0")))
|
|
|
+ if _quantity==0:
|
|
|
+ _quantity = ""
|
|
|
+ except Exception as e:
|
|
|
+ pass
|
|
|
+ try:
|
|
|
+ _uniPrice = float(_attrs.get("unitPrice","0"))
|
|
|
+ if _uniPrice==0:
|
|
|
+ _uniPrice = ""
|
|
|
+ except Exception as e:
|
|
|
+ pass
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
|
|
|
+ sumPrice = ""
|
|
|
+ if _quantity!="" and _uniPrice!="":
|
|
|
+ sumPrice = _quantity*_uniPrice
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额(元)",win_bid_price)
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"关键词",_keyword)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ for k,v in item.items():
|
|
|
+ if k not in df_data:
|
|
|
+ df_data[k] = []
|
|
|
+ df_data[k].append(v)
|
|
|
+ def filterRow(list_row,key="docid"):
|
|
|
+ set_id = set()
|
|
|
+ _filter_row = []
|
|
|
+ for row in list_row:
|
|
|
+ _v = row.get(key,"")
|
|
|
+ if _v in set_id:
|
|
|
+ continue
|
|
|
+ set_id.add(_v)
|
|
|
+ _filter_row.append(row)
|
|
|
+ return _filter_row
|
|
|
+
|
|
|
+ def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
|
|
|
+ set_enterprise = set()
|
|
|
+ if len(df_data.keys())>0:
|
|
|
+ for _tenderee,_win_tenderer in zip(df_data["采购单位"],df_data["中标供应商"]):
|
|
|
+ set_enterprise.add(_tenderee)
|
|
|
+ set_enterprise.add(_win_tenderer)
|
|
|
+ if "" in set_enterprise:
|
|
|
+ set_enterprise.remove("")
|
|
|
+ if None in set_enterprise:
|
|
|
+ set_enterprise.remove(None)
|
|
|
+ dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system"])
|
|
|
+
|
|
|
+ # conn = getConnection_oracle()
|
|
|
+ # cursor = conn.cursor()
|
|
|
+ if len(set_enterprise)>0:
|
|
|
+ for _i in range(len(df_data["采购单位"])):
|
|
|
+ _enterprise_name = df_data["采购单位"][_i]
|
|
|
+ # if df_data["招标联系人电话"][_i]=="":
|
|
|
+ # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
|
|
|
+ # if contacts is not None:
|
|
|
+ # _person,_phone = getOneContact(contacts)
|
|
|
+ # df_data["招标联系人"][_i] = _person
|
|
|
+ # df_data["招标联系人电话"][_i] = _phone
|
|
|
+
|
|
|
+
|
|
|
+ df_data["采购系统"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system","")
|
|
|
+
|
|
|
+ def fixZhaobiao_page_time(df_data):
|
|
|
+ ots_client = getConnect_ots()
|
|
|
+
|
|
|
+ for i in range(len(df_data["docid"])):
|
|
|
+ _docid = df_data["docid"][i]
|
|
|
+ bool_query = BoolQuery(must_queries=[TermQuery("docids",_docid)])
|
|
|
+
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search("project2","project2_index",
|
|
|
+ SearchQuery(bool_query,limit=1),
|
|
|
+ ColumnsToGet(["zhao_biao_page_time","bidway","bidding_budget"],ColumnReturnType.SPECIFIED))
|
|
|
+ list_data = getRow_ots(rows)
|
|
|
+ if len(list_data)>0:
|
|
|
+ df_data["招标公告时间"][i] = list_data[0].get("zhao_biao_page_time","")
|
|
|
+ df_data["招标采购方式"][i] = list_data[0].get("bidway","")
|
|
|
+ new_bidding_budget = list_data[0].get("bidding_budget",0)
|
|
|
+ if df_data["预算金额(元)"][i]=="" or float(df_data["预算金额(元)"][i])==0:
|
|
|
+ if df_data["中标金额(元)"][i]!="" and new_bidding_budget>=float(df_data["中标金额(元)"][i]):
|
|
|
+ df_data["预算金额(元)"][i] = new_bidding_budget
|
|
|
+ # df_data["招标预算金额(元)"][i] = list_data[0].get("bidding_budget","")
|
|
|
+
|
|
|
+ def popRows(df_data):
|
|
|
+ list_pop = []
|
|
|
+ set_docid = set()
|
|
|
+ for _c in range(len(df_data["采购系统"])):
|
|
|
+ if df_data["采购系统"][_c]!="教育系统" or df_data["中标金额(元)"][_c]=="":
|
|
|
+ list_pop.append(_c)
|
|
|
+ set_docid.add(df_data["docid"][_c])
|
|
|
+ list_pop.reverse()
|
|
|
+ for k,v in df_data.items():
|
|
|
+ for _p in list_pop:
|
|
|
+ v.pop(_p)
|
|
|
+ _index = 0
|
|
|
+ set_docid1 = set()
|
|
|
+ for _i in range(len(df_data["序号"])):
|
|
|
+ _docid = df_data["docid"][_i]
|
|
|
+ if _docid not in set_docid1:
|
|
|
+ _index += 1
|
|
|
+ set_docid1.add(_docid)
|
|
|
+ df_data["序号"][_i] = _index
|
|
|
+ return len(set_docid)
|
|
|
+
|
|
|
+ df = pd.read_excel("20220927v1.4.xlsx",sheetname=1)
|
|
|
+
|
|
|
+ list_search_dict = []
|
|
|
+ for title_kw,content_kw,title_rm,content_rm in zip(df["标题采集关键词"],df["全文采集关键词"],df["标题排除词"],df["全文排除词"]):
|
|
|
+ if not isinstance(title_kw,str):
|
|
|
+ title_kw = ""
|
|
|
+ if not isinstance(content_kw,str):
|
|
|
+ content_kw = ""
|
|
|
+ if not isinstance(title_rm,str):
|
|
|
+ title_rm = ""
|
|
|
+ if not isinstance(content_rm,str):
|
|
|
+ content_rm = ""
|
|
|
+ _dict = {"title_kw":re.split("\s|,|,",str(title_kw)),
|
|
|
+ "content_kw":re.split("\s|,|,",str(content_kw)),
|
|
|
+ "title_rm":re.split("\s|,|,",str(title_rm)),
|
|
|
+ "content_rm":re.split("\s|,|,",str(content_rm))}
|
|
|
+ list_search_dict.append(_dict)
|
|
|
+
|
|
|
+
|
|
|
+ columns = ["extract_json","doctextcon","attachmenttextcon","doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","web_source_no","web_source_name","service_time","person_review","time_get_file_start","time_get_file_end","time_earnest_money_start","time_earnest_money_end"]
|
|
|
+
|
|
|
+ current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
|
|
|
+ last_date = timeAdd(current_date,-1)
|
|
|
+
|
|
|
+ current_date = "2022-10-14"
|
|
|
+ last_date = "2023-05-07"
|
|
|
+
|
|
|
+
|
|
|
+ dict_channel = getDict_docchannel()
|
|
|
+ list_mq = []
|
|
|
+ list_query = []
|
|
|
+ for _d in list_search_dict:
|
|
|
+ print(_d)
|
|
|
+ kw_should_q = []
|
|
|
+ list_keys = []
|
|
|
+ title_kw = _d.get("title_kw",[])
|
|
|
+ if len(title_kw)>0:
|
|
|
+ kw_should_q.append(generateBoolShouldQuery(["doctitle"],title_kw,MatchPhraseQuery))
|
|
|
+ list_keys.extend(title_kw)
|
|
|
+ content_kw = _d.get("content_kw",[])
|
|
|
+ if len(content_kw)>0:
|
|
|
+ kw_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_kw,MatchPhraseQuery))
|
|
|
+ list_keys.extend(content_kw)
|
|
|
+
|
|
|
+ rm_should_q = []
|
|
|
+ title_rm = _d.get("title_rm",[])
|
|
|
+ if len(title_rm)>0:
|
|
|
+ rm_should_q.append(generateBoolShouldQuery(["doctitle"],title_rm,MatchPhraseQuery))
|
|
|
+ content_rm = _d.get("content_rm",[])
|
|
|
+ if len(content_rm)>0:
|
|
|
+ rm_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_rm,MatchPhraseQuery))
|
|
|
+
|
|
|
+ _query = BoolQuery(must_queries=[
|
|
|
+ generateBoolShouldQuery(["docchannel"],[101],TermQuery),
|
|
|
+ # RangeQuery("page_time",last_date,current_date),
|
|
|
+ RangeQuery("page_time",current_date,last_date),
|
|
|
+ # RangeQuery("status",201,300,True,True),
|
|
|
+ # TermQuery("docid",263568527),
|
|
|
+ # generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_keys,MatchPhraseQuery),
|
|
|
+ BoolQuery(should_queries=kw_should_q),
|
|
|
+ # generateBoolShouldQuery(["province"],list_province,WildcardQuery),
|
|
|
+ # RangeQuery("crtime","%s"%last_date,"%s"%current_date)
|
|
|
+ ],
|
|
|
+ must_not_queries=rm_should_q)
|
|
|
+ list_query.append({"query":_query,"limit":50000,"keyword":list_keys})
|
|
|
+ break
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ def getFixData():
|
|
|
+ filename = "C:\\Users\\Administrator\\Desktop\\视源数据清洗.xlsx"
|
|
|
+ dict_filter = {}
|
|
|
+ dict_update = {}
|
|
|
+ df = pd.read_excel(filename,0)
|
|
|
+ for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]):
|
|
|
+ if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"):
|
|
|
+ dict_filter[_docid] = 1
|
|
|
+ else:
|
|
|
+ dict_update[_docid] = {"docid":_docid,
|
|
|
+ "bidding_budget":bidding_budget,
|
|
|
+ "win_bid_price":win_bid_price}
|
|
|
+ df = pd.read_excel(filename,1)
|
|
|
+ for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]):
|
|
|
+ if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"):
|
|
|
+ dict_filter[_docid] = 1
|
|
|
+ else:
|
|
|
+ dict_update[_docid] = {"docid":_docid,
|
|
|
+ "bidding_budget":bidding_budget,
|
|
|
+ "win_bid_price":win_bid_price}
|
|
|
+ return dict_filter,dict_update
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ list_row = getDocument(list_query,columns)
|
|
|
+ list_row = filterRow(list_row)
|
|
|
+
|
|
|
+ print("list_row",len(list_row))
|
|
|
+
|
|
|
+ set_columns1 = set()
|
|
|
+ list_df_columns1 = []
|
|
|
+
|
|
|
+ df_data = {}
|
|
|
+ getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1)
|
|
|
+
|
|
|
+ print("len df_data",len(df_data["docid"]))
|
|
|
+ fixContactPerson(df_data,list_df_columns1)
|
|
|
+
|
|
|
+ fixZhaobiao_page_time(df_data)
|
|
|
+ size_pop = popRows(df_data)
|
|
|
+ print("size_pop",size_pop)
|
|
|
+
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+
|
|
|
+ filename = os.path.dirname(__file__)+"/data/%s关键词数据导出数量%d_%s.xlsx"%(current_date,len(list_row)-size_pop,getCurrent_date("%Y-%m-%d_%H%M%S"))
|
|
|
+ df.to_excel(filename,columns=list_df_columns1,index=False)
|
|
|
+
|
|
|
+ host = "smtp.exmail.qq.com"
|
|
|
+ username = "vip@bidizhaobiao.com"
|
|
|
+ password = "Biaoxun66-"
|
|
|
+ receivers = ["1175730271@qq.com","747012698@qq.com","995116318@qq.com"]
|
|
|
+ receivers = ["1175730271@qq.com"]
|
|
|
+ attachs = [filename]
|
|
|
+
|
|
|
+ sendEmail(host,username,password,receivers,attachs=attachs)
|
|
|
+
|
|
|
+ def export4_by_project(self):
|
|
|
+
|
|
|
+ def getRowData(df_data,rows,dict_channel,set_columns1,list_df_column1):
|
|
|
+
|
|
|
+ # dict_filter,dict_update = getFixData()
|
|
|
+ ots_client = getConnect_ots()
|
|
|
+ _index = 0
|
|
|
+ for row in rows:
|
|
|
+
|
|
|
+
|
|
|
+ _index += 1
|
|
|
+ item = {}
|
|
|
+ _dict = row
|
|
|
+ keys = row["keyword"]
|
|
|
+ log("================")
|
|
|
+ log(keys)
|
|
|
+ docid = row.get("docids","")
|
|
|
+ log(docid)
|
|
|
+ try:
|
|
|
+ docid_0 = docid.split(",")[0]
|
|
|
+ except Exception as e:
|
|
|
+ continue
|
|
|
+ log(docid)
|
|
|
+ log(_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon",""))
|
|
|
+ _keyword = str(list(set([ i for i in re.findall("|".join([re.escape(i) for i in keys if i!='']),_dict.get("doctitle","")+_dict.get("doctextcon","")+_dict.get("attachmenttextcon","")) if i!='' ])))
|
|
|
+ log(_keyword)
|
|
|
+ product_attrs = [{}]
|
|
|
+ # for doc in docid.split(","):
|
|
|
+ # _q = BoolQuery(must_queries=[TermQuery("docid",int(doc))])
|
|
|
+ # r,n,t,_ = ots_client.search("document","document_index",SearchQuery(_q),columns_to_get=ColumnsToGet(["extract_json",ColumnReturnType.SPECIFIED]))
|
|
|
+ # r = getRow_ots(r)
|
|
|
+ # if len(r)>0:
|
|
|
+ # _extract = json.loads(r[0].get("extract_json","{}"))
|
|
|
+ # if len(set(keys)&set(["AR一体机","一体机","短焦投影仪","教学一体机","云桌面","录播","电子书包","电子班牌","智慧黑板"]))>0:
|
|
|
+ # product_attrs = _extract.get("product_attrs",{"data":[{}]}).get("data")
|
|
|
+ # else:
|
|
|
+ # product_attrs = [{}]
|
|
|
+ # if len(product_attrs)==0:
|
|
|
+ # product_attrs = [{}]
|
|
|
+ # win_tenderer = ""
|
|
|
+ # bidding_budget = ""
|
|
|
+ # win_bid_price = ""
|
|
|
+ # sub_docs_json = json.loads(_dict.get("sub_docs_json","[]"))
|
|
|
+ # for _doc in sub_docs_json:
|
|
|
+ # if win_tenderer=="":
|
|
|
+ # win_tenderer = _doc.get("win_tenderer","")
|
|
|
+ # if bidding_budget=="":
|
|
|
+ # bidding_budget= _doc.get("bidding_budget","")
|
|
|
+ # if win_bid_price=="":
|
|
|
+ # win_bid_price = _doc.get("win_bid_price","")
|
|
|
+ _index1 = 0
|
|
|
+ # if _dict.get("docid") in dict_filter:
|
|
|
+ # print("====filter")
|
|
|
+ # continue
|
|
|
+ # if _dict.get("docid") in dict_update:
|
|
|
+ # print("====update")
|
|
|
+ # win_bid_price = dict_update[_dict.get("docid")].get("win_bid_price","")
|
|
|
+ # bidding_budget= dict_update[_dict.get("docid")].get("bidding_budget","")
|
|
|
+ for _attrs in product_attrs:
|
|
|
+ _index1 += 1
|
|
|
+ if re.search("运费",_attrs.get("product","")) is not None:
|
|
|
+ if _index1==1:
|
|
|
+ _attrs = {}
|
|
|
+ else:
|
|
|
+ continue
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"序号",_index)
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"正文",_dict.get("doctextcon",""))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"附件",_dict.get("attachmenttextcon",""))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"项目名称",_dict.get("project_name",""))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"docid",docid)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"网站链接",'=HYPERLINK("http://www.bidizhaobiao.com/info-%s.html","查看公告")'%(str(docid)))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"省份",_dict.get("province"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"城市",_dict.get("city"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"地区",_dict.get("district"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"公告时间",_dict.get("page_time"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"招标公告时间",_dict.get("zhao_biao_page_time"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"项目标题",_dict.get("doctitles"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"采购单位",_dict.get("tenderee"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标供应商",_dict.get("win_tenderer"))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"采购方式",_dict.get("bidway",""))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"招标采购方式","")
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"采购系统","")
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"产品名称",_attrs.get("product"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"品牌",_attrs.get("brand",""))
|
|
|
+
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"规格型号",_attrs.get("specs",""))
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"预算金额(元)",_dict.get("bidding_budget"))
|
|
|
+ # set_dict_item_columns(set_columns1,list_df_column1,item,"招标预算金额(元)","")
|
|
|
+ _quantity = ""
|
|
|
+ _uniPrice = ""
|
|
|
+ try:
|
|
|
+ _quantity = int(re.sub("[^\d]","",_attrs.get("quantity","0")))
|
|
|
+ if _quantity==0:
|
|
|
+ _quantity = ""
|
|
|
+ except Exception as e:
|
|
|
+ pass
|
|
|
+ try:
|
|
|
+ _uniPrice = float(_attrs.get("unitPrice","0"))
|
|
|
+ if _uniPrice==0:
|
|
|
+ _uniPrice = ""
|
|
|
+ except Exception as e:
|
|
|
+ pass
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"数量",_quantity)
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"单价(元)",_uniPrice)
|
|
|
+ sumPrice = ""
|
|
|
+ if _quantity!="" and _uniPrice!="":
|
|
|
+ sumPrice = _quantity*_uniPrice
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"总价(元)",sumPrice)
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"中标金额(元)",_dict.get("win_bid_price"))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_columns1,list_df_column1,item,"关键词",_keyword)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ for k,v in item.items():
|
|
|
+ if k not in df_data:
|
|
|
+ df_data[k] = []
|
|
|
+ df_data[k].append(v)
|
|
|
+ def filterRow(list_row,key="docid"):
|
|
|
+ set_id = set()
|
|
|
+ _filter_row = []
|
|
|
+ for row in list_row:
|
|
|
+ _v = row.get(key,"")
|
|
|
+ if _v in set_id:
|
|
|
+ continue
|
|
|
+ set_id.add(_v)
|
|
|
+ _filter_row.append(row)
|
|
|
+ return _filter_row
|
|
|
+
|
|
|
+ def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
|
|
|
+ set_enterprise = set()
|
|
|
+ if len(df_data.keys())>0:
|
|
|
+ for _tenderee,_win_tenderer in zip(df_data["采购单位"],df_data["中标供应商"]):
|
|
|
+ set_enterprise.add(_tenderee)
|
|
|
+ set_enterprise.add(_win_tenderer)
|
|
|
+ if "" in set_enterprise:
|
|
|
+ set_enterprise.remove("")
|
|
|
+ if None in set_enterprise:
|
|
|
+ set_enterprise.remove(None)
|
|
|
+ dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system"])
|
|
|
+
|
|
|
+ # conn = getConnection_oracle()
|
|
|
+ # cursor = conn.cursor()
|
|
|
+ if len(set_enterprise)>0:
|
|
|
+ for _i in range(len(df_data["采购单位"])):
|
|
|
+ _enterprise_name = df_data["采购单位"][_i]
|
|
|
+ # if df_data["招标联系人电话"][_i]=="":
|
|
|
+ # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
|
|
|
+ # if contacts is not None:
|
|
|
+ # _person,_phone = getOneContact(contacts)
|
|
|
+ # df_data["招标联系人"][_i] = _person
|
|
|
+ # df_data["招标联系人电话"][_i] = _phone
|
|
|
+
|
|
|
+
|
|
|
+ df_data["采购系统"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system","")
|
|
|
+
|
|
|
+ def fixZhaobiao_page_time(df_data):
|
|
|
+ ots_client = getConnect_ots()
|
|
|
+
|
|
|
+ for i in range(len(df_data["docid"])):
|
|
|
+ _docid = df_data["docid"][i]
|
|
|
+ bool_query = BoolQuery(must_queries=[TermQuery("docids",_docid)])
|
|
|
+
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search("project2","project2_index",
|
|
|
+ SearchQuery(bool_query,limit=1),
|
|
|
+ ColumnsToGet(["zhao_biao_page_time","bidway","bidding_budget"],ColumnReturnType.SPECIFIED))
|
|
|
+ list_data = getRow_ots(rows)
|
|
|
+ if len(list_data)>0:
|
|
|
+ df_data["招标公告时间"][i] = list_data[0].get("zhao_biao_page_time","")
|
|
|
+ df_data["招标采购方式"][i] = list_data[0].get("bidway","")
|
|
|
+ new_bidding_budget = list_data[0].get("bidding_budget",0)
|
|
|
+ if df_data["预算金额(元)"][i]=="" or float(df_data["预算金额(元)"][i])==0:
|
|
|
+ if df_data["中标金额(元)"][i]!="" and new_bidding_budget>=float(df_data["中标金额(元)"][i]):
|
|
|
+ df_data["预算金额(元)"][i] = new_bidding_budget
|
|
|
+ # df_data["招标预算金额(元)"][i] = list_data[0].get("bidding_budget","")
|
|
|
+
|
|
|
+ def popRows(df_data):
|
|
|
+ list_pop = []
|
|
|
+ set_docid = set()
|
|
|
+ for _c in range(len(df_data["采购系统"])):
|
|
|
+ if df_data["采购系统"][_c]!="教育系统" or df_data["中标金额(元)"][_c]=="":
|
|
|
+ list_pop.append(_c)
|
|
|
+ set_docid.add(df_data["docid"][_c])
|
|
|
+ list_pop.reverse()
|
|
|
+ for k,v in df_data.items():
|
|
|
+ for _p in list_pop:
|
|
|
+ v.pop(_p)
|
|
|
+ _index = 0
|
|
|
+ set_docid1 = set()
|
|
|
+ for _i in range(len(df_data["序号"])):
|
|
|
+ _docid = df_data["docid"][_i]
|
|
|
+ if _docid not in set_docid1:
|
|
|
+ _index += 1
|
|
|
+ set_docid1.add(_docid)
|
|
|
+ df_data["序号"][_i] = _index
|
|
|
+ return len(set_docid)
|
|
|
+
|
|
|
+ df = pd.read_excel("20220927v1.4.xlsx",sheetname=1)
|
|
|
+
|
|
|
+ list_search_dict = []
|
|
|
+ for title_kw,content_kw,title_rm,content_rm in zip(df["标题采集关键词"],df["全文采集关键词"],df["标题排除词"],df["全文排除词"]):
|
|
|
+ if not isinstance(title_kw,str):
|
|
|
+ title_kw = ""
|
|
|
+ if not isinstance(content_kw,str):
|
|
|
+ content_kw = ""
|
|
|
+ if not isinstance(title_rm,str):
|
|
|
+ title_rm = ""
|
|
|
+ if not isinstance(content_rm,str):
|
|
|
+ content_rm = ""
|
|
|
+ _dict = {"title_kw":re.split("\s|,|,",str(title_kw)),
|
|
|
+ "content_kw":re.split("\s|,|,",str(content_kw)),
|
|
|
+ "title_rm":re.split("\s|,|,",str(title_rm)),
|
|
|
+ "content_rm":re.split("\s|,|,",str(content_rm))}
|
|
|
+ list_search_dict.append(_dict)
|
|
|
+
|
|
|
+
|
|
|
+ columns = ["docids","doctitles","product","province","bidway","city","district","zhao_biao_page_time","page_time","industry","info_type","tenderee","bidding_budget","project_code","project_name","win_tenderer","win_bid_price","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose","web_source_no","web_source_name","service_time","person_review","time_get_file_start","time_get_file_end","time_earnest_money_start","time_earnest_money_end"]
|
|
|
+
|
|
|
+ current_date = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())))
|
|
|
+ last_date = timeAdd(current_date,-1)
|
|
|
+
|
|
|
+ current_date = "2022-10-14"
|
|
|
+ last_date = "2023-05-07"
|
|
|
+
|
|
|
+
|
|
|
+ dict_channel = getDict_docchannel()
|
|
|
+ list_mq = []
|
|
|
+ list_query = []
|
|
|
+ for _d in list_search_dict:
|
|
|
+ print(_d)
|
|
|
+ kw_should_q = []
|
|
|
+ list_keys = []
|
|
|
+ title_kw = _d.get("title_kw",[])
|
|
|
+ if len(title_kw)>0:
|
|
|
+ kw_should_q.append(generateBoolShouldQuery(["doctitles"],title_kw,MatchPhraseQuery))
|
|
|
+ list_keys.extend(title_kw)
|
|
|
+ content_kw = _d.get("content_kw",[])
|
|
|
+ if len(content_kw)>0:
|
|
|
+ kw_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_kw,MatchPhraseQuery))
|
|
|
+ list_keys.extend(content_kw)
|
|
|
+
|
|
|
+ rm_should_q = []
|
|
|
+ title_rm = _d.get("title_rm",[])
|
|
|
+ if len(title_rm)>0:
|
|
|
+ rm_should_q.append(generateBoolShouldQuery(["doctitles"],title_rm,MatchPhraseQuery))
|
|
|
+ content_rm = _d.get("content_rm",[])
|
|
|
+ if len(content_rm)>0:
|
|
|
+ rm_should_q.append(generateBoolShouldQuery(["doctextcon","attachmenttextcon"],content_rm,MatchPhraseQuery))
|
|
|
+
|
|
|
+ _query = BoolQuery(must_queries=[
|
|
|
+ # generateBoolShouldQuery(["docchannel"],[101],TermQuery),
|
|
|
+ # RangeQuery("page_time",last_date,current_date),
|
|
|
+ RangeQuery("page_time",current_date,last_date),
|
|
|
+ RangeQuery("status",201,300,True,True),
|
|
|
+ # TermQuery("docid",263568527),
|
|
|
+ # generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_keys,MatchPhraseQuery),
|
|
|
+ BoolQuery(should_queries=kw_should_q),
|
|
|
+ # # generateBoolShouldQuery(["province"],list_province,WildcardQuery),
|
|
|
+ # # RangeQuery("crtime","%s"%last_date,"%s"%current_date)
|
|
|
+ ],
|
|
|
+ must_not_queries=rm_should_q)
|
|
|
+ list_query.append({"query":_query,"limit":50000,"keyword":list_keys})
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ def getFixData():
|
|
|
+ filename = "C:\\Users\\Administrator\\Desktop\\视源数据清洗.xlsx"
|
|
|
+ dict_filter = {}
|
|
|
+ dict_update = {}
|
|
|
+ df = pd.read_excel(filename,0)
|
|
|
+ for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]):
|
|
|
+ if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"):
|
|
|
+ dict_filter[_docid] = 1
|
|
|
+ else:
|
|
|
+ dict_update[_docid] = {"docid":_docid,
|
|
|
+ "bidding_budget":bidding_budget,
|
|
|
+ "win_bid_price":win_bid_price}
|
|
|
+ df = pd.read_excel(filename,1)
|
|
|
+ for _docid,bidding_budget,win_bid_price,is_dup,is_fb in zip(df["docid"],df["检查招标金额"],df["中标金额(元)"],df["是否重复"],df["是否废标"]):
|
|
|
+ if str(is_dup) in ("是","1") or str(is_fb) in ("是","1"):
|
|
|
+ dict_filter[_docid] = 1
|
|
|
+ else:
|
|
|
+ dict_update[_docid] = {"docid":_docid,
|
|
|
+ "bidding_budget":bidding_budget,
|
|
|
+ "win_bid_price":win_bid_price}
|
|
|
+ return dict_filter,dict_update
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ list_row = getDocument(list_query,columns,table_name="project2",table_index="project2_index")
|
|
|
+ list_row = filterRow(list_row,key="uuid")
|
|
|
+
|
|
|
+ print("list_row",len(list_row))
|
|
|
+
|
|
|
+ set_columns1 = set()
|
|
|
+ list_df_columns1 = []
|
|
|
+
|
|
|
+ df_data = {}
|
|
|
+ getRowData(df_data,list_row,dict_channel,set_columns1,list_df_columns1)
|
|
|
+
|
|
|
+ print("len df_data",len(df_data["docid"]))
|
|
|
+ fixContactPerson(df_data,list_df_columns1)
|
|
|
+
|
|
|
+ # fixZhaobiao_page_time(df_data)
|
|
|
+ # size_pop = popRows(df_data)
|
|
|
+ size_pop = 0
|
|
|
+ print("size_pop",size_pop)
|
|
|
+
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+
|
|
|
+ filename = os.path.dirname(__file__)+"/data/%s关键词数据导出数量%d_%s.xlsx"%(current_date,len(list_row)-size_pop,getCurrent_date("%Y-%m-%d_%H%M%S"))
|
|
|
+ df.to_excel(filename,columns=list_df_columns1,index=False)
|
|
|
+
|
|
|
+ host = "smtp.exmail.qq.com"
|
|
|
+ username = "vip@bidizhaobiao.com"
|
|
|
+ password = "Biaoxun66-"
|
|
|
+ receivers = ["1175730271@qq.com","747012698@qq.com","995116318@qq.com"]
|
|
|
+ receivers = ["1175730271@qq.com"]
|
|
|
+ attachs = [filename]
|
|
|
+
|
|
|
+ sendEmail(host,username,password,receivers,attachs=attachs)
|
|
|
+
|
|
|
+ import json
|
|
|
+ def getUserInfo(self,code,userid):
|
|
|
+
|
|
|
+ data = {
|
|
|
+ "codeStr": code,
|
|
|
+ "userId": userid,
|
|
|
+ }
|
|
|
+
|
|
|
+ auth = {'Content-Type': 'application/json',"Authorization": "Bearer eyJhbGciOiJSUzI1NiJ9.eyJpc3MiOiJCWEtDX1VOSVRFX0FQSSIsInN1YiI6IjIwIiwiZGF0YSI6eyJkZXZpY2VUeXBlIjoiMjAiLCJyb2xlIjoiMjAiLCJpZCI6ImJ4a2Mtd29ya2JlbmNoIn0sImV4cCI6MTk2NjY5MjE4MSwiaWF0IjoxNjUxMDQ0MTgxfQ.lFurpoMOaVmCH3GKmJ_cqrseuSaEZJBndp8PE4QpjFY6R2mfXh5e96zn_Ma3qVkp-NKAlpA1nRYl1y08xkzj07KEx4HO_Nh6v3sfGwnDyRUz35SW3yr1fhvnbh5hEpnoCVJnFQfoMXFfn780VuOcKd01lNUjABFbSLvypDv8p-gJzkrE7z5YB53tZ_lGm_dbzKihTBjG1sBYEKJT3ekYz5Px_n-lw05IkUdbckE7n6Xj7PRPTaEjzoc3PF_pgESsdcTPSDhJlLR8x63YCqbmWy6ydhwnlEQE5rHIFA2Q5VhIxUcT7fXUBZNGmzRgaqxB4dIg2369IhpeBAJ2TQ63qg"}
|
|
|
+
|
|
|
+ r = requests.patch("https://unite.bidizhaobiao.com/api/v1/authorization/services",json=data,headers=auth)
|
|
|
+ if r.status_code==200:
|
|
|
+ return r.status_code,json.loads(r.content.decode("utf8"))
|
|
|
+ return r.status_code,""
|
|
|
+
|
|
|
+
|
|
|
+def export_15824381998():
|
|
|
+ def removeData(df_data):
|
|
|
+ list_remove_index = []
|
|
|
+ list_c = df_data.get("招标人采购系统",[])
|
|
|
+ for _c_i in range(len(list_c)):
|
|
|
+ if list_c[_c_i]=="企业采购系统":
|
|
|
+ list_remove_index.append(_c_i)
|
|
|
+ list_remove_index.reverse()
|
|
|
+ print(list_remove_index)
|
|
|
+ for k,v in df_data.items():
|
|
|
+ for _rc in list_remove_index:
|
|
|
+ v.pop(_rc)
|
|
|
+ for k,v in df_data.items():
|
|
|
+ v = v[:500]
|
|
|
+
|
|
|
+ def getRowData_zb(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
|
|
|
+
|
|
|
+ dict_line = {}
|
|
|
+ # list_data = getRow_ots(rows)
|
|
|
+ _index = 0
|
|
|
+ rows.sort(key=lambda x:x.get("docid",0),reverse=True)
|
|
|
+ set_col = set()
|
|
|
+ df_columns = []
|
|
|
+ for row in rows:
|
|
|
+ _index += 1
|
|
|
+ item = {}
|
|
|
+ _dict = row
|
|
|
+ bidding_budget = 0
|
|
|
+ sub_docs = json.loads(_dict.get("sub_docs_json","[]"))
|
|
|
+ for doc in sub_docs:
|
|
|
+ if doc.get("bidding_budget",0)>0:
|
|
|
+ bidding_budget = doc.get("bidding_budget",0)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"公告类别",dict_channel.get(_dict.get("docchannel",""),""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"公告标题",_dict.get("doctitle",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"采购方式",_dict.get("bidway",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"项目金额",bidding_budget)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"获取文件截止时间",_dict.get("time_get_file_end",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"开标时间",_dict.get("time_bidopen",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid","")))
|
|
|
+
|
|
|
+ for k,v in item.items():
|
|
|
+ if k not in df_data:
|
|
|
+ df_data[k] = []
|
|
|
+ df_data[k].append(v)
|
|
|
+ return df_columns
|
|
|
+
|
|
|
+ def getRowData_yc(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
|
|
|
+
|
|
|
+ dict_line = {}
|
|
|
+ # list_data = getRow_ots(rows)
|
|
|
+ _index = 0
|
|
|
+ rows.sort(key=lambda x:x.get("docid",0),reverse=True)
|
|
|
+ set_col = set()
|
|
|
+ df_columns = []
|
|
|
+ for row in rows:
|
|
|
+ _index += 1
|
|
|
+ item = {}
|
|
|
+ _dict = row
|
|
|
+ _type = _dict.get("type",0)
|
|
|
+ if _type==0:
|
|
|
+ _type = "周期预测"
|
|
|
+ elif _type==1:
|
|
|
+ _type = "采购意向"
|
|
|
+ elif _type==2:
|
|
|
+ _type = "到期预测"
|
|
|
+ else:
|
|
|
+ _type = "废标重招"
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"预测类型",_type)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"预计开始时间",_dict.get("may_begin",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"预计结束时间",_dict.get("may_end",""))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"上次采购时间",_dict.get("last_page_time",""))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
|
|
|
+
|
|
|
+ for k,v in item.items():
|
|
|
+ if k not in df_data:
|
|
|
+ df_data[k] = []
|
|
|
+ df_data[k].append(v)
|
|
|
+ return df_columns
|
|
|
+
|
|
|
+ def getRowData_win(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
|
|
|
+
|
|
|
+ dict_line = {}
|
|
|
+ # list_data = getRow_ots(rows)
|
|
|
+ _index = 0
|
|
|
+ rows.sort(key=lambda x:x.get("docid",0),reverse=True)
|
|
|
+ set_col = set()
|
|
|
+ df_columns = []
|
|
|
+ for row in rows:
|
|
|
+ _index += 1
|
|
|
+ item = {}
|
|
|
+ _dict = row
|
|
|
+ bidding_budget = 0
|
|
|
+ sub_docs = json.loads(_dict.get("sub_docs_json","[]"))
|
|
|
+ zbr = ""
|
|
|
+ zbje = ""
|
|
|
+ for doc in sub_docs:
|
|
|
+ if doc.get("bidding_budget",0)>0:
|
|
|
+ bidding_budget = doc.get("bidding_budget",0)
|
|
|
+ if doc.get("win_tenderer") is not None:
|
|
|
+ zbr += str(doc.get("win_tenderer"))+"\r\n"
|
|
|
+ zbje += str(doc.get("win_bid_price",0))+"\r\n"
|
|
|
+ if doc.get("second_tenderer") is not None:
|
|
|
+ zbr += str(doc.get("second_tenderer"))+"\r\n"
|
|
|
+ zbje += str(doc.get("second_bid_price",0))+"\r\n"
|
|
|
+ if doc.get("third_tenderer") is not None:
|
|
|
+ zbr += str(doc.get("third_tenderer"))+"\r\n"
|
|
|
+ zbje += str(doc.get("third_bid_price",0))+"\r\n"
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"公告类别",dict_channel.get(_dict.get("docchannel",""),""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"公告标题",_dict.get("doctitle",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"采购方式",_dict.get("bidway",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"项目金额",bidding_budget)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"获取文件截止时间",_dict.get("time_get_file_end",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"开标时间",_dict.get("time_bidopen",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标公司",zbr)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标金额",zbje)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid","")))
|
|
|
+
|
|
|
+ for k,v in item.items():
|
|
|
+ if k not in df_data:
|
|
|
+ df_data[k] = []
|
|
|
+ df_data[k].append(v)
|
|
|
+ return df_columns
|
|
|
+
|
|
|
+ def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
|
|
|
+ set_enterprise = set()
|
|
|
+ if len(df_data.keys())>0:
|
|
|
+ for _tenderee,_agency in zip(df_data["招标单位"],df_data["代理单位"]):
|
|
|
+ set_enterprise.add(_tenderee)
|
|
|
+ set_enterprise.add(_agency)
|
|
|
+ if "" in set_enterprise:
|
|
|
+ set_enterprise.remove("")
|
|
|
+ if None in set_enterprise:
|
|
|
+ set_enterprise.remove(None)
|
|
|
+ dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system","company_org_type","reg_capital","actual_capital","contacts","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code","legal_person_name","phone_number"])
|
|
|
+
|
|
|
+ # conn = getConnection_oracle()
|
|
|
+ # cursor = conn.cursor()
|
|
|
+ if len(set_enterprise)>0:
|
|
|
+ for _i in range(len(df_data["招标单位"])):
|
|
|
+ _enterprise_name = df_data["招标单位"][_i]
|
|
|
+ if df_data["招标联系人电话"][_i]=="":
|
|
|
+ contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
|
|
|
+ if contacts is not None:
|
|
|
+ _person,_phone = getOneContact(contacts)
|
|
|
+ df_data["招标联系人"][_i] = _person
|
|
|
+ df_data["招标联系人电话"][_i] = _phone
|
|
|
+
|
|
|
+
|
|
|
+ _enterprise_name = df_data["代理单位"][_i]
|
|
|
+ if df_data["代理联系人电话"][_i]=="":
|
|
|
+ contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
|
|
|
+ if contacts is not None:
|
|
|
+ _person,_phone = getOneContact(contacts)
|
|
|
+ df_data["代理联系人"][_i] = _person
|
|
|
+ df_data["代理联系人电话"][_i] = _phone
|
|
|
+
|
|
|
+ # _enterprise_name = df_data["中标单位"][_i]
|
|
|
+ # if get_legal_person:
|
|
|
+ # _person = dict_enterprise.get(_enterprise_name,{}).get("legal_person_name","")
|
|
|
+ # _phone = dict_enterprise.get(_enterprise_name,{}).get("phone_number","")
|
|
|
+ # if len(_phone)==11 and _phone[0]=="1":
|
|
|
+ # df_data["中标单位联系人"][_i] = _person
|
|
|
+ # df_data["中标单位联系电话"][_i] = _phone
|
|
|
+ # else:
|
|
|
+ # if df_data["中标单位联系电话"][_i]=="":
|
|
|
+ # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
|
|
|
+ # if contacts is not None:
|
|
|
+ # _person,_phone = getOneContact(contacts,mobile_only=True)
|
|
|
+ # df_data["中标单位联系人"][_i] = _person
|
|
|
+ # df_data["中标单位联系电话"][_i] = _phone
|
|
|
+
|
|
|
+ log("start export 15824381998:>>>>>>>>>>>>>>>")
|
|
|
+ current_date = getCurrent_date(format="%Y-%m-%d")
|
|
|
+ last_date = timeAdd(current_date,-2)
|
|
|
+ start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60))
|
|
|
+ if current_date<="2025-05-01":
|
|
|
+ for i in range(10):
|
|
|
+ try:
|
|
|
+ # start_time='2022-07-22'
|
|
|
+ # current_date = '2022-07-28'
|
|
|
+ log("start exporting export2:=================")
|
|
|
+ # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"]
|
|
|
+
|
|
|
+ dict_channel = getDict_docchannel()
|
|
|
+
|
|
|
+ list_query = []
|
|
|
+
|
|
|
+ str_keyword = '''
|
|
|
+
|
|
|
+光伏组件
|
|
|
+
|
|
|
+ '''
|
|
|
+ list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
|
|
|
+
|
|
|
+ str_not_keyword = '''
|
|
|
+ 清洗机器人
|
|
|
+ '''
|
|
|
+ list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
|
|
|
+
|
|
|
+ tenderee_keywrod = "医院、大学、高校、高中"
|
|
|
+ list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
|
|
|
+
|
|
|
+ log(str(list_keyword))
|
|
|
+ columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_get_file_end","time_bidopen"]
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery),
|
|
|
+ generateBoolShouldQuery(["docchannel"],[51,52,102,103,104,105,114],TermQuery),
|
|
|
+ RangeQuery("crtime",last_date+" 21:00:00",current_date+" 21:00:00",True,True),
|
|
|
+ RangeQuery("status",201,300,True,True),
|
|
|
+ # TermQuery("procurement_system","公安系统"),
|
|
|
+ # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
|
|
|
+ # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
|
|
|
+ # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
|
|
|
+ ],
|
|
|
+ must_not_queries=[
|
|
|
+ generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_not_key,MatchPhraseQuery),
|
|
|
+ ]
|
|
|
+ )
|
|
|
+
|
|
|
+ list_row = getDocument([{"query":bool_query,"limit":500}],columns,thread_count=1)
|
|
|
+
|
|
|
+ log("get document %d rows"%len(list_row))
|
|
|
+
|
|
|
+ df_data = {}
|
|
|
+ set_line = set()
|
|
|
+ # list_row = filterRow(list_row)
|
|
|
+ df_columns = getRowData_zb(df_data,list_row,set_line,list_keyword,dict_channel,True)
|
|
|
+ fixContactPerson(df_data,df_columns)
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+
|
|
|
+ df_data_filter = {}
|
|
|
+ for c in df_columns:
|
|
|
+ if c not in df_data_filter:
|
|
|
+ df_data_filter[c] = []
|
|
|
+ for tenderee_i in range(len(df_data["招标单位"])):
|
|
|
+ if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","中国大唐集团有限公司"):
|
|
|
+ for c in df_columns:
|
|
|
+ df_data_filter[c].append(df_data[c][tenderee_i])
|
|
|
+ df_f = pd.DataFrame(df_data_filter)
|
|
|
+
|
|
|
+ time_end = timeAdd(current_date,60)
|
|
|
+ columns = ["project_name","type","province","city","bidding_budget","city","demand","last_page_time"]
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ generateBoolShouldQuery(["demand","product"],list_keyword,MatchPhraseQuery),
|
|
|
+ RangeQuery("may_end",range_from=current_date),
|
|
|
+ RangeQuery("may_end",range_to=time_end),
|
|
|
+ # TermQuery("procurement_system","公安系统"),
|
|
|
+ # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
|
|
|
+ # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
|
|
|
+ # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
|
|
|
+ ],
|
|
|
+ must_not_queries=[
|
|
|
+ generateBoolShouldQuery(["demand","product"],list_not_key,MatchPhraseQuery),
|
|
|
+ ]
|
|
|
+ )
|
|
|
+
|
|
|
+ list_row = getDocument([{"query":bool_query,"limit":500}],columns,table_name="preproject",table_index="preproject_index",thread_count=1,sort_column="may_end")
|
|
|
+
|
|
|
+ log("get document %d rows"%len(list_row))
|
|
|
+
|
|
|
+ df_data = {}
|
|
|
+ set_line = set()
|
|
|
+ # list_row = filterRow(list_row)
|
|
|
+ df_columns_yc = getRowData_yc(df_data,list_row,set_line,list_keyword,dict_channel,True)
|
|
|
+ fixContactPerson(df_data,df_columns_yc)
|
|
|
+ df1 = pd.DataFrame(df_data)
|
|
|
+
|
|
|
+ df_data_filter = {}
|
|
|
+ for c in df_columns_yc:
|
|
|
+ if c not in df_data_filter:
|
|
|
+ df_data_filter[c] = []
|
|
|
+ for tenderee_i in range(len(df_data["招标单位"])):
|
|
|
+ if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","中国大唐集团有限公司"):
|
|
|
+ for c in df_columns_yc:
|
|
|
+ if c not in df_data_filter:
|
|
|
+ df_data_filter[c] = []
|
|
|
+ df_data_filter[c].append(df_data[c][tenderee_i])
|
|
|
+ df1_f = pd.DataFrame(df_data_filter)
|
|
|
+
|
|
|
+
|
|
|
+ filename = os.path.dirname(__file__)+"/data/%s年%s招标数据导出%s.xlsx"%(start_time[:4],current_date,getCurrent_date(format="%Y-%m-%d_%H%M%S"))
|
|
|
+ with pd.ExcelWriter(filename) as writer:
|
|
|
+ df.to_excel(writer,sheet_name="招标数据",columns=df_columns if not df.empty else None)
|
|
|
+
|
|
|
+ df_f.to_excel(writer,sheet_name="特定甲方招标数据",columns=df_columns if not df_f.empty else None)
|
|
|
+ df1.to_excel(writer,sheet_name="项目预测",columns=df_columns_yc[:-3] if not df1.empty else None)
|
|
|
+ df1_f.to_excel(writer,sheet_name="特定甲方项目预测",columns=df_columns_yc[:-3] if not df1_f.empty else None)
|
|
|
+ log(str(filename))
|
|
|
+
|
|
|
+ host = "smtp.exmail.qq.com"
|
|
|
+ username = "vip@bidizhaobiao.com"
|
|
|
+ password = "Biaoxun66-"
|
|
|
+ receivers = ["1175730271@qq.com","565748324@qq.com","1396488964@qq.com"]
|
|
|
+ # receivers = ["1175730271@qq.com"]
|
|
|
+ attachs = [filename]
|
|
|
+
|
|
|
+ sendEmail(host,username,password,receivers,attachs=attachs)
|
|
|
+
|
|
|
+
|
|
|
+ df_data_filter = {}
|
|
|
+ for c in df_columns:
|
|
|
+ if c not in df_data_filter:
|
|
|
+ df_data_filter[c] = []
|
|
|
+ for tenderee_i in range(len(df_data["招标单位"])):
|
|
|
+ if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","国家能源投资集团有限责任公司","国家能源集团龙源阿里新能源(阿里)有限公司"):
|
|
|
+ for c in df_columns:
|
|
|
+ df_data_filter[c].append(df_data[c][tenderee_i])
|
|
|
+ df_f = pd.DataFrame(df_data_filter)
|
|
|
+
|
|
|
+ df_data_filter1 = {}
|
|
|
+ for c in df_columns_yc:
|
|
|
+ if c not in df_data_filter1:
|
|
|
+ df_data_filter1[c] = []
|
|
|
+ for tenderee_i in range(len(df_data["招标单位"])):
|
|
|
+ if df_data["招标单位"][tenderee_i] in ("国家电力投资集团有限公司","国家能源投资集团有限责任公司","国家能源集团龙源阿里新能源(阿里)有限公司"):
|
|
|
+ for c in df_columns_yc:
|
|
|
+ if c not in df_data_filter1:
|
|
|
+ df_data_filter1[c] = []
|
|
|
+ df_data_filter1[c].append(df_data[c][tenderee_i])
|
|
|
+ df1_f1 = pd.DataFrame(df_data_filter1)
|
|
|
+
|
|
|
+ filename = os.path.dirname(__file__)+"/data/%s年%s招标数据导出%s.xlsx"%(start_time[:4],current_date,getCurrent_date(format="%Y-%m-%d_%H%M%S"))
|
|
|
+ with pd.ExcelWriter(filename) as writer:
|
|
|
+ df_f.to_excel(writer,sheet_name="特定甲方招标数据",columns=df_columns if not df_f.empty else None)
|
|
|
+ df1_f1.to_excel(writer,sheet_name="特定甲方项目预测",columns=df_columns_yc[:-3] if not df1_f.empty else None)
|
|
|
+ log(str(filename))
|
|
|
+
|
|
|
+ host = "smtp.exmail.qq.com"
|
|
|
+ username = "vip@bidizhaobiao.com"
|
|
|
+ password = "Biaoxun66-"
|
|
|
+ receivers = ["450604061@qq.com"]
|
|
|
+ # receivers = ["1175730271@qq.com"]
|
|
|
+ attachs = [filename]
|
|
|
+
|
|
|
+ sendEmail(host,username,password,receivers,attachs=attachs)
|
|
|
+
|
|
|
+
|
|
|
+ columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_get_file_end","time_bidopen"]
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery),
|
|
|
+ generateBoolShouldQuery(["docchannel"],[101,118,119,120,121,122],TermQuery),
|
|
|
+ RangeQuery("crtime",last_date+" 21:00:00",current_date+" 21:00:00",True,True),
|
|
|
+ RangeQuery("status",201,300,True,True),
|
|
|
+ # TermQuery("procurement_system","公安系统"),
|
|
|
+ # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
|
|
|
+ # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
|
|
|
+ # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
|
|
|
+ ],
|
|
|
+ must_not_queries=[
|
|
|
+ generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_not_key,MatchPhraseQuery),
|
|
|
+ ]
|
|
|
+ )
|
|
|
+
|
|
|
+ list_row = getDocument([{"query":bool_query,"limit":500}],columns,thread_count=1)
|
|
|
+
|
|
|
+ log("get document %d rows"%len(list_row))
|
|
|
+
|
|
|
+ df_data = {}
|
|
|
+ set_line = set()
|
|
|
+ # list_row = filterRow(list_row)
|
|
|
+ df_columns = getRowData_win(df_data,list_row,set_line,list_keyword,dict_channel,True)
|
|
|
+ fixContactPerson(df_data,df_columns)
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+ # 调整行高
|
|
|
+ # for i in range(1, len(df) + 1):
|
|
|
+ # df.style.set_properties(max_col=100, min_col=1, max_row=i, min_row=i, h=20, wrap_text=True) # 设置行高为20,自动换行
|
|
|
+ styled_df = (df.style
|
|
|
+ .set_properties(**{'text-align': 'center', 'white-space': 'pre-line'})
|
|
|
+ .set_table_styles([{'selector': 'td', 'props': [('text-align', 'center')]}]))
|
|
|
+
|
|
|
+ filename = os.path.dirname(__file__)+"/data/%s年%s中标数据导出%s.xlsx"%(start_time[:4],current_date,getCurrent_date(format="%Y-%m-%d_%H%M%S"))
|
|
|
+ with pd.ExcelWriter(filename,engine="openpyxl") as writer:
|
|
|
+ styled_df.to_excel(writer,columns = df_columns,index=False)
|
|
|
+
|
|
|
+ adjust_excel(filename,filename,columns=["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R"])
|
|
|
+
|
|
|
+ host = "smtp.exmail.qq.com"
|
|
|
+ username = "vip@bidizhaobiao.com"
|
|
|
+ password = "Biaoxun66-"
|
|
|
+ receivers = ["1175730271@qq.com","565748324@qq.com","1396488964@qq.com","1141385052@qq.com","1713739820@qq.com"]
|
|
|
+ # receivers = ["1175730271@qq.com"]
|
|
|
+ attachs = [filename]
|
|
|
+
|
|
|
+ sendEmail(host,username,password,receivers,attachs=attachs)
|
|
|
+ break
|
|
|
+ except Exception as e:
|
|
|
+ traceback.print_exc()
|
|
|
+
|
|
|
+def export_13510123669():
|
|
|
+ def removeData(df_data):
|
|
|
+ list_remove_index = []
|
|
|
+ list_c = df_data.get("招标人采购系统",[])
|
|
|
+ for _c_i in range(len(list_c)):
|
|
|
+ if list_c[_c_i]=="企业采购系统":
|
|
|
+ list_remove_index.append(_c_i)
|
|
|
+ list_remove_index.reverse()
|
|
|
+ print(list_remove_index)
|
|
|
+ for k,v in df_data.items():
|
|
|
+ for _rc in list_remove_index:
|
|
|
+ v.pop(_rc)
|
|
|
+ for k,v in df_data.items():
|
|
|
+ v = v[:500]
|
|
|
+
|
|
|
+ def getRowData_zb(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
|
|
|
+
|
|
|
+ dict_line = {}
|
|
|
+ # list_data = getRow_ots(rows)
|
|
|
+ _index = 0
|
|
|
+ rows.sort(key=lambda x:x.get("docid",0),reverse=True)
|
|
|
+ set_col = set()
|
|
|
+ df_columns = []
|
|
|
+ for row in rows:
|
|
|
+ _index += 1
|
|
|
+ item = {}
|
|
|
+ _dict = row
|
|
|
+ bidding_budget = 0
|
|
|
+ sub_docs = json.loads(_dict.get("sub_docs_json","[]"))
|
|
|
+ zbr = ""
|
|
|
+ zbje = 0
|
|
|
+ win_tenderer_manager = ""
|
|
|
+ win_tenderer_phone = ""
|
|
|
+ for doc in sub_docs:
|
|
|
+ if doc.get("bidding_budget",0)>0:
|
|
|
+ bidding_budget = doc.get("bidding_budget",0)
|
|
|
+ if doc.get("win_tenderer") is not None:
|
|
|
+ zbr = str(doc.get("win_tenderer",""))
|
|
|
+ zbje = doc.get("win_bid_price",0)
|
|
|
+ win_tenderer_manager = str(doc.get("win_tenderer_manager",""))
|
|
|
+ win_tenderer_phone = str(doc.get("win_tenderer_phone",""))
|
|
|
+
|
|
|
+ doc_type = ""
|
|
|
+ if dict_channel.get(_dict.get("docchannel",""),"")[:2] in ("中标","合同","候选","废标","开标","验收"):
|
|
|
+ doc_type = "中标"
|
|
|
+ else:
|
|
|
+ doc_type = "招标"
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"序号",_index)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province","未知"))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city","未知"))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"公告类型",doc_type)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"公告名称",_dict.get("doctitle",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"项目类型",'')
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"项目编号",_dict.get("project_code"))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"发布时间",_dict.get("page_time",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"信息类型","%s-%s"%(dict_channel.get(_dict.get("docchannel",""),""),_dict.get("bidway","")))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"报名开始时间",_dict.get("time_registration_start",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"报名截止时间",_dict.get("time_registration_end",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"投标开始时间",_dict.get("time_bidstart",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"投标结束时间",_dict.get("time_bidopen",""))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"预算金额(万元)",bidding_budget/10000)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"成交金额(万元)",zbje/10000)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标单位所在地","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人1","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人电话1","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人2","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人电话2","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人3","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人电话3","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理单位所在地","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人1","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人电话1","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人2","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人电话2","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人3","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人电话3","")
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标单位",zbr)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标单位所在地","")
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"项目中标联系人",win_tenderer_manager)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"项目中标联系电话",win_tenderer_phone)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标法人","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标法人电话","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标股东","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标股东电话","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标高管","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标高管电话","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标联系人1","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标联系人电话1","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标联系人2","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标联系人电话2","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标联系人3","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"中标联系人电话3","")
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标/采购内容",_dict.get("product",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"资质","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"公告详情链接","http://www.bidizhaobiao.com/info-%s.html"%str(_dict.get("docid","")))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招投标分析(合作商)","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"历史中标单位","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"应对策略(作为填)","")
|
|
|
+
|
|
|
+ for k,v in item.items():
|
|
|
+ if k not in df_data:
|
|
|
+ df_data[k] = []
|
|
|
+ df_data[k].append(v)
|
|
|
+ return df_columns
|
|
|
+
|
|
|
+ def getRowData_yc(df_data,rows,set_line,list_keyword,dict_channel,dumplicate):
|
|
|
+
|
|
|
+ dict_line = {}
|
|
|
+ # list_data = getRow_ots(rows)
|
|
|
+ _index = 0
|
|
|
+ rows.sort(key=lambda x:x.get("docid",0),reverse=True)
|
|
|
+ set_col = set()
|
|
|
+ df_columns = []
|
|
|
+ for row in rows:
|
|
|
+ _index += 1
|
|
|
+ item = {}
|
|
|
+ _dict = row
|
|
|
+ _type = _dict.get("type",0)
|
|
|
+ if _type==0:
|
|
|
+ _type = "周期预测"
|
|
|
+ elif _type==1:
|
|
|
+ _type = "采购意向"
|
|
|
+ elif _type==2:
|
|
|
+ _type = "到期预测"
|
|
|
+ else:
|
|
|
+ _type = "废标重招"
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"省份",_dict.get("province",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"城市",_dict.get("city",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"类型",'')
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"预测类型",_type)
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"项目类型","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"预计开始时间",_dict.get("may_begin",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"预计结束时间",_dict.get("may_end",""))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"采购内容",_dict.get("product",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"上次采购时间",_dict.get("last_page_time",""))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标单位",_dict.get("tenderee",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人",_dict.get("tenderee_contact",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招标联系人电话",_dict.get("tenderee_phone",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理单位",_dict.get("agency",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人",_dict.get("agency_contact",""))
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"代理联系人电话",_dict.get("agency_phone",""))
|
|
|
+
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"招投标分析(合作商)","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"历史中标单位","")
|
|
|
+ set_dict_item_columns(set_col,df_columns,item,"应对策略(作为填)","")
|
|
|
+
|
|
|
+ for k,v in item.items():
|
|
|
+ if k not in df_data:
|
|
|
+ df_data[k] = []
|
|
|
+ df_data[k].append(v)
|
|
|
+ return df_columns
|
|
|
+
|
|
|
+ def fixContactPerson(df_data,list_df_columns,get_legal_person=False):
|
|
|
+ set_enterprise = set()
|
|
|
+ if len(df_data.keys())>0:
|
|
|
+ for _tenderee,_agency in zip(df_data["招标单位"],df_data["代理单位"]):
|
|
|
+ set_enterprise.add(_tenderee)
|
|
|
+ set_enterprise.add(_agency)
|
|
|
+ if "" in set_enterprise:
|
|
|
+ set_enterprise.remove("")
|
|
|
+ if None in set_enterprise:
|
|
|
+ set_enterprise.remove(None)
|
|
|
+ dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system","company_org_type","reg_capital","actual_capital","contacts","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code","legal_person_name","phone_number"])
|
|
|
+
|
|
|
+ # conn = getConnection_oracle()
|
|
|
+ # cursor = conn.cursor()
|
|
|
+ if len(set_enterprise)>0:
|
|
|
+ for _i in range(len(df_data["招标单位"])):
|
|
|
+ _enterprise_name = df_data["招标单位"][_i]
|
|
|
+ if df_data["招标联系人电话"][_i]=="":
|
|
|
+ contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
|
|
|
+ if contacts is not None:
|
|
|
+ _person,_phone = getOneContact(contacts)
|
|
|
+ df_data["招标联系人"][_i] = _person
|
|
|
+ df_data["招标联系人电话"][_i] = _phone
|
|
|
+ df_data["项目类型"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system")
|
|
|
+
|
|
|
+
|
|
|
+ _enterprise_name = df_data["代理单位"][_i]
|
|
|
+ if df_data["代理联系人电话"][_i]=="":
|
|
|
+ contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
|
|
|
+ if contacts is not None:
|
|
|
+ _person,_phone = getOneContact(contacts)
|
|
|
+ df_data["代理联系人"][_i] = _person
|
|
|
+ df_data["代理联系人电话"][_i] = _phone
|
|
|
+
|
|
|
+ # _enterprise_name = df_data["中标单位"][_i]
|
|
|
+ # if get_legal_person:
|
|
|
+ # _person = dict_enterprise.get(_enterprise_name,{}).get("legal_person_name","")
|
|
|
+ # _phone = dict_enterprise.get(_enterprise_name,{}).get("phone_number","")
|
|
|
+ # if len(_phone)==11 and _phone[0]=="1":
|
|
|
+ # df_data["中标单位联系人"][_i] = _person
|
|
|
+ # df_data["中标单位联系电话"][_i] = _phone
|
|
|
+ # else:
|
|
|
+ # if df_data["中标单位联系电话"][_i]=="":
|
|
|
+ # contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
|
|
|
+ # if contacts is not None:
|
|
|
+ # _person,_phone = getOneContact(contacts,mobile_only=True)
|
|
|
+ # df_data["中标单位联系人"][_i] = _person
|
|
|
+ # df_data["中标单位联系电话"][_i] = _phone
|
|
|
+
|
|
|
+ def fixContactPerson1(df_data,list_df_columns,get_legal_person=False):
|
|
|
+ set_enterprise = set()
|
|
|
+ if len(df_data.keys())>0:
|
|
|
+ for _tenderee,_agency,_win in zip(df_data["招标单位"],df_data["代理单位"],df_data["中标单位"]):
|
|
|
+ set_enterprise.add(_tenderee)
|
|
|
+ set_enterprise.add(_agency)
|
|
|
+ set_enterprise.add(_win)
|
|
|
+ if "" in set_enterprise:
|
|
|
+ set_enterprise.remove("")
|
|
|
+ if None in set_enterprise:
|
|
|
+ set_enterprise.remove(None)
|
|
|
+ dict_enterprise = getDictEnterprise(list(set_enterprise),columns_to_get = ["procurement_system","company_org_type","reg_capital","actual_capital","contacts","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code","legal_person_name","phone_number","reg_location","province","city"])
|
|
|
+
|
|
|
+ # print("dict_enterprise",dict_enterprise)
|
|
|
+ # conn = getConnection_oracle()
|
|
|
+ # cursor = conn.cursor()
|
|
|
+ if len(set_enterprise)>0:
|
|
|
+ for _i in range(len(df_data["招标单位"])):
|
|
|
+ _enterprise_name = df_data["招标单位"][_i]
|
|
|
+ contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
|
|
|
+ if contacts is not None:
|
|
|
+ for _index in range(min(len(contacts),3)):
|
|
|
+ contact_person,phone_no = contacts[_index].get("contact_person"),contacts[_index].get("phone_no")
|
|
|
+ is_legal_person,is_manager,is_shareholder = contacts[_index].get("is_legal_person",0),contacts[_index].get("is_manager",0),contacts[_index].get("is_shareholder",0)
|
|
|
+ df_data["招标联系人%s"%str(_index+1)][_i] = contact_person
|
|
|
+ df_data["招标联系人电话%s"%str(_index+1)][_i] = phone_no
|
|
|
+ df_data["招标单位所在地"][_i] = "%s-%s"%(dict_enterprise.get(_enterprise_name,{}).get("province",""),dict_enterprise.get(_enterprise_name,{}).get("city",""))
|
|
|
+ df_data["项目类型"][_i] = dict_enterprise.get(_enterprise_name,{}).get("procurement_system")
|
|
|
+
|
|
|
+
|
|
|
+ _enterprise_name = df_data["代理单位"][_i]
|
|
|
+ contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
|
|
|
+ if contacts is not None:
|
|
|
+ for _index in range(min(len(contacts),3)):
|
|
|
+ contact_person,phone_no = contacts[_index].get("contact_person"),contacts[_index].get("phone_no")
|
|
|
+ is_legal_person,is_manager,is_shareholder = contacts[_index].get("is_legal_person",0),contacts[_index].get("is_manager",0),contacts[_index].get("is_shareholder",0)
|
|
|
+ df_data["代理联系人%s"%str(_index+1)][_i] = contact_person
|
|
|
+ df_data["代理联系人电话%s"%str(_index+1)][_i] = phone_no
|
|
|
+ df_data["代理单位所在地"][_i] = "%s-%s"%(dict_enterprise.get(_enterprise_name,{}).get("province",""),dict_enterprise.get(_enterprise_name,{}).get("city",""))
|
|
|
+
|
|
|
+ _enterprise_name = df_data["中标单位"][_i]
|
|
|
+ df_data["中标单位所在地"][_i] = "%s-%s"%(dict_enterprise.get(_enterprise_name,{}).get("province",""),dict_enterprise.get(_enterprise_name,{}).get("city",""))
|
|
|
+ _person = dict_enterprise.get(_enterprise_name,{}).get("legal_person_name","")
|
|
|
+ _phone = dict_enterprise.get(_enterprise_name,{}).get("phone_number","")
|
|
|
+ if len(_phone)==11 and _phone[0]=="1":
|
|
|
+ df_data["中标法人"][_i] = _person
|
|
|
+ df_data["中标法人电话"][_i] = _phone
|
|
|
+
|
|
|
+ contacts = dict_enterprise.get(_enterprise_name,{}).get("contacts")
|
|
|
+ if contacts is not None:
|
|
|
+ for _index in range(min(len(contacts),3)):
|
|
|
+ contact_person,phone_no = contacts[_index].get("contact_person"),contacts[_index].get("phone_no")
|
|
|
+ is_legal_person,is_manager,is_shareholder = contacts[_index].get("is_legal_person",0),contacts[_index].get("is_manager",0),contacts[_index].get("is_shareholder",0)
|
|
|
+
|
|
|
+ if is_manager:
|
|
|
+ df_data["中标高管"][_i] = contact_person
|
|
|
+ df_data["中标高管电话"][_i] = phone_no
|
|
|
+ if is_shareholder:
|
|
|
+ df_data["中标股东"][_i] = contact_person
|
|
|
+ df_data["中标股东电话"][_i] = phone_no
|
|
|
+ df_data["中标联系人%s"%str(_index+1)][_i] = contact_person
|
|
|
+ df_data["中标联系人电话%s"%str(_index+1)][_i] = phone_no
|
|
|
+
|
|
|
+ log("start export 15824381998:>>>>>>>>>>>>>>>")
|
|
|
+ current_date = getCurrent_date(format="%Y-%m-%d")
|
|
|
+ start_time = time.strftime("%Y-%m-%d",time.localtime(time.mktime(time.localtime())-7*24*60*60))
|
|
|
+ if current_date<="2025-02-04" and datetime.datetime.now().weekday() in (1,4):
|
|
|
+ for i in range(10):
|
|
|
+ try:
|
|
|
+ if datetime.datetime.now().weekday()==1:
|
|
|
+ last_date = timeAdd(current_date,-4)
|
|
|
+ if datetime.datetime.now().weekday() in (4,):
|
|
|
+
|
|
|
+ last_date = timeAdd(current_date,-3)
|
|
|
+ # start_time='2022-07-22'
|
|
|
+ # current_date = '2022-07-28'
|
|
|
+ page_time_start = timeAdd(current_date,-7)
|
|
|
+ log("start exporting export2:=================")
|
|
|
+ # columns = ["doctitle","doctextcon","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_bidclose"]
|
|
|
+
|
|
|
+ dict_channel = getDict_docchannel()
|
|
|
+
|
|
|
+ list_query = []
|
|
|
+
|
|
|
+ str_keyword = '''
|
|
|
+
|
|
|
+多功能位移机,失能群体安全转移,助行机器人,行走机器人,护理机器人,大小便护理,洗浴机,洗浴机器人,吃饭机器人,养老护理,老年评估,防摔,智能护理,智慧康养
|
|
|
+ '''
|
|
|
+ list_keyword = splitIntoList(str_keyword,"[\s\n、,,]")
|
|
|
+
|
|
|
+ str_not_keyword = '''
|
|
|
+ 清洗机器人
|
|
|
+ '''
|
|
|
+ list_not_key = splitIntoList(str_not_keyword,"[\s\n、,,]")
|
|
|
+
|
|
|
+ tenderee_keywrod = "医院、大学、高校、高中"
|
|
|
+ list_t_key = splitIntoList(tenderee_keywrod,"[\s\n、,,]")
|
|
|
+
|
|
|
+ log(str(list_keyword))
|
|
|
+ columns = ["doctitle","docchannel","product","province","bidway","city","district","page_time","industry","info_type","tenderee","project_code","project_name","sub_docs_json","tenderee_contact","tenderee_phone","agency","agency_contact","agency_phone","uuid","time_get_file_end","time_bidopen","time_bidstart","time_registration_start","time_registration_end"]
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ generateBoolShouldQuery(["doctitle"],list_keyword,MatchPhraseQuery),
|
|
|
+ generateBoolShouldQuery(["docchannel"],[51,52,102,103,105,104,114,121,122,101,119,120,118],TermQuery),
|
|
|
+ RangeQuery("crtime",last_date+" 10:00:00",current_date+" 10:00:00",True,True),
|
|
|
+ RangeQuery("status",201,300,True,True),
|
|
|
+ RangeQuery("page_time",page_time_start)
|
|
|
+ # TermQuery("procurement_system","公安系统"),
|
|
|
+ # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
|
|
|
+ # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
|
|
|
+ # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
|
|
|
+ ],
|
|
|
+ must_not_queries=[
|
|
|
+ generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],list_not_key,MatchPhraseQuery),
|
|
|
+ ]
|
|
|
+ )
|
|
|
+
|
|
|
+ list_row = getDocument([{"query":bool_query,"limit":500}],columns,thread_count=1)
|
|
|
+
|
|
|
+ log("get document %d rows"%len(list_row))
|
|
|
+
|
|
|
+ df_data = {}
|
|
|
+ set_line = set()
|
|
|
+ # list_row = filterRow(list_row)
|
|
|
+ df_columns = getRowData_zb(df_data,list_row,set_line,list_keyword,dict_channel,True)
|
|
|
+ fixContactPerson1(df_data,df_columns)
|
|
|
+ df = pd.DataFrame(df_data)
|
|
|
+
|
|
|
+ df_data_filter_11 = {}
|
|
|
+ for c in df_columns:
|
|
|
+ if c not in df_data_filter_11:
|
|
|
+ df_data_filter_11[c] = []
|
|
|
+ df_data_filter_12 = {}
|
|
|
+ for c in df_columns:
|
|
|
+ if c not in df_data_filter_12:
|
|
|
+ df_data_filter_12[c] = []
|
|
|
+
|
|
|
+ for _i in range(len(df_data["项目类型"])):
|
|
|
+
|
|
|
+ if df_data["项目类型"][_i] in ("教育系统","科研系统"):
|
|
|
+ for c in df_columns:
|
|
|
+ if c not in df_data_filter_11:
|
|
|
+ df_data_filter_11[c] = []
|
|
|
+ df_data_filter_11[c].append(df_data[c][_i])
|
|
|
+
|
|
|
+ else:
|
|
|
+ for c in df_columns:
|
|
|
+ if c not in df_data_filter_12:
|
|
|
+ df_data_filter_12[c] = []
|
|
|
+ df_data_filter_12[c].append(df_data[c][_i])
|
|
|
+
|
|
|
+
|
|
|
+ df_f11 = pd.DataFrame(df_data_filter_11)
|
|
|
+ df_f12 = pd.DataFrame(df_data_filter_12)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ time_end = timeAdd(current_date,60)
|
|
|
+ columns = ["project_name","type","province","city","bidding_budget","city","demand","last_page_time"]
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ generateBoolShouldQuery(["demand","product"],list_keyword,MatchPhraseQuery),
|
|
|
+ RangeQuery("may_end",range_from=current_date),
|
|
|
+ RangeQuery("may_end",range_to=time_end),
|
|
|
+ # TermQuery("procurement_system","公安系统"),
|
|
|
+ # generateBoolShouldQuery(["province"],["湖南"],TermQuery),
|
|
|
+ # generateBoolShouldQuery(["tenderee"],list_t_key,WildcardQuery)
|
|
|
+ # generateBoolShouldQuery(["docchannel"],[101,118,119],TermQuery),
|
|
|
+ ],
|
|
|
+ must_not_queries=[
|
|
|
+ generateBoolShouldQuery(["demand","product"],list_not_key,MatchPhraseQuery),
|
|
|
+ ]
|
|
|
+ )
|
|
|
+
|
|
|
+ list_row = getDocument([{"query":bool_query,"limit":500}],columns,table_name="preproject",table_index="preproject_index",thread_count=1,sort_column="may_end")
|
|
|
+
|
|
|
+ log("get document %d rows"%len(list_row))
|
|
|
+
|
|
|
+ df_data = {}
|
|
|
+ set_line = set()
|
|
|
+ # list_row = filterRow(list_row)
|
|
|
+ df_columns_yc = getRowData_yc(df_data,list_row,set_line,list_keyword,dict_channel,True)
|
|
|
+ fixContactPerson(df_data,df_columns_yc)
|
|
|
+
|
|
|
+
|
|
|
+ for _i in range(len(df_data["项目类型"])):
|
|
|
+ if df_data["项目类型"][_i] in ("政府办公室系统","财政系统","民事系统","企业采购系统","卫生系统"):
|
|
|
+ df_data["类型"][_i] = "政企"
|
|
|
+ elif df_data["项目类型"][_i] in ("教育系统","科研系统"):
|
|
|
+ df_data["类型"][_i] = "教育"
|
|
|
+ else:
|
|
|
+ df_data["类型"][_i] = "其他"
|
|
|
+ df1 = pd.DataFrame(df_data)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ filename = os.path.dirname(__file__)+"/data/%s年%s项目数据导出.xlsx"%(start_time[:4],current_date)
|
|
|
+ with pd.ExcelWriter(filename) as writer:
|
|
|
+ df_f11.to_excel(writer,sheet_name="教育",columns=df_columns if not df_f11.empty else None,index=False)
|
|
|
+ df_f12.to_excel(writer,sheet_name="政企",columns=df_columns if not df_f12.empty else None,index=False)
|
|
|
+ df1.to_excel(writer,sheet_name="项目预测",columns=df_columns_yc if not df1.empty else None)
|
|
|
+ log(str(filename))
|
|
|
+
|
|
|
+ host = "smtp.exmail.qq.com"
|
|
|
+ username = "vip@bidizhaobiao.com"
|
|
|
+ password = "Biaoxun66-"
|
|
|
+ # receivers = ["724949655@qq.com","1396488964@qq.com"]
|
|
|
+ receivers = ["md47@zuowei.com"]
|
|
|
+
|
|
|
+ # receivers = ["1175730271@qq.com"]
|
|
|
+ attachs = [filename]
|
|
|
+
|
|
|
+ sendEmail(host,username,password,receivers,attachs=attachs)
|
|
|
+
|
|
|
+
|
|
|
+ break
|
|
|
+ except Exception as e:
|
|
|
+ traceback.print_exc()
|
|
|
+
|
|
|
+def job_export():
|
|
|
_scheduler = BlockingScheduler()
|
|
|
- _scheduler.add_job(export_medicine_friday,"cron",day_of_week='fri',hour=18)
|
|
|
+
|
|
|
+ e3 = Export3()
|
|
|
+ # _scheduler.add_job(export_medicine_friday,"cron",hour=8)
|
|
|
+ # _scheduler.add_job(export_medicine_friday,"cron",second="*/1")
|
|
|
+ _scheduler.add_job(export2,"cron",hour=9)
|
|
|
+ _scheduler.add_job(export5,"cron",hour=16)
|
|
|
+ # _scheduler.add_job(e3.export4,"cron",hour=7)
|
|
|
+ _scheduler.add_job(e3.trytimes,"cron",hour=21)
|
|
|
+ _scheduler.add_job(e3.export3_1,"cron",hour=21)
|
|
|
+ _scheduler.add_job(export_15824381998,"cron",hour=21)
|
|
|
+ _scheduler.add_job(export_13510123669,"cron",hour=10)
|
|
|
_scheduler.start()
|
|
|
|
|
|
+if __name__=="__main__":
|
|
|
+ # job_export()
|
|
|
+ # export_medicine_friday()
|
|
|
+ # export2()
|
|
|
+ # export_document_except()
|
|
|
+ # e3 = Export3()
|
|
|
+ # e3.export4_by_project()
|
|
|
+ # e3.export4()
|
|
|
+ # e3.trytimes()
|
|
|
+ # e3.export3_1()
|
|
|
+ # export5()
|
|
|
+ # export_15824381998()
|
|
|
+ export_13510123669()
|
|
|
+
|
|
|
|
|
|
|
|
|
-if __name__=="__main__":
|
|
|
- job_medicine_friday()
|