luojiehua
/
DataMining


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207
							#coding:UTF8

import sys
import os
sys.path.append("../")

import pandas as pd
from dataSource.source import *
import json
from utils.multiThread import MultiThreadHandler
import queue
from utils.Utils import *
from dataSource.pool import ConnectorPool
import re
from tablestore import *
import traceback


from export.exportUtils import generateBoolShouldQuery,splitIntoList

data_path = "../data/"

def getCompanys():
    list_company = []
    keywords = ["环境","生态","再生","回收","环保"]
    provinces = ["广东"]
    for _name in keywords:
        for _prov in provinces:
            data = make_elasticSearch({
                "query": {
                    "bool": {
                        "must": [
                            {
                                "wildcard": {
                                    "name.keyword": "*%s*"%_name
                                }
                            }
                            # ,
                            # {
                            #     "term": {
                            #         "province.keyword": "%s"%_prov
                            #     }
                            # }
                            # ,
                            # {
                            #     "range": {
                            #         "zhongBiaoNumber": {
                            #             "gt": "0"
                            #         }
                            #     }
                            # }
                        ],
                        "must_not": [ ],
                        "should": [ ]
                    }
                },
                "from": 0,
                "size": 1000000,
                "sort": [ ],
                "aggs": { }
            })
            print("--",data["hits"]["total"])
            for item in data["hits"]["hits"]:
                _company = {"enterprise_name":"","regCapital":"","legal_person":"","phone":"","industry":"","province":""}
                _company["enterprise_name"] = item["_source"].get("name","")
                _company["regCapital"] = item["_source"].get("regCapital","")
                _company["zhongBiaoNumber"] = item["_source"].get("zhongBiaoNumber","0")
                list_company.append(_company)
    # data = make_elasticSearch({
    #     "query": {
    #         "bool": {
    #             "must": [
    #                 {
    #                     "wildcard": {
    #                         "name.keyword": "*电商*"
    #                     }
    #                 }
    #                 ,
    #                 {
    #                     "term": {
    #                         "province.keyword": "北京"
    #                     }
    #                 }
    #                 ,
    #                 {
    #                     "range": {
    #                         "zhongBiaoNumber": {
    #                             "gt": "0"
    #                         }
    #                     }
    #                 }
    #             ],
    #             "must_not": [ ],
    #             "should": [ ]
    #         }
    #     },
    #     "from": 0,
    #     "size": 10000,
    #     "sort": [ ],
    #     "aggs": { }
    # })
    #
    # for item in data["hits"]["hits"]:
    #     _company = {"enterprise_name":"","regCapital":"","legal_person":"","phone":"","industry":"","province":""}
    #     _company["enterprise_name"] = item["_source"].get("name","")
    #     _company["regCapital"] = item["_source"].get("regCapital","")
    #     list_company.append(_company)
    print(len(list_company))
    return list_company

def exportFactory():
    def _handle(item,result_queue,pool_mongo,pool_neo4j):
        company_name = item["enterprise_name"]
        mongo = pool_mongo.getConnector()
        coll_zb = mongo.enterprise_profile
        rows = coll_zb.find({"enterprise_name":item["enterprise_name"]},{"enterprise_name":1, "actualCapital":1,"estiblishTime":1,"legal_person":1,"phone":1 })
        _flag = False
        for row in rows:
            actualCapital = row.get("actualCapital","0")
            estiblishTime = row.get("estiblishTime","2020-01-01")
            _captial = re.match("\d+[亿万]+",actualCapital)
            # if _captial is not None:
            # if getUnifyMoney(_captial.group())>getUnifyMoney("5000万"):
            # if estiblishTime<="2015-10-09":
            item["legal_person"] = row.get("legal_person","")
            item["phone"] = row.get("phone","")
            item["actualCapital"] = actualCapital
            item["estiblishTime"] = row.get("estiblishTime","")
            _flag = True
            break
        if _flag:
            result_queue.put(item)
        cql = "MATCH (n:Organization)-[r:ZhongBiaoRelation]->(p:Project) where n.name='%s' RETURN count(p) as _c "%(company_name)
        graph = pool_neo4j.getConnector()
        finded = graph.run(cql)
        data = json.loads(json.dumps(finded.data()))
        _count = data[0]["_c"]
        # list_project = []
        # for _data in data:
        #     if _count<=3:
        #         if "zhong_biao_page_time" in _data and _data["zhong_biao_page_time"]>"2019-01-01":
        #             if _data["project_name"] is not None:
        #                 list_project.append(_data["project_name"])
        #     _count += 1
        item["count"] = _count
        pool_mongo.putConnector(mongo)
        pool_neo4j.putConnector(graph)
    # list_company = getCompanys()
    list_company = []
    filename = "../data/天眼查1(1).xlsx"
    df1 = pd.read_excel(filename)
    for item in df1["公司名称"]:
        list_company.append({"enterprise_name":item,"regCapital":"","legal_person":"","phone":"","industry":"","province":""})
    task_queue = queue.Queue()
    result_queue = queue.Queue()
    for item in list_company:
        task_queue.put(item)
    pool_mongo = ConnectorPool(init_num=10,max_num=50,method_init=getConnect_mongodb)
    pool_neo4j = ConnectorPool(init_num=10,max_num=50,method_init=getConnect_neo4j)
    _mult = MultiThreadHandler(task_queue=task_queue,task_handler=_handle,result_queue=result_queue,thread_count=70,pool_mongo=pool_mongo,pool_neo4j=pool_neo4j)
    _mult.run()
    list_name = []
    list_actualCapital = []
    list_estiblishTime = []
    list_legal_person = []
    list_phone = []
    list_zb = []
    while(True):
        try:
            item = result_queue.get(False)
            list_name.append(item["enterprise_name"])
            list_actualCapital.append(item["actualCapital"])
            list_estiblishTime.append(item["estiblishTime"])
            list_legal_person.append(item["legal_person"])
            list_phone.append(item["phone"])
            list_zb.append(item["count"])
        except:
            break
    df = pd.DataFrame({"公司":list_name,"实缴":list_actualCapital,
                       "注册时间":list_estiblishTime,"联系人":list_legal_person,"联系电话":list_phone,
                       "中标次数":list_zb})
    df.to_excel("%s"%filename+"_export.xlsx",columns=["公司","实缴","注册时间","联系人","联系电话","中标次数"])

def deal():
    def _handle(item,result_queue):
        graph = getConnect_neo4j()
        company_name = item["enterprise_name"]
        cql = "MATCH (n:Organization)-[r:ZhongBiaoRelation]->(p:Project) where n.name='%s' RETURN p.zhong_biao_page_time as zhong_biao_page_time,p.project_name as project_name order by p.zhong_biao_page_time desc limit 3"%(company_name)
        finded = graph.run(cql)
        data = json.loads(json.dumps(finded.data()))
        _count = 1
        list_project = []
        for _data in data:
            if _count<=3:
                if "zhong_biao_page_time" in _data and _data["zhong_biao_page_time"]>"2019-01-01":
                    list_project.append(_data["project_name"])
            _count += 1
        item["project"] = str(list_project)
        result_queue.put(item)
    file = "../data/北京行业_export.xls"
    df = pd.read_excel(file)
    list_company = []
    for _company,rep,industry,project,count,person,phone in zip(df["公司名字"],df["注册资金"],df["行业"],df["中标项目"],df["中标次数"],df["联系人"],df["联系电话"]):
        list_company.append({"enterprise_name":_company,"regCapital":rep,"legal_person":person,"phone":phone,"industry":industry,"province":"","count":count})
    task_queue = queue.Queue()
    result_queue = queue.Queue()
    for item in list_company:
        task_queue.put(item)
    _mult = MultiThreadHandler(task_queue=task_queue,task_handler=_handle,result_queue=result_queue,thread_count=30)
    _mult.run()
    list_name = []
    list_regCapital = []
    list_industry = []
    list_count = []
    list_person = []
    list_phone = []
    list_project = []
    while(True):

        try:
            _result = result_queue.get(False)
            list_name.append(_result["enterprise_name"])
            list_regCapital.append(_result["regCapital"])
            list_industry.append(_result["industry"])
            list_count.append(_result["count"])
            list_person.append(_result["legal_person"])
            list_phone.append(_result["phone"])
            list_project.append(_result["project"])
        except Exception as e:
            print(e)
            break
    df1 = pd.DataFrame({"公司名字":list_name,"注册资金":list_regCapital,"行业":list_industry,"中标项目":list_project,"中标次数":list_count,"联系人":list_person,"联系电话":list_phone})
    df1.to_excel("%s_export1.xls"%("北京行业"),columns=["公司名字","注册资金","行业","中标项目","中标次数","联系人","联系电话"])

def deal1():
    def _handle(item,result_queue):
        graph = getConnect_neo4j()
        company_name = item["enterprise_name"]
        cql = "MATCH (n:Organization)-[r:ZhongBiaoRelation]->(p:Project) where n.name='%s' RETURN p.zhong_biao_page_time as zhong_biao_page_time,p.project_name as project_name order by p.zhong_biao_page_time desc "%(company_name)
        finded = graph.run(cql)
        data = json.loads(json.dumps(finded.data()))
        _count = 0
        list_project = []
        for _data in data:
            if _count<=2:
                if "zhong_biao_page_time" in _data and _data["zhong_biao_page_time"]>"2019-01-01":
                    list_project.append(_data["project_name"])
            _count += 1
        item["count"] = _count
        item["project"] = str(list_project)
        cql = "MATCH (n:Organization)-[r:ZhongBiaoRelation]->(p:Project) where n.name='%s' RETURN r.price"%(company_name)
        print(cql)
        finded = graph.run(cql)
        finded_money = json.loads(json.dumps(finded.data()))
        whole_money = 0
        for _item in finded_money:
            if _item["r.price"] is not None:
                whole_money += getUnifyMoney(_item["r.price"])
        item["whole_money"] = str(whole_money)
        result_queue.put(item)
    # filename = "数据导出需求9.11(1)(1).xlsx"
    filename = "../data/新建 XLSX 工作表(1).xlsx"
    df = pd.read_excel(filename)
    list_company = []
    for _key in df.keys():
        print(_key,len(df[_key]))
    for _company in df["公司名称"]:
        list_company.append({"enterprise_name":_company,"regCapital":"","legal_person":"","phone":"","industry":"","province":"","count":0})
    task_queue = queue.Queue()
    result_queue = queue.Queue()
    for item in list_company:
        task_queue.put(item)
    _mult = MultiThreadHandler(task_queue=task_queue,task_handler=_handle,result_queue=result_queue,thread_count=30)
    _mult.run()
    _dict_item = {}
    while(True):
        try:
            item = result_queue.get(False)
            if item["enterprise_name"]!="":
                _dict_item[item["enterprise_name"]] = item
        except Exception as e:
            print(str(e))
            break
    list_count = []
    list_project = []
    list_money = []
    list_zb = []
    for _company in df["公司名称"]:
        if _company in _dict_item:
            list_count.append(_dict_item[_company]["count"])
            list_project.append(_dict_item[_company]["project"])
            list_money.append(_dict_item[_company]["whole_money"])
            list_zb.append("是" if _dict_item[_company]["count"]>0 else "否")
        else:
            print(_company)
            list_count.append(0)
            list_project.append("")
            list_money.append("0")
            list_zb.append("否")

    print(len(list_count),len(list_project),len(list_money),len(list_zb))
    df2 = pd.DataFrame({"公司名称":df["公司名称"],"次数":list_count})
    df2.to_excel("%s_export.xls"%filename)
    # df1 = pd.DataFrame({"月份":df["月份"],"电话":df["电话"],"公司名字":df["公司名字"],"开通时间":df["开通时间"],
    #                     "到期时间":df["到期时间"],"客户公司注册时间":df["客户公司注册时间"],"客户公司注册资金":df["客户公司注册资金"],
    #                     "实际缴费资金":df["实际缴费资金"],"天眼查行业分类":df["天眼查行业分类"],"是否中标":list_zb,
    #                     "中标次数":list_count,"中标项目|3个":list_project,"中标金额":list_money,"客户设置关键词":df["客户设置关键词"],"客户搜索词":df["客户搜索词"].xls})
    # df1.to_excel("%s_补充.xls"%filename,columns=["月份","电话","公司名字",	"开通时间"	,"到期时间"	,"客户公司注册时间"	,"客户公司注册资金"	,"实际缴费资金"	,"天眼查行业分类"	,"是否中标"	,"中标次数"	,"中标项目|3个"	,"中标金额"	,"客户设置关键词"	,"客户搜索词"])

def deal3():
    filename = "../data/导出工厂.xlsx"
    df = pd.DataFrame(filename)
    count = 0
    for item in df["实缴"]:
        if getUnifyMoney(item)>getUnifyMoney("5000万"):
            count += 1
            print(count)

def exportEnterpriseByName():
    df = pd.read_csv("../data/中标家具公司.csv",encoding="GBK")

    def _handle(item,result_queue,pool_ots):
        ots_client = pool_ots.getConnector()

        primary_key = [('name',str(item["name"]))]

        columns_to_get = ["reg_capital","actual_capital","contacts","industry","estiblish_time","social_staff_num","business_scope","zhong_biao_number"]

        consumed, return_row, next_token = ots_client.get_row("enterprise",primary_key, columns_to_get, None, 1)

        print(return_row)

        for _item in return_row.attribute_columns:
            if _item[0]=="contacts":
                a = json.loads(_item[1])
                for i in a:
                    if i.get("mobile_no","")==item["phone"] or i.get("phone_no","")==item["phone"]:
                        item["contact_person"] = i.get("contact_person","")
            else:
                item[_item[0]] = _item[1]

    list_dict = []
    for name,phone in zip(df["name"],df["phone"]):
        list_dict.append({"name":name,"phone":phone})

    task_queue = queue.Queue()
    for item in list_dict:
        task_queue.put(item)

    result_queue = queue.Queue()
    pool_ots = ConnectorPool(init_num=10,max_num=30,method_init=getConnect_ots)
    mt = MultiThreadHandler(task_queue=task_queue,task_handler=_handle,result_queue=result_queue,thread_count=70,pool_ots=pool_ots)
    mt.run()

    columns = ["name","contact_person","phone","reg_capital","actual_capital","industry","estiblish_time","social_staff_num","business_scope","zhong_biao_number"]
    df_data = {}
    for _c in columns:
        df_data[_c] = []
    for item in list_dict:
        for _key in columns:
            df_data[_key].append(item.get(_key,""))
    df1 = pd.DataFrame(df_data)
    df1.to_csv("中标家具公司1.csv")

def getCompanys():
    conn = getConnection_mysql()
    cursor = conn.cursor()
    sql = '''select C.login_id as 登陆名,B.company ,B.contactname as 联系人,B.phone as 联系电话 ,(select MLEVELNAME from sys_memberlevel where id =A.memberlevelid) as 会员等级,( select name from b2c_mall_staff_basic_info where userid=B.aftermarket) as 售后客服   from bxkc.bxkc_member_term A,bxkc.b2c_mall_staff_basic_info B,bxkc.b2c_user_login_info C
where A.USERID=B.USERID and B.USERID=C.USERID and B.innerOrg like '广州%'
and A.memberlevelid!=81 and A.status='01' and str_to_date('2020-11-20','%Y-%m-%d') between  A.stime and A.etiem ;
'''
    cursor.execute(sql)
    vol = cursor.description
    list_company = []
    rows = cursor.fetchall()
    for row in rows:
        _company = {}
        for _vol,_value in zip(vol,row):
            _name = _vol[0]
            _company[_name] = _value
        list_company.append(_company)
    return list_company

def exportEnterprise_byindustry(page_time,
                                columns = ["name","address","business_scope","province","city","district","reg_capital","phone","estiblish_time"],
                                keywords = ["钢材","水泥","五金","水电","暖通","暖气","电缆"]):

    list_should_q = []
    for _key in keywords:
        list_should_q.append(WildcardQuery("industry","*%s*"%_key))
        list_should_q.append(WildcardQuery("nicknames","*%s*"%_key))
    key_query = BoolQuery(should_queries=list_should_q)

    #WildcardQuery("industry","*建筑*")
    ots_client = getConnect_ots()
    bool_query = BoolQuery(must_queries=[RangeQuery("bidi_id",0,include_lower=True),
                                         key_query,
                                         RangeQuery("estiblish_time",range_to="2017-01-01")])

    rows, next_token, total_count, is_all_succeed = ots_client.search("enterprise", "enterprise_index",
                                                                      SearchQuery(bool_query, limit=100, get_total_count=True),
                                                                      ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
    all_rows = 0
    df_data = {}
    for key in columns:
        df_data[key] = []
    for row in rows:
        _dict = dict()
        for part in row:
            for item in part:
                _dict[item[0]] = item[1]
        for key in columns:
            df_data[key].append(_dict.get(key,""))
        # if "reg_capital" in _dict:
        #     _money = re.match("\d+[万亿千百十]",_dict["reg_capital"])
        #     if _money is not None:
        #         if getUnifyMoney(_money.group())>2000000:
        #             for key in columns:
        #                 df_data[key].append(_dict.get(key,""))
    all_rows += len(rows)

    # print(next_token)
    while(next_token):
        rows, next_token, total_count, is_all_succeed = ots_client.search("enterprise", "enterprise_index",
                                                                          SearchQuery(bool_query, next_token=next_token,limit=100, get_total_count=True),
                                                                          ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
        for row in rows:
            _dict = dict()
            for part in row:
                for item in part:
                    _dict[item[0]] = item[1]
            for key in columns:
                df_data[key].append(_dict.get(key,""))
        # if "reg_capital" in _dict:
        #     _money = re.match("\d+[万亿千百十]",_dict["reg_capital"])
        #     if _money is not None:
        #         if getUnifyMoney(_money.group())>2000000:
        #             for key in columns:
        #                 df_data[key].append(_dict.get(key,""))
        all_rows += len(rows)
        print(all_rows,total_count,len(df_data[columns[0]]))
    df = pd.DataFrame(df_data)
    df.to_csv("../data/enterprise_2017_a.csv",columns=columns)


def getTyc_company():
    root_path = ["G:/文档/tyc国企","G:/文档/tyc机构"]
    list_files = []
    for _path in root_path:
        for file in os.listdir(_path):
            list_files.append(os.path.join(_path,file))

    list_files = ["G:/文档/tyc机构\\高级搜索导出数据结果—自定义条件—天眼查(W20011656561610789770227).xlsx"]

    pool_mysql = ConnectorPool(method_init=getConnection_testmysql,init_num=10,max_num=30)
    task_queue = queue.Queue()
    result_queue = queue.Queue()
    for _file in list_files:
        task_queue.put(_file)

    def _handle(_file,task_queue,pool_mysql):
        print("handle",_file)
        conn = pool_mysql.getConnector()
        cursor = conn.cursor()
        df = pd.read_excel(_file,header=2)
        for name,social_credit,identification,regist_num,organization_code in zip(df["公司名称"],df["统一社会信用代码"],df["纳税人识别号"],df["注册号"],df["组织机构代码"]):
            try:
                sql = " insert into Enterprise(name,social_credit,identification,regist_num,organization_code) values ('%s','%s','%s','%s','%s')"%(name,social_credit,identification,regist_num,organization_code)
                cursor.execute(sql)
            except Exception as e:
                print("error")
        conn.commit()
        pool_mysql.putConnector(conn)

    mt = MultiThreadHandler(task_queue,_handle,result_queue,20,pool_mysql=pool_mysql)
    mt.run()

set_columns = set()
list_df_columns = []

def set_dict_item(_dict,name,v):
    _dict[name] = getLegal_str(v)
    if name not in set_columns:
        set_columns.add(name)
        list_df_columns.append(getLegal_str(name))

def exportEnterprise_by_bidNum():


    columns = ["name","contacts","province","city","address","reg_location"]

    list_data = []
    ots_client = getConnect_ots()
    bool_query = BoolQuery(must_not_queries=[
        ExistsQuery("tyc_id"),
        RangeQuery("bid_number",1),
        RangeQuery("status",401,451),
        BoolQuery(should_queries=[NestedQuery("contacts",ExistsQuery("contacts.phone_no")),
        NestedQuery("contacts",ExistsQuery("contacts.mobile_no"))])
                                           ])
    for _prov in ["北京","天津"]:
        bool_query = BoolQuery(must_queries=[BoolQuery(should_queries=[TermQuery("province",_prov)]),
                                             BoolQuery(should_queries=[MatchPhraseQuery("nicknames","地产"),MatchPhraseQuery("nicknames","酒店")]),
                                             NestedQuery("contacts",WildcardQuery("contacts.mobile_no","1*"))])
        #
        # bool_query = BoolQuery(must_queries=[MatchPhraseQuery("nicknames","物资回收"),
        #                                      TermQuery("province","贵州")]
                               # ,must_not_queries=[ExistsQuery("tyc_id"),NestedQuery("contacts",ExistsQuery("contacts"))]
                               #                   )


        rows, next_token, total_count, is_all_succeed = ots_client.search("enterprise", "enterprise_index",
                                                                          SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("tyc_id",SortOrder.ASC)]), limit=100, get_total_count=True),
                                                                          ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))


        def getData(df_data,rows):
            list_dict = getRow_ots(rows)
            for _dict in list_dict:
                print(_dict)

                for mobile_person,mobile_no in getMobiles(_dict.get("contacts","[]")):
                # for contact_person,mobile_no in getMobiles(_dict.get("contacts","[{}]")):
                    _d = {}
                    set_dict_item(_d,"名称",_dict.get("name",""))
                    set_dict_item(_d,"省份",_dict.get("province",""))
                    set_dict_item(_d,"城市",_dict.get("city",""))
                    set_dict_item(_d,"联系人",mobile_person)
                    set_dict_item(_d,"手机",mobile_no)
                    list_data.append(_d)

                # _d = {}
                # set_dict_item(_d,"名称",_dict.get("name",""))
                # set_dict_item(_d,"省份",_dict.get("province",""))
                # set_dict_item(_d,"城市",_dict.get("city",""))
                # list_data.append(_d)
                # mobile_person,mobile_no = getOneContact(_dict.get("contacts"))
                # if mobile_no!="":
                #     set_dict_item(_d,"联系人",mobile_person)
                #     set_dict_item(_d,"手机",mobile_no)
                #     # _address = _dict.get("address","")
                #     # reg_location = _dict.get("reg_location","")
                #     # if _address=="":
                #     #     _address = reg_location
                #     # set_dict_item(_d,"地址",_address)
                #     list_data.append(_d)

        getData(df_data,rows)
        _count = len(rows)
        while(next_token):
            print("%d/%d"%(_count,total_count))
            rows, next_token, total_count, is_all_succeed = ots_client.search("enterprise", "enterprise_index",
                                                                              SearchQuery(bool_query, next_token=next_token,limit=100, get_total_count=True),
                                                                              ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
            getData(df_data,rows)
            _count += len(rows)
            if _count>=300:
                break
    df_data = {}

    for item in list_data:
        for k in list_df_columns:
            if k not in df_data:
                df_data[k] = []
            df_data[k].append(item.get(k))
    df = pd.DataFrame(df_data)
    df.to_excel("../data/%s_enterprise_bidinum.xlsx"%getCurrent_date("%Y-%m-%d_%H%M%S"),columns=list_df_columns)

def make_Legal_enterprise():
    import codecs
    def format(_e):
        if _e is None:
            return None
        if not isinstance(_e,str):
            return None
        if re.search("^[a-zA-Z0-9]+$",_e) is not None:
            return None
        if re.search("[<《］＞－。\-\.\?]",_e) is not None:
            return None
        _e1 = re.sub("\s+","",_e.replace("（","(").replace("）",")"))
        if re.search("[省市区县乡镇]$",_e) is not None:
            return None
        if len(_e1)>=4:
            return _e1
        return None
    set_enterprise = set()
    df = pd.read_csv("../data/other/enterprise_bidinum.csv", encoding="GBK")

    _count = 0
    for _e in df["name"]:
        _count += 1
        if _count%10000==0:
            print(_count)
        _e1 = format(_e)
        if _e1 is not None:
            set_enterprise.add(_e1)

    conn = getConnection_testmysql()
    cursor = conn.cursor()
    sql = " select name from Enterprise "
    cursor.execute(sql)
    rows = cursor.fetchmany(10000)
    while rows:
        for row in rows:
            _count += 1
            if _count%10000==0:
                print(_count)
            _e = row[0]
            _e1 = format(_e)
            if _e1 is not None:
                set_enterprise.add(_e1)
        rows = cursor.fetchmany(10000)

    with codecs.open("../data/other/LEGAL_ENTERPRISE.txt", "w", encoding="UTF8") as f:
        for _e in list(set_enterprise):
            f.write(_e+"\n")


def getDictEnterprise(list_enterprise,columns_to_get = ["reg_capital","actual_capital","industry","estiblish_time","social_staff_num","zhong_biao_number","tou_biao_number","credit_code"]):
    task_queue = queue.Queue()
    result_queue= queue.Queue()

    for _enterprise in list_enterprise:
        task_queue.put(str(_enterprise))
    def _handle(item,result_queue,pool_ots):
        ots_client = pool_ots.getConnector()
        try:
            primary_key = [("name",item)]
            consumed,return_row,next_token = ots_client.get_row("enterprise",primary_key,columns_to_get,None,1)
            dict_data = getRow_ots_primary(return_row)
            if dict_data is not None:
                result_queue.put({item:dict_data})
        except Exception as e:
            traceback.print_exc()

        pool_ots.putConnector(ots_client)

    pool_ots = ConnectorPool(init_num=10,max_num=50,method_init=getConnect_ots)
    mt = MultiThreadHandler(task_queue=task_queue,task_handler=_handle,result_queue=result_queue,thread_count=50,pool_ots=pool_ots)
    mt.run()

    dict_enterprise = {}
    while True:
        try:
            _dict = result_queue.get(False)
            for k,v in _dict.items():
                dict_enterprise[k] = v
        except Exception as e:
            break
    return dict_enterprise


def getOneContact(contacts,tojson=True,mobile_first=True,mobile_only=True):
    mobile_person = ""
    mobile_no = ''
    phone_person = ""
    phone_no = ''
    if contacts is None:
        return "",""
    try:
        if tojson:
            list_contacts = json.loads(contacts)
        else:
            list_contacts = contacts

        for _contact in list_contacts:
            if _contact.get("mobile_no","")!="":
                mobile_person = _contact.get("contact_person","")
                mobile_no = _contact.get("mobile_no","")
            if _contact.get("phone_no","")!="":
                phone_person = _contact.get("contact_person","")
                phone_no = _contact.get("phone_no","")
        if mobile_first:
            if mobile_no!="":
                return mobile_person,mobile_no
            else:
                if mobile_only:
                    return mobile_person,mobile_no
    except Exception as e:
        pass
    return phone_person,phone_no

def getMobiles(contacts,to_json=True):
    if to_json:
        list_contacts = json.loads(contacts)
    else:
        list_contacts = contacts
    list_result = []
    for _c in list_contacts:
        if _c.get("mobile_no","")!="":
            list_result.append([_c.get("contact_person",""),_c.get("mobile_no")])
    return list_result

def getEnterpriseData(list_enterprise,df_data):
    def getEnterpriseData(list_enterprise,df_data):
        for _e in list_enterprise:
            _dict = {}
            set_dict_item(_dict,"公司名称",_e.get("name"))


            set_dict_item(_dict,"省份",_e.get("province"))
            set_dict_item(_dict,"城市",_e.get("city"))
            set_dict_item(_dict,"法人",_e.get("legal_person"))
            set_dict_item(_dict,"法人电话",_e.get("phone"))
            _match = re.search("^1\d{10}",_e.get("phone",""))
            set_dict_item(_dict,"是否手机","是" if _match is not None else "否")
            # set_dict_item(_dict,"企业属性",v.get("business_scope",""))
            # set_dict_item(_dict,"行业",v.get("industry",""))
            # contact_person,mobile_no = getOneContact(v.get("contacts",'[]'))
            # set_dict_item(_dict,"所有联系方式",v.get("contacts"))
            # set_dict_item(_dict,"联系人",contact_person)
            # set_dict_item(_dict,"手机号",mobile_no)
            # set_dict_item(_dict,"注册时间",v.get("estiblish_time",""))
            # set_dict_item(_dict,"注册资金",v.get("reg_capital",""))
            # set_dict_item(_dict,"bid_number",v.get("bid_number",0))
            # set_dict_item(_dict,"招标次数",v.get("zhao_biao_number",0))
            # set_dict_item(_dict,"投标次数",v.get("tou_biao_number",0))
            # set_dict_item(_dict,"中标次数",v.get("zhong_biao_number",0))
            # set_dict_item(_dict,"主营产品",v.get("products",""))
            for k,v in _dict.items():
                if k not in df_data:
                    df_data[k] = []
                df_data[k].append(v)

def exportEnterprise():
    def getEnterpriseData(list_enterprise,df_data):
        for _e in list_enterprise:
            _dict = {}
            set_dict_item(_dict,"公司名称",_e.get("name"))

            bool_query = BoolQuery(must_queries=[
                TermQuery("enterprise_name",_e.get("name"))
            ])

            rows,next_token,total_count,is_all_succeed = ots_client.search("enterprise_contact","enterprise_contact_index",
                                                                           SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("score",SortOrder.DESC)]),limit=5,get_total_count=False),
                                                                           ColumnsToGet(["contact_person","position","phone_no"],ColumnReturnType.SPECIFIED))
            list_row = getRow_ots(rows)
            for _i in range(1,6):
                if _i-1<len(list_row):
                    set_dict_item(_dict,"企业联系人%d"%_i,"%s(%s)\n%s"%(list_row[_i-1].get("contact_person",""),list_row[_i-1].get("position",""),list_row[_i-1].get("phone_no","")))
                else:
                    set_dict_item(_dict,"企业联系人%d"%_i,"")

# 企业状态	行业	机构类型	注册资本	成立时间	企业注册地	企业地址

            set_dict_item(_dict,"企业状态",_e.get("reg_status"))
            set_dict_item(_dict,"行业",_e.get("industry"))
            set_dict_item(_dict,"机构类型","公司")
            set_dict_item(_dict,"注册资本",_e.get("reg_capital"))
            set_dict_item(_dict,"成立时间",_e.get("found_date"))
            set_dict_item(_dict,"企业注册地","%s-%s"%(_e.get("province",""),_e.get("city","")))
            set_dict_item(_dict,"企业地址",_e.get("reg_location"))

            # _match = re.search("^1\d{10}",_e.get("phone",""))
            # set_dict_item(_dict,"是否手机","是" if _match is not None else "否")
            # set_dict_item(_dict,"企业属性",v.get("business_scope",""))
            # set_dict_item(_dict,"行业",v.get("industry",""))
            # contact_person,mobile_no = getOneContact(v.get("contacts",'[]'))
            # set_dict_item(_dict,"所有联系方式",v.get("contacts"))
            # set_dict_item(_dict,"联系人",contact_person)
            # set_dict_item(_dict,"手机号",mobile_no)
            # set_dict_item(_dict,"注册时间",v.get("estiblish_time",""))
            # set_dict_item(_dict,"注册资金",v.get("reg_capital",""))
            # set_dict_item(_dict,"bid_number",v.get("bid_number",0))
            # set_dict_item(_dict,"招标次数",v.get("zhao_biao_number",0))
            # set_dict_item(_dict,"投标次数",v.get("tou_biao_number",0))
            # set_dict_item(_dict,"中标次数",v.get("zhong_biao_number",0))
            # set_dict_item(_dict,"主营产品",v.get("products",""))
            for k,v in _dict.items():
                if k not in df_data:
                    df_data[k] = []
                df_data[k].append(v)


    a = '''

    '''

    sys_keys = splitIntoList(a,"\s")

    # data = pd.read_excel("../data/用户投标情况导出.xlsx")
    _name_c = "公司名称"
    list_enterprise = []
    columns = ["province","city","legal_person","phone","reg_status","industry","reg_capital","found_date","reg_location"]
    ots_client = getConnect_ots()
    bool_query = BoolQuery(must_queries=[
        # RangeQuery("zhong_biao_number",1000)
        # TermQuery("qualifications_number",0),
        # MatchPhraseQuery("nicknames","公司"),
        generateBoolShouldQuery(["province"],["四川"],WildcardQuery),
        generateBoolShouldQuery([""])
        # generateBoolShouldQuery(["province"],["上海","江苏","浙江","安徽","福建","江西","山东"],WildcardQuery),
        # generateBoolShouldQuery(["nicknames"],["工程","建筑","建设"],MatchPhraseQuery)
    ],
        must_not_queries=[RangeQuery("status",401,451)])
    rows,next_token,total_count,is_all_succeed = ots_client.search("enterprise","enterprise_index",
                                                                   SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("zhong_biao_number",SortOrder.ASC)]),limit=100,get_total_count=True),
                                                                   columns_to_get=ColumnsToGet(columns,ColumnReturnType.SPECIFIED))
    print("total_count",total_count)
    list_data = getRow_ots(rows)
    list_enterprise.extend(list_data)
    while next_token:
        rows,next_token,total_count,is_all_succeed = ots_client.search("enterprise","enterprise_index",
                                                                       SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
                                                                       columns_to_get=ColumnsToGet(columns,ColumnReturnType.SPECIFIED))
        print("%d/%d"%(len(list_enterprise),total_count))
        list_data = getRow_ots(rows)
        list_enterprise.extend(list_data)
        if len(list_enterprise)>=200:
            break


    # dict_enterprise = getDictEnterprise(data[_name_c][:1050000],
    df_data = {}
    getEnterpriseData(list_enterprise,df_data)
    df = pd.DataFrame(df_data)
    df.to_excel("../data/%s企业导出.xlsx"%getCurrent_date("%Y-%m-%d_%H%M%S"),columns=list_df_columns)


import numpy as np
def exportEnterprise_by_phone():

    ots_client = getConnect_ots()

    filename = "C:\\Users\\Administrator\\Desktop\\用户数据0910.xlsx"


    df = pd.read_excel(filename)
    astr_phone = df["手机"]
    all_count = 0
    _begin = 0
    int_count = 0
    while _begin<5582:
        # should_q = []
        # print("-=")
        # for str_phone,str_enter,int_throw,int_search in zip(astr_phone[_begin:_begin+100],df["公司名称"][_begin:_begin+100],df["浏览条数"][_begin:_begin+100],df["搜索次数"][_begin:_begin+100]):
        #     if str(str_phone) !="nan" and str(str_enter)!="nan" and str(int_search)=="nan" and str(int_throw)!="nan":
        #         int_count += 1
        #         print(str_phone,str_enter,int_throw,int_search)
        #         _phone = str(int(str_phone))
        #         # should_q.append(NestedQuery("contacts",TermQuery("contacts.mobile_no",_phone)))
        #         should_q.append(MatchPhraseQuery("nicknames",str(str_enter)))
        # _begin += 100
        # if should_q:
        #     bool_query = BoolQuery(should_queries=should_q)
        #     rows,next_token,total_count,is_all_succeed = ots_client.search("enterprise","enterprise_index",
        #                                                                    SearchQuery(bool_query,get_total_count=True),
        #                                                                    columns_to_get=ColumnsToGet(["nicknames"],ColumnReturnType.SPECIFIED))
        try:
            str_enter = str(df["公司名称"][_begin])
            consumed, return_row, next_token = ots_client.get_row("enterprise",[('name',str_enter)], ["nicknames"], None, 1)
            rows = getRow_ots_primary(return_row)
            total_count = len(rows)
            _begin += 1
            int_count += 1

            if total_count>0:
                all_count += total_count
                print("===",str_enter,int_count,all_count)
        except Exception as e:
            pass
    print("===",int_count,all_count)

def attachColumn():
    filename = "../data/中标单位.xlsx"
    list_data = {}
    list_enterprise = []
    df1 = pd.read_excel(filename)
    for _name in df1["中标单位"]:
        list_enterprise.append(_name)
    d_e = getDictEnterprise(list_enterprise,["legal_person","phone"])
    df_data = {}
    columns = ["name","legal_person","phone"]
    for _name in list_enterprise:
        for _c in columns:
            if _c not in df_data:
                df_data[_c] = []
            df_data[_c].append(d_e.get(_name).get(_c))
    df = pd.DataFrame(df_data)
    df.to_excel("%s.attach.xlsx"%(filename))

def transform_enterprise():
    conn_source = getConnection_testmysql()
    conn_target = getConnection_oracle()
    cursor_source = conn_source.cursor()
    cursor_target = conn_target.cursor()

    sql = "  select name,province,city,credit_code,org_number,tax_number from enterprise_build "
    cursor_source.execute(sql)
    rows_source = cursor_source.fetchmany(10)
    excepted = False
    _index = 0
    while True:

        try:
            if excepted:
                print("==")
                for _r in rows_source:
                    _sql = " insert into BXKC.COMPANY_NAME_INFO(COMPANY_NAME,PROVINCE,CITY,TAX_NUM,ORG_NUM,CREDIT_CODE) values ('%s','%s','%s','%s','%s','%s')"%(_r[0],_r[1],_r[2],_r[5],_r[4],_r[3])
                    _sql = _sql.replace("None","")
                    cursor_target.execute(_sql)
                conn_target.commit()
                excepted = False
            else:
                _sql = " INSERT ALL"
                for _r in rows_source:
                    _sql += " into BXKC.COMPANY_NAME_INFO(COMPANY_NAME,PROVINCE,CITY,TAX_NUM,ORG_NUM,CREDIT_CODE) values ('%s','%s','%s','%s','%s','%s') "%(_r[0],_r[1],_r[2],_r[5],_r[4],_r[3])
                _sql = _sql +" select 1 from dual "
                _sql = _sql.replace("None","")
                cursor_target.execute(_sql)
                conn_target.commit()
                excepted = False

        except Exception as e:
            excepted = True
            traceback.print_exc()
        rows_source = cursor_source.fetchmany(1000)
        _index += 1
        print(_index,excepted)
        if not rows_source or len(rows_source)==0:
            break


def exportEnterprise_GMV():

    task_queue = queue.Queue()

    ots_client = getConnect_ots()

    bool_query = BoolQuery(must_queries=[
        RangeQuery("zhong_biao_number",20,100)
    ])
    rows,next_token,total_count,is_all_succeed = ots_client.search("enterprise","enterprise_index",
                                                                   SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("zhong_biao_number")]),limit=100,get_total_count=True),
                                                                   ColumnsToGet(["zhao_biao_number"],ColumnReturnType.SPECIFIED))
    list_dict = getRow_ots(rows)
    for _dict in list_dict:
        task_queue.put(_dict)
    while next_token:
        rows,next_token,total_count,is_all_succeed = ots_client.search("enterprise","enterprise_index",
                                                                       SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
                                                                       ColumnsToGet(["zhao_biao_number"],ColumnReturnType.SPECIFIED))
        list_dict = getRow_ots(rows)
        for _dict in list_dict:
            task_queue.put(_dict)
        if task_queue.qsize()>=10000:
            break

    def _handle(_dict,result_queue,ots_client):
        name = _dict.get("name")

        bool_query = BoolQuery(must_queries=[
            RangeQuery("page_time","2020-01-01","2021-12-31",True,True),
            TermQuery("win_tenderer",name)
        ])
        rows,next_token,total_count,is_all_succeed = ots_client.search("project2","project2_index",
                                                                       SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("page_time",SortOrder.DESC)]),limit=100,get_total_count=True),
                                                                       ColumnsToGet(["page_time","win_bid_price"],ColumnReturnType.SPECIFIED))
        list_rows = getRow_ots(rows)
        _dict["c3"] = 0
        _dict["c6"] = 0
        _dict["c12"] = 0
        _dict["c24"] = 0
        for _row in list_rows:
            page_time = _row.get("page_time")
            win_bid_price = _row.get("win_bid_price",0)
            if page_time>="2021-10-01":
                _dict["c3"] += win_bid_price
                _dict["c6"] += win_bid_price
                _dict["c12"] += win_bid_price
                _dict["c24"] += win_bid_price
            elif page_time>="2021-07-01":
                _dict["c6"] += win_bid_price
                _dict["c12"] += win_bid_price
                _dict["c24"] += win_bid_price
            elif page_time>="2021-01-01":
                _dict["c12"] += win_bid_price
                _dict["c24"] += win_bid_price
            else:
                _dict["c24"] += win_bid_price
        while next_token:
            ows,next_token,total_count,is_all_succeed = ots_client.search("project2","project2_index",
                                                                          SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("page_time",SortOrder.DESC)]),limit=100,get_total_count=True),
                                                                          ColumnsToGet(["page_time","win_bid_price"],ColumnReturnType.SPECIFIED))
            list_rows = getRow_ots(rows)
            for _row in list_rows:
                page_time = _row.get("page_time")
                win_bid_price = _row.get("win_bid_price",0)
                if page_time>="2021-10-01":
                    _dict["c3"] += win_bid_price
                    _dict["c6"] += win_bid_price
                    _dict["c12"] += win_bid_price
                    _dict["c24"] += win_bid_price
                elif page_time>="2021-07-01":
                    _dict["c6"] += win_bid_price
                    _dict["c12"] += win_bid_price
                    _dict["c24"] += win_bid_price
                elif page_time>="2021-01-01":
                    _dict["c12"] += win_bid_price
                    _dict["c24"] += win_bid_price
                else:
                    _dict["c24"] += win_bid_price
        result_queue.put(_dict)

    result_queue = queue.Queue()
    mt = MultiThreadHandler(task_queue,_handle,result_queue,30,ots_client=ots_client)
    mt.run()
    list_item = []
    while True:
        try:
            _dict = result_queue.get(False)
            list_item.append(_dict)
        except Exception as e:
            break
    df_data = {"公司名称":[],
               "近3个月营收":[],
               "近6个月营收":[],
               "近12个月营收":[],
               "近24个月营收":[]}
    for _dict in list_item:
        df_data["公司名称"].append(_dict.get("name"))
        df_data["近3个月营收"].append(_dict.get("c3"))
        df_data["近6个月营收"].append(_dict.get("c6"))
        df_data["近12个月营收"].append(_dict.get("c12"))
        df_data["近24个月营收"].append(_dict.get("c24"))
    df = pd.DataFrame(df_data)
    df.to_excel("蚂蚁测试数据.xlsx",columns=["公司名称","近3个月营收","近6个月营收","近12个月营收","近24个月营收"])


def attachColumn1():

    filename = "全国剩下数据16570-1(2).xlsx"
    df = pd.read_excel(filename)
    list_enter = list(set(df["公司名"]))
    dict_en = getDictEnterprise(list_enter)
    list_zhongbiao = []
    for company in df["公司名"]:
        _zb = dict_en.get(company,{}).get("zhong_biao_number",0)
        if _zb>0:
            _c = "是"
        else:
            _c = "否"
        list_zhongbiao.append(_c)
    df["是否中标"] = list_zhongbiao
    df.to_excel("全国剩下数据16570-1(2)11.xlsx")

def exportContact():
    filename = "../data/2023-03-06_190109_to_excel.xlsx"
    df = pd.read_excel(filename)
    list_ename = df["_id"]

    list_dict = []
    for _en in list_ename:
        if isinstance(_en,(str)) and _en!="":
            _dict = {"enterprise_name":_en}
            list_dict.append(_dict)
    task_queue = queue.Queue()
    for _d in list_dict:
        task_queue.put(_d)
    ots_client = getConnect_ots()
    def _handle(_d,result_queue):

        _name = _d["enterprise_name"]
        bool_query = BoolQuery(must_queries=[TermQuery("name",_name)])

        rows,next_token,total_count,is_all_succeed = ots_client.search("enterprise","enterprise_index",
                                                                       SearchQuery(bool_query,limit=1),
                                                                       columns_to_get=ColumnsToGet(["reg_location"],return_type=ColumnReturnType.SPECIFIED))
        l_data = getRow_ots(rows)
        if len(l_data)>0:
            _d.update(l_data[0])

        bool_query = BoolQuery(must_queries=[TermQuery("enterprise_name",_name),
                                             BoolQuery(should_queries=[TermQuery("is_legal_person",1),
                                                                       TermQuery("is_mobile",1)])])

        rows,next_token,total_count,is_all_succeed = ots_client.search("enterprise_contact","enterprise_contact_index",
                                                                       SearchQuery(bool_query,limit=5),
                                                                       columns_to_get=ColumnsToGet(["enterprise_name","contact_person","phone_no","position"],return_type=ColumnReturnType.SPECIFIED))
        l_data = getRow_ots(rows)
        if len(l_data)>0:
            _d.update(l_data[0])

    mt = MultiThreadHandler(task_queue,_handle,None,60)
    mt.run()
    df_data= {}
    columns = ["name","contact_person","phone_no","reg_location"]
    for _d in list_dict:
        if "phone_no" in _d:
            for c in columns:
                if c not in df_data:
                    df_data[c] = []
                df_data[c].append(getLegal_str(_d.get(c,"")))


    df = pd.DataFrame(df_data)
    df.to_excel("../data/%s_export_enterprise.xlsx"%(getCurrent_date(format="%Y-%m-%d_%H%M%S")),encoding="utf",columns=columns)

def getTycCompany():

    filename = "公司地址(1).xlsx"
    df = pd.read_excel(filename)
    list_name = df["name"]
    task_queue = queue.Queue()

    list_data = []
    for _i in range(len(list_name)):
        _name = list_name[_i]

        _d = {"企业名称":_name,
              "地址":df["address"][_i],
              "注册地址":df["reg_location"][_i]}
        task_queue.put(_d)
        list_data.append(_d)

    ots_client = getConnect_ots()
    columns = ["legal_person","phone_number"]
    def _handle(item,result_queue):
        try:
            bool_query = BoolQuery(must_queries=[TermQuery("name",item.get("企业名称"))])
            rows, next_token,total_count,is_all_succeed = ots_client.search("enterprise","enterprise_index",
                                                                SearchQuery(bool_query,limit=1),columns_to_get=ColumnsToGet(column_names=columns,return_type=ColumnReturnType.SPECIFIED))
            item["count"] = len(getRow_ots(rows))
            if item["count"]==1:
                _d = getRow_ots(rows)[0]
                item["法人"] = _d.get("legal_person")
                item["法人电话"] = _d.get("phone_number")
                # item["简称"] = _d.get("alias")
                item["营业状态"] = _d.get("reg_status")

            bool_query = BoolQuery(must_queries=[
                RangeQuery("status",201,301),
                generateBoolShouldQuery(["doctitle","doctextcon","attachmenttextcon"],[item.get("企业名称")],MatchPhraseQuery)
            ])
            rows, next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
                                                                           SearchQuery(bool_query,limit=1,get_total_count=True),columns_to_get=ColumnsToGet(return_type=ColumnReturnType.NONE))

            item["公告数量"] = total_count

            bool_query = BoolQuery(must_queries=[
                TermQuery("status",1),
                TermQuery("enterprise_name",item.get("企业名称")),

            ])
            rows, next_token,total_count,is_all_succeed = ots_client.search("enterprise_contact","enterprise_contact_index",
                                                                            SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("score",SortOrder.DESC)]),limit=10,get_total_count=True),
                                                                            columns_to_get=ColumnsToGet(["contact_person","phone_no"],return_type=ColumnReturnType.SPECIFIED))
            list_concat = getRow_ots(rows)
            concat = ""
            for data_i in range(len(list_concat)):
                data = list_concat[data_i]
                concat += "联系人%d%s(%s)\n"%(data_i+1,data.get("contact_person",""),data.get("phone_no",""))
            item["联系人"] = concat
        except Exception:
            traceback.print_exc()
    mt = MultiThreadHandler(task_queue,_handle,None,30)
    mt.run()

    columns = ["企业名称","法人","法人电话","地址","注册地址","公告数量","联系人"]
    df_data = {}
    for data in list_data:
        for c in columns:
            if c not in df_data:
                df_data[c] = []
            df_data[c].append(data.get(c))
    df = pd.DataFrame(df_data)
    df.to_excel("%s.xlsx"%filename,columns=columns)


if __name__=="__main__":
    # getTyc_company()
    getTycCompany()
    # exportEnterprise_by_bidNum()
    # print(getDictEnterprise(["南宁宏基建筑工程有限责任公司"],["phone"]))
    # exportEnterprise_by_phone()
    # make_Legal_enterprise()
    # transform_enterprise()
    # exportEnterprise()
    # exportContact()
    # attachColumn()
    # attachColumn()

    # ots_client = getConnect_ots()
    # bool_query = BoolQuery(must_queries=[RangeQuery("tyc_id",1,include_lower=True),
    #                                      RangeQuery("bid_number",4,include_lower=True)
    #                                      ])
    # bool_query = BoolQuery(must_queries=[TermQuery("bid_number",0)],
    #                        must_not_queries=[ExistsQuery("tyc_id"),NestedQuery("contacts",ExistsQuery("contacts"))])
    #
    #
    # columns = ["name","contacts","province","city","address","reg_location"]
    # rows, next_token, total_count, is_all_succeed = ots_client.search("enterprise", "enterprise_index",
    #                                                                   SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("tyc_id",SortOrder.ASC)]), limit=100, get_total_count=True),
    #                                                                   ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
    # print(total_count)
    # exportEnterprise_GMV()