11 月之前 · 92ca7909f6
--- a/.idea/dataSources.xml
+++ b/.idea/dataSources.xml
@@ -12,7 +12,7 @@
 
				       <synchronize>true</synchronize>
			
 
				       <auto-commit>false</auto-commit>
			
 
				       <jdbc-driver>oracle.jdbc.OracleDriver</jdbc-driver>
			
 
				-      <jdbc-url>jdbc:oracle:thin:@121.46.18.113:10522:yanphone</jdbc-url>
			
 
				+      <jdbc-url>jdbc:oracle:thin:@dianxin.bidizhaobiao.com:10522:yanphone</jdbc-url>
			
 
				     </data-source>
			
 
				     <data-source source="LOCAL" name="mysql_测试" uuid="19d8fc36-28e0-4de8-bed4-c356c7fd53cb">
			
 
				       <driver-ref>mysql.8</driver-ref>
			
@@ -66,5 +66,12 @@
 
				       <jdbc-url>jdbc:postgresql://116.62.141.83:5432/postgres</jdbc-url>
			
 
				       <working-dir>$ProjectFileDir$</working-dir>
			
 
				     </data-source>
			
 
				+    <data-source source="LOCAL" name="mysql@localhost" uuid="8312b206-e701-448e-b945-8e554498c4d2">
			
 
				+      <driver-ref>mysql.8</driver-ref>
			
 
				+      <synchronize>true</synchronize>
			
 
				+      <jdbc-driver>com.mysql.cj.jdbc.Driver</jdbc-driver>
			
 
				+      <jdbc-url>jdbc:mysql://localhost:3306/mysql</jdbc-url>
			
 
				+      <working-dir>$ProjectFileDir$</working-dir>
			
 
				+    </data-source>
			
 
				   </component>
			
 
				 </project>
			
--- a/.idea/encodings.xml
+++ b/.idea/encodings.xml
@@ -11,6 +11,7 @@
 
				     <file url="file://$PROJECT_DIR$/data/exportFind_tenderee1.csv" charset="GBK" />
			
 
				     <file url="file://$PROJECT_DIR$/data/服务型客户.txt" charset="GBK" />
			
 
				     <file url="file://$PROJECT_DIR$/dataSource/ossUtils.py" charset="UTF-8" />
			
 
				+    <file url="file://$PROJECT_DIR$/export/html2text.py" charset="UTF-8" />
			
 
				     <file url="file://$PROJECT_DIR$/test/industry_keyword_expand.py" charset="UTF-8" />
			
 
				     <file url="file://$PROJECT_DIR$/test/拓展关键词.xlsx" charset="UTF-8" />
			
 
				     <file url="file://$PROJECT_DIR$/utils/ERNIE_utils.py" charset="UTF-8" />
			
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,6 +1,6 @@
 
				 <?xml version="1.0" encoding="UTF-8"?>
			
 
				 <project version="4">
			
 
				-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5 (dl_nlp)" project-jdk-type="Python SDK">
			
 
				+  <component name="ProjectRootManager" version="2" languageLevel="JDK_19" project-jdk-name="Python 3.7 (py37)" project-jdk-type="Python SDK">
			
 
				     <output url="file://$PROJECT_DIR$/out" />
			
 
				   </component>
			
 
				 </project>
			
--- a/.idea/sqldialects.xml
+++ b/.idea/sqldialects.xml
@@ -3,6 +3,5 @@
 
				   <component name="SqlDialectMappings">
			
 
				     <file url="file://$PROJECT_DIR$/ddl" dialect="MySQL" />
			
 
				     <file url="file://$PROJECT_DIR$/ddl_postgres" dialect="PostgreSQL" />
			
 
				-    <file url="file://G:/要素提取标注备份/iepy_public_auth_group.sql" dialect="PostgreSQL" />
			
 
				   </component>
			
 
				 </project>
			
--- a/DataMining.iml
+++ b/DataMining.iml
@@ -3,7 +3,7 @@
 
				   <component name="NewModuleRootManager" inherit-compiler-output="true">
			
 
				     <exclude-output />
			
 
				     <content url="file://$MODULE_DIR$" />
			
 
				-    <orderEntry type="jdk" jdkName="Python 3.5 (dl_nlp)" jdkType="Python SDK" />
			
 
				+    <orderEntry type="jdk" jdkName="Python 3.7 (py37)" jdkType="Python SDK" />
			
 
				     <orderEntry type="sourceFolder" forTests="false" />
			
 
				   </component>
			
 
				 </module>
			
--- a/export/DoubaoUtils.py
+++ b/export/DoubaoUtils.py
--- a/export/cleanAgency.py
+++ b/export/cleanAgency.py
@@ -0,0 +1,31 @@
 
				+#coding:utf8
			
 
				+
			
 
				+import pandas as pd
			
 
				+import re
			
 
				+
			
 
				+def clean():
			
 
				+    filename = "20230227都大于100.xlsx"
			
 
				+    df = pd.read_excel(filename)
			
 
				+
			
 
				+    _count = 0
			
 
				+    df_data = {"name":[],
			
 
				+               "zbn":[],
			
 
				+               "dln":[],
			
 
				+               "sn":[]}
			
 
				+    for name,zbn,dln,sn in zip(df["nicknames"],df["zhao_biao_number"],df["dai_li_number"],df["same_number"]):
			
 
				+        if dln>10000 and re.search("招标|咨询",name) is not None:
			
 
				+            _count += 1
			
 
				+            print(_count,name,zbn,dln,sn)
			
 
				+            df_data["name"].append(name)
			
 
				+            df_data["zbn"].append(zbn)
			
 
				+            df_data["dln"].append(dln)
			
 
				+            df_data["sn"].append(sn)
			
 
				+    df = pd.DataFrame(df_data)
			
 
				+    df.to_excel("daili_check.xlsx")
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    clean()
			
--- a/export/docchannel.pk
+++ b/export/docchannel.pk
--- a/export/exportDocument.py
+++ b/export/exportDocument.py
--- a/export/exportEnterprise.py
+++ b/export/exportEnterprise.py
@@ -1052,12 +1052,22 @@ def attachColumn1():
 
				     df.to_excel("全国剩下数据16570-1(2)11.xlsx")
			
 
				 
			
 
				 def exportContact():
			
 
				-    filename = "../data/2023-03-06_190109_to_excel.xlsx"
			
 
				-    df = pd.read_excel(filename)
			
 
				-    list_ename = df["_id"]
			
 
				+    filename = r"G:\新建文件夹\WeChat Files\wxid_kluerlj8cn3b21\FileStorage\File\2024-09\企业名称名单.txt"
			
 
				+
			
 
				+    list_name = []
			
 
				+    with open(filename,"r",encoding="utf8") as f:
			
 
				+        while 1:
			
 
				+            line = f.readline()
			
 
				+            if not line:
			
 
				+                break
			
 
				+            line = line.strip()
			
 
				+            list_name.append(line)
			
 
				+
			
 
				+    # df = pd.read_excel(filename)
			
 
				+    # list_name = df["name"]
			
 
				 
			
 
				     list_dict = []
			
 
				-    for _en in list_ename:
			
 
				+    for _en in list_name:
			
 
				         if isinstance(_en,(str)) and _en!="":
			
 
				             _dict = {"enterprise_name":_en}
			
 
				             list_dict.append(_dict)
			
@@ -1072,28 +1082,28 @@ def exportContact():
 
				 
			
 
				         rows,next_token,total_count,is_all_succeed = ots_client.search("enterprise","enterprise_index",
			
 
				                                                                        SearchQuery(bool_query,limit=1),
			
 
				-                                                                       columns_to_get=ColumnsToGet(["reg_location"],return_type=ColumnReturnType.SPECIFIED))
			
 
				+                                                                       columns_to_get=ColumnsToGet(["tou_biao_number","zhong_biao_number"],return_type=ColumnReturnType.SPECIFIED))
			
 
				         l_data = getRow_ots(rows)
			
 
				         if len(l_data)>0:
			
 
				             _d.update(l_data[0])
			
 
				 
			
 
				-        bool_query = BoolQuery(must_queries=[TermQuery("enterprise_name",_name),
			
 
				-                                             BoolQuery(should_queries=[TermQuery("is_legal_person",1),
			
 
				-                                                                       TermQuery("is_mobile",1)])])
			
 
				-
			
 
				-        rows,next_token,total_count,is_all_succeed = ots_client.search("enterprise_contact","enterprise_contact_index",
			
 
				-                                                                       SearchQuery(bool_query,limit=5),
			
 
				-                                                                       columns_to_get=ColumnsToGet(["enterprise_name","contact_person","phone_no","position"],return_type=ColumnReturnType.SPECIFIED))
			
 
				-        l_data = getRow_ots(rows)
			
 
				-        if len(l_data)>0:
			
 
				-            _d.update(l_data[0])
			
 
				+        # bool_query = BoolQuery(must_queries=[TermQuery("enterprise_name",_name),
			
 
				+        #                                      BoolQuery(should_queries=[TermQuery("is_legal_person",1),
			
 
				+        #                                                                TermQuery("is_mobile",1)])])
			
 
				+        #
			
 
				+        # rows,next_token,total_count,is_all_succeed = ots_client.search("enterprise_contact","enterprise_contact_index",
			
 
				+        #                                                                SearchQuery(bool_query,limit=5),
			
 
				+        #                                                                columns_to_get=ColumnsToGet(["enterprise_name","contact_person","phone_no","position"],return_type=ColumnReturnType.SPECIFIED))
			
 
				+        # l_data = getRow_ots(rows)
			
 
				+        # if len(l_data)>0:
			
 
				+        #     _d.update(l_data[0])
			
 
				 
			
 
				     mt = MultiThreadHandler(task_queue,_handle,None,60)
			
 
				     mt.run()
			
 
				     df_data= {}
			
 
				-    columns = ["name","contact_person","phone_no","reg_location"]
			
 
				+    columns = ["name","tou_biao_number","zhong_biao_number"]
			
 
				     for _d in list_dict:
			
 
				-        if "phone_no" in _d:
			
 
				+        if _d.get("tou_biao_number",0)>0 or _d.get("zhong_biao_number",0)>0:
			
 
				             for c in columns:
			
 
				                 if c not in df_data:
			
 
				                     df_data[c] = []
			
@@ -1101,7 +1111,7 @@ def exportContact():
 
				 
			
 
				 
			
 
				     df = pd.DataFrame(df_data)
			
 
				-    df.to_excel("../data/%s_export_enterprise.xlsx"%(getCurrent_date(format="%Y-%m-%d_%H%M%S")),encoding="utf",columns=columns)
			
 
				+    df.to_csv("../data/%s_export_enterprise.csv"%(getCurrent_date(format="%Y-%m-%d_%H%M%S")),encoding="utf",columns=columns)
			
 
				 
			
 
				 def getTycCompany():
			
 
				 
			
@@ -1179,15 +1189,16 @@ def getTycCompany():
 
				 
			
 
				 if __name__=="__main__":
			
 
				     # getTyc_company()
			
 
				-    getTycCompany()
			
 
				+    # getTycCompany()
			
 
				     # exportEnterprise_by_bidNum()
			
 
				     # print(getDictEnterprise(["南宁宏基建筑工程有限责任公司"],["phone"]))
			
 
				     # exportEnterprise_by_phone()
			
 
				     # make_Legal_enterprise()
			
 
				     # transform_enterprise()
			
 
				     # exportEnterprise()
			
 
				-    # exportContact()
			
 
				+    exportContact()
			
 
				     # attachColumn()
			
 
				+
			
 
				     # attachColumn()
			
 
				 
			
 
				     # ots_client = getConnect_ots()
			
--- a/export/exportEs.py
+++ b/export/exportEs.py
@@ -645,5 +645,122 @@ if __name__ == '__main__':
 
				         },
			
 
				         "_source":["contacts_enterprise_name","contacts_person_name","contacts_phone_no"]
			
 
				     }
			
 
				+
			
 
				+    body = {
			
 
				+        "_source": "_name",
			
 
				+        'query': {  # 查询命令
			
 
				+            "bool": {
			
 
				+                "must":[
			
 
				+                    # {"has_child":{
			
 
				+                    #     "type":"contacts",
			
 
				+                    #     "query":{
			
 
				+                    #         "bool":{
			
 
				+                    #             'must': [
			
 
				+                    #                 {'match_phrase':
			
 
				+                    #                      {'nicknames':'医院'
			
 
				+                    #                       # {
			
 
				+                    #                       # "query": "医院",  # >= 大于等于
			
 
				+                    #                       # # # "lt": 1650038400000  # < 小于
			
 
				+                    #                       # }
			
 
				+                    #                       },
			
 
				+                    #                  },
			
 
				+                    #                 {'term':
			
 
				+                    #                      {'contacts_is_legal_person':0
			
 
				+                    #                       # {
			
 
				+                    #                       # "query": "医院",  # >= 大于等于
			
 
				+                    #                       # # # "lt": 1650038400000  # < 小于
			
 
				+                    #                       # }
			
 
				+                    #                       },
			
 
				+                    #                  },{'term':
			
 
				+                    #                         {'contacts_is_mobile':1
			
 
				+                    #                          # {
			
 
				+                    #                          # "query": "医院",  # >= 大于等于
			
 
				+                    #                          # # # "lt": 1650038400000  # < 小于
			
 
				+                    #                          # }
			
 
				+                    #                          },
			
 
				+                    #                     }
			
 
				+                    #             ]
			
 
				+                    #         }
			
 
				+                    #     }
			
 
				+                    # }},
			
 
				+                    # {"range":{"zhong_biao_number":{"gt":0}}}
			
 
				+                    {
			
 
				+                        "range": {
			
 
				+                            "status": {
			
 
				+                                "gte": 201,
			
 
				+                                "lte": 300
			
 
				+                            }
			
 
				+                        }
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "bool": {
			
 
				+                            "should": [
			
 
				+                                {
			
 
				+                                    "match_phrase": {
			
 
				+                                        "nicknames": "合同能源管理"
			
 
				+                                    }
			
 
				+                                },{
			
 
				+                                    "match_phrase": {
			
 
				+                                        "history_names": "合同能源管理"
			
 
				+                                    }
			
 
				+                                },{
			
 
				+                                    "match_phrase": {
			
 
				+                                        "alias": "合同能源管理"
			
 
				+                                    }
			
 
				+                                }
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    }
			
 
				+
			
 
				+                    # {'match_phrase':
			
 
				+                    #      {'business_scope':'合同能源管理'
			
 
				+                    #       # {
			
 
				+                    #       # "query": "医院",  # >= 大于等于
			
 
				+                    #       # # # "lt": 1650038400000  # < 小于
			
 
				+                    #       # }
			
 
				+                    #       },
			
 
				+                    #  },
			
 
				+                    # {"bool":{
			
 
				+                    #     "should":[
			
 
				+                    #         {"term":{
			
 
				+                    #             "district":"鹤山"
			
 
				+                    #         }
			
 
				+                    #         }
			
 
				+                    #     ]
			
 
				+                    # }},
			
 
				+                    # {"bool":{
			
 
				+                    #     "should":list_should
			
 
				+                    # }}
			
 
				+
			
 
				+                ]
			
 
				+                # 'must': [
			
 
				+                #
			
 
				+                #
			
 
				+                #     # {"range":{"tyc_id":{"gt":1111}}}
			
 
				+                #     # {'match_phrase':
			
 
				+                #     #     {'contacts_is_legal_person':1
			
 
				+                #     #         # {
			
 
				+                #     #         # "query": "医院",  # >= 大于等于
			
 
				+                #     #         # # # "lt": 1650038400000  # < 小于
			
 
				+                #     #         # }
			
 
				+                #     #     },
			
 
				+                #     # },{'match_phrase':
			
 
				+                #     #                        {'contacts_is_mobile':1
			
 
				+                #     #                         # {
			
 
				+                #     #                         # "query": "医院",  # >= 大于等于
			
 
				+                #     #                         # # # "lt": 1650038400000  # < 小于
			
 
				+                #     #                         # }
			
 
				+                #     #                         },
			
 
				+                #     #    }
			
 
				+                # ]
			
 
				+
			
 
				+            },
			
 
				+
			
 
				+        },
			
 
				+        # "sort": [
			
 
				+        #         {"id": "desc"}
			
 
				+        #     ]
			
 
				+        "_source":"name"
			
 
				+    }
			
 
				     list_result.extend(search_data(es,body,10000000,False))
			
 
				-    data_to_excel(list_result,["contacts_enterprise_name","contacts_person_name","contacts_phone_no"])
			
 
				+    data_to_excel(list_result,["name"])
			
--- a/export/exportProject.py
+++ b/export/exportProject.py
@@ -16,6 +16,7 @@ import traceback
 
				 from utils.hashUtil import aesCipher
			
 
				 from export.exportEnterprise import getDictEnterprise,getOneContact
			
 
				 from export.exportUtils import generateBoolShouldQuery
			
 
				+from queue import Queue
			
 
				 
			
 
				 
			
 
				 data_path = "../data/"
			
@@ -444,8 +445,163 @@ def appendCellphones():
 
				 
			
 
				     file = "../data/"
			
 
				 
			
 
				+from export.exportUtils import *
			
 
				+import jieba
			
 
				+
			
 
				+def export_industry_keywords_by_enterprise(list_enterprise):
			
 
				+
			
 
				+    task_queue = Queue()
			
 
				+    result_queue = Queue()
			
 
				+
			
 
				+    list_query = []
			
 
				+    for _enterprise in list_enterprise:
			
 
				+        _query = BoolQuery(must_queries=[
			
 
				+            TermQuery("win_tenderer",_enterprise),
			
 
				+            RangeQuery("status",201,301)
			
 
				+        ])
			
 
				+        list_query.append({"query":_query,"limit":1000})
			
 
				+
			
 
				+    list_data = getDocument(list_query,columns=["docid","doctitles","project_name","product","win_tenderer"],
			
 
				+                            table_name="project2",
			
 
				+                            table_index="project2_index")
			
 
				+    dict_keywords = {}
			
 
				+    dict_keywords_product = {}
			
 
				+    dict_keywords_product_count = {}
			
 
				+    for _data in list_data:
			
 
				+        doctitles = _data.get("doctitles","")
			
 
				+        project_name = _data.get("project_name","")
			
 
				+        product = _data.get("product","")
			
 
				+        for _keyword in doctitles.split(","):
			
 
				+            for _word in jieba.cut(_keyword):
			
 
				+                if _word in dict_keywords:
			
 
				+                    dict_keywords[_word] += 1
			
 
				+                else:
			
 
				+                    dict_keywords[_word] = 1
			
 
				+        for _keyword in project_name.split(","):
			
 
				+            for _word in jieba.cut(_keyword):
			
 
				+                if _word in dict_keywords:
			
 
				+                    dict_keywords[_word] += 1
			
 
				+                else:
			
 
				+                    dict_keywords[_word] = 1
			
 
				+        for _keyword in product.split(","):
			
 
				+            if _keyword in dict_keywords_product_count:
			
 
				+                dict_keywords_product_count[_keyword] += 1
			
 
				+            else:
			
 
				+                dict_keywords_product_count[_keyword] = 1
			
 
				+            for _word in jieba.cut(_keyword):
			
 
				+                if _word in dict_keywords:
			
 
				+                    dict_keywords[_word] += 1
			
 
				+                else:
			
 
				+                    dict_keywords[_word] = 1
			
 
				+
			
 
				+                if _word in dict_keywords_product:
			
 
				+                    dict_keywords_product[_word] += 1
			
 
				+                else:
			
 
				+                    dict_keywords_product[_word] = 1
			
 
				+
			
 
				+    list_keywords = []
			
 
				+    for _keyword,count in dict_keywords.items():
			
 
				+        list_keywords.append([_keyword,count])
			
 
				+    list_keywords.sort(key=lambda x:x[1],reverse=True)
			
 
				+    list_keywords.insert(0,["关键词","数量"])
			
 
				+    list_keywords = list_keywords[:10000]
			
 
				+
			
 
				+    list_keywords_product = []
			
 
				+    for _keyword,count in dict_keywords_product.items():
			
 
				+        list_keywords_product.append([_keyword,count])
			
 
				+    list_keywords_product.sort(key=lambda x:x[1],reverse=True)
			
 
				+    list_keywords_product.insert(0,["关键词","数量"])
			
 
				+    list_keywords_product = list_keywords_product[:10000]
			
 
				+
			
 
				+    list_keywords_product_count = []
			
 
				+    for _keyword,count in dict_keywords_product_count.items():
			
 
				+        list_keywords_product_count.append([_keyword,count])
			
 
				+    list_keywords_product_count.sort(key=lambda x:x[1],reverse=True)
			
 
				+    list_keywords_product_count.insert(0,["关键词","数量"])
			
 
				+    list_keywords_product_count = list_keywords_product_count[:10000]
			
 
				+
			
 
				+    filename = "../data/%s_行业关键词.xlsx"%(getCurrent_date("%Y-%m-%d_%H%M%S"))
			
 
				+    with pd.ExcelWriter(filename) as writer:
			
 
				+        df_1 = pd.DataFrame(list_data)
			
 
				+        df_1.to_excel(writer,sheet_name="项目数据")
			
 
				+        df_data = pd.DataFrame(list_keywords)
			
 
				+        df_data.to_excel(writer,sheet_name="标题项目名称产品词频统计")
			
 
				+        df_data = pd.DataFrame(list_keywords_product)
			
 
				+        df_data.to_excel(writer,sheet_name="产品词频统计")
			
 
				+        df_data = pd.DataFrame(list_keywords_product_count)
			
 
				+        df_data.to_excel(writer,sheet_name="产品项目词频统计")
			
 
				+
			
 
				+
			
 
				+def turn_structure():
			
 
				+    filename = r'G:\新建文件夹\WeChat Files\wxid_kluerlj8cn3b21\FileStorage\File\2025-06\食堂食材行业关键词.xlsx'
			
 
				+    df = pd.read_excel(filename)
			
 
				+    list_product_exclude = df["产品排除词"]
			
 
				+    list_title_exclude = df["标题排除词"]
			
 
				+    list_keywords = df["标题+正文关键词"]
			
 
				+    list_title_exclude = [a for a in list_title_exclude if isinstance(a,str)]
			
 
				+    list_keywords = [a for a in list_keywords if isinstance(a,str)]
			
 
				+    list_product_exclude = [a for a in list_product_exclude if isinstance(a,str)]
			
 
				+    list_data = [["行业","全文关键词","全文排除词","标题排除词","产品排除词"]]
			
 
				+    list_data.append(["食堂食材","、".join(list_keywords),"","、".join(list_title_exclude),"、".join(list_product_exclude)])
			
 
				+    df1 = pd.DataFrame(list_data)
			
 
				+    df1.to_excel("../data/%s_行业关键词.xlsx"%(getCurrent_date("%Y-%m-%d_%H%M%S")),sheet_name="行业关键词")
			
 
				+
			
 
				 
			
 
				 if __name__=="__main__":
			
 
				-    exportProject_by_pagetime()
			
 
				+    # exportProject_by_pagetime()
			
 
				     # exportProjectWithOneDocid()
			
 
				     # exportCompanyByCycleProduct()
			
 
				+    turn_structure()
			
 
				+#     list_enterprise = splitIntoList('''
			
 
				+#     明喆集团股份有限公司
			
 
				+# 招商积余物业管理有限公司
			
 
				+# 广州粤华物业有限公司
			
 
				+# 广州广电城市服务集团股份有限公司
			
 
				+# 绿城物业服务集团有限公司
			
 
				+# 龙城城市运营服务集团有限公司
			
 
				+# 深业物业运营集团股份有限公司
			
 
				+# 广东宏德科技物业有限公司
			
 
				+# 保利物业服务股份有限公司
			
 
				+# 新大正物业集团股份有限公司
			
 
				+# 山东明德物业管理集团有限公司
			
 
				+# 深圳市金地物业管理有限公司
			
 
				+# 上海复欣物业管理发展有限公司
			
 
				+# 招商局物业管理有限公司
			
 
				+# 东吴服务产业集团（江苏）有限公司
			
 
				+# 碧桂园生活服务集团股份有限公司
			
 
				+# 天津市金玉物业管理有限公司
			
 
				+# 润加物业服务（深圳）有限公司
			
 
				+# 山东宏泰物业发展有限公司
			
 
				+# 爱玛客服务产业（中国）有限公司
			
 
				+# 中海物业管理有限公司
			
 
				+# 浙江亚太酒店物业服务有限公司
			
 
				+# 深圳万物商企物业服务有限公司
			
 
				+# 天津天孚物业管理有限公司
			
 
				+# 上海益中亘泰（集团）股份有限公司
			
 
				+# 上海申勤物业管理服务有限公司
			
 
				+# 广东公诚设备资产服务有限公司
			
 
				+# 上海东湖物业管理有限公司
			
 
				+# 天津市赛驰物业服务有限公司
			
 
				+# 安徽省长城物业管理有限公司
			
 
				+# 深圳市万科物业服务有限公司
			
 
				+# 上海生乐物业管理有限公司
			
 
				+# 金科智慧服务集团股份有限公司
			
 
				+# 山东润华物业管理有限公司
			
 
				+# 国药诺达物业服务有限公司
			
 
				+# 深业置地（深圳）物业管理有限公司
			
 
				+# 广东华信服务集团有限公司
			
 
				+# 天津玉龙源物业管理服务股份有限公司
			
 
				+# 广州珠江城市管理服务集团股份有限公司
			
 
				+# 中航物业管理有限公司
			
 
				+# 深圳市广美隆物业清洁服务有限公司
			
 
				+# 浙江浙大新宇物业集团有限公司
			
 
				+# 金融街物业股份有限公司
			
 
				+# 天津峥嵘物业管理有限公司
			
 
				+# 新疆德泰保安服务有限公司
			
 
				+# 北京住总北宇物业服务有限责任公司
			
 
				+# 广东泰科物业管理有限公司
			
 
				+# 德州市陵城区人才发展集团有限公司
			
 
				+# 乌鲁木齐阳光管道物业服务有限公司
			
 
				+# 深业物业集团有限公司
			
 
				+#     ''',_splitkeys="\n|\s")
			
 
				+#     export_industry_keywords_by_enterprise(list_enterprise)
			
--- a/jobs/exportJobs.py
+++ b/jobs/exportJobs.py
@@ -37,7 +37,7 @@ def export_medicine_friday():
 
				                     host = "smtp.exmail.qq.com"
			
 
				                     username = "vip@bidizhaobiao.com"
			
 
				                     password = "Biaoxun66-"
			
 
				-                    receivers = ["1985262186@qq.com","1175730271@qq.com","1265797328@qq.com","1289358902@qq.com"]
			
 
				+                    receivers = ["1985262186@qq.com","1265797328@qq.com","1289358902@qq.com"]
			
 
				                     attachs = [filename]
			
 
				 
			
 
				                     sendEmail(host,username,password,receivers,attachs=attachs)
			
@@ -139,7 +139,7 @@ def export2():
 
				                     host = "smtp.exmail.qq.com"
			
 
				                     username = "vip@bidizhaobiao.com"
			
 
				                     password = "Biaoxun66-"
			
 
				-                    receivers = ["1175730271@qq.com","493894608@qq.com"]
			
 
				+                    receivers = ["493894608@qq.com"]
			
 
				                     # receivers = ["1175730271@qq.com"]
			
 
				                     attachs = [filename]
			
 
				 
			
@@ -280,7 +280,7 @@ def export5():
 
				                     host = "smtp.exmail.qq.com"
			
 
				                     username = "vip@bidizhaobiao.com"
			
 
				                     password = "Biaoxun66-"
			
 
				-                    receivers = ["1175730271@qq.com","365531448@qq.com"]
			
 
				+                    receivers = ["365531448@qq.com"]
			
 
				                     # receivers = ["1175730271@qq.com"]
			
 
				                     attachs = [filename]
			
 
				 
			
@@ -741,7 +741,7 @@ class Export3():
 
				             username = "vip1@bidizhaobiao.com"
			
 
				             password = "Biaoxun666+"
			
 
				             # receivers = ["1175730271@qq.com","995116318@qq.com","huangxiaofang@cvte.com"]
			
 
				-            receivers = ["1175730271@qq.com","1208135584@qq.com","youyuer@cvte.com","chenyuxue@cvte.com"]
			
 
				+            receivers = ["1208135584@qq.com","544329183@qq.com","youyuer@cvte.com","chenyuxue@cvte.com"]
			
 
				 
			
 
				             # receivers = ["1175730271@qq.com"]
			
 
				 
			
@@ -1009,7 +1009,7 @@ class Export3():
 
				             username = "vip1@bidizhaobiao.com"
			
 
				             password = "Biaoxun666+"
			
 
				             # receivers = ["1175730271@qq.com","995116318@qq.com","huangxiaofang@cvte.com"]
			
 
				-            receivers = ["1175730271@qq.com","1208135584@qq.com","wanghongyan@cvte.com"]
			
 
				+            receivers = ["1208135584@qq.com","544329183@qq.com","wanghongyan@cvte.com"]
			
 
				 
			
 
				             # receivers = ["1175730271@qq.com"]
			
 
				 
			
@@ -1335,8 +1335,8 @@ class Export3():
 
				         host = "smtp.exmail.qq.com"
			
 
				         username = "vip@bidizhaobiao.com"
			
 
				         password = "Biaoxun66-"
			
 
				-        receivers = ["1175730271@qq.com","747012698@qq.com","995116318@qq.com"]
			
 
				-        receivers = ["1175730271@qq.com"]
			
 
				+        receivers = ["747012698@qq.com","995116318@qq.com"]
			
 
				+        # receivers = ["1175730271@qq.com"]
			
 
				         attachs = [filename]
			
 
				 
			
 
				         sendEmail(host,username,password,receivers,attachs=attachs)
			
@@ -1667,8 +1667,8 @@ class Export3():
 
				         host = "smtp.exmail.qq.com"
			
 
				         username = "vip@bidizhaobiao.com"
			
 
				         password = "Biaoxun66-"
			
 
				-        receivers = ["1175730271@qq.com","747012698@qq.com","995116318@qq.com"]
			
 
				-        receivers = ["1175730271@qq.com"]
			
 
				+        receivers = ["747012698@qq.com","995116318@qq.com"]
			
 
				+        # receivers = ["1175730271@qq.com"]
			
 
				         attachs = [filename]
			
 
				 
			
 
				         sendEmail(host,username,password,receivers,attachs=attachs)
			
@@ -2013,7 +2013,7 @@ def export_15824381998():
 
				                 host = "smtp.exmail.qq.com"
			
 
				                 username = "vip@bidizhaobiao.com"
			
 
				                 password = "Biaoxun66-"
			
 
				-                receivers = ["1175730271@qq.com","565748324@qq.com","1396488964@qq.com"]
			
 
				+                receivers = ["565748324@qq.com"]
			
 
				                 # receivers = ["1175730271@qq.com"]
			
 
				                 attachs = [filename]
			
 
				 
			
@@ -2100,7 +2100,7 @@ def export_15824381998():
 
				                 host = "smtp.exmail.qq.com"
			
 
				                 username = "vip@bidizhaobiao.com"
			
 
				                 password = "Biaoxun66-"
			
 
				-                receivers = ["1175730271@qq.com","565748324@qq.com","1396488964@qq.com","1141385052@qq.com","1713739820@qq.com"]
			
 
				+                receivers = ["565748324@qq.com","1141385052@qq.com","1713739820@qq.com"]
			
 
				                 # receivers = ["1175730271@qq.com"]
			
 
				                 attachs = [filename]
			
 
				 
			
@@ -2535,7 +2535,7 @@ def export_13510123669():
 
				                 host = "smtp.exmail.qq.com"
			
 
				                 username = "vip@bidizhaobiao.com"
			
 
				                 password = "Biaoxun66-"
			
 
				-                # receivers = ["724949655@qq.com","1396488964@qq.com"]
			
 
				+                # receivers = ["724949655@qq.com"]
			
 
				                 receivers = ["md47@zuowei.com"]
			
 
				 
			
 
				                 # receivers = ["1175730271@qq.com"]
			
@@ -2557,8 +2557,8 @@ def job_export():
 
				     _scheduler.add_job(export2,"cron",hour=9)
			
 
				     _scheduler.add_job(export5,"cron",hour=16)
			
 
				     # _scheduler.add_job(e3.export4,"cron",hour=7)
			
 
				-    _scheduler.add_job(e3.trytimes,"cron",hour=21)
			
 
				-    _scheduler.add_job(e3.export3_1,"cron",hour=21)
			
 
				+    # _scheduler.add_job(e3.trytimes,"cron",hour=21)
			
 
				+    # _scheduler.add_job(e3.export3_1,"cron",hour=21)
			
 
				     _scheduler.add_job(export_15824381998,"cron",hour=21)
			
 
				     _scheduler.add_job(export_13510123669,"cron",hour=10)
			
 
				     _scheduler.start()
			
@@ -2568,14 +2568,14 @@ if __name__=="__main__":
 
				     # export_medicine_friday()
			
 
				     # export2()
			
 
				     # export_document_except()
			
 
				-    # e3 = Export3()
			
 
				+    e3 = Export3()
			
 
				     # e3.export4_by_project()
			
 
				     # e3.export4()
			
 
				-    # e3.trytimes()
			
 
				+    e3.trytimes()
			
 
				     # e3.export3_1()
			
 
				     # export5()
			
 
				     # export_15824381998()
			
 
				-    export_13510123669()
			
 
				+    # export_13510123669()
			
 
				 
			
 
				 
			
 
				 
			
--- a/mining/ChatAgent.py
+++ b/mining/ChatAgent.py
@@ -0,0 +1,91 @@
 
				+#coding:utf8
			
 
				+
			
 
				+import requests
			
 
				+import time
			
 
				+import traceback
			
 
				+import json
			
 
				+
			
 
				+agent_id = "7458183497300492338"
			
 
				+url = "https://api.coze.cn/v3/chat"
			
 
				+Authorization = "Bearer pat_XXlExeHd1loVyHY1kV7Z38GjBERfmvOAvhfxCtXSYLOzNtqYXBqd9Sh3BYwJJzjw"
			
 
				+user_id = "2103446848"
			
 
				+
			
 
				+
			
 
				+headers = {"Authorization":Authorization,
			
 
				+           "Content-Type":"application/json"}
			
 
				+
			
 
				+
			
 
				+def chat_agent(msg,retry_time=3):
			
 
				+
			
 
				+    for _ in range(retry_time):
			
 
				+        try:
			
 
				+
			
 
				+            data_raw = {"bot_id":agent_id,
			
 
				+                        "user_id":user_id,
			
 
				+                        "stream":True,
			
 
				+                        "auto_save_history":True,
			
 
				+                        "additional_messages":[
			
 
				+                            {
			
 
				+                                "role":"user",
			
 
				+                                "content":msg,
			
 
				+                                "content_type":"text"
			
 
				+                            }
			
 
				+                        ]}
			
 
				+
			
 
				+            resp = requests.post(url,headers=headers,json=data_raw,timeout=60)
			
 
				+
			
 
				+            _data = resp.content.decode("utf-8")
			
 
				+
			
 
				+            return decode_msg(_data)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            traceback.print_exc()
			
 
				+            time.sleep(1)
			
 
				+
			
 
				+
			
 
				+msg_list_url = "https://api.coze.cn/v3/chat/message/list"
			
 
				+
			
 
				+
			
 
				+def get_msg_list(conversation_id,chat_id,retry_time=3):
			
 
				+
			
 
				+    for _ in range(retry_time):
			
 
				+        try:
			
 
				+
			
 
				+            data_raw = {"conversation_id":conversation_id,
			
 
				+                        "chat_id":chat_id}
			
 
				+
			
 
				+            resp = requests.post(msg_list_url,headers=headers,json=data_raw)
			
 
				+
			
 
				+            _data = _result.content.decode("utf-8")
			
 
				+
			
 
				+            return decode_msg(_data)
			
 
				+        except Exception as e:
			
 
				+            traceback.print_exc()
			
 
				+            time.sleep(1)
			
 
				+
			
 
				+
			
 
				+def decode_msg(content):
			
 
				+    list_msg = content.split("\n\n")
			
 
				+    response = ""
			
 
				+    for msg in list_msg:
			
 
				+        try:
			
 
				+            list_line = msg.split("\n")
			
 
				+            if len(list_line) ==2:
			
 
				+                msg_type = list_line[0]
			
 
				+                msg_data = json.loads(list_line[1][5:])
			
 
				+                if msg_type=="event:conversation.message.delta":
			
 
				+                    response += msg_data.get("content")
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            pass
			
 
				+    return response
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    msg = '''
			
 
				+    上海市青浦区华新镇社区事务受理服务中心的完整名称和统一信用代码是多少，还有对齐原因，以json格式给出准确的结果
			
 
				+    '''
			
 
				+    _result = chat_agent(msg)
			
 
				+    print(_result)
			
 
				+    # _result = get_msg_list('7458566549444739098','7458566549444722714')
			
 
				+    # print(_result.content.decode("utf-8"))
			
--- a/mining/DoubaoUtils.py
+++ b/mining/DoubaoUtils.py
--- a/mining/chatUtil.py
+++ b/mining/chatUtil.py
@@ -0,0 +1,85 @@
 
				+#coding:utf8
			
 
				+
			
 
				+from bs4 import BeautifulSoup
			
 
				+import re
			
 
				+
			
 
				+def html2text(_html):
			
 
				+
			
 
				+    if type(_html)==str:
			
 
				+        _soup = BeautifulSoup(_html,"lxml")
			
 
				+    else:
			
 
				+        _soup = _html
			
 
				+    list_table = _soup.find_all("table")
			
 
				+    list_tbody = _soup.find_all("tbody")
			
 
				+    if len(list_table)>0 or len(list_tbody)>0:
			
 
				+        list_childs = _soup.find_all(recursive=False)
			
 
				+        list_child_text = []
			
 
				+        for child in list_childs:
			
 
				+            list_child_text.append(html2text(child))
			
 
				+        return "\n".join(list_child_text)
			
 
				+
			
 
				+    else:
			
 
				+        if _soup.name=="table" or _soup.name=="tbody":
			
 
				+            _table_text = ""
			
 
				+            trs = _soup.find_all("tr")
			
 
				+            list_tr_text = []
			
 
				+            for tr in trs:
			
 
				+                tds = tr.find_all("th")
			
 
				+                if len(tds)>0:
			
 
				+                    list_td_text = []
			
 
				+                    for td in tds:
			
 
				+                        list_td_text.append(re.sub('\s','',td.get_text()))
			
 
				+                    list_tr_text.append("|".join(list_td_text))
			
 
				+                tds = tr.find_all("td")
			
 
				+                if len(tds)>0:
			
 
				+                    list_td_text = []
			
 
				+                    for td in tds:
			
 
				+                        list_td_text.append(re.sub('\s','',td.get_text()))
			
 
				+                    list_tr_text.append("|".join(list_td_text))
			
 
				+            _table_text = "%s\n\n"%"\n".join(list_tr_text)
			
 
				+            if _table_text == "":
			
 
				+                _table_text = _soup.get_text()
			
 
				+            _soup.decompose()
			
 
				+            return _table_text
			
 
				+        else:
			
 
				+            _text = re.sub('\s','',_soup.get_text().strip())
			
 
				+            _soup.decompose()
			
 
				+            return _text
			
 
				+
			
 
				+def table2list(_html):
			
 
				+    if type(_html)==str:
			
 
				+        _soup = BeautifulSoup(_html,'lxml')
			
 
				+    else:
			
 
				+        _soup = _html
			
 
				+    if _soup.name=="table" or _soup.name=="tbody":
			
 
				+        _table_text = ""
			
 
				+        trs = _soup.find_all("tr")
			
 
				+        list_tr_text = []
			
 
				+        for tr in trs:
			
 
				+            tds = tr.find_all("th")
			
 
				+            if len(tds)>0:
			
 
				+                list_td_text = []
			
 
				+                for td in tds:
			
 
				+                    list_td_text.append(re.sub('\s','',td.get_text()))
			
 
				+                if len(list_td_text)>0:
			
 
				+                    list_tr_text.append(list_td_text)
			
 
				+            tds = tr.find_all("td")
			
 
				+            if len(tds)>0:
			
 
				+                list_td_text = []
			
 
				+                for td in tds:
			
 
				+                    list_td_text.append(re.sub('\s','',td.get_text()))
			
 
				+                if len(list_td_text)>0:
			
 
				+                    list_tr_text.append(list_td_text)
			
 
				+        return list_tr_text
			
 
				+
			
 
				+def tableList2text(table_list):
			
 
				+    list_tr_text = []
			
 
				+    for tr in table_list:
			
 
				+        tds = tr
			
 
				+        if len(tds)>0:
			
 
				+            list_td_text = []
			
 
				+            for td in tds:
			
 
				+                list_td_text.append(re.sub('\s','',td))
			
 
				+            list_tr_text.append("|".join(list_td_text))
			
 
				+    _table_text = "%s\n\n"%"\n".join(list_tr_text)
			
 
				+    return _table_text
			
--- a/utils/ERNIE_utils.py
+++ b/utils/ERNIE_utils.py
@@ -8,6 +8,7 @@ def get_access_token():
 
				     """
			
 
				 
			
 
				     url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=gnwVXv96An9qMYqq9eWbeNqk&client_secret=mDsRQbCPsV4N7x28LbwkhTAaLmrrDnXk"
			
 
				+    url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=Ok8QMe4qIQOAex0F9Gf1uns0&client_secret=6DjGGDdvhnBaEOMdSXAg02KxZnQhWpbd"
			
 
				 
			
 
				     payload = json.dumps("")
			
 
				     headers = {
			
@@ -19,17 +20,18 @@ def get_access_token():
 
				     return response.json().get("access_token")
			
 
				 
			
 
				 def main():
			
 
				-    # _token = get_access_token()
			
 
				-    _token = "24.93c9d66ffc94ffaef6c6c9d35770a5f5.2592000.1701242081.282335-37357318"
			
 
				+    _token = get_access_token()
			
 
				+    # _token = "24.93c9d66ffc94ffaef6c6c9d35770a5f5.2592000.1701242081.282335-37357318"
			
 
				     url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions?access_token=" + _token
			
 
				 
			
 
				+    # url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/xuanyuan_70b_chat?access_token=" + _token
			
 
				+
			
 
				     payload = json.dumps({
			
 
				         "messages": [
			
 
				             {
			
 
				                 "role": "user",
			
 
				                 "content": '''
			
 
				-                假设分类是建筑建材-建筑涂料的相关产品词“面漆”
			
 
				-                请拓展其相关行业产品词，列举30个
			
 
				+               今天是几号
			
 
				                 '''
			
 
				             }
			
 
				         ]
			
@@ -38,30 +40,39 @@ def main():
 
				         'Content-Type': 'application/json'
			
 
				     }
			
 
				 
			
 
				+
			
 
				+
			
 
				+
			
 
				     response = requests.request("POST", url, headers=headers, data=payload)
			
 
				 
			
 
				     print(response.text)
			
 
				 
			
 
				-def chat(message):
			
 
				-    _token = "24.93c9d66ffc94ffaef6c6c9d35770a5f5.2592000.1701242081.282335-37357318"
			
 
				-    url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions?access_token=" + _token
			
 
				-
			
 
				+def chat(msg,token=None,api_url=None):
			
 
				+    if token is None:
			
 
				+        token = get_access_token()
			
 
				+    # _token = "24.93c9d66ffc94ffaef6c6c9d35770a5f5.2592000.1701242081.282335-37357318"
			
 
				+    if api_url is None:
			
 
				+        api_url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions"
			
 
				+        # api_url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-3.5-128k"
			
 
				+    url =  api_url+"?access_token="+ token
			
 
				     payload = json.dumps({
			
 
				         "messages": [
			
 
				             {
			
 
				                 "role": "user",
			
 
				                 "content": '''
			
 
				-                %s
			
 
				-                '''%message
			
 
				+               %s
			
 
				+                '''%msg
			
 
				             }
			
 
				-        ]
			
 
				+        ],
			
 
				+        "stream":False
			
 
				     })
			
 
				     headers = {
			
 
				         'Content-Type': 'application/json'
			
 
				     }
			
 
				-
			
 
				     response = requests.request("POST", url, headers=headers, data=payload)
			
 
				+
			
 
				     return response
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    main()
			
 
				+    # main()
			
 
				+    print(chat("今天是几号").text)