''' Created on 2019年1月3日 @author: User ''' import sys import os import json import re import pickle import requests import codecs from bs4 import BeautifulSoup import time import shutil from threading import Thread import jpype sys.path.append(os.path.abspath("../..")) def save(object_to_save, path): ''' 保存对象 @Arugs: object_to_save: 需要保存的对象 @Return: 保存的路径 ''' with open(path, 'wb') as f: pickle.dump(object_to_save, f) def load(path): ''' 读取对象 @Arugs: path: 读取的路径 @Return: 读取的对象 ''' with open(path, 'rb') as f: object1 = pickle.load(f) return object1 def test(name,content): user = { #"content": "XXXXXXXXXXXXXXXXXXX", "content": content, "id":name, "doc_id":"1234" } myheaders = {'Content-Type': 'application/json',"Authorization": "NzZmOWZlMmU2MGY3YmQ4MDBjM2E5MDAyZjhjNjQ0MzZlMmE0NTMwZg==","appKey": "203780894","appSecret": "3rwyr0b8djsn6l3o4i8mplxe4giiy2ke"} try: #_resp = requests.post('http://pai-eas-vpc.cn-hangzhou.aliyuncs.com/api/predict/content_extract', json=user, headers=myheaders, verify=True) _resp = requests.post("http://192.168.2.101:15015" + '/article_extract', json=user, headers=myheaders, verify=True) # _resp = requests.post("http://192.168.2.101:15030" + '/article_extract', json=user, headers=myheaders, verify=True) # _resp = requests.post("http://127.0.0.1:15013" + '/content_extract', json=user, headers=myheaders, verify=True) # print("http://127.0.0.1:15014") resp_json = _resp.content.decode("utf-8") print("##",_resp.status_code) print("==",resp_json) print(json.loads(resp_json)) if _resp.status_code==201: print(json.loads(resp_json)) return resp_json else: print(resp_json) return None except Exception as e: print(str(e)) return None def getFile(filename): path = "C:\\Users\\User\\Desktop\\数据20191014\\" file = path+filename dest_dir = "C:\\Users\\User\\Desktop\\getfile" shutil.copy(file,dest_dir) ''' ''' class MyThread(Thread): cost_time = dict() list_result = [] num_200 = [] num_other = [] data = load("list_contents.pk") def run(self): for item in self.data[:100]: filename = item[0] content = item[1] result = test(filename,content) if result is not None: self.num_200.append("") self.list_result.append(result) result = json.loads(result) _time = result["cost_time"] for _key in _time.keys(): if _key not in self.cost_time: self.cost_time[_key] = 0 self.cost_time[_key] += _time[_key] else: self.num_other.append("") def test_highConcurrency(): thread_num = 10 list_thread = [] cost_time = dict() _start_time = time.time() for i in range(thread_num): t = MyThread() list_thread.append(t) for t in list_thread: t.start() for t in list_thread: t.join() t = list_thread[0] _time = t.cost_time for _key in _time.keys(): if _key not in cost_time: cost_time[_key] = 0 cost_time[_key] += _time[_key] num_200 = len(t.num_200) num_other = len(t.num_other) print("==================") print("cost:",time.time()-_start_time) print("num_200:",num_200,"num_other:",num_other) print(cost_time) if __name__=="__main__": import os os.environ text = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read() start_time = time.time() content = str(BeautifulSoup(text,'lxml').find("div",id="pcontent")) print(content) test("12",content) print("takes %d"%(time.time()-start_time)) ''' data = load("list_contents.pk") for item in data[:100]: filename = item[0] content = item[1] a = time.time() #predict("12",content) test("12",content) print("takes",time.time()-a) break test_highConcurrency() '''