123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- '''
- Created on 2019年1月3日
- @author: User
- '''
- import sys
- import os
- import json
- import re
- import pickle
- import requests
- import codecs
- from bs4 import BeautifulSoup
- import time
- import shutil
- from threading import Thread
- import jpype
- sys.path.append(os.path.abspath("../.."))
- def save(object_to_save, path):
- '''
- 保存对象
- @Arugs:
- object_to_save: 需要保存的对象
- @Return:
- 保存的路径
- '''
- with open(path, 'wb') as f:
- pickle.dump(object_to_save, f)
- def load(path):
- '''
- 读取对象
- @Arugs:
- path: 读取的路径
- @Return:
- 读取的对象
- '''
- with open(path, 'rb') as f:
- object1 = pickle.load(f)
- return object1
- def test(name,content):
- user = {
- #"content": "XXXXXXXXXXXXXXXXXXX",
- "content": content,
- "id":name,
- "doc_id":"1234"
- }
- myheaders = {'Content-Type': 'application/json',"Authorization": "NzZmOWZlMmU2MGY3YmQ4MDBjM2E5MDAyZjhjNjQ0MzZlMmE0NTMwZg==","appKey": "203780894","appSecret": "3rwyr0b8djsn6l3o4i8mplxe4giiy2ke"}
- try:
- #_resp = requests.post('http://pai-eas-vpc.cn-hangzhou.aliyuncs.com/api/predict/content_extract', json=user, headers=myheaders, verify=True)
- _resp = requests.post("http://192.168.2.101:15015" + '/article_extract', json=user, headers=myheaders, verify=True)
- # _resp = requests.post("http://192.168.2.101:15030" + '/article_extract', json=user, headers=myheaders, verify=True)
- # _resp = requests.post("http://127.0.0.1:15013" + '/content_extract', json=user, headers=myheaders, verify=True)
- # print("http://127.0.0.1:15014")
- resp_json = _resp.content.decode("utf-8")
- print("##",_resp.status_code)
- print("==",resp_json)
- print(json.loads(resp_json))
- if _resp.status_code==201:
- print(json.loads(resp_json))
- return resp_json
- else:
- print(resp_json)
- return None
- except Exception as e:
- print(str(e))
- return None
- def getFile(filename):
- path = "C:\\Users\\User\\Desktop\\数据20191014\\"
- file = path+filename
- dest_dir = "C:\\Users\\User\\Desktop\\getfile"
- shutil.copy(file,dest_dir)
-
- ''' '''
- class MyThread(Thread):
-
- cost_time = dict()
- list_result = []
- num_200 = []
- num_other = []
-
- data = load("list_contents.pk")
- def run(self):
- for item in self.data[:100]:
- filename = item[0]
- content = item[1]
- result = test(filename,content)
- if result is not None:
- self.num_200.append("")
- self.list_result.append(result)
- result = json.loads(result)
- _time = result["cost_time"]
- for _key in _time.keys():
- if _key not in self.cost_time:
- self.cost_time[_key] = 0
- self.cost_time[_key] += _time[_key]
- else:
- self.num_other.append("")
-
-
- def test_highConcurrency():
- thread_num = 10
- list_thread = []
- cost_time = dict()
- _start_time = time.time()
- for i in range(thread_num):
- t = MyThread()
- list_thread.append(t)
- for t in list_thread:
- t.start()
- for t in list_thread:
- t.join()
- t = list_thread[0]
- _time = t.cost_time
- for _key in _time.keys():
- if _key not in cost_time:
- cost_time[_key] = 0
- cost_time[_key] += _time[_key]
- num_200 = len(t.num_200)
- num_other = len(t.num_other)
- print("==================")
- print("cost:",time.time()-_start_time)
- print("num_200:",num_200,"num_other:",num_other)
- print(cost_time)
- if __name__=="__main__":
- import os
- os.environ
- text = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read()
- start_time = time.time()
- content = str(BeautifulSoup(text,'lxml').find("div",id="pcontent"))
- print(content)
- test("12",content)
- print("takes %d"%(time.time()-start_time))
-
- '''
- data = load("list_contents.pk")
- for item in data[:100]:
- filename = item[0]
- content = item[1]
- a = time.time()
- #predict("12",content)
- test("12",content)
- print("takes",time.time()-a)
- break
- test_highConcurrency()
- '''
|