#coding:UTF8 ''' Created on 2019年1月4日 @author: User ''' from bs4 import BeautifulSoup, Comment import copy import re import sys import os import codecs import requests import time import logging import json global json,logging logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s') import json import random session = requests.Session() def test(name,content,_url=None): # _times = 2 # _content = "" # for _ in range(_times): # _content += content # content = _content print(len(content)) user = { "content": content, "doc_id":name, "timeout":2000, "original_docchannel":101 } # print(user) myheaders = {'Content-Type': 'application/json',"Authorization":"NzZmOWZlMmU2MGY3YmQ4MDBjM2E5MDAyZjhjNjQ0MzZlMmE0NTMwZg=="} list_url = ["http://127.0.0.1:15030/content_extract", "http://127.0.0.1:15031/content_extract", "http://127.0.0.1:15032/content_extract", "http://127.0.0.1:15033/content_extract", "http://127.0.0.1:15034/content_extract", "http://127.0.0.1:15035/content_extract", "http://127.0.0.1:15036/content_extract", "http://127.0.0.1:15037/content_extract", ] # _i = random.randint(0,len(list_url)-1) # _resp = requests.post(list_url[_i], json=user, headers=myheaders, verify=True) # _url = "http://1255640119316927.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/content_extract" _url = "http://127.0.0.1:15030/content_extract" # _url = "http://192.168.2.102:15030/industry_extract" # _url = "http://192.168.2.102:15030/content_extract" _resp = session.post(_url, json=user,verify=True,timeout=1000) # _resp = requests.post("http://192.168.2.102:15000" + '/article_extract', json=user, headers=myheaders, verify=True) resp_json = _resp.content.decode("utf-8") logging.info("%d===%s"%(_resp.status_code,resp_json[:100])) return resp_json def presure_test(): from BiddingKG.dl.common.multiThread import MultiThreadHandler from queue import Queue text = codecs.open("2.html","r",encoding="utf8").read() content = str(BeautifulSoup(text).find("div",id="pcontent")) start_time = time.time() task_queue = Queue() for i in range(300): task_queue.put(content) def _handle(item,result_queue): test("",item) mt = MultiThreadHandler(task_queue,_handle,None,3) mt.run() end_time = time.time() print("all takes :%ds"%(end_time-start_time)) def runlocal(content): import sys import os sys.path.append(os.path.abspath("../..")) import fool from BiddingKG.dl.interface.extract import predict predict("12", content,"打印机",original_docchannel=101) def run_one(): from BiddingKG.dl.interface.extract import predict # filename = "比地_52_79929693.html" #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read() text = codecs.open("C:\\Users\\\Administrator\\Desktop\\test12354.txt","r",encoding="utf8").read() # text = codecs.open("2.html","r",encoding="utf8").read() content = str(BeautifulSoup(text).find("div",id="pcontent")) a = time.time() # text = ''' # 购安装工程二标段,第一中标候选人,投标人名称,南阳市宝琛装饰工程有限责任公司,投标报价:147892 # ''' print("start") _time1 = time.time() print(predict("12", text,"打印机")) # test(12,content) # test(12,text) print("takes",time.time()-a) # a = time.time() # print(predict("12", text,"打印机")) # print("takes", time.time() - a) pass if __name__=="__main__": # presure_test() run_one()