123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122 |
- #coding:UTF8
- '''
- Created on 2019年1月4日
- @author: User
- '''
- from bs4 import BeautifulSoup, Comment
- import copy
- import re
- import sys
- import os
- import codecs
- import requests
- import time
- import logging
- import json
- global json,logging
- logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
- import json
- import random
- session = requests.Session()
- def test(name,content,_url=None):
- # _times = 2
- # _content = ""
- # for _ in range(_times):
- # _content += content
- # content = _content
- print(len(content))
- user = {
- "content": content,
- "doc_id":name,
- "timeout":2000,
- "original_docchannel":101
- }
- # print(user)
- myheaders = {'Content-Type': 'application/json',"Authorization":"NzZmOWZlMmU2MGY3YmQ4MDBjM2E5MDAyZjhjNjQ0MzZlMmE0NTMwZg=="}
- list_url = ["http://127.0.0.1:15030/content_extract",
- "http://127.0.0.1:15031/content_extract",
- "http://127.0.0.1:15032/content_extract",
- "http://127.0.0.1:15033/content_extract",
- "http://127.0.0.1:15034/content_extract",
- "http://127.0.0.1:15035/content_extract",
- "http://127.0.0.1:15036/content_extract",
- "http://127.0.0.1:15037/content_extract",
- ]
- # _i = random.randint(0,len(list_url)-1)
- # _resp = requests.post(list_url[_i], json=user, headers=myheaders, verify=True)
- # _url = "http://1255640119316927.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/content_extract"
- _url = "http://192.168.2.102:15030/test"
- _url = "http://192.168.2.102:15030/industry_extract"
- _url = "http://192.168.2.102:15030/content_extract"
- _resp = session.post(_url, json=user,verify=True,timeout=1000)
- # _resp = requests.post("http://192.168.2.102:15000" + '/article_extract', json=user, headers=myheaders, verify=True)
- resp_json = _resp.content.decode("utf-8")
- logging.info("%d===%s"%(_resp.status_code,resp_json[:100]))
- return resp_json
- def presure_test():
- from BiddingKG.dl.common.multiThread import MultiThreadHandler
- from queue import Queue
- text = codecs.open("2.html","r",encoding="utf8").read()
- content = str(BeautifulSoup(text).find("div",id="pcontent"))
- start_time = time.time()
- task_queue = Queue()
- for i in range(300):
- task_queue.put(content)
- def _handle(item,result_queue):
- test("",item)
- mt = MultiThreadHandler(task_queue,_handle,None,3)
- mt.run()
- end_time = time.time()
- print("all takes :%ds"%(end_time-start_time))
- def runlocal(content):
- import sys
- import os
- sys.path.append(os.path.abspath("../.."))
- import fool
- from BiddingKG.dl.interface.extract import predict
- predict("12", content,"打印机",original_docchannel=101)
- def run_one():
- from BiddingKG.dl.interface.extract import predict
- # filename = "比地_52_79929693.html"
- #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
- text = codecs.open("C:\\Users\\\Administrator\\Desktop\\2.html","r",encoding="utf8").read()
- # text = codecs.open("2.html","r",encoding="utf8").read()
- content = str(BeautifulSoup(text).find("div",id="pcontent"))
- a = time.time()
- # text = '''
- # 购安装工程二标段,第一中标候选人,投标人名称,南阳市宝琛装饰工程有限责任公司,投标报价:147892
- # '''
- print("start")
- _time1 = time.time()
- print(predict("12", content,"打印机",original_docchannel=52))
- # test(12,content)
- # test(12,text)
- print("takes",time.time()-a)
- print("start")
- _time1 = time.time()
- print(predict("12", content,"打印机",original_docchannel=52))
- # test(12,content)
- # test(12,text)
- print("takes",time.time()-a)
- pass
- if __name__=="__main__":
- # presure_test()
- run_one()
|