test4.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. #coding:UTF8
  2. '''
  3. Created on 2019年1月4日
  4. @author: User
  5. '''
  6. from bs4 import BeautifulSoup, Comment
  7. import copy
  8. import re
  9. import sys
  10. import os
  11. import codecs
  12. import requests
  13. import time
  14. import logging
  15. import json
  16. global json,logging
  17. logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  18. import json
  19. import random
  20. def test(name,content,_url=None):
  21. user = {
  22. "content": content,
  23. "doc_id":name,
  24. "timeout":200,
  25. "original_docchannel":101
  26. }
  27. myheaders = {'Content-Type': 'application/json',"Authorization":"NzZmOWZlMmU2MGY3YmQ4MDBjM2E5MDAyZjhjNjQ0MzZlMmE0NTMwZg=="}
  28. list_url = ["http://127.0.0.1:15030/content_extract",
  29. "http://127.0.0.1:15031/content_extract",
  30. "http://127.0.0.1:15032/content_extract",
  31. "http://127.0.0.1:15033/content_extract",
  32. "http://127.0.0.1:15034/content_extract",
  33. "http://127.0.0.1:15035/content_extract",
  34. "http://127.0.0.1:15036/content_extract",
  35. "http://127.0.0.1:15037/content_extract",
  36. ]
  37. # _i = random.randint(0,len(list_url)-1)
  38. # _resp = requests.post(list_url[_i], json=user, headers=myheaders, verify=True)
  39. # _url = "http://1255640119316927.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/content_extract"
  40. _url = "http://192.168.2.102:15030/content_extract"
  41. # _url = "http://127.0.0.1:15030/content_extract"
  42. _resp = requests.post(_url, json=user, headers=myheaders, verify=True)
  43. # _resp = requests.post("http://192.168.2.102:15000" + '/article_extract', json=user, headers=myheaders, verify=True)
  44. resp_json = _resp.content.decode("utf-8")
  45. logging.info("%d===%s"%(_resp.status_code,resp_json[:10]))
  46. return resp_json
  47. def presure_test():
  48. from BiddingKG.dl.common.multiThread import MultiThreadHandler
  49. from queue import Queue
  50. text = codecs.open("C:\\Users\\\Administrator\\Desktop\\2.html","r",encoding="utf8").read()
  51. content = str(BeautifulSoup(text).find("div",id="pcontent"))
  52. start_time = time.time()
  53. task_queue = Queue()
  54. for i in range(3000):
  55. task_queue.put(text)
  56. def _handle(item,result_queue):
  57. test("",item)
  58. mt = MultiThreadHandler(task_queue,_handle,None,1)
  59. mt.run()
  60. end_time = time.time()
  61. print("all takes :%ds"%(end_time-start_time))
  62. def runlocal(content):
  63. import sys
  64. import os
  65. sys.path.append(os.path.abspath("../.."))
  66. import fool
  67. from BiddingKG.dl.interface.extract import predict
  68. predict("12", content,"打印机",original_docchannel=101)
  69. def run_one():
  70. from BiddingKG.dl.interface.extract import predict
  71. # filename = "比地_52_79929693.html"
  72. #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
  73. text = codecs.open("C:\\Users\\\Administrator\\Desktop\\2.html","r",encoding="utf8").read()
  74. content = str(BeautifulSoup(text).find("div",id="pcontent"))
  75. a = time.time()
  76. # text = '''
  77. # 购安装工程二标段,第一中标候选人,投标人名称,南阳市宝琛装饰工程有限责任公司,投标报价:147892
  78. # '''
  79. print("start")
  80. _time1 = time.time()
  81. print(predict("12", text,"打印机",original_docchannel=101))
  82. # test(12,content)
  83. # test(12,text)
  84. print("takes",time.time()-a)
  85. pass
  86. if __name__=="__main__":
  87. # presure_test()
  88. run_one()