test4.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. #coding:UTF8
  2. '''
  3. Created on 2019年1月4日
  4. @author: User
  5. '''
  6. from bs4 import BeautifulSoup, Comment
  7. import copy
  8. import re
  9. import sys
  10. import os
  11. import codecs
  12. import requests
  13. import time
  14. import logging
  15. import json
  16. global json,logging
  17. logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  18. import json
  19. import random
  20. session = requests.Session()
  21. def test(name,content,_url=None):
  22. # _times = 2
  23. # _content = ""
  24. # for _ in range(_times):
  25. # _content += content
  26. # content = _content
  27. print(len(content))
  28. user = {
  29. "content": content,
  30. "doc_id":name,
  31. "timeout":2000,
  32. "original_docchannel":101
  33. }
  34. # print(user)
  35. myheaders = {'Content-Type': 'application/json',"Authorization":"NzZmOWZlMmU2MGY3YmQ4MDBjM2E5MDAyZjhjNjQ0MzZlMmE0NTMwZg=="}
  36. list_url = ["http://127.0.0.1:15030/content_extract",
  37. "http://127.0.0.1:15031/content_extract",
  38. "http://127.0.0.1:15032/content_extract",
  39. "http://127.0.0.1:15033/content_extract",
  40. "http://127.0.0.1:15034/content_extract",
  41. "http://127.0.0.1:15035/content_extract",
  42. "http://127.0.0.1:15036/content_extract",
  43. "http://127.0.0.1:15037/content_extract",
  44. ]
  45. # _i = random.randint(0,len(list_url)-1)
  46. # _resp = requests.post(list_url[_i], json=user, headers=myheaders, verify=True)
  47. # _url = "http://1255640119316927.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/content_extract"
  48. _url = "http://127.0.0.1:15030/content_extract"
  49. # _url = "http://192.168.2.102:15030/industry_extract"
  50. # _url = "http://192.168.2.102:15030/content_extract"
  51. _resp = session.post(_url, json=user,verify=True,timeout=1000)
  52. # _resp = requests.post("http://192.168.2.102:15000" + '/article_extract', json=user, headers=myheaders, verify=True)
  53. resp_json = _resp.content.decode("utf-8")
  54. logging.info("%d===%s"%(_resp.status_code,resp_json[:100]))
  55. return resp_json
  56. def presure_test():
  57. from BiddingKG.dl.common.multiThread import MultiThreadHandler
  58. from queue import Queue
  59. text = codecs.open("2.html","r",encoding="utf8").read()
  60. content = str(BeautifulSoup(text).find("div",id="pcontent"))
  61. start_time = time.time()
  62. task_queue = Queue()
  63. for i in range(300):
  64. task_queue.put(content)
  65. def _handle(item,result_queue):
  66. test("",item)
  67. mt = MultiThreadHandler(task_queue,_handle,None,3)
  68. mt.run()
  69. end_time = time.time()
  70. print("all takes :%ds"%(end_time-start_time))
  71. def runlocal(content):
  72. import sys
  73. import os
  74. sys.path.append(os.path.abspath("../.."))
  75. import fool
  76. from BiddingKG.dl.interface.extract import predict
  77. predict("12", content,"打印机",original_docchannel=101)
  78. def run_one():
  79. from BiddingKG.dl.interface.extract import predict
  80. # filename = "比地_52_79929693.html"
  81. #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
  82. text = codecs.open("C:\\Users\\\Administrator\\Desktop\\2.html","r",encoding="utf8").read()
  83. # text = codecs.open("2.html","r",encoding="utf8").read()
  84. content = str(BeautifulSoup(text).find("div",id="pcontent"))
  85. a = time.time()
  86. # text = '''
  87. # 购安装工程二标段,第一中标候选人,投标人名称,南阳市宝琛装饰工程有限责任公司,投标报价:147892
  88. # '''
  89. # print("start")
  90. # _time1 = time.time()
  91. # print(predict("12", content,"打印机",original_docchannel=52))
  92. # # test(12,content)
  93. # # test(12,text)
  94. # print("takes",time.time()-a)
  95. print("start")
  96. _time1 = time.time()
  97. print(predict("12", content,"打印机",original_docchannel=52))
  98. # test(12,content)
  99. # test(12,text)
  100. print("takes",time.time()-a)
  101. pass
  102. if __name__=="__main__":
  103. # presure_test()
  104. run_one()