test4.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. #coding:UTF8
  2. '''
  3. Created on 2019年1月4日
  4. @author: User
  5. '''
  6. from bs4 import BeautifulSoup, Comment
  7. import copy
  8. import re
  9. import sys
  10. import os
  11. import codecs
  12. import requests
  13. import time
  14. import logging
  15. import json
  16. global json,logging
  17. logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  18. import json
  19. import random
  20. session = requests.Session()
  21. def test(name,content,_url=None):
  22. # _times = 2
  23. # _content = ""
  24. # for _ in range(_times):
  25. # _content += content
  26. # content = _content
  27. print(len(content))
  28. user = {
  29. "content": content,
  30. "doc_id":name,
  31. "timeout":2000,
  32. "original_docchannel":101
  33. }
  34. # print(user)
  35. myheaders = {'Content-Type': 'application/json',"Authorization":"NzZmOWZlMmU2MGY3YmQ4MDBjM2E5MDAyZjhjNjQ0MzZlMmE0NTMwZg=="}
  36. list_url = ["http://127.0.0.1:15030/content_extract",
  37. "http://127.0.0.1:15031/content_extract",
  38. "http://127.0.0.1:15032/content_extract",
  39. "http://127.0.0.1:15033/content_extract",
  40. "http://127.0.0.1:15034/content_extract",
  41. "http://127.0.0.1:15035/content_extract",
  42. "http://127.0.0.1:15036/content_extract",
  43. "http://127.0.0.1:15037/content_extract",
  44. ]
  45. # _i = random.randint(0,len(list_url)-1)
  46. # _resp = requests.post(list_url[_i], json=user, headers=myheaders, verify=True)
  47. # _url = "http://1255640119316927.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/content_extract"
  48. _url = "http://127.0.0.1:15030/content_extract"
  49. _url = "http://192.168.2.102:15030/content_extract"
  50. # _url = "http://192.168.2.102:15030/industry_extract"
  51. # _url = "http://192.168.2.102:15030/content_extract"
  52. _resp = session.post(_url, json=user,verify=True,timeout=1000)
  53. # _resp = requests.post("http://192.168.2.102:15000" + '/article_extract', json=user, headers=myheaders, verify=True)
  54. resp_json = _resp.content.decode("utf-8")
  55. logging.info("%d===%s"%(_resp.status_code,resp_json[:100]))
  56. return resp_json
  57. def presure_test():
  58. from BiddingKG.dl.common.multiThread import MultiThreadHandler
  59. from queue import Queue
  60. text = codecs.open("2.html","r",encoding="utf8").read()
  61. content = str(BeautifulSoup(text).find("div",id="pcontent"))
  62. start_time = time.time()
  63. task_queue = Queue()
  64. for i in range(300):
  65. task_queue.put(content)
  66. def _handle(item,result_queue):
  67. test("",item)
  68. mt = MultiThreadHandler(task_queue,_handle,None,3)
  69. mt.run()
  70. end_time = time.time()
  71. print("all takes :%ds"%(end_time-start_time))
  72. def runlocal(content):
  73. import sys
  74. import os
  75. sys.path.append(os.path.abspath("../.."))
  76. import fool
  77. from BiddingKG.dl.interface.extract import predict
  78. predict("12", content,"打印机",original_docchannel=101)
  79. def run_one():
  80. from BiddingKG.dl.interface.extract import predict
  81. # filename = "比地_52_79929693.html"
  82. #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
  83. text = codecs.open("C:\\Users\\Administrator\\Desktop\\test12354.txt","r",encoding="utf8").read()
  84. # text = codecs.open("2.html","r",encoding="utf8").read()
  85. content = str(BeautifulSoup(text).find("div",id="pcontent"))
  86. a = time.time()
  87. # text = '''
  88. # 购安装工程二标段,第一中标候选人,投标人名称,南阳市宝琛装饰工程有限责任公司,投标报价:147892
  89. # '''
  90. print("start")
  91. _time1 = time.time()
  92. print(predict("12", text,"市属公立医院医用耗材及其他设备招标结果每两周公示(10.16-10.31) "))
  93. # test(12,content)
  94. # test(12,text)
  95. print("takes",time.time()-a)
  96. # a = time.time()
  97. # print(predict("12", text,""))
  98. # print("takes", time.time() - a)
  99. pass
  100. if __name__=="__main__":
  101. # presure_test()
  102. run_one()