test4.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. #coding:UTF8
  2. '''
  3. Created on 2019年1月4日
  4. @author: User
  5. '''
  6. from bs4 import BeautifulSoup, Comment
  7. import copy
  8. import re
  9. import sys
  10. import os
  11. import codecs
  12. import requests
  13. import time
  14. import logging
  15. import json
  16. global json,logging
  17. logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  18. import json
  19. import random
  20. from ipywidgets import Layout
  21. session = requests.Session()
  22. def test(name,content,_url=None):
  23. # _times = 2
  24. # _content = ""
  25. # for _ in range(_times):
  26. # _content += content
  27. # content = _content
  28. print(len(content))
  29. user = {
  30. "content": content,
  31. "doc_id":name,
  32. "timeout":2000,
  33. "original_docchannel":101
  34. }
  35. # print(user)
  36. myheaders = {'Content-Type': 'application/json',"Authorization":"NzZmOWZlMmU2MGY3YmQ4MDBjM2E5MDAyZjhjNjQ0MzZlMmE0NTMwZg=="}
  37. list_url = ["http://127.0.0.1:15030/content_extract",
  38. "http://127.0.0.1:15031/content_extract",
  39. "http://127.0.0.1:15032/content_extract",
  40. "http://127.0.0.1:15033/content_extract",
  41. "http://127.0.0.1:15034/content_extract",
  42. "http://127.0.0.1:15035/content_extract",
  43. "http://127.0.0.1:15036/content_extract",
  44. "http://127.0.0.1:15037/content_extract",
  45. ]
  46. # _i = random.randint(0,len(list_url)-1)
  47. # _resp = requests.post(list_url[_i], json=user, headers=myheaders, verify=True)
  48. # _url = "http://1255640119316927.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/content_extract"
  49. _url = "http://127.0.0.1:15030/content_extract"
  50. _url = "http://192.168.2.102:15030/content_extract"
  51. # _url = "http://192.168.2.102:15030/industry_extract"
  52. # _url = "http://192.168.2.102:15030/content_extract"
  53. _resp = session.post(_url, json=user,verify=True,timeout=1000)
  54. # _resp = requests.post("http://192.168.2.102:15000" + '/article_extract', json=user, headers=myheaders, verify=True)
  55. resp_json = _resp.content.decode("utf-8")
  56. logging.info("%d===%s"%(_resp.status_code,resp_json[:100]))
  57. return resp_json
  58. def presure_test():
  59. from BiddingKG.dl.common.multiThread import MultiThreadHandler
  60. from queue import Queue
  61. text = codecs.open("2.html","r",encoding="utf8").read()
  62. content = str(BeautifulSoup(text).find("div",id="pcontent"))
  63. start_time = time.time()
  64. task_queue = Queue()
  65. for i in range(300):
  66. task_queue.put(content)
  67. def _handle(item,result_queue):
  68. test("",item)
  69. mt = MultiThreadHandler(task_queue,_handle,None,3)
  70. mt.run()
  71. end_time = time.time()
  72. print("all takes :%ds"%(end_time-start_time))
  73. def runlocal(content):
  74. import sys
  75. import os
  76. sys.path.append(os.path.abspath("../.."))
  77. import fool
  78. from BiddingKG.dl.interface.extract import predict
  79. predict("12", content,"打印机",original_docchannel=101)
  80. def run_one():
  81. from BiddingKG.dl.interface.extract import predict
  82. # filename = "比地_52_79929693.html"
  83. #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
  84. text = codecs.open("C:\\Users\\Administrator\\Desktop\\test12354.txt","r",encoding="utf8").read()
  85. # text = codecs.open("2.html","r",encoding="utf8").read()
  86. content = str(BeautifulSoup(text).find("div",id="pcontent"))
  87. a = time.time()
  88. # text = '''
  89. # 购安装工程二标段,第一中标候选人,投标人名称,南阳市宝琛装饰工程有限责任公司,投标报价:147892
  90. # '''
  91. print("start")
  92. _time1 = time.time()
  93. print(predict("12", text,""))
  94. # test(12,content)
  95. # test(12,text)
  96. print("takes",time.time()-a)
  97. # a = time.time()
  98. # print(predict("12", text,"打印机"))
  99. # print("takes", time.time() - a)
  100. pass
  101. def test_ner():
  102. import fool
  103. _text = '''
  104. 一、 *采购人名称:中共黄山市黄山区委统一战线工作部
  105. 二、 *履约供应商名称:黄山区睿智办公设备销售中心
  106. '''
  107. print(fool.ner(_text))
  108. if __name__=="__main__":
  109. # presure_test()
  110. # run_one()
  111. test_ner()