testInterface.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. '''
  2. Created on 2018年12月26日
  3. @author: User
  4. '''
  5. import sys
  6. import os
  7. import codecs
  8. import re
  9. sys.path.append(os.path.abspath("../.."))
  10. import requests
  11. from BiddingKG.dl.common.Connection import *
  12. import time
  13. import psycopg2
  14. import glob
  15. from BiddingKG.dl.common.Utils import *
  16. if __name__=="__main__":
  17. #测试接口的代码
  18. #conn = getConnection()
  19. conn = psycopg2.connect(dbname="article_label",user="postgres",password="postgres",host="192.168.2.101")
  20. cursor = conn.cursor()
  21. #validation
  22. #sql = " select content,id from articles where id in(select doc_id from articles_validation where exists(select 1 from articles_processed where id=doc_id)) order by id"
  23. #training
  24. #sql = " select content,id from articles where not exists(select 1 from articles_validation where doc_id=articles.id) order by id limit 5000"
  25. sql = " select id from articles_processed "
  26. cursor.execute(sql)
  27. rows = cursor.fetchall()
  28. ids = []
  29. for row in rows:
  30. ids.append(row[0])
  31. # 添加对应headers 及 tonken 用于数据传参和登录认证使用
  32. myheaders = {'Content-Type': 'application/json'}
  33. # 接口测试数据
  34. #guardian_base = 'http://47.110.128.185:15015'
  35. guardian_base1 = 'http://127.0.0.1:15015'
  36. guardian_base2 = 'http://192.168.2.101:15015'
  37. # 使用requests的post方法进行请求路由
  38. result = []
  39. #content = row[0]
  40. #content = "<div> <div> 110kV龙台变~净 化厂35kV电力线路(外电部分) </div> <a>附件下载,文件请用音频播放器或者360浏览器打开</a> <br> <a>公告来源:http://bulletin.cebpubservice.com/biddingBulletin/2019-02-12/1058505.html</a> </div>"
  41. i = 0
  42. a1 = time.time()
  43. same_flag = None
  44. files = glob.glob("C:\\Users\\User\\Desktop\\20190416要素\\*.html")
  45. for file in files:
  46. name = file.split("\\")[-1]
  47. content = codecs.open(file,"r",encoding="utf8").read()
  48. i += 1
  49. print(i,len(files))
  50. if i>=2000:
  51. break
  52. print(file)
  53. user = {
  54. "content": content,
  55. "title":"XXXXXX",
  56. "doc_id":"1234555"
  57. }
  58. a = time.time()
  59. _resp = requests.post(guardian_base2 + '/article_extract', json=user, headers=myheaders, verify=True)
  60. resp_json = _resp.content.decode("utf-8")
  61. print(resp_json)
  62. #_resp1 = requests.post(guardian_base1 + '/article_extract', json=user, headers=myheaders, verify=True)
  63. #resp_json1 = _resp1.content.decode("utf-8")
  64. resp_json1 = ""
  65. resp_json = str(resp_json).replace("(","(").replace(")",")")
  66. if resp_json==resp_json1:
  67. same_flag = 0
  68. else:
  69. same_flag = 1
  70. result.append([name,same_flag,resp_json,resp_json1])
  71. ''''''
  72. #将结果输出到文件方便查看
  73. result.sort(key=lambda x:x[1],reverse=True)
  74. i = 0
  75. with codecs.open("testInterface.html","w",encoding="utf8") as f:
  76. f.write('<html><head>\
  77. <meta http-equiv="Content-Type"\
  78. content="text/html; charset=UTF-8">\
  79. </head>\
  80. <body bgcolor="#FFFFFF">\
  81. <table border="1">\
  82. <tr>\
  83. <td>序号</td>\
  84. <td>doc_id</td>\
  85. <td>same</td>\
  86. <td width=40%>before</td>\
  87. <td width=40%>重新训练</td>\
  88. </tr>')
  89. for item in result:
  90. i += 1
  91. f.write("<tr>"+"<td>"+str(i)+"</td>"+"<td>"+str(item[0])+"</td>"+"<td>"+str(item[1])+"</td>"+"<td>"+str(item[2])+"</td>"+"<td>"+str(item[3])+"</td>"+"</tr>")
  92. f.write("</table></body>")