testInterface.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. '''
  2. Created on 2018年12月26日
  3. @author: User
  4. '''
  5. import sys
  6. import os
  7. import codecs
  8. import re
  9. sys.path.append(os.path.abspath("../.."))
  10. import requests
  11. from BiddingKG.dl.common.Connection import *
  12. import time
  13. import psycopg2
  14. import glob
  15. from BiddingKG.dl.common.Utils import *
  16. import json
  17. if __name__=="__main__":
  18. #测试接口的代码
  19. #conn = getConnection()
  20. conn = psycopg2.connect(dbname="article_label",user="postgres",password="postgres",host="192.168.2.101")
  21. cursor = conn.cursor()
  22. #validation
  23. #sql = " select content,id from articles where id in(select doc_id from articles_validation where exists(select 1 from articles_processed where id=doc_id)) order by id"
  24. #training
  25. #sql = " select content,id from articles where not exists(select 1 from articles_validation where doc_id=articles.id) order by id limit 5000"
  26. sql = " select id from articles_processed "
  27. cursor.execute(sql)
  28. rows = cursor.fetchall()
  29. ids = []
  30. for row in rows:
  31. ids.append(row[0])
  32. # 添加对应headers 及 tonken 用于数据传参和登录认证使用
  33. myheaders = {'Content-Type': 'application/json'}
  34. # 接口测试数据
  35. #guardian_base = 'http://47.110.128.185:15015'
  36. guardian_base1 = 'http://127.0.0.1:15013'
  37. guardian_base2 = 'http://192.168.2.101:15015'
  38. # 使用requests的post方法进行请求路由
  39. result = []
  40. #content = row[0]
  41. #content = "<div> <div> 110kV龙台变~净 化厂35kV电力线路(外电部分) </div> <a>附件下载,文件请用音频播放器或者360浏览器打开</a> <br> <a>公告来源:http://bulletin.cebpubservice.com/biddingBulletin/2019-02-12/1058505.html</a> </div>"
  42. i = 0
  43. a1 = time.time()
  44. same_flag = None
  45. # files = glob.glob("C:\\Users\\User\\Desktop\\测试数据20200312\\*.html")
  46. files = data = load("../test/label_0_1197.pk")
  47. for file in files:
  48. # name = file.split("\\")[-1]
  49. name = file["filename"]
  50. # content = codecs.open(file,"r",encoding="utf8").read()
  51. content = file["content"]
  52. i += 1
  53. print(i,len(files))
  54. if i>=2000:
  55. break
  56. # print(file)
  57. user = {
  58. "content": content,
  59. "title":"XXXXXX",
  60. "doc_id":"1234555"
  61. }
  62. a = time.time()
  63. # _resp = requests.post(guardian_base2 + '/article_extract', json=user, headers=myheaders, verify=True)
  64. # resp_json = _resp.content.decode("utf-8")
  65. # print(resp_json)
  66. resp_json = {"code":file["code"],"name":file["name"]}
  67. _resp1 = requests.post(guardian_base2 + '/content_extract', json=user, headers=myheaders, verify=True)
  68. resp_json1 = _resp1.content.decode("utf-8")
  69. resp_json1 = json.loads(resp_json1)
  70. resp_json1 = {"code":resp_json1["code"],"name":resp_json1["name"]}
  71. resp_json = str(resp_json).replace("(","(").replace(")",")")
  72. print(resp_json)
  73. print(resp_json1)
  74. if resp_json==resp_json1:
  75. same_flag = 0
  76. else:
  77. same_flag = 1
  78. result.append([name,same_flag,resp_json,resp_json1])
  79. ''''''
  80. #将结果输出到文件方便查看
  81. result.sort(key=lambda x:x[1],reverse=True)
  82. i = 0
  83. with codecs.open("testInterface.html", "w", encoding="utf8") as f:
  84. f.write('<html><head>\
  85. <meta http-equiv="Content-Type"\
  86. content="text/html; charset=UTF-8">\
  87. </head>\
  88. <body bgcolor="#FFFFFF">\
  89. <table border="1">\
  90. <tr>\
  91. <td>序号</td>\
  92. <td>doc_id</td>\
  93. <td>same</td>\
  94. <td width=40%>before</td>\
  95. <td width=40%>重新训练</td>\
  96. </tr>')
  97. for item in result:
  98. i += 1
  99. f.write("<tr>"+"<td>"+str(i)+"</td>"+"<td>"+str(item[0])+"</td>"+"<td>"+str(item[1])+"</td>"+"<td>"+str(item[2])+"</td>"+"<td>"+str(item[3])+"</td>"+"</tr>")
  100. f.write("</table></body>")