123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117 |
- '''
- Created on 2018年12月26日
- @author: User
- '''
- import sys
- import os
- import codecs
- import re
- sys.path.append(os.path.abspath("../.."))
- import requests
- from BiddingKG.dl.common.Connection import *
- import time
- import psycopg2
- import glob
- from BiddingKG.dl.common.Utils import *
- import json
- if __name__=="__main__":
- #测试接口的代码
- #conn = getConnection()
- conn = psycopg2.connect(dbname="article_label",user="postgres",password="postgres",host="192.168.2.101")
- cursor = conn.cursor()
-
- #validation
- #sql = " select content,id from articles where id in(select doc_id from articles_validation where exists(select 1 from articles_processed where id=doc_id)) order by id"
- #training
- #sql = " select content,id from articles where not exists(select 1 from articles_validation where doc_id=articles.id) order by id limit 5000"
-
- sql = " select id from articles_processed "
- cursor.execute(sql)
-
- rows = cursor.fetchall()
-
- ids = []
- for row in rows:
- ids.append(row[0])
-
-
-
- # 添加对应headers 及 tonken 用于数据传参和登录认证使用
- myheaders = {'Content-Type': 'application/json'}
- # 接口测试数据
-
- #guardian_base = 'http://47.110.128.185:15015'
- guardian_base1 = 'http://127.0.0.1:15013'
- guardian_base2 = 'http://192.168.2.101:15015'
- # 使用requests的post方法进行请求路由
- result = []
- #content = row[0]
- #content = "<div> <div> 110kV龙台变~净 化厂35kV电力线路(外电部分) </div> <a>附件下载,文件请用音频播放器或者360浏览器打开</a> <br> <a>公告来源:http://bulletin.cebpubservice.com/biddingBulletin/2019-02-12/1058505.html</a> </div>"
-
- i = 0
- a1 = time.time()
- same_flag = None
- # files = glob.glob("C:\\Users\\User\\Desktop\\测试数据20200312\\*.html")
- files = data = load("../test/label_0_1197.pk")
- for file in files:
- # name = file.split("\\")[-1]
- name = file["filename"]
- # content = codecs.open(file,"r",encoding="utf8").read()
- content = file["content"]
- i += 1
-
- print(i,len(files))
- if i>=2000:
- break
- # print(file)
- user = {
- "content": content,
- "title":"XXXXXX",
- "doc_id":"1234555"
- }
- a = time.time()
- # _resp = requests.post(guardian_base2 + '/article_extract', json=user, headers=myheaders, verify=True)
- # resp_json = _resp.content.decode("utf-8")
- # print(resp_json)
- resp_json = {"code":file["code"],"name":file["name"]}
- _resp1 = requests.post(guardian_base2 + '/content_extract', json=user, headers=myheaders, verify=True)
- resp_json1 = _resp1.content.decode("utf-8")
- resp_json1 = json.loads(resp_json1)
- resp_json1 = {"code":resp_json1["code"],"name":resp_json1["name"]}
- resp_json = str(resp_json).replace("(","(").replace(")",")")
- print(resp_json)
- print(resp_json1)
- if resp_json==resp_json1:
- same_flag = 0
- else:
- same_flag = 1
- result.append([name,same_flag,resp_json,resp_json1])
-
- ''''''
- #将结果输出到文件方便查看
- result.sort(key=lambda x:x[1],reverse=True)
- i = 0
- with codecs.open("testInterface.html", "w", encoding="utf8") as f:
- f.write('<html><head>\
- <meta http-equiv="Content-Type"\
- content="text/html; charset=UTF-8">\
- </head>\
- <body bgcolor="#FFFFFF">\
- <table border="1">\
- <tr>\
- <td>序号</td>\
- <td>doc_id</td>\
- <td>same</td>\
- <td width=40%>before</td>\
- <td width=40%>重新训练</td>\
- </tr>')
- for item in result:
- i += 1
- f.write("<tr>"+"<td>"+str(i)+"</td>"+"<td>"+str(item[0])+"</td>"+"<td>"+str(item[1])+"</td>"+"<td>"+str(item[2])+"</td>"+"<td>"+str(item[3])+"</td>"+"</tr>")
- f.write("</table></body>")
-
|