''' Created on 2018年12月26日 @author: User ''' import sys import os import codecs import re sys.path.append(os.path.abspath("../..")) import requests from BiddingKG.dl.common.Connection import * import time import psycopg2 import glob from BiddingKG.dl.common.Utils import * if __name__=="__main__": #测试接口的代码 #conn = getConnection() conn = psycopg2.connect(dbname="article_label",user="postgres",password="postgres",host="192.168.2.101") cursor = conn.cursor() #validation #sql = " select content,id from articles where id in(select doc_id from articles_validation where exists(select 1 from articles_processed where id=doc_id)) order by id" #training #sql = " select content,id from articles where not exists(select 1 from articles_validation where doc_id=articles.id) order by id limit 5000" sql = " select id from articles_processed " cursor.execute(sql) rows = cursor.fetchall() ids = [] for row in rows: ids.append(row[0]) # 添加对应headers 及 tonken 用于数据传参和登录认证使用 myheaders = {'Content-Type': 'application/json'} # 接口测试数据 #guardian_base = 'http://47.110.128.185:15015' guardian_base1 = 'http://127.0.0.1:15015' guardian_base2 = 'http://192.168.2.101:15015' # 使用requests的post方法进行请求路由 result = [] #content = row[0] #content = "
110kV龙台变~净 化厂35kV电力线路(外电部分)
附件下载,文件请用音频播放器或者360浏览器打开
公告来源:http://bulletin.cebpubservice.com/biddingBulletin/2019-02-12/1058505.html
" i = 0 a1 = time.time() same_flag = None files = glob.glob("C:\\Users\\User\\Desktop\\20190416要素\\*.html") for file in files: name = file.split("\\")[-1] content = codecs.open(file,"r",encoding="utf8").read() i += 1 print(i,len(files)) if i>=2000: break print(file) user = { "content": content, "title":"XXXXXX", "doc_id":"1234555" } a = time.time() _resp = requests.post(guardian_base2 + '/article_extract', json=user, headers=myheaders, verify=True) resp_json = _resp.content.decode("utf-8") print(resp_json) #_resp1 = requests.post(guardian_base1 + '/article_extract', json=user, headers=myheaders, verify=True) #resp_json1 = _resp1.content.decode("utf-8") resp_json1 = "" resp_json = str(resp_json).replace("(","(").replace(")",")") if resp_json==resp_json1: same_flag = 0 else: same_flag = 1 result.append([name,same_flag,resp_json,resp_json1]) '''''' #将结果输出到文件方便查看 result.sort(key=lambda x:x[1],reverse=True) i = 0 with codecs.open("testInterface.html","w",encoding="utf8") as f: f.write('\ \ \ \ \ \ \ \ \ \ \ ') for item in result: i += 1 f.write(""+""+""+""+""+""+"") f.write("
序号doc_idsamebefore重新训练
"+str(i)+""+str(item[0])+""+str(item[1])+""+str(item[2])+""+str(item[3])+"
")