# -*- coding: utf-8 -*- """ Created on Fri Jun 1 18:03:03 2018 @author: DONG """ import sys import os sys.path.append(os.path.abspath("..")) print(sys.path) from flask import Flask, jsonify from flask import abort from flask import request import json import logging import redis import time import uuid import settings import pandas as pd from bs4 import BeautifulSoup # import tensorflow as tf logging.basicConfig(level=settings.LOG_LEVEL, format=settings.LOG_FORMAT, datefmt=settings.DATE_FORMAT) logger = logging.getLogger(__name__) app = Flask(__name__) app.config['JSON_AS_ASCII'] = False ''' gunicorn_logger = logging.getLogger('gunicorn.error') app.logger.handlers = gunicorn_logger.handlers app.logger.setLevel(gunicorn_logger.level) ''' # 创建Redis数据库连接 db = redis.StrictRedis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, db=settings.REDIS_DB,password=settings.REDIS_PASS) @app.route('/article_extract', methods=['POST']) def text_predict(): start_time = time.time() # 初始化待返回结果 data = {"code": [],"name":"","prem":[],"success":True} MAX_CONTENT = 150000 # 确保请求符合要求 if request.method == "POST": if (not request.json) or ('content' not in request.json): abort(400) else: # 随机生成UUID作为每段文字的ID,然后把ID+文本添加到Redis待处理队列中 k = str(uuid.uuid4()) #k = request.json['id'] content = request.json['content'] _text = str(BeautifulSoup(content,"lxml").get_text()) if len(_text)>MAX_CONTENT: logger.info("over the limit of content") if "doc_id" in request.json: logger.info("receive request with doc_id="+str(request.json['doc_id'])) return jsonify(data), 201 #print(k) ''' if len(content)>MAX_CONTENT: data["success"] = False return jsonify(data), 201 ''' # 只处理文章长度在10个字以上的文本 if len(content) > 10: d = {"id": k, "content": content,"jointime":time.time(),"doc_id":"","title":""} if "doc_id" in request.json: d["doc_id"] = request.json['doc_id'] logger.info("receive request with doc_id="+str(request.json['doc_id'])) if "title" in request.json: d["title"] = request.json['title'] db.rpush(settings.CONTENT_QUEUE, json.dumps(d)) # 不停循环直到模型服务器处理完任务并返回分类预测 while True: # 尝试去redis获取结果 output = db.get(k) # 验证输出是否已有分类结果 if output is not None: # 把输出的分类结果添加到返回结果的字典中 output = output.decode("utf-8") result_json = json.loads(output) data['code'] = result_json['code'] data['name'] = result_json['name'] data['prem'] = result_json['prem'] if "success" in result_json.keys(): data['success'] = result_json["success"] # 从redis中移除数据并中断循环 db.delete(k) break # 让程序短暂等待,然后重新请求 time.sleep(settings.CLIENT_SLEEP) # 表示程序能成功返回结果 # 以json形式返回结果 logger.info(" time from receive to send: "+str(time.time()-start_time)) return jsonify(data), 201 if __name__ == '__main__': app.run(host='0.0.0.0', port=15014, threaded=True, debug=True)