123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- # -*- coding: utf-8 -*-
- """
- Created on Fri Jun 1 18:03:03 2018
- @author: DONG
- """
- import sys
- import os
- sys.path.append(os.path.abspath(".."))
- print(sys.path)
- from flask import Flask, jsonify
- from flask import abort
- from flask import request
- import json
- import logging
- import redis
- import time
- import uuid
- import settings
- import pandas as pd
- from bs4 import BeautifulSoup
- # import tensorflow as tf
- logging.basicConfig(level=settings.LOG_LEVEL,
- format=settings.LOG_FORMAT, datefmt=settings.DATE_FORMAT)
- logger = logging.getLogger(__name__)
- app = Flask(__name__)
- app.config['JSON_AS_ASCII'] = False
- '''
- gunicorn_logger = logging.getLogger('gunicorn.error')
- app.logger.handlers = gunicorn_logger.handlers
- app.logger.setLevel(gunicorn_logger.level)
- '''
- # 创建Redis数据库连接
- db = redis.StrictRedis(host=settings.REDIS_HOST, port=settings.REDIS_PORT,
- db=settings.REDIS_DB,password=settings.REDIS_PASS)
- @app.route('/article_extract', methods=['POST'])
- def text_predict():
- start_time = time.time()
- # 初始化待返回结果
- data = {"code": [],"name":"","prem":[],"success":True}
- MAX_CONTENT = 150000
- # 确保请求符合要求
- if request.method == "POST":
- if (not request.json) or ('content' not in request.json):
- abort(400)
- else:
- # 随机生成UUID作为每段文字的ID,然后把ID+文本添加到Redis待处理队列中
- k = str(uuid.uuid4())
- #k = request.json['id']
- content = request.json['content']
- _text = str(BeautifulSoup(content,"lxml").get_text())
- if len(_text)>MAX_CONTENT:
- logger.info("over the limit of content")
- if "doc_id" in request.json:
- logger.info("receive request with doc_id="+str(request.json['doc_id']))
- return jsonify(data), 201
- #print(k)
- '''
- if len(content)>MAX_CONTENT:
- data["success"] = False
- return jsonify(data), 201
- '''
- # 只处理文章长度在10个字以上的文本
- if len(content) > 10:
- d = {"id": k, "content": content,"jointime":time.time(),"doc_id":"","title":""}
- if "doc_id" in request.json:
- d["doc_id"] = request.json['doc_id']
- logger.info("receive request with doc_id="+str(request.json['doc_id']))
- if "title" in request.json:
- d["title"] = request.json['title']
- db.rpush(settings.CONTENT_QUEUE, json.dumps(d))
- # 不停循环直到模型服务器处理完任务并返回分类预测
- while True:
- # 尝试去redis获取结果
- output = db.get(k)
- # 验证输出是否已有分类结果
- if output is not None:
- # 把输出的分类结果添加到返回结果的字典中
- output = output.decode("utf-8")
- result_json = json.loads(output)
- data['code'] = result_json['code']
- data['name'] = result_json['name']
- data['prem'] = result_json['prem']
- if "success" in result_json.keys():
- data['success'] = result_json["success"]
-
-
- # 从redis中移除数据并中断循环
- db.delete(k)
- break
- # 让程序短暂等待,然后重新请求
- time.sleep(settings.CLIENT_SLEEP)
- # 表示程序能成功返回结果
- # 以json形式返回结果
- logger.info(" time from receive to send: "+str(time.time()-start_time))
- return jsonify(data), 201
- if __name__ == '__main__':
- app.run(host='0.0.0.0', port=15014, threaded=True, debug=True)
|