run_web_server.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Fri Jun 1 18:03:03 2018
  4. @author: DONG
  5. """
  6. import sys
  7. import os
  8. sys.path.append(os.path.abspath(".."))
  9. print(sys.path)
  10. from flask import Flask, jsonify
  11. from flask import abort
  12. from flask import request
  13. import json
  14. import logging
  15. import redis
  16. import time
  17. import uuid
  18. import settings
  19. import pandas as pd
  20. from bs4 import BeautifulSoup
  21. # import tensorflow as tf
  22. logging.basicConfig(level=settings.LOG_LEVEL,
  23. format=settings.LOG_FORMAT, datefmt=settings.DATE_FORMAT)
  24. logger = logging.getLogger(__name__)
  25. app = Flask(__name__)
  26. app.config['JSON_AS_ASCII'] = False
  27. '''
  28. gunicorn_logger = logging.getLogger('gunicorn.error')
  29. app.logger.handlers = gunicorn_logger.handlers
  30. app.logger.setLevel(gunicorn_logger.level)
  31. '''
  32. # 创建Redis数据库连接
  33. db = redis.StrictRedis(host=settings.REDIS_HOST, port=settings.REDIS_PORT,
  34. db=settings.REDIS_DB,password=settings.REDIS_PASS)
  35. @app.route('/article_extract', methods=['POST'])
  36. def text_predict():
  37. start_time = time.time()
  38. # 初始化待返回结果
  39. data = {"code": [],"name":"","prem":[],"success":True}
  40. MAX_CONTENT = 150000
  41. # 确保请求符合要求
  42. if request.method == "POST":
  43. if (not request.json) or ('content' not in request.json):
  44. abort(400)
  45. else:
  46. # 随机生成UUID作为每段文字的ID,然后把ID+文本添加到Redis待处理队列中
  47. k = str(uuid.uuid4())
  48. #k = request.json['id']
  49. content = request.json['content']
  50. _text = str(BeautifulSoup(content,"lxml").get_text())
  51. if len(_text)>MAX_CONTENT:
  52. logger.info("over the limit of content")
  53. if "doc_id" in request.json:
  54. logger.info("receive request with doc_id="+str(request.json['doc_id']))
  55. return jsonify(data), 201
  56. #print(k)
  57. '''
  58. if len(content)>MAX_CONTENT:
  59. data["success"] = False
  60. return jsonify(data), 201
  61. '''
  62. # 只处理文章长度在10个字以上的文本
  63. if len(content) > 10:
  64. d = {"id": k, "content": content,"jointime":time.time(),"doc_id":"","title":""}
  65. if "doc_id" in request.json:
  66. d["doc_id"] = request.json['doc_id']
  67. logger.info("receive request with doc_id="+str(request.json['doc_id']))
  68. if "title" in request.json:
  69. d["title"] = request.json['title']
  70. db.rpush(settings.CONTENT_QUEUE, json.dumps(d))
  71. # 不停循环直到模型服务器处理完任务并返回分类预测
  72. while True:
  73. # 尝试去redis获取结果
  74. output = db.get(k)
  75. # 验证输出是否已有分类结果
  76. if output is not None:
  77. # 把输出的分类结果添加到返回结果的字典中
  78. output = output.decode("utf-8")
  79. result_json = json.loads(output)
  80. data['code'] = result_json['code']
  81. data['name'] = result_json['name']
  82. data['prem'] = result_json['prem']
  83. if "success" in result_json.keys():
  84. data['success'] = result_json["success"]
  85. # 从redis中移除数据并中断循环
  86. db.delete(k)
  87. break
  88. # 让程序短暂等待,然后重新请求
  89. time.sleep(settings.CLIENT_SLEEP)
  90. # 表示程序能成功返回结果
  91. # 以json形式返回结果
  92. logger.info(" time from receive to send: "+str(time.time()-start_time))
  93. return jsonify(data), 201
  94. if __name__ == '__main__':
  95. app.run(host='0.0.0.0', port=15014, threaded=True, debug=True)