run_single_server.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Fri Jun 1 18:03:03 2018
  4. @author: DONG
  5. """
  6. import sys
  7. import os
  8. import json
  9. import re
  10. sys.path.append(os.path.abspath("../.."))
  11. from flask import Flask, jsonify
  12. from flask import abort
  13. from flask import request
  14. import time
  15. import uuid
  16. from BiddingKG.dl.common.Utils import log
  17. from bs4 import BeautifulSoup, Comment
  18. import BiddingKG.dl.interface.predictor as predictor
  19. import BiddingKG.dl.interface.Preprocessing as Preprocessing
  20. import BiddingKG.dl.interface.getAttributes as getAttributes
  21. import BiddingKG.dl.entityLink.entityLink as entityLink
  22. import json
  23. import traceback
  24. app = Flask(__name__)
  25. app.config['JSON_AS_ASCII'] = False
  26. codeNamePredict = predictor.CodeNamePredict()
  27. premPredict = predictor.PREMPredict()
  28. epcPredict = predictor.EPCPredict()
  29. roleRulePredict = predictor.RoleRulePredictor()
  30. @app.route('/content_extract', methods=['POST'])
  31. def text_predict():
  32. start_time = time.time()
  33. # 初始化待返回结果
  34. data = {"code": [],"name":"","prem":[],"success":False}
  35. data = {"listpage_url": "","status_code":201}
  36. MAX_CONTENT = 150000
  37. # 确保请求符合要求
  38. if request.method == "POST":
  39. if (not request.json) or ('content' not in request.json):
  40. abort(400)
  41. else:
  42. if "doc_id" in request.json:
  43. _doc_id = request.json['doc_id']
  44. else:
  45. _doc_id = ""
  46. if "title" in request.json:
  47. _title = request.json["title"]
  48. else:
  49. _title = ""
  50. try:
  51. log("get request of doc_id:%s"%(_doc_id))
  52. k = str(uuid.uuid4())
  53. cost_time = dict()
  54. content = request.json['content']
  55. start_time = time.time()
  56. list_articles,list_sentences,list_entitys,_cost_time = Preprocessing.get_preprocessed([[k,content,"",_doc_id,_title]],useselffool=True)
  57. cost_time["preprocess"] = time.time()-start_time
  58. cost_time.update(_cost_time)
  59. '''
  60. for articles in list_articles:
  61. print(articles.content)
  62. '''
  63. start_time = time.time()
  64. codeName = codeNamePredict.predict(list_sentences,list_entitys)
  65. cost_time["codename"] = time.time()-start_time
  66. start_time = time.time()
  67. premPredict.predict(list_sentences,list_entitys)
  68. cost_time["prem"] = time.time()-start_time
  69. start_time = time.time()
  70. roleRulePredict.predict(list_articles,list_sentences, list_entitys,codeName)
  71. cost_time["rule"] = time.time()-start_time
  72. start_time = time.time()
  73. epcPredict.predict(list_sentences,list_entitys)
  74. cost_time["person"] = time.time()-start_time
  75. start_time = time.time()
  76. entityLink.link_entitys(list_entitys)
  77. '''
  78. for list_entity in list_entitys:
  79. for _entity in list_entity:
  80. for _ent in _entity.linked_entitys:
  81. print(_entity.entity_text,_ent.entity_text)
  82. '''
  83. prem = getAttributes.getPREMs(list_sentences,list_entitys,list_articles)
  84. cost_time["attrs"] = time.time()-start_time
  85. '''
  86. for entitys in list_entitys:
  87. for entity in entitys:
  88. print(entity.entity_text,entity.entity_type,entity.sentence_index,entity.begin_index,entity.label,entity.values)
  89. '''
  90. #print(prem)
  91. data = Preprocessing.union_result(codeName, prem)[0][1]
  92. data["cost_time"] = cost_time
  93. data["success"] = True
  94. #return json.dumps(Preprocessing.union_result(codeName, prem)[0][1],cls=MyEncoder,sort_keys=True,indent=4,ensure_ascii=False)
  95. #except Exception as e:
  96. #log(str(e))
  97. except Exception as e:
  98. traceback.print_exc(file=sys.stdout)
  99. data["success"] = False
  100. # 以json形式返回结果
  101. _resp = jsonify(data)
  102. #log(str(data["flag"])+str(data))
  103. log("done for doc_id:%s with result:%s"%(_doc_id,str(data)))
  104. return _resp, 201
  105. if __name__ == '__main__':
  106. app.run(host='0.0.0.0', port=15013, threaded=True, debug=False)
  107. log("ContentExtractor running")