# -*- coding: utf-8 -*- """ Created on Fri Jun 1 18:03:03 2018 @author: DONG """ import sys import os import json import re sys.path.append(os.path.abspath("../..")) from flask import Flask, jsonify from flask import abort from flask import request import time import uuid from BiddingKG.dl.common.Utils import log from bs4 import BeautifulSoup, Comment import BiddingKG.dl.interface.predictor as predictor import BiddingKG.dl.interface.Preprocessing as Preprocessing import BiddingKG.dl.interface.getAttributes as getAttributes import BiddingKG.dl.entityLink.entityLink as entityLink import json import traceback app = Flask(__name__) app.config['JSON_AS_ASCII'] = False codeNamePredict = predictor.CodeNamePredict() premPredict = predictor.PREMPredict() epcPredict = predictor.EPCPredict() roleRulePredict = predictor.RoleRulePredictor() @app.route('/content_extract', methods=['POST']) def text_predict(): start_time = time.time() # 初始化待返回结果 data = {"code": [],"name":"","prem":[],"success":False} data = {"listpage_url": "","status_code":201} MAX_CONTENT = 150000 # 确保请求符合要求 if request.method == "POST": if (not request.json) or ('content' not in request.json): abort(400) else: if "doc_id" in request.json: _doc_id = request.json['doc_id'] else: _doc_id = "" if "title" in request.json: _title = request.json["title"] else: _title = "" try: log("get request of doc_id:%s"%(_doc_id)) k = str(uuid.uuid4()) cost_time = dict() content = request.json['content'] start_time = time.time() list_articles,list_sentences,list_entitys,_cost_time = Preprocessing.get_preprocessed([[k,content,"",_doc_id,_title]],useselffool=True) cost_time["preprocess"] = time.time()-start_time cost_time.update(_cost_time) ''' for articles in list_articles: print(articles.content) ''' start_time = time.time() codeName = codeNamePredict.predict(list_sentences,list_entitys) cost_time["codename"] = time.time()-start_time start_time = time.time() premPredict.predict(list_sentences,list_entitys) cost_time["prem"] = time.time()-start_time start_time = time.time() roleRulePredict.predict(list_articles,list_sentences, list_entitys,codeName) cost_time["rule"] = time.time()-start_time start_time = time.time() epcPredict.predict(list_sentences,list_entitys) cost_time["person"] = time.time()-start_time start_time = time.time() entityLink.link_entitys(list_entitys) ''' for list_entity in list_entitys: for _entity in list_entity: for _ent in _entity.linked_entitys: print(_entity.entity_text,_ent.entity_text) ''' prem = getAttributes.getPREMs(list_sentences,list_entitys,list_articles) cost_time["attrs"] = time.time()-start_time ''' for entitys in list_entitys: for entity in entitys: print(entity.entity_text,entity.entity_type,entity.sentence_index,entity.begin_index,entity.label,entity.values) ''' #print(prem) data = Preprocessing.union_result(codeName, prem)[0][1] data["cost_time"] = cost_time data["success"] = True #return json.dumps(Preprocessing.union_result(codeName, prem)[0][1],cls=MyEncoder,sort_keys=True,indent=4,ensure_ascii=False) #except Exception as e: #log(str(e)) except Exception as e: traceback.print_exc(file=sys.stdout) data["success"] = False # 以json形式返回结果 _resp = jsonify(data) #log(str(data["flag"])+str(data)) log("done for doc_id:%s with result:%s"%(_doc_id,str(data))) return _resp, 201 if __name__ == '__main__': app.run(host='0.0.0.0', port=15013, threaded=True, debug=False) log("ContentExtractor running")