|
@@ -7,15 +7,13 @@ Created on 2019年12月3日
|
|
import allspark
|
|
import allspark
|
|
import sys
|
|
import sys
|
|
import os
|
|
import os
|
|
|
|
+os.environ["KERAS_BACKEND"] = "tensorflow"
|
|
import json
|
|
import json
|
|
import re
|
|
import re
|
|
import time
|
|
import time
|
|
import uuid
|
|
import uuid
|
|
from BiddingKG.dl.common.Utils import log
|
|
from BiddingKG.dl.common.Utils import log
|
|
-import BiddingKG.dl.interface.predictor as predictor
|
|
|
|
-import BiddingKG.dl.interface.Preprocessing as Preprocessing
|
|
|
|
-import BiddingKG.dl.interface.getAttributes as getAttributes
|
|
|
|
-import BiddingKG.dl.entityLink.entityLink as entityLink
|
|
|
|
|
|
+from BiddingKG.dl.interface.extract import predict
|
|
import numpy as np
|
|
import numpy as np
|
|
import ctypes
|
|
import ctypes
|
|
import inspect
|
|
import inspect
|
|
@@ -129,7 +127,7 @@ class MyProcessor(allspark.BaseProcessor):
|
|
print(entity.entity_text,entity.entity_type,entity.sentence_index,entity.begin_index,entity.label,entity.values)
|
|
print(entity.entity_text,entity.entity_type,entity.sentence_index,entity.begin_index,entity.label,entity.values)
|
|
'''
|
|
'''
|
|
#print(prem)
|
|
#print(prem)
|
|
- data_res = Preprocessing.union_result(codeName, prem)[0][1]
|
|
|
|
|
|
+ data_res = predict(docid)
|
|
data_res["cost_time"] = cost_time
|
|
data_res["cost_time"] = cost_time
|
|
data_res["success"] = True
|
|
data_res["success"] = True
|
|
#return json.dumps(Preprocessing.union_result(codeName, prem)[0][1],cls=MyEncoder,sort_keys=True,indent=4,ensure_ascii=False)
|
|
#return json.dumps(Preprocessing.union_result(codeName, prem)[0][1],cls=MyEncoder,sort_keys=True,indent=4,ensure_ascii=False)
|
|
@@ -151,10 +149,6 @@ class MyProcessor(allspark.BaseProcessor):
|
|
do service intialization and load models in this function.
|
|
do service intialization and load models in this function.
|
|
"""'''
|
|
"""'''
|
|
'''
|
|
'''
|
|
- self.codeNamePredict = predictor.CodeNamePredict()
|
|
|
|
- self.premPredict = predictor.PREMPredict()
|
|
|
|
- self.epcPredict = predictor.EPCPredict()
|
|
|
|
- self.roleRulePredict = predictor.RoleRulePredictor()
|
|
|
|
self.timeout = 60
|
|
self.timeout = 60
|
|
self.status_types = 5
|
|
self.status_types = 5
|
|
self.timeOfType = self.timeout//self.status_types
|
|
self.timeOfType = self.timeout//self.status_types
|
|
@@ -176,102 +170,28 @@ class MyProcessor(allspark.BaseProcessor):
|
|
"""
|
|
"""
|
|
data = data.decode("utf8")
|
|
data = data.decode("utf8")
|
|
data = json.loads(data,encoding="utf8")
|
|
data = json.loads(data,encoding="utf8")
|
|
- # k = str(uuid.uuid4())
|
|
|
|
- # cost_time = dict()
|
|
|
|
- # if "doc_id" in data:
|
|
|
|
- # _doc_id = data['doc_id']
|
|
|
|
- # else:
|
|
|
|
- # _doc_id = ""
|
|
|
|
- # if "title" in data:
|
|
|
|
- # _title = data["title"]
|
|
|
|
- # else:
|
|
|
|
- # _title = ""
|
|
|
|
- # data_res = ""
|
|
|
|
- # try:
|
|
|
|
- # if "content" in data:
|
|
|
|
- # log("get request of doc_id:%s"%(_doc_id))
|
|
|
|
- # k = str(uuid.uuid4())
|
|
|
|
- # cost_time = dict()
|
|
|
|
- # content = data['content']
|
|
|
|
- # start_time = time.time()
|
|
|
|
- # list_articles,list_sentences,list_entitys,_cost_time = Preprocessing.get_articles_processed([[k,content,"",_doc_id,_title]],useselffool=True)
|
|
|
|
- # log("get preprocessed done of doc_id%s"%(_doc_id))
|
|
|
|
- # cost_time["preprocess"] = time.time()-start_time
|
|
|
|
- # cost_time.update(_cost_time)
|
|
|
|
- # '''
|
|
|
|
- # for articles in list_articles:
|
|
|
|
- # print(articles.content)
|
|
|
|
- #
|
|
|
|
- # '''
|
|
|
|
- # start_time = time.time()
|
|
|
|
- # codeName = self.codeNamePredict.predict(list_articles,MAX_AREA=2000)
|
|
|
|
- # log("get codename done of doc_id%s"%(_doc_id))
|
|
|
|
- # cost_time["codename"] = time.time()-start_time
|
|
|
|
- #
|
|
|
|
- # start_time = time.time()
|
|
|
|
- # self.premPredict.predict(list_sentences,list_entitys)
|
|
|
|
- # log("get prem done of doc_id%s"%(_doc_id))
|
|
|
|
- # cost_time["prem"] = time.time()-start_time
|
|
|
|
- # start_time = time.time()
|
|
|
|
- # self.roleRulePredict.predict(list_articles,list_sentences, list_entitys,codeName)
|
|
|
|
- # cost_time["rule"] = time.time()-start_time
|
|
|
|
- # start_time = time.time()
|
|
|
|
- # self.epcPredict.predict(list_sentences,list_entitys)
|
|
|
|
- # log("get epc done of doc_id%s"%(_doc_id))
|
|
|
|
- # cost_time["person"] = time.time()-start_time
|
|
|
|
- # start_time = time.time()
|
|
|
|
- # entityLink.link_entitys(list_entitys)
|
|
|
|
- # '''
|
|
|
|
- # for list_entity in list_entitys:
|
|
|
|
- # for _entity in list_entity:
|
|
|
|
- # for _ent in _entity.linked_entitys:
|
|
|
|
- # print(_entity.entity_text,_ent.entity_text)
|
|
|
|
- # '''
|
|
|
|
- # prem = getAttributes.getPREMs(list_sentences,list_entitys,list_articles)
|
|
|
|
- # log("get attributes done of doc_id%s"%(_doc_id))
|
|
|
|
- # cost_time["attrs"] = time.time()-start_time
|
|
|
|
- #
|
|
|
|
- #
|
|
|
|
- # '''
|
|
|
|
- #
|
|
|
|
- #
|
|
|
|
- # for entitys in list_entitys:
|
|
|
|
- # for entity in entitys:
|
|
|
|
- # print(entity.entity_text,entity.entity_type,entity.sentence_index,entity.begin_index,entity.label,entity.values)
|
|
|
|
- # '''
|
|
|
|
- # #print(prem)
|
|
|
|
- # data_res = Preprocessing.union_result(codeName, prem)[0][1]
|
|
|
|
- # data_res["cost_time"] = cost_time
|
|
|
|
- # data_res["success"] = True
|
|
|
|
- # #return json.dumps(Preprocessing.union_result(codeName, prem)[0][1],cls=MyEncoder,sort_keys=True,indent=4,ensure_ascii=False)
|
|
|
|
- # else:
|
|
|
|
- # data_res = {"success":False,"msg":"content not passed"}
|
|
|
|
- #
|
|
|
|
- #
|
|
|
|
- # except Exception as e:
|
|
|
|
- # data_res = {"success":False,"msg":str(e)}
|
|
|
|
- # # 以json形式返回结果
|
|
|
|
- # _resp = json.dumps(data_res,cls=MyEncoder)
|
|
|
|
- # #log(str(data["flag"])+str(data))
|
|
|
|
- # log("done for doc_id:%s with result:%s"%(_doc_id,str(data_res)))
|
|
|
|
- _timeout = self.timeout
|
|
|
|
|
|
+
|
|
|
|
+ _doc_id = data.get("doc_id","")
|
|
|
|
+ _title = data.get("title","")
|
|
|
|
+ _content = data.get("content","")
|
|
|
|
|
|
status_code = 200
|
|
status_code = 200
|
|
- if "timeout" in data:
|
|
|
|
- _timeout = data["timeout"]
|
|
|
|
|
|
+ # if "timeout" in data:
|
|
|
|
+ # _timeout = data["timeout"]
|
|
list_result = []
|
|
list_result = []
|
|
- t = Thread(target=self.run_thread,args=(data,list_result))
|
|
|
|
- start_time = time.time()
|
|
|
|
- t.start()
|
|
|
|
- t.join(_timeout)
|
|
|
|
- if t.is_alive():
|
|
|
|
- stop_thread(t)
|
|
|
|
- status_code = 302#超时被kill
|
|
|
|
- data_res = {"success":False,"msg":"timeout"}
|
|
|
|
- else:
|
|
|
|
- status_code += int((time.time()-start_time)//self.timeOfType+1)
|
|
|
|
- data_res = list_result[0]
|
|
|
|
- _resp = json.dumps(data_res,cls=MyEncoder)
|
|
|
|
|
|
+ # t = Thread(target=self.run_thread,args=(data,list_result))
|
|
|
|
+ # start_time = time.time()
|
|
|
|
+ # t.start()
|
|
|
|
+ # t.join(_timeout)
|
|
|
|
+ # if t.is_alive():
|
|
|
|
+ # stop_thread(t)
|
|
|
|
+ # status_code = 302#超时被kill
|
|
|
|
+ # data_res = {"success":False,"msg":"timeout"}
|
|
|
|
+ # else:
|
|
|
|
+ # status_code += int((time.time()-start_time)//self.timeOfType+1)
|
|
|
|
+ # data_res = list_result[0]
|
|
|
|
+ # _resp = json.dumps(data_res,cls=MyEncoder)
|
|
|
|
+ _resp = predict(doc_id=_doc_id,text=_content,title=_title)
|
|
|
|
|
|
return self.post_process(_resp),status_code
|
|
return self.post_process(_resp),status_code
|
|
|
|
|