|
@@ -1,14 +1,16 @@
|
|
#-*- coding: utf-8 -*-
|
|
#-*- coding: utf-8 -*-
|
|
|
|
+import gc
|
|
import json
|
|
import json
|
|
import sys
|
|
import sys
|
|
import os
|
|
import os
|
|
|
|
+import tracemalloc
|
|
from io import BytesIO
|
|
from io import BytesIO
|
|
import objgraph
|
|
import objgraph
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
|
|
# 强制tf使用cpu
|
|
# 强制tf使用cpu
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
|
|
from format_convert.utils import judge_error_code, request_post, get_intranet_ip, get_ip_port, get_logger, log, \
|
|
from format_convert.utils import judge_error_code, request_post, get_intranet_ip, get_ip_port, get_logger, log, \
|
|
- set_flask_global, get_md5_from_bytes
|
|
|
|
|
|
+ set_flask_global, get_md5_from_bytes, memory_decorator
|
|
from format_convert.convert_doc import doc2text, DocConvert
|
|
from format_convert.convert_doc import doc2text, DocConvert
|
|
from format_convert.convert_docx import docx2text, DocxConvert
|
|
from format_convert.convert_docx import docx2text, DocxConvert
|
|
from format_convert.convert_image import picture2text, ImageConvert
|
|
from format_convert.convert_image import picture2text, ImageConvert
|
|
@@ -21,7 +23,6 @@ from format_convert.convert_xlsx import xlsx2text, XlsxConvert
|
|
from format_convert.convert_zip import zip2text, ZipConvert
|
|
from format_convert.convert_zip import zip2text, ZipConvert
|
|
|
|
|
|
import hashlib
|
|
import hashlib
|
|
-from format_convert import get_memory_info
|
|
|
|
from format_convert.judge_platform import get_platform
|
|
from format_convert.judge_platform import get_platform
|
|
from ocr import ocr_interface
|
|
from ocr import ocr_interface
|
|
from otr import otr_interface
|
|
from otr import otr_interface
|
|
@@ -37,7 +38,6 @@ import inspect
|
|
logging.getLogger("pdfminer").setLevel(logging.WARNING)
|
|
logging.getLogger("pdfminer").setLevel(logging.WARNING)
|
|
from format_convert.table_correct import *
|
|
from format_convert.table_correct import *
|
|
import logging
|
|
import logging
|
|
-from format_convert import timeout_decorator
|
|
|
|
from format_convert.wrapt_timeout_decorator import *
|
|
from format_convert.wrapt_timeout_decorator import *
|
|
from format_convert import _global
|
|
from format_convert import _global
|
|
|
|
|
|
@@ -58,6 +58,7 @@ def choose_port():
|
|
return _url
|
|
return _url
|
|
|
|
|
|
|
|
|
|
|
|
+@memory_decorator
|
|
def getText(_type, path_or_stream):
|
|
def getText(_type, path_or_stream):
|
|
print("file type - " + _type)
|
|
print("file type - " + _type)
|
|
log("file type - " + _type)
|
|
log("file type - " + _type)
|
|
@@ -349,7 +350,7 @@ def cut_str(text_list, only_text_list, max_bytes_length=2000000):
|
|
return ["-1"]
|
|
return ["-1"]
|
|
|
|
|
|
|
|
|
|
-@get_memory_info.memory_decorator
|
|
|
|
|
|
+@memory_decorator
|
|
def convert(data, ocr_model, otr_model):
|
|
def convert(data, ocr_model, otr_model):
|
|
"""
|
|
"""
|
|
接口返回值:
|
|
接口返回值:
|
|
@@ -460,9 +461,16 @@ def _convert():
|
|
|
|
|
|
# log("growth start" + str(objgraph.growth()))
|
|
# log("growth start" + str(objgraph.growth()))
|
|
# log("most_common_types start" + str(objgraph.most_common_types(20)))
|
|
# log("most_common_types start" + str(objgraph.most_common_types(20)))
|
|
|
|
+ # tracemalloc.start(25)
|
|
|
|
+ # snapshot = tracemalloc.take_snapshot()
|
|
|
|
|
|
log("into convert")
|
|
log("into convert")
|
|
start_time = time.time()
|
|
start_time = time.time()
|
|
|
|
+
|
|
|
|
+ # _global = {}
|
|
|
|
+ # _global.update({"md5": "1"+"0"*15})
|
|
|
|
+ # _global.update({"port": globals().get("port")})
|
|
|
|
+ # set_flask_global()
|
|
_md5 = _global.get("md5")
|
|
_md5 = _global.get("md5")
|
|
try:
|
|
try:
|
|
if not request.form:
|
|
if not request.form:
|
|
@@ -553,8 +561,8 @@ def _convert():
|
|
+ str(len(str(text))) + " is_success: 1 "
|
|
+ str(len(str(text))) + " is_success: 1 "
|
|
+ str(time.time() - start_time))
|
|
+ str(time.time() - start_time))
|
|
|
|
|
|
- log("growth end" + str(objgraph.growth()))
|
|
|
|
- log("most_common_types end" + str(objgraph.most_common_types(20)))
|
|
|
|
|
|
+ # log("growth end" + str(objgraph.growth()))
|
|
|
|
+ # log("most_common_types end" + str(objgraph.most_common_types(20)))
|
|
return json.dumps({"result_html": text, "result_text": only_text,
|
|
return json.dumps({"result_html": text, "result_text": only_text,
|
|
"is_success": 1, "swf_images": str(swf_images)})
|
|
"is_success": 1, "swf_images": str(swf_images)})
|
|
|
|
|
|
@@ -569,6 +577,33 @@ def _convert():
|
|
traceback.print_exc()
|
|
traceback.print_exc()
|
|
return json.dumps({"result_html": ["-1"], "result_text": ["-1"],
|
|
return json.dumps({"result_html": ["-1"], "result_text": ["-1"],
|
|
"is_success": 0, "swf_images": str([])})
|
|
"is_success": 0, "swf_images": str([])})
|
|
|
|
+ finally:
|
|
|
|
+ # _global._del()
|
|
|
|
+ # gc.collect()
|
|
|
|
+ log("finally")
|
|
|
|
+ # snapshot1 = tracemalloc.take_snapshot()
|
|
|
|
+ # top_stats = snapshot1.compare_to(snapshot, 'lineno')
|
|
|
|
+ # log("[ Top 20 differences ]")
|
|
|
|
+ # for stat in top_stats[:20]:
|
|
|
|
+ # if stat.size_diff < 0:
|
|
|
|
+ # continue
|
|
|
|
+ # log(stat)
|
|
|
|
+ # gth = objgraph.growth(limit=10)
|
|
|
|
+ # for gt in gth:
|
|
|
|
+ # log("growth type:%s, count:%s, growth:%s" % (gt[0], gt[1], gt[2]))
|
|
|
|
+ # # if gt[2] > 100 or gt[1] > 300:
|
|
|
|
+ # # continue
|
|
|
|
+ # if gt[2] < 5:
|
|
|
|
+ # continue
|
|
|
|
+ # _p = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
+ # objgraph.show_backrefs(objgraph.by_type(gt[0])[0], max_depth=10, too_many=5,
|
|
|
|
+ # filename=_p + "/dots/%s_%s_backrefs.dot" % (_md5, gt[0]))
|
|
|
|
+ # objgraph.show_refs(objgraph.by_type(gt[0])[0], max_depth=10, too_many=5,
|
|
|
|
+ # filename=_p + "/dots/%s_%s_refs.dot" % (_md5, gt[0]))
|
|
|
|
+ # objgraph.show_chain(
|
|
|
|
+ # objgraph.find_backref_chain(objgraph.by_type(gt[0])[0], objgraph.is_proper_module),
|
|
|
|
+ # filename=_p + "/dots/%s_%s_chain.dot" % (_md5, gt[0])
|
|
|
|
+ # )
|
|
|
|
|
|
|
|
|
|
def test_more(_dir, process_no=None):
|
|
def test_more(_dir, process_no=None):
|
|
@@ -651,6 +686,7 @@ if __name__ == '__main__':
|
|
port = 15010
|
|
port = 15010
|
|
|
|
|
|
globals().update({"md5": "1"+"0"*15})
|
|
globals().update({"md5": "1"+"0"*15})
|
|
|
|
+ globals().update({"port": str(port)})
|
|
_global._init()
|
|
_global._init()
|
|
_global.update({"md5": "1"+"0"*15})
|
|
_global.update({"md5": "1"+"0"*15})
|
|
_global.update({"port": str(port)})
|
|
_global.update({"port": str(port)})
|