fangjiasheng
/
FORMAT_CONVERSION_MAXCOMPUTE


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
							import base64
import logging
import os
import sys
sys.path.append(os.path.dirname(__file__) + "/../")
import traceback
import requests
from format_convert import get_memory_info
from format_convert.utils import get_platform, get_sequential_data, judge_error_code
from ocr.ocr_interface import ocr, OcrModels
from otr.otr_interface import otr, OtrModels
from format_convert.libreoffice_interface import office_convert


def from_office_interface(src_path, dest_path, target_format, retry_times=1):
    try:
        # Win10跳出超时装饰器
        if get_platform() == "Windows":
            # origin_office_convert = office_convert.__wrapped__
            # file_path = origin_office_convert(src_path, dest_path, target_format, retry_times)
            file_path = office_convert(src_path, dest_path, target_format, retry_times)
        else:
            # 将装饰器包装为一个类，否则多进程Pickle会报错 it's not the same object as xxx 问题，
            # timeout_decorator_obj = my_timeout_decorator.TimeoutClass(office_convert, 180, TimeoutError)
            # file_path = timeout_decorator_obj.run(src_path, dest_path, target_format, retry_times)

            file_path = office_convert(src_path, dest_path, target_format, retry_times)

        if judge_error_code(file_path):
            return file_path
        return file_path
    except TimeoutError:
        logging.info("from_office_interface timeout error!")
        return [-5]
    except:
        logging.info("from_office_interface error!")
        print("from_office_interface", traceback.print_exc())
        return [-1]


@get_memory_info.memory_decorator
def from_ocr_interface(image_stream, is_table=False):
    logging.info("into from_ocr_interface")
    try:
        base64_stream = base64.b64encode(image_stream)

        # 调用接口
        try:
            if globals().get("global_ocr_model") is None:
                globals().update({"global_ocr_model": OcrModels().get_model()})
                print("=========== init ocr model ===========")
            r = ocr(data=base64_stream, ocr_model=globals().get("global_ocr_model"))
        except TimeoutError:
            if is_table:
                return [-5], [-5]
            else:
                return [-5]
        except requests.exceptions.ConnectionError as e:
            if is_table:
                return [-2], [-2]
            else:
                return [-2]

        _dict = r
        text_list = eval(_dict.get("text"))
        bbox_list = eval(_dict.get("bbox"))
        if text_list is None:
            text_list = []
        if bbox_list is None:
            bbox_list = []

        if is_table:
            return text_list, bbox_list
        else:
            if text_list and bbox_list:
                text = get_sequential_data(text_list, bbox_list, html=True)
                if judge_error_code(text):
                    return text
                # if text == [-1]:
                #     return [-1]
            else:
                text = ""
            return text
    except Exception as e:
        logging.info("from_ocr_interface error!")
        # print("from_ocr_interface", e, global_type)
        if is_table:
            return [-1], [-1]
        else:
            return [-1]


@get_memory_info.memory_decorator
def from_otr_interface2(image_stream):
    logging.info("into from_otr_interface")
    try:
        base64_stream = base64.b64encode(image_stream)

        # 调用接口
        try:
            if globals().get("global_otr_model") is None:
                globals().update({"global_otr_model": OtrModels().get_model()})
                print("=========== init otr model ===========")
            r = otr(data=base64_stream, otr_model=globals().get("global_otr_model"))
        except TimeoutError:
            return [-5], [-5], [-5], [-5], [-5]
        except requests.exceptions.ConnectionError as e:
            logging.info("from_otr_interface")
            print("from_otr_interface", traceback.print_exc())
            return [-2], [-2], [-2], [-2], [-2]

        # 处理结果
        _dict = r
        points = eval(_dict.get("points"))
        split_lines = eval(_dict.get("split_lines"))
        bboxes = eval(_dict.get("bboxes"))
        outline_points = eval(_dict.get("outline_points"))
        lines = eval(_dict.get("lines"))
        # print("from_otr_interface len(bboxes)", len(bboxes))
        if points is None:
            points = []
        if split_lines is None:
            split_lines = []
        if bboxes is None:
            bboxes = []
        if outline_points is None:
            outline_points = []
        if lines is None:
            lines = []
        return points, split_lines, bboxes, outline_points, lines
    except Exception as e:
        logging.info("from_otr_interface error!")
        print("from_otr_interface", traceback.print_exc())
        return [-1], [-1], [-1], [-1], [-1]


def from_otr_interface(image_stream, is_from_pdf=False):
    logging.info("into from_otr_interface")
    try:
        base64_stream = base64.b64encode(image_stream)

        # 调用接口
        try:
            if globals().get("global_otr_model") is None:
                globals().update({"global_otr_model": OtrModels().get_model()})
                print("=========== init otr model ===========")
            r = otr(data=base64_stream, otr_model=globals().get("global_otr_model"), is_from_pdf=is_from_pdf)
        except TimeoutError:
            return [-5]
        except requests.exceptions.ConnectionError as e:
            logging.info("from_otr_interface")
            print("from_otr_interface", traceback.print_exc())
            return [-2]

        # 处理结果
        _dict = r
        list_line = eval(_dict.get("list_line"))
        return list_line
    except Exception as e:
        logging.info("from_otr_interface error!")
        print("from_otr_interface", traceback.print_exc())
        return [-1]