import os import sys from format_convert.convert_tree import _Document sys.path.append(os.path.dirname(__file__) + "/../") import logging import traceback from format_convert import get_memory_info from format_convert.convert_docx import docx2text, DocxConvert from format_convert.convert_need_interface import from_office_interface from format_convert.utils import judge_error_code @get_memory_info.memory_decorator def doc2text(path, unique_type_dir): logging.info("into doc2text") try: # 调用office格式转换 file_path = from_office_interface(path, unique_type_dir, 'docx') if judge_error_code(file_path): return file_path text = docx2text(file_path, unique_type_dir) return text except Exception as e: logging.info("doc2text error!") print("doc2text", traceback.print_exc()) return [-1] class DocConvert: def __init__(self, path, unique_type_dir): self._doc = _Document(path) self.path = path self.unique_type_dir = unique_type_dir def convert(self): # 调用office格式转换 file_path = from_office_interface(self.path, self.unique_type_dir, 'docx') if judge_error_code(file_path): self._doc = file_path return print("file_path", file_path) self._doc = DocxConvert(file_path, self.unique_type_dir)._doc def get_html(self): self.convert() if self._doc.error_code is not None: return self._doc.error_code print() return self._doc.get_html()