import os import re import sys sys.path.append(os.path.abspath(os.path.dirname(__file__)) + "/../") from format_convert.convert_tree import _Document, _Sentence, _Page import logging import traceback from format_convert.convert_doc import DocConvert from format_convert.utils import judge_error_code, get_logger, log class WpsConvert: def __init__(self, path, unique_type_dir): self._doc = _Document(path) self.path = path self.unique_type_dir = unique_type_dir def convert(self): # 改后缀,调用doc处理 print('self.path', self.path) file_name = re.split('[/\\\]', self.path)[-1] with open(self.path, 'rb') as file: content = file.read() new_file_name = file_name[:-4] + '.doc' new_file_path = self.unique_type_dir + new_file_name print('new_file_path', new_file_path) with open(new_file_path, 'wb') as file: file.write(content) log('wps file ' + file_name + ' -> ' + new_file_name) self._doc_convert = DocConvert(new_file_path, self.unique_type_dir) self._doc_convert.convert() self._doc = self._doc_convert._doc def get_html(self): try: self.convert() except: traceback.print_exc() self._doc.error_code = [-1] # 直接返回doc处理的html if self._doc.error_code is not None: return self._doc.error_code else: return self._doc.get_html() if __name__ == '__main__': _p = "C:/Users/Administrator/Downloads/1723004790329.wps" # _p = "C:/Users/Administrator/Desktop/test_wps/error2.wps" save_dir = r"D:\Project\format_conversion_maxcompute\format_convert\temp" + '/' c = WpsConvert(_p, save_dir) _html = c.get_html() with open('../result.html', 'w', encoding='utf-8') as f: f.write('
' + _html[0])