12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- import os
- import re
- import sys
- sys.path.append(os.path.abspath(os.path.dirname(__file__)) + "/../")
- from format_convert.convert_tree import _Document, _Sentence, _Page
- import logging
- import traceback
- from format_convert.convert_doc import DocConvert
- from format_convert.utils import judge_error_code, get_logger, log
- class WpsConvert:
- def __init__(self, path, unique_type_dir):
- self._doc = _Document(path)
- self.path = path
- self.unique_type_dir = unique_type_dir
- def convert(self):
- # 改后缀,调用doc处理
- print('self.path', self.path)
- file_name = re.split('[/\\\]', self.path)[-1]
- with open(self.path, 'rb') as file:
- content = file.read()
- new_file_name = file_name[:-4] + '.doc'
- new_file_path = self.unique_type_dir + new_file_name
- print('new_file_path', new_file_path)
- with open(new_file_path, 'wb') as file:
- file.write(content)
- log('wps file ' + file_name + ' -> ' + new_file_name)
- self._doc_convert = DocConvert(new_file_path, self.unique_type_dir)
- self._doc_convert.convert()
- self._doc = self._doc_convert._doc
- def get_html(self):
- try:
- self.convert()
- except:
- traceback.print_exc()
- self._doc.error_code = [-1]
- # 直接返回doc处理的html
- if self._doc.error_code is not None:
- return self._doc.error_code
- else:
- return self._doc.get_html()
- if __name__ == '__main__':
- _p = "C:/Users/Administrator/Downloads/1723004790329.wps"
- # _p = "C:/Users/Administrator/Desktop/test_wps/error2.wps"
- save_dir = r"D:\Project\format_conversion_maxcompute\format_convert\temp" + '/'
- c = WpsConvert(_p, save_dir)
- _html = c.get_html()
- with open('../result.html', 'w', encoding='utf-8') as f:
- f.write('<!DOCTYPE HTML><head><meta charset="UTF-8"></head>' + _html[0])
|