convert_wps.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import os
  2. import re
  3. import sys
  4. sys.path.append(os.path.abspath(os.path.dirname(__file__)) + "/../")
  5. from format_convert.convert_tree import _Document, _Sentence, _Page
  6. import logging
  7. import traceback
  8. from format_convert.convert_doc import DocConvert
  9. from format_convert.utils import judge_error_code, get_logger, log
  10. class WpsConvert:
  11. def __init__(self, path, unique_type_dir):
  12. self._doc = _Document(path)
  13. self.path = path
  14. self.unique_type_dir = unique_type_dir
  15. def convert(self):
  16. # 改后缀,调用doc处理
  17. print('self.path', self.path)
  18. file_name = re.split('[/\\\]', self.path)[-1]
  19. with open(self.path, 'rb') as file:
  20. content = file.read()
  21. new_file_name = file_name[:-4] + '.doc'
  22. new_file_path = self.unique_type_dir + new_file_name
  23. print('new_file_path', new_file_path)
  24. with open(new_file_path, 'wb') as file:
  25. file.write(content)
  26. log('wps file ' + file_name + ' -> ' + new_file_name)
  27. self._doc_convert = DocConvert(new_file_path, self.unique_type_dir)
  28. self._doc_convert.convert()
  29. self._doc = self._doc_convert._doc
  30. def get_html(self):
  31. try:
  32. self.convert()
  33. except:
  34. traceback.print_exc()
  35. self._doc.error_code = [-1]
  36. # 直接返回doc处理的html
  37. if self._doc.error_code is not None:
  38. return self._doc.error_code
  39. else:
  40. return self._doc.get_html()
  41. if __name__ == '__main__':
  42. _p = "C:/Users/Administrator/Downloads/1723004790329.wps"
  43. # _p = "C:/Users/Administrator/Desktop/test_wps/error2.wps"
  44. save_dir = r"D:\Project\format_conversion_maxcompute\format_convert\temp" + '/'
  45. c = WpsConvert(_p, save_dir)
  46. _html = c.get_html()
  47. with open('../result.html', 'w', encoding='utf-8') as f:
  48. f.write('<!DOCTYPE HTML><head><meta charset="UTF-8"></head>' + _html[0])