convert_xls.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import os
  2. import sys
  3. from format_convert.convert_tree import _Document
  4. sys.path.append(os.path.dirname(__file__) + "/../")
  5. import logging
  6. import traceback
  7. from format_convert import get_memory_info
  8. from format_convert.convert_need_interface import from_office_interface
  9. from format_convert.convert_xlsx import xlsx2text, XlsxConvert
  10. from format_convert.utils import judge_error_code
  11. @get_memory_info.memory_decorator
  12. def xls2text(path, unique_type_dir):
  13. logging.info("into xls2text")
  14. try:
  15. # 调用libreoffice格式转换
  16. file_path = from_office_interface(path, unique_type_dir, 'xlsx')
  17. if judge_error_code(file_path):
  18. return file_path
  19. text = xlsx2text(file_path, unique_type_dir)
  20. if judge_error_code(text):
  21. return text
  22. return text
  23. except Exception as e:
  24. logging.info("xls2text error!")
  25. print("xls2text", traceback.print_exc())
  26. return [-1]
  27. class XlsConvert:
  28. def __init__(self, path, unique_type_dir):
  29. self._doc = _Document(path)
  30. self.path = path
  31. self.unique_type_dir = unique_type_dir
  32. def convert(self):
  33. # 调用office格式转换
  34. file_path = from_office_interface(self.path, self.unique_type_dir, 'xlsx')
  35. if judge_error_code(file_path):
  36. self._doc = file_path
  37. return
  38. _xlsx = XlsxConvert(file_path, self.unique_type_dir)
  39. _xlsx.convert()
  40. self._doc = _xlsx._doc
  41. def get_html(self):
  42. try:
  43. self.convert()
  44. except:
  45. traceback.print_exc()
  46. self._doc.error_code = [-1]
  47. if self._doc.error_code is not None:
  48. return self._doc.error_code
  49. print(self._doc.children)
  50. return self._doc.get_html()