convert_xls.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import inspect
  2. import os
  3. import sys
  4. sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
  5. from format_convert.convert_tree import _Document
  6. import logging
  7. import traceback
  8. from format_convert import get_memory_info
  9. from format_convert.convert_need_interface import from_office_interface
  10. from format_convert.convert_xlsx import xlsx2text, XlsxConvert
  11. from format_convert.utils import judge_error_code, get_logger, log
  12. @get_memory_info.memory_decorator
  13. def xls2text(path, unique_type_dir):
  14. log("into xls2text")
  15. try:
  16. # 调用libreoffice格式转换
  17. file_path = from_office_interface(path, unique_type_dir, 'xlsx')
  18. if judge_error_code(file_path):
  19. return file_path
  20. text = xlsx2text(file_path, unique_type_dir)
  21. if judge_error_code(text):
  22. return text
  23. return text
  24. except Exception as e:
  25. log("xls2text error!")
  26. traceback.print_exc()
  27. return [-1]
  28. class XlsConvert:
  29. def __init__(self, path, unique_type_dir):
  30. self._doc = _Document(path)
  31. self.path = path
  32. self.unique_type_dir = unique_type_dir
  33. def convert(self):
  34. # 调用office格式转换
  35. file_path = from_office_interface(self.path, self.unique_type_dir, 'xlsx')
  36. if judge_error_code(file_path):
  37. self._doc.error_code = file_path
  38. return
  39. _xlsx = XlsxConvert(file_path, self.unique_type_dir)
  40. _xlsx.convert()
  41. self._doc = _xlsx._doc
  42. def get_html(self):
  43. try:
  44. self.convert()
  45. except:
  46. traceback.print_exc()
  47. self._doc.error_code = [-1]
  48. print("xls ", self._doc)
  49. if self._doc.error_code is not None:
  50. return self._doc.error_code
  51. print(self._doc.children)
  52. return self._doc.get_html()