convert_doc.py 822 B

1234567891011121314151617181920212223242526
  1. import os
  2. import sys
  3. sys.path.append(os.path.dirname(__file__) + "/../")
  4. import logging
  5. import traceback
  6. from format_convert import get_memory_info
  7. from format_convert.convert_docx import docx2text
  8. from format_convert.convert_need_interface import from_office_interface
  9. from format_convert.utils import judge_error_code
  10. @get_memory_info.memory_decorator
  11. def doc2text(path, unique_type_dir):
  12. logging.info("into doc2text")
  13. try:
  14. # 调用office格式转换
  15. file_path = from_office_interface(path, unique_type_dir, 'docx')
  16. if judge_error_code(file_path):
  17. return file_path
  18. text = docx2text(file_path, unique_type_dir)
  19. return text
  20. except Exception as e:
  21. logging.info("doc2text error!")
  22. print("doc2text", traceback.print_exc())
  23. return [-1]