convert_txt.py 971 B

123456789101112131415161718192021222324252627282930313233
  1. import os
  2. import sys
  3. sys.path.append(os.path.dirname(__file__) + "/../")
  4. import logging
  5. import traceback
  6. import chardet
  7. from format_convert import get_memory_info
  8. @get_memory_info.memory_decorator
  9. def txt2text(path):
  10. logging.info("into txt2text")
  11. try:
  12. # 判断字符编码
  13. with open(path, "rb") as ff:
  14. data = ff.read()
  15. encode = chardet.detect(data).get("encoding")
  16. print("txt2text judge code is", encode)
  17. try:
  18. if encode is None:
  19. logging.info("txt2text cannot judge file code!")
  20. return [-3]
  21. with open(path, "r", encoding=encode) as ff:
  22. txt_text = ff.read()
  23. return [txt_text]
  24. except:
  25. logging.info("txt2text cannot open file with code " + encode)
  26. return [-3]
  27. except Exception as e:
  28. print("txt2text", traceback.print_exc())
  29. logging.info("txt2text error!")
  30. return [-1]