convert_xlsx.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. import os
  2. import sys
  3. sys.path.append(os.path.dirname(__file__) + "/../")
  4. import logging
  5. import traceback
  6. import pandas
  7. from format_convert import get_memory_info
  8. @get_memory_info.memory_decorator
  9. def xlsx2text(path, unique_type_dir):
  10. logging.info("into xlsx2text")
  11. try:
  12. try:
  13. # sheet_name=None, 即拿取所有sheet,存为dict
  14. df_dict = pandas.read_excel(path, header=None, keep_default_na=False, sheet_name=None)
  15. except Exception as e:
  16. logging.info("xlsx format error!")
  17. return [-3]
  18. df_list = [sheet for sheet in df_dict.values()]
  19. sheet_text = ""
  20. for df in df_list:
  21. text = '<table border="1">' + "\n"
  22. for index, row in df.iterrows():
  23. text = text + "<tr>"
  24. for r in row:
  25. text = text + "<td>" + str(r) + "</td>" + "\n"
  26. # print(text)
  27. text = text + "</tr>" + "\n"
  28. text = text + "</table>" + "\n"
  29. sheet_text += text
  30. return [sheet_text]
  31. except Exception as e:
  32. logging.info("xlsx2text error!")
  33. print("xlsx2text", traceback.print_exc())
  34. return [-1]