libreoffice_interface.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. import base64
  2. import json
  3. import os
  4. import re
  5. import shutil
  6. import subprocess
  7. import sys
  8. import traceback
  9. import psutil
  10. sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
  11. from format_convert.judge_platform import get_platform
  12. import logging
  13. from format_convert.utils import my_subprocess_call
  14. from flask import Flask, request
  15. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  16. def monitor_libreoffice():
  17. try:
  18. # logging.info("=========================================")
  19. logging.info("into monitor_libreoffice")
  20. # logging.info("------------------------------MEM top 10")
  21. os.system("ps aux|head -1;ps aux|grep -v PID|sort -rn -k +4|head")
  22. #
  23. # logging.info("--------------------------soffice process")
  24. # os.system("ps -ef | grep soffice")
  25. pids = psutil.pids()
  26. for pid in pids:
  27. try:
  28. process = psutil.Process(pid)
  29. # if process.username() == "appuser":
  30. if re.search("soffice", process.exe()):
  31. if str(pid) == str(globals().get("soffice_pid")):
  32. logging.info("process " + str(pid) + str(process.exe()))
  33. comm = "kill -9 " + str(pid)
  34. os.system(comm)
  35. logging.info("killed soffice" + str(pid))
  36. elif re.search("unrar", process.exe()):
  37. logging.info("process " + str(pid) + str(process.exe()))
  38. comm = "kill -9 " + str(pid)
  39. os.system(comm)
  40. logging.info("killed unrar" + str(pid))
  41. except TimeoutError:
  42. raise TimeoutError
  43. except:
  44. continue
  45. # logging.info("=========================================")
  46. except TimeoutError:
  47. raise TimeoutError
  48. # @timeout_decorator.timeout(120, timeout_exception=TimeoutError, use_signals=False)
  49. def office_convert(src_path, dest_path, target_format, retry_times=1):
  50. try:
  51. logging.info("into office_convert")
  52. # print("src_path", src_path)
  53. uid1 = src_path.split(os.sep)[-1].split(".")[0]
  54. dest_file_path = dest_path + uid1 + "." + target_format
  55. src_format = src_path.split(".")[-1]
  56. # 重试转换
  57. for i in range(retry_times):
  58. # 调用Win下的libreoffice子进程
  59. if get_platform() == "Windows":
  60. soffice = 'C:\\Program Files\\LibreOfficeDev 5\\program\\soffice.exe'
  61. comm_list = [soffice, '--headless', '--convert-to', target_format, src_path,
  62. '--outdir', dest_path+os.sep]
  63. try:
  64. p = subprocess.call(comm_list, timeout=30*(i+2))
  65. except:
  66. continue
  67. # 调用Linux下的libreoffice子进程
  68. else:
  69. # 先杀libreoffice进程
  70. # monitor_libreoffice()
  71. # 再调用转换
  72. libreoffice_dir = 'soffice'
  73. comm_list = [libreoffice_dir, '--headless', '--convert-to', target_format, src_path,
  74. '--outdir', dest_path+os.sep]
  75. comm = ''
  76. for c in comm_list:
  77. comm += c + ' '
  78. # logging.info("office_convert command" + comm)
  79. try:
  80. # p = subprocess.call(comm_list, timeout=30*(i+2))
  81. # os.system(comm)
  82. pid, p_code = my_subprocess_call(comm_list, timeout=30*(i+1))
  83. logging.info("subprocess code " + str(p_code))
  84. globals().update({"soffice_pid": pid})
  85. except TimeoutError:
  86. return [-5]
  87. except Exception as e:
  88. print(1, src_format + ' to ' + target_format + ' Failed! Retry...', i, 'times')
  89. traceback.print_exc()
  90. continue
  91. # 执行失败,重试
  92. if not os.path.exists(dest_file_path):
  93. print(2, src_format + ' to ' + target_format + ' Failed! Retry...', i, 'times')
  94. continue
  95. # 执行成功,跳出循环
  96. else:
  97. break
  98. # 重试后还未成功
  99. if not os.path.exists(dest_file_path):
  100. logging.info(str(3) + src_format + ' to ' + target_format + " failed!")
  101. return [-3]
  102. logging.info("out office_convert")
  103. return dest_file_path
  104. except TimeoutError:
  105. return [-5]
  106. # 接口配置
  107. app = Flask(__name__)
  108. @app.route('/soffice', methods=['POST'])
  109. def _office_convert():
  110. src_path = None
  111. try:
  112. logging.info("into office_convert")
  113. if not request.form:
  114. logging.info("office_convert no data!")
  115. return {"data": []}
  116. src_path = request.form.get("src_path")
  117. dest_path = request.form.get("dest_path")
  118. file_b64 = request.form.get("file")
  119. file_bytes = base64.b64decode(file_b64)
  120. target_format = request.form.get("target_format")
  121. # retry_times = int(request.form.get("retry_times"))
  122. uid1 = src_path.split(os.sep)[-1].split(".")[0]
  123. dest_file_path = dest_path + uid1 + "." + target_format
  124. src_format = src_path.split(".")[-1]
  125. if not os.path.exists(os.path.dirname(src_path)):
  126. os.makedirs(os.path.dirname(src_path), mode=0o777)
  127. with open(src_path, "wb") as f:
  128. f.write(file_bytes)
  129. # 调用Win下的libreoffice子进程
  130. if get_platform() == "Windows":
  131. soffice = 'C:\\Program Files\\LibreOfficeDev 5\\program\\soffice.exe'
  132. comm_list = [soffice, '--headless', '--convert-to', target_format, src_path,
  133. '--outdir', dest_path+os.sep]
  134. p = subprocess.call(comm_list, timeout=10)
  135. # 调用Linux下的libreoffice子进程
  136. else:
  137. # 再调用转换
  138. libreoffice_dir = 'soffice'
  139. comm_list = [libreoffice_dir, '--headless', '--convert-to', target_format, src_path,
  140. '--outdir', dest_path+os.sep]
  141. comm = ''
  142. for c in comm_list:
  143. comm += c + ' '
  144. logging.info("office_convert command" + comm)
  145. # p = subprocess.call(comm_list, timeout=30*(i+2))
  146. # os.system(comm)
  147. pid, p_code = my_subprocess_call(comm_list, timeout=10)
  148. logging.info("subprocess code " + str(p_code))
  149. # 重试后还未成功
  150. if not os.path.exists(dest_file_path):
  151. logging.info(str(3) + src_format + ' to ' + target_format + " failed!")
  152. return {"data": [-3]}
  153. logging.info("out office_convert")
  154. with open(dest_file_path, "rb") as f:
  155. file_bytes = f.read()
  156. base64_stream = base64.b64encode(file_bytes)
  157. # temp_dir = "/data/fangjiasheng/format_conversion_maxcompute/format_convert/temp/"
  158. # if os.path.exists(temp_dir):
  159. # shutil.rmtree(temp_dir)
  160. print("base64_stream", type(base64_stream))
  161. return {"data": str(file_bytes)}
  162. except TimeoutError:
  163. return {"data": [-5]}
  164. except:
  165. traceback.print_exc()
  166. return {"data": [-1]}
  167. finally:
  168. if src_path is not None:
  169. file_dir = os.path.dirname(src_path)
  170. if os.path.exists(file_dir):
  171. logging.info("delete " + str(file_dir))
  172. shutil.rmtree(file_dir)
  173. if __name__ == "__main__":
  174. port = 16000
  175. os.system("service cron start")
  176. app.run(host='0.0.0.0', port=port, threaded=False, debug=False)