import base64 import json import os import re import shutil import subprocess import sys import traceback import psutil sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../") from format_convert.judge_platform import get_platform import logging from format_convert.utils import my_subprocess_call from flask import Flask, request logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') def monitor_libreoffice(): try: # logging.info("=========================================") logging.info("into monitor_libreoffice") # logging.info("------------------------------MEM top 10") os.system("ps aux|head -1;ps aux|grep -v PID|sort -rn -k +4|head") # # logging.info("--------------------------soffice process") # os.system("ps -ef | grep soffice") pids = psutil.pids() for pid in pids: try: process = psutil.Process(pid) # if process.username() == "appuser": if re.search("soffice", process.exe()): if str(pid) == str(globals().get("soffice_pid")): logging.info("process " + str(pid) + str(process.exe())) comm = "kill -9 " + str(pid) os.system(comm) logging.info("killed soffice" + str(pid)) elif re.search("unrar", process.exe()): logging.info("process " + str(pid) + str(process.exe())) comm = "kill -9 " + str(pid) os.system(comm) logging.info("killed unrar" + str(pid)) except TimeoutError: raise TimeoutError except: continue # logging.info("=========================================") except TimeoutError: raise TimeoutError # @timeout_decorator.timeout(120, timeout_exception=TimeoutError, use_signals=False) def office_convert(src_path, dest_path, target_format, retry_times=1): try: logging.info("into office_convert") # print("src_path", src_path) uid1 = src_path.split(os.sep)[-1].split(".")[0] dest_file_path = dest_path + uid1 + "." + target_format src_format = src_path.split(".")[-1] # 重试转换 for i in range(retry_times): # 调用Win下的libreoffice子进程 if get_platform() == "Windows": soffice = 'C:\\Program Files\\LibreOfficeDev 5\\program\\soffice.exe' comm_list = [soffice, '--headless', '--convert-to', target_format, src_path, '--outdir', dest_path+os.sep] try: p = subprocess.call(comm_list, timeout=30*(i+2)) except: continue # 调用Linux下的libreoffice子进程 else: # 先杀libreoffice进程 # monitor_libreoffice() # 再调用转换 libreoffice_dir = 'soffice' comm_list = [libreoffice_dir, '--headless', '--convert-to', target_format, src_path, '--outdir', dest_path+os.sep] comm = '' for c in comm_list: comm += c + ' ' # logging.info("office_convert command" + comm) try: # p = subprocess.call(comm_list, timeout=30*(i+2)) # os.system(comm) pid, p_code = my_subprocess_call(comm_list, timeout=30*(i+1)) logging.info("subprocess code " + str(p_code)) globals().update({"soffice_pid": pid}) except TimeoutError: return [-5] except Exception as e: print(1, src_format + ' to ' + target_format + ' Failed! Retry...', i, 'times') traceback.print_exc() continue # 执行失败,重试 if not os.path.exists(dest_file_path): print(2, src_format + ' to ' + target_format + ' Failed! Retry...', i, 'times') continue # 执行成功,跳出循环 else: break # 重试后还未成功 if not os.path.exists(dest_file_path): logging.info(str(3) + src_format + ' to ' + target_format + " failed!") return [-3] logging.info("out office_convert") return dest_file_path except TimeoutError: return [-5] # 接口配置 app = Flask(__name__) @app.route('/soffice', methods=['POST']) def _office_convert(): src_path = None try: logging.info("into office_convert") if not request.form: logging.info("office_convert no data!") return {"data": []} src_path = request.form.get("src_path") dest_path = request.form.get("dest_path") file_b64 = request.form.get("file") file_bytes = base64.b64decode(file_b64) target_format = request.form.get("target_format") # retry_times = int(request.form.get("retry_times")) uid1 = src_path.split(os.sep)[-1].split(".")[0] dest_file_path = dest_path + uid1 + "." + target_format src_format = src_path.split(".")[-1] if not os.path.exists(os.path.dirname(src_path)): os.makedirs(os.path.dirname(src_path), mode=0o777) with open(src_path, "wb") as f: f.write(file_bytes) # 调用Win下的libreoffice子进程 if get_platform() == "Windows": soffice = 'C:\\Program Files\\LibreOfficeDev 5\\program\\soffice.exe' comm_list = [soffice, '--headless', '--convert-to', target_format, src_path, '--outdir', dest_path+os.sep] p = subprocess.call(comm_list, timeout=10) # 调用Linux下的libreoffice子进程 else: # 再调用转换 libreoffice_dir = 'soffice' comm_list = [libreoffice_dir, '--headless', '--convert-to', target_format, src_path, '--outdir', dest_path+os.sep] comm = '' for c in comm_list: comm += c + ' ' logging.info("office_convert command" + comm) # p = subprocess.call(comm_list, timeout=30*(i+2)) # os.system(comm) pid, p_code = my_subprocess_call(comm_list, timeout=22) logging.info("subprocess code " + str(p_code)) # 重试后还未成功 if not os.path.exists(dest_file_path): logging.info(str(3) + src_format + ' to ' + target_format + " failed!") return {"data": [-3]} logging.info("out office_convert") with open(dest_file_path, "rb") as f: file_bytes = f.read() base64_stream = base64.b64encode(file_bytes) # temp_dir = "/data/fangjiasheng/format_conversion_maxcompute/format_convert/temp/" # if os.path.exists(temp_dir): # shutil.rmtree(temp_dir) print("base64_stream", type(base64_stream)) return {"data": str(file_bytes)} except TimeoutError: return {"data": [-5]} except: traceback.print_exc() return {"data": [-1]} finally: if src_path is not None: file_dir = os.path.dirname(src_path) if os.path.exists(file_dir): logging.info("delete " + str(file_dir)) shutil.rmtree(file_dir) if __name__ == "__main__": port = 16000 os.system("service cron start") app.run(host='0.0.0.0', port=port, threaded=False, debug=False)