123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- import base64
- import json
- import os
- import re
- import shutil
- import subprocess
- import sys
- import traceback
- import psutil
- sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
- from format_convert.judge_platform import get_platform
- import logging
- from format_convert.utils import my_subprocess_call
- from flask import Flask, request
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
- def monitor_libreoffice():
- try:
- # logging.info("=========================================")
- logging.info("into monitor_libreoffice")
- # logging.info("------------------------------MEM top 10")
- os.system("ps aux|head -1;ps aux|grep -v PID|sort -rn -k +4|head")
- #
- # logging.info("--------------------------soffice process")
- # os.system("ps -ef | grep soffice")
- pids = psutil.pids()
- for pid in pids:
- try:
- process = psutil.Process(pid)
- # if process.username() == "appuser":
- if re.search("soffice", process.exe()):
- if str(pid) == str(globals().get("soffice_pid")):
- logging.info("process " + str(pid) + str(process.exe()))
- comm = "kill -9 " + str(pid)
- os.system(comm)
- logging.info("killed soffice" + str(pid))
- elif re.search("unrar", process.exe()):
- logging.info("process " + str(pid) + str(process.exe()))
- comm = "kill -9 " + str(pid)
- os.system(comm)
- logging.info("killed unrar" + str(pid))
- except TimeoutError:
- raise TimeoutError
- except:
- continue
- # logging.info("=========================================")
- except TimeoutError:
- raise TimeoutError
- # @timeout_decorator.timeout(120, timeout_exception=TimeoutError, use_signals=False)
- def office_convert(src_path, dest_path, target_format, retry_times=1):
- try:
- logging.info("into office_convert")
- # print("src_path", src_path)
- uid1 = src_path.split(os.sep)[-1].split(".")[0]
- dest_file_path = dest_path + uid1 + "." + target_format
- src_format = src_path.split(".")[-1]
- # 重试转换
- for i in range(retry_times):
- # 调用Win下的libreoffice子进程
- if get_platform() == "Windows":
- soffice = 'C:\\Program Files\\LibreOfficeDev 5\\program\\soffice.exe'
- comm_list = [soffice, '--headless', '--convert-to', target_format, src_path,
- '--outdir', dest_path+os.sep]
- try:
- p = subprocess.call(comm_list, timeout=30*(i+2))
- except:
- continue
- # 调用Linux下的libreoffice子进程
- else:
- # 先杀libreoffice进程
- # monitor_libreoffice()
- # 再调用转换
- libreoffice_dir = 'soffice'
- comm_list = [libreoffice_dir, '--headless', '--convert-to', target_format, src_path,
- '--outdir', dest_path+os.sep]
- comm = ''
- for c in comm_list:
- comm += c + ' '
- # logging.info("office_convert command" + comm)
- try:
- # p = subprocess.call(comm_list, timeout=30*(i+2))
- # os.system(comm)
- pid, p_code = my_subprocess_call(comm_list, timeout=30*(i+1))
- logging.info("subprocess code " + str(p_code))
- globals().update({"soffice_pid": pid})
- except TimeoutError:
- return [-5]
- except Exception as e:
- print(1, src_format + ' to ' + target_format + ' Failed! Retry...', i, 'times')
- traceback.print_exc()
- continue
- # 执行失败,重试
- if not os.path.exists(dest_file_path):
- print(2, src_format + ' to ' + target_format + ' Failed! Retry...', i, 'times')
- continue
- # 执行成功,跳出循环
- else:
- break
- # 重试后还未成功
- if not os.path.exists(dest_file_path):
- logging.info(str(3) + src_format + ' to ' + target_format + " failed!")
- return [-3]
- logging.info("out office_convert")
- return dest_file_path
- except TimeoutError:
- return [-5]
- # 接口配置
- app = Flask(__name__)
- @app.route('/soffice', methods=['POST'])
- def _office_convert():
- src_path = None
- try:
- logging.info("into office_convert")
- if not request.form:
- logging.info("office_convert no data!")
- return {"data": []}
- src_path = request.form.get("src_path")
- dest_path = request.form.get("dest_path")
- file_b64 = request.form.get("file")
- file_bytes = base64.b64decode(file_b64)
- target_format = request.form.get("target_format")
- # retry_times = int(request.form.get("retry_times"))
- uid1 = src_path.split(os.sep)[-1].split(".")[0]
- dest_file_path = dest_path + uid1 + "." + target_format
- src_format = src_path.split(".")[-1]
- if not os.path.exists(os.path.dirname(src_path)):
- os.makedirs(os.path.dirname(src_path), mode=0o777)
- with open(src_path, "wb") as f:
- f.write(file_bytes)
- # 调用Win下的libreoffice子进程
- if get_platform() == "Windows":
- soffice = 'C:\\Program Files\\LibreOfficeDev 5\\program\\soffice.exe'
- comm_list = [soffice, '--headless', '--convert-to', target_format, src_path,
- '--outdir', dest_path+os.sep]
- p = subprocess.call(comm_list, timeout=10)
- # 调用Linux下的libreoffice子进程
- else:
- # 再调用转换
- libreoffice_dir = 'soffice'
- comm_list = [libreoffice_dir, '--headless', '--convert-to', target_format, src_path,
- '--outdir', dest_path+os.sep]
- comm = ''
- for c in comm_list:
- comm += c + ' '
- logging.info("office_convert command" + comm)
- # p = subprocess.call(comm_list, timeout=30*(i+2))
- # os.system(comm)
- pid, p_code = my_subprocess_call(comm_list, timeout=22)
- logging.info("subprocess code " + str(p_code))
- # 重试后还未成功
- if not os.path.exists(dest_file_path):
- logging.info(str(3) + src_format + ' to ' + target_format + " failed!")
- return {"data": [-3]}
- logging.info("out office_convert")
- with open(dest_file_path, "rb") as f:
- file_bytes = f.read()
- base64_stream = base64.b64encode(file_bytes)
- # temp_dir = "/data/fangjiasheng/format_conversion_maxcompute/format_convert/temp/"
- # if os.path.exists(temp_dir):
- # shutil.rmtree(temp_dir)
- print("base64_stream", type(base64_stream))
- return {"data": str(file_bytes)}
- except TimeoutError:
- return {"data": [-5]}
- except:
- traceback.print_exc()
- return {"data": [-1]}
- finally:
- if src_path is not None:
- file_dir = os.path.dirname(src_path)
- if os.path.exists(file_dir):
- logging.info("delete " + str(file_dir))
- shutil.rmtree(file_dir)
- if __name__ == "__main__":
- port = 16000
- os.system("service cron start")
- app.run(host='0.0.0.0', port=port, threaded=False, debug=False)
|