import datetime import logging import os import re import sys import time import psutil sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../") from format_convert.utils import get_ip_port, get_intranet_ip, get_args_from_config, get_all_ip, get_using_ip # 解析配置文件 ip_port_dict = get_ip_port() ip = get_using_ip() print("local ip:", ip) # 自定义输出 std_out = " >>/convert.out 2>&1 &" std_out_gpu = " >>/gpu.out 2>&1 &" std_out_schedule = " >>/schedule.out 2>&1 &" # 获取接口各个参数,提前生成命令 python_path = get_args_from_config(ip_port_dict, ip, "python_path")[0] project_path = get_args_from_config(ip_port_dict, ip, "project_path")[0] gunicorn_path = get_args_from_config(ip_port_dict, ip, "gunicorn_path")[0] interface_list = ['convert', 'ocr', 'otr', 'idc', 'isr', 'atc', 'yolo', 'office'] comm_dict = {} interface_port_dict = {} for name in interface_list: if get_args_from_config(ip_port_dict, ip, name, 'MASTER'): port_list, num_list, gpu_list = get_args_from_config(ip_port_dict, ip, name, 'MASTER')[0] else: port_list, num_list, gpu_list = get_args_from_config(ip_port_dict, ip, name)[0] interface_port_dict[name] = [port_list, num_list, gpu_list] for i, port in enumerate(port_list): port_num = num_list[i] if int(port_num) == 0: continue # 设置gpu if gpu_list: gpu = gpu_list[i] else: gpu = -1 os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) gpu_comm = 'export CUDA_VISIBLE_DEVICES=' + str(gpu) + ' && ' # 设置命令 if name == 'convert': comm = "nohup " + gunicorn_path + " -w " + str(port_num) + " -t 6000 --keep-alive 600 -b 0.0.0.0:" + str(port) + " --chdir " + project_path + "format_convert" + ' ' + name + ":app" + std_out elif name == 'yolo': comm = "nohup " + gunicorn_path + " -w " + str(port_num) + " -t 300 --keep-alive 600 -b 0.0.0.0:" + str(port) + " --chdir " + project_path + "/botr/yolov8" + ' ' + name + "_interface:app" + std_out_gpu elif name == 'office': comm = "docker run --init -itd --log-opt max-size=10m --log-opt max-file=3 -p #:16000 soffice:v2 bash" office_port_comm_list = [] for office_port in range(port, port + port_num): office_port_comm_list.append(re.sub("#", str(office_port), comm)) comm_dict[name] = office_port_comm_list else: comm = "nohup " + gunicorn_path + " -w " + str(port_num) + " -t 300 --keep-alive 600 -b 0.0.0.0:" + str(port) + " --chdir " + project_path + "/" + name + ' ' + name + "_interface:app" + std_out_gpu if name == 'office': continue if name in comm_dict.keys(): comm_dict[name] += [gpu_comm + comm] else: comm_dict[name] = [gpu_comm + comm] # print(name, port_list, num_list, gpu_list) # print('comm_dict', comm_dict) # print('interface_port_dict', interface_port_dict) # convert_port_list = get_args_from_config(ip_port_dict, ip, "convert", "MASTER") # if convert_port_list: # convert_port_list = convert_port_list[0] # ocr_port_list = get_args_from_config(ip_port_dict, ip, "ocr") # otr_port_list = get_args_from_config(ip_port_dict, ip, "otr") # idc_port_list = get_args_from_config(ip_port_dict, ip, "idc") # isr_port_list = get_args_from_config(ip_port_dict, ip, "isr") # atc_port_list = get_args_from_config(ip_port_dict, ip, "atc") # yolo_port_list = get_args_from_config(ip_port_dict, ip, "yolo") # soffice_port_list = get_args_from_config(ip_port_dict, ip, "office", "MASTER") # if soffice_port_list: # soffice_port_list = soffice_port_list[0] # python_path_list = get_args_from_config(ip_port_dict, ip, "python_path") # project_path_list = get_args_from_config(ip_port_dict, ip, "project_path") # gunicorn_path_list = get_args_from_config(ip_port_dict, ip, "gunicorn_path") # std_out = " >>/convert.out 2>&1 &" # std_out_gpu = " >>/gpu.out 2>&1 &" # std_out_schedule = " >>/schedule.out 2>&1 &" # # print("convert_port_list", convert_port_list) # print("ocr_port_list", ocr_port_list) # print("otr_port_list", otr_port_list) # print("idc_port_list", idc_port_list) # print("isr_port_list", isr_port_list) # print("atc_port_list", atc_port_list) # print("yolo_port_list", yolo_port_list) # print("soffice_port_list", soffice_port_list) # # # 根据port生成gunicorn语句 # ocr_comm_list = [] # otr_comm_list = [] # isr_comm_list = [] # idc_comm_list = [] # atc_comm_list = [] # yolo_comm_list = [] # for i in range(len(ocr_port_list)): # ocr_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(ocr_port_list[i])) # + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir " # + project_path_list[i] + "/ocr ocr_interface:app" + std_out_gpu) # for i in range(len(otr_port_list)): # otr_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(otr_port_list[i])) # + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir " # + project_path_list[i] + "/otr otr_interface:app" + std_out_gpu) # for i in range(len(idc_port_list)): # idc_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(idc_port_list[i])) # + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir " # + project_path_list[i] + "/idc idc_interface:app" + std_out_gpu) # for i in range(len(isr_port_list)): # isr_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(isr_port_list[i])) # + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir " # + project_path_list[i] + "/isr isr_interface:app" + std_out_gpu) # for i in range(len(atc_port_list)): # atc_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(atc_port_list[i])) # + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir " # + project_path_list[i] + "/atc atc_interface:app" + std_out_gpu) # for i in range(len(yolo_port_list)): # yolo_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(yolo_port_list[i])) # + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir " # + project_path_list[i] + "/botr/yolov8 yolo_interface:app" + std_out_gpu) # # convert_comm = "nohup " + gunicorn_path_list[0] + " -w " + str(len(convert_port_list)) + " -t 300 -b 0.0.0.0:# --chdir " \ # + project_path_list[0] + "/format_convert convert:app" + std_out # soffice_comm = "docker run --init -itd --log-opt max-size=10m --log-opt max-file=3 -p #:16000 soffice:v2 bash" def get_port(): net_conn = psutil.net_connections() current_port_list = [] for conn in net_conn: current_port_list.append(str(conn.laddr.port)) current_port_list = list(set(current_port_list)) current_port_list.sort(key=lambda x: x) return current_port_list def restart(interface_type, port, index=0): # if process_type == "convert": # _comm = re.sub("#", port, convert_comm) # elif process_type == "ocr": # _comm = re.sub("#", port, ocr_comm_list[index]) # elif process_type == "otr": # _comm = re.sub("#", port, otr_comm_list[index]) # elif process_type == "soffice": # _comm = re.sub("#", port, soffice_comm) # elif process_type == "idc": # _comm = re.sub("#", port, idc_comm_list[index]) # elif process_type == "isr": # _comm = re.sub("#", port, isr_comm_list[index]) # elif process_type == "atc": # _comm = re.sub("#", port, atc_comm_list[index]) # elif process_type == "yolo": # _comm = re.sub("#", port, yolo_comm_list[index]) # else: # _comm = "netstat -nltp" # print("no process_type", process_type) # _comm_list = comm_dict.get(interface_type) if not _comm_list: print('monitor_process_config restart command error! check config!') raise for _comm in _comm_list: if str(port) in _comm: print(datetime.datetime.now(), "restart comm", _comm) os.system(_comm) def kill_soffice(limit_sec=30): try: pid_list = psutil.pids() for pid in pid_list: process = psutil.Process(pid) process_cmd = '' for c in process.cmdline(): process_cmd += c + " " if process_cmd.strip() == "": continue if process.status() == "zombie": print("zombie cmd", process_cmd) if re.search("soffice", process.exe()): start_time = process.create_time() now_time = time.time() run_time = now_time-start_time if run_time >= limit_sec: comm = "kill -9 " + str(pid) print(datetime.datetime.now(), "kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec) os.system(comm) except: pass def kill_nested_timeout_process(): try: pid_list = psutil.pids() suspect_pid_list = [] for pid in pid_list: process = psutil.Process(pid) process_cmd = '' for c in process.cmdline(): process_cmd += c + " " if process_cmd.strip() == "": continue if re.search("convert:app", process_cmd): ppid = process.ppid() start_time = process.create_time() now_time = time.time() run_time = now_time-start_time if str(ppid) == "1": suspect_pid_list.append([str(pid), float(run_time)]) # 时间最久的父进程为1的不能杀,是接口主进程 if len(suspect_pid_list) <= 1: return else: suspect_pid_list.sort(key=lambda x: x[1], reverse=True) for pid, run_time in suspect_pid_list[1:]: # print("pid", pid, run_time) comm = "kill -9 " + str(pid) print(datetime.datetime.now(), "kill process ", str(pid), "father is 1", process_cmd) os.system(comm) except: pass def monitor(): for _name in interface_list: if interface_port_dict.get(_name): _port_list, _num_list, _gpu_list = interface_port_dict.get(_name) current_port_list = get_port() for j, p in enumerate(_port_list): if str(p) not in current_port_list: restart(_name, p) # if convert_port_list: # for p in convert_port_list[:1]: # if p not in current_port_list: # restart("convert", p) # # if ocr_port_list: # for j in range(len(ocr_port_list)): # for p in ocr_port_list[j][:1]: # if p not in current_port_list: # restart("ocr", p, index=j) # # if otr_port_list: # for j in range(len(otr_port_list)): # for p in otr_port_list[j][:1]: # if p not in current_port_list: # restart("otr", p, index=j) # # if idc_port_list: # for j in range(len(idc_port_list)): # for p in idc_port_list[j][:1]: # if p not in current_port_list: # restart("idc", p, index=j) # # if isr_port_list: # for j in range(len(isr_port_list)): # for p in isr_port_list[j][:1]: # if p not in current_port_list: # restart("isr", p, index=j) # # if atc_port_list: # for j in range(len(atc_port_list)): # for p in atc_port_list[j][:1]: # if p not in current_port_list: # restart("atc", p, index=j) # # if yolo_port_list: # for j in range(len(yolo_port_list)): # for p in yolo_port_list[j][:1]: # if p not in current_port_list: # restart("yolo", p, index=j) # # if soffice_port_list: # for p in soffice_port_list: # if p not in current_port_list: # restart("soffice", p) kill_soffice() kill_nested_timeout_process() # if schedule_port_list: # for p in schedule_port_list: # if p not in current_port_list: # restart("schedule", p) if __name__ == "__main__": for i in range(3): # os.system("echo $(date +%F%n%T)") monitor() time.sleep(10)