start_all.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. import datetime
  2. import logging
  3. import os
  4. import re
  5. import sys
  6. import time
  7. import psutil
  8. sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
  9. from format_convert.utils import get_ip_port, get_intranet_ip, get_args_from_config, get_all_ip, get_using_ip
  10. from config.interface_list import INTERFACES
  11. # 解析配置文件
  12. ip_port_dict = get_ip_port()
  13. ip = get_using_ip()
  14. print("local ip:", ip)
  15. # 自定义输出
  16. std_out = " >>/convert.out 2>&1 &"
  17. std_out_gpu = " >>/gpu.out 2>&1 &"
  18. std_out_schedule = " >>/schedule.out 2>&1 &"
  19. # 获取接口各个参数,提前生成命令
  20. python_path = get_args_from_config(ip_port_dict, ip, "python_path")[0]
  21. project_path = get_args_from_config(ip_port_dict, ip, "project_path")[0]
  22. gunicorn_path = get_args_from_config(ip_port_dict, ip, "gunicorn_path")[0]
  23. # interface_list = ['convert', 'ocr', 'otr', 'idc', 'isr', 'atc', 'yolo', 'office', 'tika']
  24. interface_list = INTERFACES
  25. comm_dict = {}
  26. interface_port_dict = {}
  27. for name in interface_list:
  28. if get_args_from_config(ip_port_dict, ip, name, 'MASTER'):
  29. port_list, num_list, gpu_list = get_args_from_config(ip_port_dict, ip, name, 'MASTER')[0]
  30. elif get_args_from_config(ip_port_dict, ip, name, 'SLAVE'):
  31. port_list, num_list, gpu_list = get_args_from_config(ip_port_dict, ip, name, 'SLAVE')[0]
  32. else:
  33. if get_args_from_config(ip_port_dict, ip, name):
  34. port_list, num_list, gpu_list = get_args_from_config(ip_port_dict, ip, name)[0]
  35. else:
  36. continue
  37. interface_port_dict[name] = [port_list, num_list, gpu_list]
  38. # print('interface_port_dict', interface_port_dict)
  39. for i, port in enumerate(port_list):
  40. port_num = num_list[i]
  41. if int(port_num) == 0:
  42. continue
  43. # 设置gpu
  44. if gpu_list:
  45. gpu = gpu_list[i]
  46. else:
  47. gpu = -1
  48. os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
  49. gpu_comm = 'export CUDA_VISIBLE_DEVICES=' + str(gpu) + ' && '
  50. # 设置命令
  51. if name == 'convert':
  52. comm = "nohup " + gunicorn_path + " -w " + str(port_num) + " -t 6000 --keep-alive 600 -b 0.0.0.0:" + str(port) + " --chdir " + project_path + "format_convert" + ' ' + name + ":app" + std_out
  53. elif name == 'yolo':
  54. comm = "nohup " + gunicorn_path + " -w " + str(port_num) + " -t 300 --keep-alive 600 -b 0.0.0.0:" + str(port) + " --chdir " + project_path + "/botr/yolov8" + ' ' + name + "_interface:app" + std_out_gpu
  55. elif name == 'office':
  56. comm = "docker run --init -itd --log-opt max-size=10m --log-opt max-file=3 -p #:16000 soffice:v2 bash"
  57. office_port_comm_list = []
  58. for office_port in range(port, port + port_num):
  59. office_port_comm_list.append(re.sub("#", str(office_port), comm))
  60. comm_dict[name] = office_port_comm_list
  61. elif name == 'tika':
  62. comm = "nohup " + gunicorn_path + " -w " + str(port_num) + " -t 300 --keep-alive 600 -b 0.0.0.0:" + str(port) + " --chdir " + project_path + "/" + name + '_ ' + name + "_interface:app" + std_out_gpu
  63. else:
  64. comm = "nohup " + gunicorn_path + " -w " + str(port_num) + " -t 300 --keep-alive 600 -b 0.0.0.0:" + str(port) + " --chdir " + project_path + "/" + name + ' ' + name + "_interface:app" + std_out_gpu
  65. if name == 'office':
  66. continue
  67. if name in comm_dict.keys():
  68. comm_dict[name] += [gpu_comm + comm]
  69. else:
  70. comm_dict[name] = [gpu_comm + comm]
  71. # print(name, port_list, num_list, gpu_list)
  72. # print('comm_dict', comm_dict)
  73. # print('interface_port_dict', interface_port_dict)
  74. # convert_port_list = get_args_from_config(ip_port_dict, ip, "convert", "MASTER")
  75. # if convert_port_list:
  76. # convert_port_list = convert_port_list[0]
  77. # ocr_port_list = get_args_from_config(ip_port_dict, ip, "ocr")
  78. # otr_port_list = get_args_from_config(ip_port_dict, ip, "otr")
  79. # idc_port_list = get_args_from_config(ip_port_dict, ip, "idc")
  80. # isr_port_list = get_args_from_config(ip_port_dict, ip, "isr")
  81. # atc_port_list = get_args_from_config(ip_port_dict, ip, "atc")
  82. # yolo_port_list = get_args_from_config(ip_port_dict, ip, "yolo")
  83. # soffice_port_list = get_args_from_config(ip_port_dict, ip, "office", "MASTER")
  84. # if soffice_port_list:
  85. # soffice_port_list = soffice_port_list[0]
  86. # python_path_list = get_args_from_config(ip_port_dict, ip, "python_path")
  87. # project_path_list = get_args_from_config(ip_port_dict, ip, "project_path")
  88. # gunicorn_path_list = get_args_from_config(ip_port_dict, ip, "gunicorn_path")
  89. # std_out = " >>/convert.out 2>&1 &"
  90. # std_out_gpu = " >>/gpu.out 2>&1 &"
  91. # std_out_schedule = " >>/schedule.out 2>&1 &"
  92. #
  93. # print("convert_port_list", convert_port_list)
  94. # print("ocr_port_list", ocr_port_list)
  95. # print("otr_port_list", otr_port_list)
  96. # print("idc_port_list", idc_port_list)
  97. # print("isr_port_list", isr_port_list)
  98. # print("atc_port_list", atc_port_list)
  99. # print("yolo_port_list", yolo_port_list)
  100. # print("soffice_port_list", soffice_port_list)
  101. #
  102. # # 根据port生成gunicorn语句
  103. # ocr_comm_list = []
  104. # otr_comm_list = []
  105. # isr_comm_list = []
  106. # idc_comm_list = []
  107. # atc_comm_list = []
  108. # yolo_comm_list = []
  109. # for i in range(len(ocr_port_list)):
  110. # ocr_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(ocr_port_list[i]))
  111. # + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  112. # + project_path_list[i] + "/ocr ocr_interface:app" + std_out_gpu)
  113. # for i in range(len(otr_port_list)):
  114. # otr_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(otr_port_list[i]))
  115. # + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  116. # + project_path_list[i] + "/otr otr_interface:app" + std_out_gpu)
  117. # for i in range(len(idc_port_list)):
  118. # idc_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(idc_port_list[i]))
  119. # + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  120. # + project_path_list[i] + "/idc idc_interface:app" + std_out_gpu)
  121. # for i in range(len(isr_port_list)):
  122. # isr_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(isr_port_list[i]))
  123. # + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  124. # + project_path_list[i] + "/isr isr_interface:app" + std_out_gpu)
  125. # for i in range(len(atc_port_list)):
  126. # atc_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(atc_port_list[i]))
  127. # + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  128. # + project_path_list[i] + "/atc atc_interface:app" + std_out_gpu)
  129. # for i in range(len(yolo_port_list)):
  130. # yolo_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(yolo_port_list[i]))
  131. # + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  132. # + project_path_list[i] + "/botr/yolov8 yolo_interface:app" + std_out_gpu)
  133. #
  134. # convert_comm = "nohup " + gunicorn_path_list[0] + " -w " + str(len(convert_port_list)) + " -t 300 -b 0.0.0.0:# --chdir " \
  135. # + project_path_list[0] + "/format_convert convert:app" + std_out
  136. # soffice_comm = "docker run --init -itd --log-opt max-size=10m --log-opt max-file=3 -p #:16000 soffice:v2 bash"
  137. def get_port():
  138. net_conn = psutil.net_connections()
  139. current_port_list = []
  140. for conn in net_conn:
  141. current_port_list.append(str(conn.laddr.port))
  142. current_port_list = list(set(current_port_list))
  143. current_port_list.sort(key=lambda x: x)
  144. return current_port_list
  145. def restart(interface_type, port, index=0):
  146. # if process_type == "convert":
  147. # _comm = re.sub("#", port, convert_comm)
  148. # elif process_type == "ocr":
  149. # _comm = re.sub("#", port, ocr_comm_list[index])
  150. # elif process_type == "otr":
  151. # _comm = re.sub("#", port, otr_comm_list[index])
  152. # elif process_type == "soffice":
  153. # _comm = re.sub("#", port, soffice_comm)
  154. # elif process_type == "idc":
  155. # _comm = re.sub("#", port, idc_comm_list[index])
  156. # elif process_type == "isr":
  157. # _comm = re.sub("#", port, isr_comm_list[index])
  158. # elif process_type == "atc":
  159. # _comm = re.sub("#", port, atc_comm_list[index])
  160. # elif process_type == "yolo":
  161. # _comm = re.sub("#", port, yolo_comm_list[index])
  162. # else:
  163. # _comm = "netstat -nltp"
  164. # print("no process_type", process_type)
  165. #
  166. _comm_list = comm_dict.get(interface_type)
  167. if not _comm_list:
  168. print('monitor_process_config restart command error! check config!')
  169. raise
  170. for _comm in _comm_list:
  171. if str(port) in _comm:
  172. print(datetime.datetime.now(), "restart comm", _comm)
  173. os.system(_comm)
  174. def kill_soffice(limit_sec=30):
  175. try:
  176. pid_list = psutil.pids()
  177. for pid in pid_list:
  178. process = psutil.Process(pid)
  179. process_cmd = ''
  180. for c in process.cmdline():
  181. process_cmd += c + " "
  182. if process_cmd.strip() == "":
  183. continue
  184. if process.status() == "zombie":
  185. print("zombie cmd", process_cmd)
  186. if re.search("soffice", process.exe()):
  187. start_time = process.create_time()
  188. now_time = time.time()
  189. run_time = now_time-start_time
  190. if run_time >= limit_sec:
  191. comm = "kill -9 " + str(pid)
  192. print(datetime.datetime.now(), "kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec)
  193. os.system(comm)
  194. except:
  195. pass
  196. def kill_nested_timeout_process():
  197. try:
  198. pid_list = psutil.pids()
  199. suspect_pid_list = []
  200. for pid in pid_list:
  201. process = psutil.Process(pid)
  202. process_cmd = ''
  203. for c in process.cmdline():
  204. process_cmd += c + " "
  205. if process_cmd.strip() == "":
  206. continue
  207. if re.search("convert:app", process_cmd):
  208. ppid = process.ppid()
  209. start_time = process.create_time()
  210. now_time = time.time()
  211. run_time = now_time-start_time
  212. if str(ppid) == "1":
  213. suspect_pid_list.append([str(pid), float(run_time)])
  214. # 时间最久的父进程为1的不能杀,是接口主进程
  215. if len(suspect_pid_list) <= 1:
  216. return
  217. else:
  218. suspect_pid_list.sort(key=lambda x: x[1], reverse=True)
  219. for pid, run_time in suspect_pid_list[1:]:
  220. # print("pid", pid, run_time)
  221. comm = "kill -9 " + str(pid)
  222. print(datetime.datetime.now(), "kill process ", str(pid), "father is 1", process_cmd)
  223. os.system(comm)
  224. except:
  225. pass
  226. def monitor():
  227. for _name in interface_list:
  228. if interface_port_dict.get(_name):
  229. _port_list, _num_list, _gpu_list = interface_port_dict.get(_name)
  230. current_port_list = get_port()
  231. for j, p in enumerate(_port_list):
  232. if str(p) not in current_port_list:
  233. restart(_name, p)
  234. # if convert_port_list:
  235. # for p in convert_port_list[:1]:
  236. # if p not in current_port_list:
  237. # restart("convert", p)
  238. #
  239. # if ocr_port_list:
  240. # for j in range(len(ocr_port_list)):
  241. # for p in ocr_port_list[j][:1]:
  242. # if p not in current_port_list:
  243. # restart("ocr", p, index=j)
  244. #
  245. # if otr_port_list:
  246. # for j in range(len(otr_port_list)):
  247. # for p in otr_port_list[j][:1]:
  248. # if p not in current_port_list:
  249. # restart("otr", p, index=j)
  250. #
  251. # if idc_port_list:
  252. # for j in range(len(idc_port_list)):
  253. # for p in idc_port_list[j][:1]:
  254. # if p not in current_port_list:
  255. # restart("idc", p, index=j)
  256. #
  257. # if isr_port_list:
  258. # for j in range(len(isr_port_list)):
  259. # for p in isr_port_list[j][:1]:
  260. # if p not in current_port_list:
  261. # restart("isr", p, index=j)
  262. #
  263. # if atc_port_list:
  264. # for j in range(len(atc_port_list)):
  265. # for p in atc_port_list[j][:1]:
  266. # if p not in current_port_list:
  267. # restart("atc", p, index=j)
  268. #
  269. # if yolo_port_list:
  270. # for j in range(len(yolo_port_list)):
  271. # for p in yolo_port_list[j][:1]:
  272. # if p not in current_port_list:
  273. # restart("yolo", p, index=j)
  274. #
  275. # if soffice_port_list:
  276. # for p in soffice_port_list:
  277. # if p not in current_port_list:
  278. # restart("soffice", p)
  279. kill_soffice()
  280. kill_nested_timeout_process()
  281. # if schedule_port_list:
  282. # for p in schedule_port_list:
  283. # if p not in current_port_list:
  284. # restart("schedule", p)
  285. if __name__ == "__main__":
  286. for i in range(3):
  287. # os.system("echo $(date +%F%n%T)")
  288. monitor()
  289. time.sleep(10)