monitor_process_config.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. import datetime
  2. import logging
  3. import os
  4. import re
  5. import sys
  6. import time
  7. import psutil
  8. sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
  9. from format_convert.utils import get_ip_port, get_intranet_ip, get_args_from_config, get_all_ip, get_using_ip
  10. ip_port_dict = get_ip_port()
  11. ip = get_using_ip()
  12. print("local ip:", ip)
  13. if ip == 'http://127.0.0.1':
  14. ip = 'http://0.0.0.0'
  15. # 获取各个参数
  16. convert_port_list = get_args_from_config(ip_port_dict, ip, "convert", "MASTER")
  17. if convert_port_list:
  18. convert_port_list = convert_port_list[0]
  19. ocr_port_list = get_args_from_config(ip_port_dict, ip, "ocr")
  20. otr_port_list = get_args_from_config(ip_port_dict, ip, "otr")
  21. idc_port_list = get_args_from_config(ip_port_dict, ip, "idc")
  22. isr_port_list = get_args_from_config(ip_port_dict, ip, "isr")
  23. atc_port_list = get_args_from_config(ip_port_dict, ip, "atc")
  24. yolo_port_list = get_args_from_config(ip_port_dict, ip, "yolo")
  25. soffice_port_list = get_args_from_config(ip_port_dict, ip, "office", "MASTER")
  26. if soffice_port_list:
  27. soffice_port_list = soffice_port_list[0]
  28. python_path_list = get_args_from_config(ip_port_dict, ip, "python_path")
  29. project_path_list = get_args_from_config(ip_port_dict, ip, "project_path")
  30. gunicorn_path_list = get_args_from_config(ip_port_dict, ip, "gunicorn_path")
  31. std_out = " >>/convert.out 2>&1 &"
  32. std_out_gpu = " >>/gpu.out 2>&1 &"
  33. std_out_schedule = " >>/schedule.out 2>&1 &"
  34. print("convert_port_list", convert_port_list)
  35. print("ocr_port_list", ocr_port_list)
  36. print("otr_port_list", otr_port_list)
  37. print("idc_port_list", idc_port_list)
  38. print("isr_port_list", isr_port_list)
  39. print("atc_port_list", atc_port_list)
  40. print("yolo_port_list", yolo_port_list)
  41. print("soffice_port_list", soffice_port_list)
  42. # 根据port生成gunicorn语句
  43. ocr_comm_list = []
  44. otr_comm_list = []
  45. isr_comm_list = []
  46. idc_comm_list = []
  47. atc_comm_list = []
  48. yolo_comm_list = []
  49. for i in range(len(ocr_port_list)):
  50. ocr_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(ocr_port_list[i]))
  51. + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  52. + project_path_list[i] + "/ocr ocr_interface:app" + std_out_gpu)
  53. for i in range(len(otr_port_list)):
  54. otr_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(otr_port_list[i]))
  55. + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  56. + project_path_list[i] + "/otr otr_interface:app" + std_out_gpu)
  57. for i in range(len(idc_port_list)):
  58. idc_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(idc_port_list[i]))
  59. + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  60. + project_path_list[i] + "/idc idc_interface:app" + std_out_gpu)
  61. for i in range(len(isr_port_list)):
  62. isr_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(isr_port_list[i]))
  63. + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  64. + project_path_list[i] + "/isr isr_interface:app" + std_out_gpu)
  65. for i in range(len(atc_port_list)):
  66. atc_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(atc_port_list[i]))
  67. + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  68. + project_path_list[i] + "/atc atc_interface:app" + std_out_gpu)
  69. for i in range(len(yolo_port_list)):
  70. yolo_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(yolo_port_list[i]))
  71. + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  72. + project_path_list[i] + "/botr/yolov8 yolo_interface:app" + std_out_gpu)
  73. convert_comm = "nohup " + gunicorn_path_list[0] + " -w " + str(len(convert_port_list)) + " -t 300 -b 0.0.0.0:# --chdir " \
  74. + project_path_list[0] + "/format_convert convert:app" + std_out
  75. soffice_comm = "docker run --init -itd --log-opt max-size=10m --log-opt max-file=3 -p #:16000 soffice:v2 bash"
  76. def get_port():
  77. net_conn = psutil.net_connections()
  78. current_port_list = []
  79. for conn in net_conn:
  80. current_port_list.append(str(conn.laddr.port))
  81. current_port_list = list(set(current_port_list))
  82. current_port_list.sort(key=lambda x: x)
  83. return current_port_list
  84. def restart(process_type, port, index=0):
  85. if process_type == "convert":
  86. _comm = re.sub("#", port, convert_comm)
  87. elif process_type == "ocr":
  88. _comm = re.sub("#", port, ocr_comm_list[index])
  89. elif process_type == "otr":
  90. _comm = re.sub("#", port, otr_comm_list[index])
  91. elif process_type == "soffice":
  92. _comm = re.sub("#", port, soffice_comm)
  93. elif process_type == "idc":
  94. _comm = re.sub("#", port, idc_comm_list[index])
  95. elif process_type == "isr":
  96. _comm = re.sub("#", port, isr_comm_list[index])
  97. elif process_type == "atc":
  98. _comm = re.sub("#", port, atc_comm_list[index])
  99. elif process_type == "yolo":
  100. _comm = re.sub("#", port, yolo_comm_list[index])
  101. else:
  102. _comm = "netstat -nltp"
  103. print("no process_type", process_type)
  104. # os.system("echo $(date +%F%n%T)")
  105. print(datetime.datetime.now(), "restart comm", _comm)
  106. os.system(_comm)
  107. def kill_soffice(limit_sec=30):
  108. try:
  109. pid_list = psutil.pids()
  110. for pid in pid_list:
  111. process = psutil.Process(pid)
  112. process_cmd = ''
  113. for c in process.cmdline():
  114. process_cmd += c + " "
  115. if process_cmd.strip() == "":
  116. continue
  117. if process.status() == "zombie":
  118. print("zombie cmd", process_cmd)
  119. if re.search("soffice", process.exe()):
  120. start_time = process.create_time()
  121. now_time = time.time()
  122. run_time = now_time-start_time
  123. if run_time >= limit_sec:
  124. comm = "kill -9 " + str(pid)
  125. print(datetime.datetime.now(), "kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec)
  126. os.system(comm)
  127. except:
  128. pass
  129. def kill_nested_timeout_process():
  130. try:
  131. pid_list = psutil.pids()
  132. suspect_pid_list = []
  133. for pid in pid_list:
  134. process = psutil.Process(pid)
  135. process_cmd = ''
  136. for c in process.cmdline():
  137. process_cmd += c + " "
  138. if process_cmd.strip() == "":
  139. continue
  140. if re.search("convert:app", process_cmd):
  141. ppid = process.ppid()
  142. start_time = process.create_time()
  143. now_time = time.time()
  144. run_time = now_time-start_time
  145. if str(ppid) == "1":
  146. suspect_pid_list.append([str(pid), float(run_time)])
  147. # 时间最久的父进程为1的不能杀,是接口主进程
  148. if len(suspect_pid_list) <= 1:
  149. return
  150. else:
  151. suspect_pid_list.sort(key=lambda x: x[1], reverse=True)
  152. for pid, run_time in suspect_pid_list[1:]:
  153. # print("pid", pid, run_time)
  154. comm = "kill -9 " + str(pid)
  155. print(datetime.datetime.now(), "kill process ", str(pid), "father is 1", process_cmd)
  156. os.system(comm)
  157. except:
  158. pass
  159. def monitor():
  160. current_port_list = get_port()
  161. if convert_port_list:
  162. for p in convert_port_list[:1]:
  163. if p not in current_port_list:
  164. restart("convert", p)
  165. if ocr_port_list:
  166. for j in range(len(ocr_port_list)):
  167. for p in ocr_port_list[j][:1]:
  168. if p not in current_port_list:
  169. restart("ocr", p, index=j)
  170. if otr_port_list:
  171. for j in range(len(otr_port_list)):
  172. for p in otr_port_list[j][:1]:
  173. if p not in current_port_list:
  174. restart("otr", p, index=j)
  175. if idc_port_list:
  176. for j in range(len(idc_port_list)):
  177. for p in idc_port_list[j][:1]:
  178. if p not in current_port_list:
  179. restart("idc", p, index=j)
  180. if isr_port_list:
  181. for j in range(len(isr_port_list)):
  182. for p in isr_port_list[j][:1]:
  183. if p not in current_port_list:
  184. restart("isr", p, index=j)
  185. if atc_port_list:
  186. for j in range(len(atc_port_list)):
  187. for p in atc_port_list[j][:1]:
  188. if p not in current_port_list:
  189. restart("atc", p, index=j)
  190. if yolo_port_list:
  191. for j in range(len(yolo_port_list)):
  192. for p in yolo_port_list[j][:1]:
  193. if p not in current_port_list:
  194. restart("yolo", p, index=j)
  195. if soffice_port_list:
  196. for p in soffice_port_list:
  197. if p not in current_port_list:
  198. restart("soffice", p)
  199. kill_soffice()
  200. kill_nested_timeout_process()
  201. # if schedule_port_list:
  202. # for p in schedule_port_list:
  203. # if p not in current_port_list:
  204. # restart("schedule", p)
  205. if __name__ == "__main__":
  206. for i in range(6):
  207. # os.system("echo $(date +%F%n%T)")
  208. monitor()
  209. time.sleep(10)