monitor_process_config.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. import datetime
  2. import logging
  3. import os
  4. import re
  5. import sys
  6. import time
  7. import psutil
  8. sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
  9. from format_convert.utils import get_ip_port, get_intranet_ip, get_args_from_config, get_all_ip, get_using_ip
  10. # ip_port_dict = get_ip_port()
  11. # ip = "http://" + get_intranet_ip()
  12. # ip = "http://127.0.0.1"
  13. # convert_port_list = ip_port_dict.get(ip).get("convert")
  14. # ocr_port_list = ip_port_dict.get(ip).get("ocr")
  15. # otr_port_list = ip_port_dict.get(ip).get("otr")
  16. # soffice_port_list = ip_port_dict.get(ip).get("office")
  17. # if not convert_port_list:
  18. # convert_port_list = []
  19. # if not ocr_port_list:
  20. # ocr_port_list = []
  21. # if not otr_port_list:
  22. # otr_port_list = []
  23. # if not soffice_port_list:
  24. # soffice_port_list = []
  25. #
  26. # # schedule_port_list = ip_port_dict.get(ip).get("schedule")
  27. # # python_path = ip_port_dict.get(ip).get("python_path")
  28. # # project_path = ip_port_dict.get(ip).get("project_path")
  29. #
  30. # interface_path = project_path[:-1]
  31. # std_out = " >>/convert.out 2>&1 &"
  32. # std_out_gpu = " >>/gpu.out 2>&1 &"
  33. # std_out_schedule = " >>/schedule.out 2>&1 &"
  34. # # convert_comm = "nohup " + python_path + " " + interface_path + "/format_convert/convert.py #" + std_out
  35. # # ocr_comm = "nohup " + python_path + " " + interface_path + "/ocr/ocr_gpu_interface.py # 0" + std_out_gpu
  36. # # otr_comm = "nohup " + python_path + " " + interface_path + "/otr/otr_gpu_interface.py # 0" + std_out_gpu
  37. # schedule_comm = "nohup " + python_path + " " + interface_path + "/format_convert/schedule_interface.py #" + std_out_schedule
  38. # soffice_comm = "docker run --init -itd --log-opt max-size=10m --log-opt max-file=3 -p #:16000 soffice:v2 bash"
  39. #
  40. #
  41. # gunicorn_path = python_path
  42. # # print("convert_port_list", len(convert_port_list))
  43. # convert_comm = "nohup " + gunicorn_path + " -w " + str(len(convert_port_list)) + " -t 300 -b 0.0.0.0:# --chdir " \
  44. # + interface_path + "/format_convert convert:app" + std_out
  45. # ocr_comm = "nohup " + gunicorn_path + " -w " + str(len(ocr_port_list)) + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir " \
  46. # + interface_path + "/ocr ocr_gpu_interface:app" + std_out_gpu
  47. # otr_comm = "nohup " + gunicorn_path + " -w " + str(len(otr_port_list)) + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir " \
  48. # + interface_path + "/otr otr_gpu_interface:app" + std_out_gpu
  49. ip_port_dict = get_ip_port()
  50. ip = get_using_ip()
  51. print("local ip:", ip)
  52. convert_port_list = get_args_from_config(ip_port_dict, ip, "convert", "MASTER")
  53. if convert_port_list:
  54. convert_port_list = convert_port_list[0]
  55. ocr_port_list = get_args_from_config(ip_port_dict, ip, "ocr")
  56. otr_port_list = get_args_from_config(ip_port_dict, ip, "otr")
  57. soffice_port_list = get_args_from_config(ip_port_dict, ip, "office", "MASTER")
  58. if soffice_port_list:
  59. soffice_port_list = soffice_port_list[0]
  60. python_path_list = get_args_from_config(ip_port_dict, ip, "python_path")
  61. project_path_list = get_args_from_config(ip_port_dict, ip, "project_path")
  62. gunicorn_path_list = get_args_from_config(ip_port_dict, ip, "gunicorn_path")
  63. std_out = " >>/convert.out 2>&1 &"
  64. std_out_gpu = " >>/gpu.out 2>&1 &"
  65. std_out_schedule = " >>/schedule.out 2>&1 &"
  66. print("convert_port_list", convert_port_list)
  67. print("ocr_port_list", ocr_port_list)
  68. print("otr_port_list", otr_port_list)
  69. print("soffice_port_list", soffice_port_list)
  70. ocr_comm_list = []
  71. otr_comm_list = []
  72. for i in range(len(ocr_port_list)):
  73. ocr_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(ocr_port_list[i]))
  74. + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  75. + project_path_list[i] + "/ocr ocr_interface:app" + std_out_gpu)
  76. for i in range(len(otr_port_list)):
  77. otr_comm_list.append("nohup " + gunicorn_path_list[i] + " -w " + str(len(otr_port_list[i]))
  78. + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir "
  79. + project_path_list[i] + "/otr otr_interface:app" + std_out_gpu)
  80. convert_comm = "nohup " + gunicorn_path_list[0] + " -w " + str(len(convert_port_list)) + " -t 300 -b 0.0.0.0:# --chdir " \
  81. + project_path_list[0] + "/format_convert convert:app" + std_out
  82. schedule_comm = "nohup " + python_path_list[0] + " " + project_path_list[0] + "/format_convert/schedule_interface.py #" + std_out_schedule
  83. soffice_comm = "docker run --init -itd --log-opt max-size=10m --log-opt max-file=3 -p #:16000 soffice:v2 bash"
  84. def get_port():
  85. net_conn = psutil.net_connections()
  86. current_port_list = []
  87. for conn in net_conn:
  88. current_port_list.append(str(conn.laddr.port))
  89. current_port_list = list(set(current_port_list))
  90. current_port_list.sort(key=lambda x: x)
  91. return current_port_list
  92. def restart(process_type, port, index=0):
  93. if process_type == "convert":
  94. _comm = re.sub("#", port, convert_comm)
  95. elif process_type == "ocr":
  96. _comm = re.sub("#", port, ocr_comm_list[index])
  97. elif process_type == "otr":
  98. _comm = re.sub("#", port, otr_comm_list[index])
  99. elif process_type == "soffice":
  100. _comm = re.sub("#", port, soffice_comm)
  101. elif process_type == "schedule":
  102. _comm = re.sub("#", port, schedule_comm)
  103. else:
  104. _comm = "netstat -nltp"
  105. print("no process_type", process_type)
  106. # os.system("echo $(date +%F%n%T)")
  107. print(datetime.datetime.now(), "restart comm", _comm)
  108. os.system(_comm)
  109. def kill_soffice(limit_sec=30):
  110. pid_list = psutil.pids()
  111. for pid in pid_list:
  112. process = psutil.Process(pid)
  113. process_cmd = ''
  114. for c in process.cmdline():
  115. process_cmd += c + " "
  116. if process_cmd.strip() == "":
  117. continue
  118. if process.status() == "zombie":
  119. print("zombie cmd", process_cmd)
  120. if re.search("soffice", process.exe()):
  121. start_time = process.create_time()
  122. now_time = time.time()
  123. run_time = now_time-start_time
  124. if run_time >= limit_sec:
  125. comm = "kill -9 " + str(pid)
  126. print(datetime.datetime.now(), "kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec)
  127. os.system(comm)
  128. def kill_nested_timeout_process():
  129. pid_list = psutil.pids()
  130. suspect_pid_list = []
  131. for pid in pid_list:
  132. process = psutil.Process(pid)
  133. process_cmd = ''
  134. for c in process.cmdline():
  135. process_cmd += c + " "
  136. if process_cmd.strip() == "":
  137. continue
  138. if re.search("convert:app", process_cmd):
  139. ppid = process.ppid()
  140. start_time = process.create_time()
  141. now_time = time.time()
  142. run_time = now_time-start_time
  143. if str(ppid) == "1":
  144. suspect_pid_list.append([str(pid), float(run_time)])
  145. # 时间最久的父进程为1的不能杀,是接口主进程
  146. if len(suspect_pid_list) <= 1:
  147. return
  148. else:
  149. suspect_pid_list.sort(key=lambda x: x[1], reverse=True)
  150. for pid, run_time in suspect_pid_list[1:]:
  151. # print("pid", pid, run_time)
  152. comm = "kill -9 " + str(pid)
  153. print(datetime.datetime.now(), "kill process ", str(pid), "father is 1", process_cmd)
  154. os.system(comm)
  155. def monitor():
  156. current_port_list = get_port()
  157. if convert_port_list:
  158. for p in convert_port_list[:1]:
  159. if p not in current_port_list:
  160. restart("convert", p)
  161. if ocr_port_list:
  162. for j in range(len(ocr_port_list)):
  163. for p in ocr_port_list[j][:1]:
  164. if p not in current_port_list:
  165. restart("ocr", p, index=j)
  166. if otr_port_list:
  167. for j in range(len(otr_port_list)):
  168. for p in otr_port_list[j][:1]:
  169. if p not in current_port_list:
  170. restart("otr", p, index=j)
  171. if soffice_port_list:
  172. for p in soffice_port_list:
  173. if p not in current_port_list:
  174. restart("soffice", p)
  175. kill_soffice()
  176. kill_nested_timeout_process()
  177. # if schedule_port_list:
  178. # for p in schedule_port_list:
  179. # if p not in current_port_list:
  180. # restart("schedule", p)
  181. if __name__ == "__main__":
  182. for i in range(6):
  183. # os.system("echo $(date +%F%n%T)")
  184. monitor()
  185. time.sleep(10)