monitor_process_config.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. import datetime
  2. import logging
  3. import os
  4. import re
  5. import sys
  6. import time
  7. import psutil
  8. sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
  9. from format_convert.utils import get_ip_port, get_intranet_ip
  10. ip_port_dict = get_ip_port()
  11. # ip = "http://" + get_intranet_ip()
  12. ip = "http://127.0.0.1"
  13. convert_port_list = ip_port_dict.get(ip).get("convert")
  14. ocr_port_list = ip_port_dict.get(ip).get("ocr")
  15. otr_port_list = ip_port_dict.get(ip).get("otr")
  16. soffice_port_list = ip_port_dict.get(ip).get("office")
  17. if not convert_port_list:
  18. convert_port_list = []
  19. if not ocr_port_list:
  20. ocr_port_list = []
  21. if not otr_port_list:
  22. otr_port_list = []
  23. if not soffice_port_list:
  24. soffice_port_list = []
  25. # schedule_port_list = ip_port_dict.get(ip).get("schedule")
  26. python_path = ip_port_dict.get(ip).get("python_path")
  27. project_path = ip_port_dict.get(ip).get("project_path")
  28. interface_path = project_path[:-1]
  29. std_out = " >>/convert.out 2>&1 &"
  30. std_out_gpu = " >>/gpu.out 2>&1 &"
  31. std_out_schedule = " >>/schedule.out 2>&1 &"
  32. # convert_comm = "nohup " + python_path + " " + interface_path + "/format_convert/convert.py #" + std_out
  33. # ocr_comm = "nohup " + python_path + " " + interface_path + "/ocr/ocr_gpu_interface.py # 0" + std_out_gpu
  34. # otr_comm = "nohup " + python_path + " " + interface_path + "/otr/otr_gpu_interface.py # 0" + std_out_gpu
  35. schedule_comm = "nohup " + python_path + " " + interface_path + "/format_convert/schedule_interface.py #" + std_out_schedule
  36. soffice_comm = "docker run --init -itd --log-opt max-size=10m --log-opt max-file=3 -p #:16000 soffice:v2 bash"
  37. gunicorn_path = python_path
  38. print("convert_port_list", len(convert_port_list))
  39. convert_comm = "nohup " + gunicorn_path + " -w " + str(len(convert_port_list)) + " -t 300 -b 0.0.0.0:# --chdir " \
  40. + interface_path + "/format_convert convert:app" + std_out
  41. ocr_comm = "nohup " + gunicorn_path + " -w " + str(len(ocr_port_list)) + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir " \
  42. + interface_path + "/ocr ocr_interface:app" + std_out_gpu
  43. otr_comm = "nohup " + gunicorn_path + " -w " + str(len(otr_port_list)) + " -t 300 --keep-alive 600 -b 0.0.0.0:# --chdir " \
  44. + interface_path + "/otr otr_interface:app" + std_out_gpu
  45. def get_port():
  46. net_conn = psutil.net_connections()
  47. current_port_list = []
  48. for conn in net_conn:
  49. current_port_list.append(str(conn.laddr.port))
  50. current_port_list = list(set(current_port_list))
  51. current_port_list.sort(key=lambda x: x)
  52. return current_port_list
  53. def restart(process_type, port):
  54. if process_type == "convert":
  55. _comm = re.sub("#", port, convert_comm)
  56. elif process_type == "ocr":
  57. _comm = re.sub("#", port, ocr_comm)
  58. elif process_type == "otr":
  59. _comm = re.sub("#", port, otr_comm)
  60. elif process_type == "soffice":
  61. _comm = re.sub("#", port, soffice_comm)
  62. elif process_type == "schedule":
  63. _comm = re.sub("#", port, schedule_comm)
  64. else:
  65. _comm = "netstat -nltp"
  66. print("no process_type", process_type)
  67. # os.system("echo $(date +%F%n%T)")
  68. print(datetime.datetime.now(), "restart comm", _comm)
  69. os.system(_comm)
  70. def kill_soffice(limit_sec=30):
  71. pid_list = psutil.pids()
  72. for pid in pid_list:
  73. process = psutil.Process(pid)
  74. process_cmd = ''
  75. for c in process.cmdline():
  76. process_cmd += c + " "
  77. if process_cmd.strip() == "":
  78. continue
  79. if process.status() == "zombie":
  80. print("zombie cmd", process_cmd)
  81. if re.search("soffice", process.exe()):
  82. start_time = process.create_time()
  83. now_time = time.time()
  84. run_time = now_time-start_time
  85. if run_time >= limit_sec:
  86. comm = "kill -9 " + str(pid)
  87. print(datetime.datetime.now(), "kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec)
  88. os.system(comm)
  89. def kill_nested_timeout_process():
  90. pid_list = psutil.pids()
  91. suspect_pid_list = []
  92. for pid in pid_list:
  93. process = psutil.Process(pid)
  94. process_cmd = ''
  95. for c in process.cmdline():
  96. process_cmd += c + " "
  97. if process_cmd.strip() == "":
  98. continue
  99. if re.search("convert:app", process_cmd):
  100. ppid = process.ppid()
  101. start_time = process.create_time()
  102. now_time = time.time()
  103. run_time = now_time-start_time
  104. if str(ppid) == "1":
  105. suspect_pid_list.append([str(pid), float(run_time)])
  106. # 时间最久的父进程为1的不能杀,是接口主进程
  107. if len(suspect_pid_list) <= 1:
  108. return
  109. else:
  110. suspect_pid_list.sort(key=lambda x: x[1], reverse=True)
  111. for pid, run_time in suspect_pid_list[1:]:
  112. # print("pid", pid, run_time)
  113. comm = "kill -9 " + str(pid)
  114. print(datetime.datetime.now(), "kill process ", str(pid), "father is 1", process_cmd)
  115. os.system(comm)
  116. def monitor():
  117. current_port_list = get_port()
  118. if convert_port_list:
  119. for p in convert_port_list[:1]:
  120. if p not in current_port_list:
  121. restart("convert", p)
  122. if ocr_port_list:
  123. for p in ocr_port_list[:1]:
  124. if p not in current_port_list:
  125. restart("ocr", p)
  126. if otr_port_list:
  127. for p in otr_port_list[:1]:
  128. if p not in current_port_list:
  129. restart("otr", p)
  130. if soffice_port_list:
  131. for p in soffice_port_list:
  132. if p not in current_port_list:
  133. restart("soffice", p)
  134. kill_soffice()
  135. kill_nested_timeout_process()
  136. # if schedule_port_list:
  137. # for p in schedule_port_list:
  138. # if p not in current_port_list:
  139. # restart("schedule", p)
  140. if __name__ == "__main__":
  141. for i in range(6):
  142. # os.system("echo $(date +%F%n%T)")
  143. monitor()
  144. time.sleep(10)