log_process_convert.py 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. import re
  2. from datetime import datetime
  3. log_path = '/convert.out'
  4. def process(_str=''):
  5. if not _str:
  6. with open(log_path, 'r') as f:
  7. line_list = f.readlines()
  8. else:
  9. line_list = _str.split('\n')
  10. md5_list = []
  11. md5_finished_list = []
  12. md5_larger_time_list = []
  13. for line in line_list:
  14. line_split = line.split(' ')
  15. if len(line_split) <= 8:
  16. continue
  17. _date = line_split[0]
  18. _time = line_split[1].split(',')[0]
  19. md5 = None
  20. for col in line_split:
  21. if len(col) == len('066ad08d38dd9fa8c37d03f7c67359c9') and re.search('^[0-9a-z]+$', col):
  22. md5 = col
  23. break
  24. if md5 is None:
  25. continue
  26. md5_list.append(md5)
  27. if 'is_success' in line:
  28. md5_finished_list.append(md5)
  29. try:
  30. _time = datetime.strptime(_date + ' ' + _time, "%Y-%m-%d %H:%M:%S")
  31. search_time = '2024-05-16 13:38:08'
  32. search_time = datetime.strptime(search_time, "%Y-%m-%d %H:%M:%S")
  33. except:
  34. continue
  35. if _time > search_time:
  36. md5_larger_time_list.append(md5)
  37. md5_list = list(set(md5_list))
  38. md5_not_list = list(set(md5_finished_list + md5_larger_time_list))
  39. for md5 in md5_list:
  40. if md5 not in md5_not_list:
  41. print('md5', md5)
  42. if __name__ == '__main__':
  43. _str = '''
  44. 2024-05-16 17:08:25,765 - get_table - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - yolo detect cost: 0.07653164863586426
  45. 2024-05-16 17:08:25,765 - get_table - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - detect not b_table_list
  46. 2024-05-16 17:08:25,765 - image_process - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - botr process cost: 0.07787585258483887
  47. 2024-05-16 17:08:25,834 - from_atc_interface - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - into from_atc_interface
  48. 2024-05-16 17:08:25,834 - interface_pool_gunicorn - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - atc
  49. 2024-05-16 17:08:25,834 - interface_pool_gunicorn - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - http://192.168.0.115:18061
  50. 2024-05-16 17:08:25,919 - from_atc_interface - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - get interface return
  51. 2024-05-16 17:08:25,919 - from_atc_interface - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - from_atc_interface cost time 0.0849916934967041
  52. 2024-05-16 17:08:25,919 - cut_str - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - into cut_str
  53. 2024-05-16 17:08:25,920 - cut_str - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - into cut_str
  54. 2024-05-16 17:08:25,920 - _convert - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - md5: 42aee7658c696ab9d44f49d632aa9239 finished result: ['光大水务(莒县)有限公司(城北厂)双 1034 is_success: 1 pdf 其他 0.8948788642883301
  55. 2024-05-16 12:38:41,339 - from_atc_interface - INFO - 65ba62da6b5f4f998133a5203f8f9e1f - None - from_atc_interface cost time 0.0882723331451416
  56. 2024-05-16 12:38:41,339 - cut_str - INFO - 65ba62da6b5f4f998133a5203f8f9e1f - None - into cut_str
  57. 2024-05-16 12:38:41,339 - cut_str - INFO - 65ba62da6b5f4f998133a5203f8f9e1f - None - into cut_str
  58. 98%2024-05-16 12:38:41,340 - _convert - INFO - 65ba62da6b5f4f998133a5203f8f9e1f - None - md5: 65ba62da6b5f4f998133a5203f8f9e1f finished result: ['合同编号:11N2020160092 2710 is_success: 1 pdf 其他 1.9107580184936523
  59. 2024-05-16 12:38:41,340 - _convert - INFO - 65ba62da6b5f4f998133a5203f8f9e1f - None - finally
  60. '''
  61. process()