import re from datetime import datetime log_path = '/convert.out' def process(_str=''): if not _str: with open(log_path, 'r') as f: line_list = f.readlines() else: line_list = _str.split('\n') md5_list = [] md5_finished_list = [] md5_larger_time_list = [] for line in line_list: line_split = line.split(' ') if len(line_split) <= 8: continue _date = line_split[0] _time = line_split[1].split(',')[0] md5 = None for col in line_split: if len(col) == len('066ad08d38dd9fa8c37d03f7c67359c9') and re.search('^[0-9a-z]+$', col): md5 = col break if md5 is None: continue md5_list.append(md5) if 'is_success' in line: md5_finished_list.append(md5) try: _time = datetime.strptime(_date + ' ' + _time, "%Y-%m-%d %H:%M:%S") search_time = '2024-05-16 13:38:08' search_time = datetime.strptime(search_time, "%Y-%m-%d %H:%M:%S") except: continue if _time > search_time: md5_larger_time_list.append(md5) md5_list = list(set(md5_list)) md5_not_list = list(set(md5_finished_list + md5_larger_time_list)) for md5 in md5_list: if md5 not in md5_not_list: print('md5', md5) if __name__ == '__main__': _str = ''' 2024-05-16 17:08:25,765 - get_table - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - yolo detect cost: 0.07653164863586426 2024-05-16 17:08:25,765 - get_table - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - detect not b_table_list 2024-05-16 17:08:25,765 - image_process - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - botr process cost: 0.07787585258483887 2024-05-16 17:08:25,834 - from_atc_interface - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - into from_atc_interface 2024-05-16 17:08:25,834 - interface_pool_gunicorn - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - atc 2024-05-16 17:08:25,834 - interface_pool_gunicorn - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - http://192.168.0.115:18061 2024-05-16 17:08:25,919 - from_atc_interface - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - get interface return 2024-05-16 17:08:25,919 - from_atc_interface - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - from_atc_interface cost time 0.0849916934967041 2024-05-16 17:08:25,919 - cut_str - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - into cut_str 2024-05-16 17:08:25,920 - cut_str - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - into cut_str 2024-05-16 17:08:25,920 - _convert - INFO - 42aee7658c696ab9d44f49d632aa9239 - None - md5: 42aee7658c696ab9d44f49d632aa9239 finished result: ['光大水务(莒县)有限公司(城北厂)双 1034 is_success: 1 pdf 其他 0.8948788642883301 2024-05-16 12:38:41,339 - from_atc_interface - INFO - 65ba62da6b5f4f998133a5203f8f9e1f - None - from_atc_interface cost time 0.0882723331451416 2024-05-16 12:38:41,339 - cut_str - INFO - 65ba62da6b5f4f998133a5203f8f9e1f - None - into cut_str 2024-05-16 12:38:41,339 - cut_str - INFO - 65ba62da6b5f4f998133a5203f8f9e1f - None - into cut_str 98%2024-05-16 12:38:41,340 - _convert - INFO - 65ba62da6b5f4f998133a5203f8f9e1f - None - md5: 65ba62da6b5f4f998133a5203f8f9e1f finished result: ['合同编号:11N2020160092 2710 is_success: 1 pdf 其他 1.9107580184936523 2024-05-16 12:38:41,340 - _convert - INFO - 65ba62da6b5f4f998133a5203f8f9e1f - None - finally ''' process()