import copy import os import random import re import sys import time from bs4 import BeautifulSoup from datetime import datetime from multiprocessing import Process import datetime as dt sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../") from format_convert.utils import file_lock def run(): f = file_lock(os.path.abspath(os.path.dirname(__file__)) + '/19022.lock') print("acquire file_lock! process " + str(os.getpid())) for i in range(10): print("process " + str(os.getpid()) + " " + str(i)) time.sleep(random.randint(0, 1)) f.close() def merge_table(): with open(r'C:\Users\Administrator\Desktop\2.html', 'r') as f: html_str = f.read() html_str_origin = copy.deepcopy(html_str) try: match1 = re.finditer('', html_str) table_index_list = [] for m1, m2 in zip(match1, match2): table_index_list.append([m1.span()[0], m1.span()[1], m2.span()[0], m2.span()[1]]) print(table_index_list) soup = BeautifulSoup(html_str) tables = soup.find_all('table') table_td_cnt_list = [] for table in tables: tds = table.tr.find_all('td') table_td_cnt_list.append(len(list(tds))) print(table_td_cnt_list) if len(table_index_list) == len(table_td_cnt_list): merge_index_list = [] temp_index = [] for i in range(1, len(table_index_list)): last_index = table_index_list[i-1] index = table_index_list[i] last_tds = table_td_cnt_list[i-1] tds = table_td_cnt_list[i] if index[0] - last_index[-1] == 0 and last_tds == tds: temp_index += [i-1, i] temp_index = list(set(temp_index)) else: if temp_index: merge_index_list.append(temp_index) temp_index = [] if temp_index: merge_index_list.append(temp_index) print(merge_index_list) print('before len(html_str)', len(html_str)) for merge in merge_index_list: start_index = table_index_list[merge[0]][0] end_index = table_index_list[merge[-1]][-1] table_replace = re.sub(']*>|', '', html_str[start_index:end_index]) table_replace = '' + table_replace + '
' table_replace += ' '*(end_index-start_index-len(table_replace)) html_str = html_str[:start_index] + table_replace + html_str[end_index:] print('after len(html_str)', len(html_str)) if len(html_str_origin) == len(html_str): with open(r'C:\Users\Administrator\Desktop\3.html', 'w') as f: f.write(html_str) return html_str else: return html_str_origin else: return html_str_origin except: return html_str_origin if __name__ == '__main__': # process_list = [] # for j in range(10): # p1 = Process(target=run,) # p1.start() # process_list.append(p1) # # for p in process_list: # p.join() print('|'.join(['a', 'n'])) _t = datetime.strptime('2023-04-26', '%Y-%m-%d') _t2 = datetime.strptime('2023-04-02', '%Y-%m-%d') print(abs((_t2-_t).days)) print(datetime.strftime(_t + dt.timedelta(days=10), '%Y-%m-%d')) # merge_table() print(datetime.now())