import copy
import os
import random
import re
import sys
import time
from bs4 import BeautifulSoup
from datetime import datetime
from multiprocessing import Process
import datetime as dt
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
from format_convert.utils import file_lock
def run():
f = file_lock(os.path.abspath(os.path.dirname(__file__)) + '/19022.lock')
print("acquire file_lock! process " + str(os.getpid()))
for i in range(10):
print("process " + str(os.getpid()) + " " + str(i))
time.sleep(random.randint(0, 1))
f.close()
def merge_table():
with open(r'C:\Users\Administrator\Desktop\2.html', 'r') as f:
html_str = f.read()
html_str_origin = copy.deepcopy(html_str)
try:
match1 = re.finditer('
', html_str)
table_index_list = []
for m1, m2 in zip(match1, match2):
table_index_list.append([m1.span()[0], m1.span()[1], m2.span()[0], m2.span()[1]])
print(table_index_list)
soup = BeautifulSoup(html_str)
tables = soup.find_all('table')
table_td_cnt_list = []
for table in tables:
tds = table.tr.find_all('td')
table_td_cnt_list.append(len(list(tds)))
print(table_td_cnt_list)
if len(table_index_list) == len(table_td_cnt_list):
merge_index_list = []
temp_index = []
for i in range(1, len(table_index_list)):
last_index = table_index_list[i-1]
index = table_index_list[i]
last_tds = table_td_cnt_list[i-1]
tds = table_td_cnt_list[i]
if index[0] - last_index[-1] == 0 and last_tds == tds:
temp_index += [i-1, i]
temp_index = list(set(temp_index))
else:
if temp_index:
merge_index_list.append(temp_index)
temp_index = []
if temp_index:
merge_index_list.append(temp_index)
print(merge_index_list)
print('before len(html_str)', len(html_str))
for merge in merge_index_list:
start_index = table_index_list[merge[0]][0]
end_index = table_index_list[merge[-1]][-1]
table_replace = re.sub('', '', html_str[start_index:end_index])
table_replace = ''
table_replace += ' '*(end_index-start_index-len(table_replace))
html_str = html_str[:start_index] + table_replace + html_str[end_index:]
print('after len(html_str)', len(html_str))
if len(html_str_origin) == len(html_str):
with open(r'C:\Users\Administrator\Desktop\3.html', 'w') as f:
f.write(html_str)
return html_str
else:
return html_str_origin
else:
return html_str_origin
except:
return html_str_origin
if __name__ == '__main__':
# process_list = []
# for j in range(10):
# p1 = Process(target=run,)
# p1.start()
# process_list.append(p1)
#
# for p in process_list:
# p.join()
print('|'.join(['a', 'n']))
_t = datetime.strptime('2023-04-26', '%Y-%m-%d')
_t2 = datetime.strptime('2023-04-02', '%Y-%m-%d')
print(abs((_t2-_t).days))
print(datetime.strftime(_t + dt.timedelta(days=10), '%Y-%m-%d'))
# merge_table()
print(datetime.now())