import os import sys import time import pandas as pd import numpy as np import psycopg2 sys.path.append(os.path.abspath("../../..")) from BiddingKG.dl.table_head.predict import predict def eval_text_list(table_text): if table_text[0] == '"': table_text = eval(table_text) else: table_text = table_text table_text = table_text.replace('\\', '/') table_text = eval(table_text) return table_text def read_postgresql(txt_name, start_id, _time): conn = psycopg2.connect(database="table_head_label", user="postgres", password="postgres", host="192.168.2.103", port="5432") row_list = [] if txt_name == "": sql = """ select * from "label_table_head_info" where status = 1 and update_time >= '2022-01-17'; """ df = pd.read_sql(sql=sql, con=conn) for index, row in df.iterrows(): row_list.append([x for x in row]) else: with open('check_user_result/' + txt_name, "r") as f: id_list = f.readlines() for _id in id_list: _id = _id[:-1] sql = 'select * from label_table_head_info where id =' + _id df = pd.read_sql(sql=sql, con=conn) # df = df[0] for index, row in df.iterrows(): row_list.append([x for x in row]) cnt = 0 new_list = [] for line in row_list: try: table_text = eval_text_list(line[2]) except: print("无法识别") continue if cnt % 1000 == 0: print("Loop", cnt) line[0] = start_id + cnt cnt += 1 line[6] = 'test' line[9] = 1 if time == '': line[7] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) else: line[7] = _time # 模型识别table_head label_list = predict(table_text) line[3] = str(label_list) new_list.append(line) df = pd.DataFrame(new_list) new_csv_path = "data_new.csv" df.to_csv(new_csv_path, index=False) conn.close() return new_csv_path if __name__ == '__main__': new_csv_path = read_postgresql('test11_error.txt', 206863, '2021-12-31 00:00:00') # new_csv_path = read_postgresql('test20_right.txt', 203995, '') # df = pd.read_csv('data_new.csv') # print(df.iloc[:, 4])