123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- import os
- import sys
- import time
- import pandas as pd
- import numpy as np
- import psycopg2
- sys.path.append(os.path.abspath("../../.."))
- from BiddingKG.dl.table_head.predict import predict
- def eval_text_list(table_text):
- if table_text[0] == '"':
- table_text = eval(table_text)
- else:
- table_text = table_text
- table_text = table_text.replace('\\', '/')
- table_text = eval(table_text)
- return table_text
- def read_postgresql(txt_name, start_id, _time):
- conn = psycopg2.connect(database="table_head_label", user="postgres",
- password="postgres", host="192.168.2.103", port="5432")
- with open('check_user_result/' + txt_name, "r") as f:
- id_list = f.readlines()
- # with open('check_user_result/test27.txt', "r") as f:
- # id_list += f.readlines()
- _list = []
- for _id in id_list:
- _id = _id[:-1]
- sql = 'select * from label_table_head_info where id =' + _id
- df = pd.read_sql(sql=sql, con=conn)
- # df = df[0]
- for index, row in df.iterrows():
- _list.append([x for x in row])
- cnt = 0
- new_list = []
- for line in _list:
- try:
- table_text = eval_text_list(line[2])
- except:
- print("无法识别")
- continue
- if cnt % 1000 == 0:
- print("Loop", cnt)
- line[0] = start_id + cnt
- cnt += 1
- line[6] = 'test'
- line[9] = 1
- if time == '':
- line[7] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- else:
- line[7] = _time
- # 模型识别table_head
- label_list = predict(table_text)
- line[3] = str(label_list)
- new_list.append(line)
- df = pd.DataFrame(_list)
- new_csv_path = "data_new.csv"
- df.to_csv(new_csv_path, index=False)
- conn.close()
- return new_csv_path
- if __name__ == '__main__':
- new_csv_path = read_postgresql('test20_error.txt', 203995, '2022-01-01 00:00:00')
- # new_csv_path = read_postgresql('test20_right.txt', 203995, '')
- # df = pd.read_csv('data_new.csv')
- # print(df.iloc[:, 4])
|