postgresql2csv.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. import os
  2. import sys
  3. import time
  4. import pandas as pd
  5. import numpy as np
  6. import psycopg2
  7. sys.path.append(os.path.abspath("../../.."))
  8. from BiddingKG.dl.table_head.predict import predict
  9. def eval_text_list(table_text):
  10. if table_text[0] == '"':
  11. table_text = eval(table_text)
  12. else:
  13. table_text = table_text
  14. table_text = table_text.replace('\\', '/')
  15. table_text = eval(table_text)
  16. return table_text
  17. def read_postgresql(txt_name, start_id, _time):
  18. conn = psycopg2.connect(database="table_head_label", user="postgres",
  19. password="postgres", host="192.168.2.103", port="5432")
  20. with open('check_user_result/' + txt_name, "r") as f:
  21. id_list = f.readlines()
  22. # with open('check_user_result/test27.txt', "r") as f:
  23. # id_list += f.readlines()
  24. _list = []
  25. for _id in id_list:
  26. _id = _id[:-1]
  27. sql = 'select * from label_table_head_info where id =' + _id
  28. df = pd.read_sql(sql=sql, con=conn)
  29. # df = df[0]
  30. for index, row in df.iterrows():
  31. _list.append([x for x in row])
  32. cnt = 0
  33. new_list = []
  34. for line in _list:
  35. try:
  36. table_text = eval_text_list(line[2])
  37. except:
  38. print("无法识别")
  39. continue
  40. if cnt % 1000 == 0:
  41. print("Loop", cnt)
  42. line[0] = start_id + cnt
  43. cnt += 1
  44. line[6] = 'test'
  45. line[9] = 1
  46. if time == '':
  47. line[7] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  48. else:
  49. line[7] = _time
  50. # 模型识别table_head
  51. label_list = predict(table_text)
  52. line[3] = str(label_list)
  53. new_list.append(line)
  54. df = pd.DataFrame(_list)
  55. new_csv_path = "data_new.csv"
  56. df.to_csv(new_csv_path, index=False)
  57. conn.close()
  58. return new_csv_path
  59. if __name__ == '__main__':
  60. new_csv_path = read_postgresql('test20_error.txt', 203995, '2022-01-01 00:00:00')
  61. # new_csv_path = read_postgresql('test20_right.txt', 203995, '')
  62. # df = pd.read_csv('data_new.csv')
  63. # print(df.iloc[:, 4])