postgresql2csv.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. import os
  2. import sys
  3. import time
  4. import pandas as pd
  5. import numpy as np
  6. import psycopg2
  7. sys.path.append(os.path.abspath("../../.."))
  8. from BiddingKG.dl.table_head.predict import predict
  9. def eval_text_list(table_text):
  10. if table_text[0] == '"':
  11. table_text = eval(table_text)
  12. else:
  13. table_text = table_text
  14. table_text = table_text.replace('\\', '/')
  15. table_text = eval(table_text)
  16. return table_text
  17. def read_postgresql(txt_name, start_id, _time):
  18. conn = psycopg2.connect(database="table_head_label", user="postgres",
  19. password="postgres", host="192.168.2.103", port="5432")
  20. row_list = []
  21. if txt_name == "":
  22. sql = """
  23. select * from "label_table_head_info"
  24. where status = 1 and update_time >= '2022-01-17';
  25. """
  26. df = pd.read_sql(sql=sql, con=conn)
  27. for index, row in df.iterrows():
  28. row_list.append([x for x in row])
  29. else:
  30. with open('check_user_result/' + txt_name, "r") as f:
  31. id_list = f.readlines()
  32. for _id in id_list:
  33. _id = _id[:-1]
  34. sql = 'select * from label_table_head_info where id =' + _id
  35. df = pd.read_sql(sql=sql, con=conn)
  36. # df = df[0]
  37. for index, row in df.iterrows():
  38. row_list.append([x for x in row])
  39. cnt = 0
  40. new_list = []
  41. for line in row_list:
  42. try:
  43. table_text = eval_text_list(line[2])
  44. except:
  45. print("无法识别")
  46. continue
  47. if cnt % 1000 == 0:
  48. print("Loop", cnt)
  49. line[0] = start_id + cnt
  50. cnt += 1
  51. line[6] = 'test'
  52. line[9] = 1
  53. if time == '':
  54. line[7] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  55. else:
  56. line[7] = _time
  57. # 模型识别table_head
  58. label_list = predict(table_text)
  59. line[3] = str(label_list)
  60. new_list.append(line)
  61. df = pd.DataFrame(new_list)
  62. new_csv_path = "data_new.csv"
  63. df.to_csv(new_csv_path, index=False)
  64. conn.close()
  65. return new_csv_path
  66. if __name__ == '__main__':
  67. new_csv_path = read_postgresql('test11_error.txt', 206863, '2021-12-31 00:00:00')
  68. # new_csv_path = read_postgresql('test20_right.txt', 203995, '')
  69. # df = pd.read_csv('data_new.csv')
  70. # print(df.iloc[:, 4])