test_re_ratio.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import json
  2. import sys, os
  3. import time
  4. import pandas as pd
  5. sys.path.append(os.path.abspath("../../.."))
  6. print("sys.path[-1]", sys.path[-1])
  7. from BiddingKG.dl.interface.extract import predict
  8. def bidi_predict(html_str):
  9. content = html_str
  10. result_dict = json.loads(predict("1", content))
  11. return result_dict
  12. def test_csv(_path):
  13. start_time = time.time()
  14. df = pd.read_csv(_path)
  15. # ratio, total_money, unit_money
  16. predict_list_1 = []
  17. predict_list_2 = []
  18. predict_list_3 = []
  19. for index, row in df.iterrows():
  20. # if index >= 1000:
  21. # break
  22. if index % 50 == 0:
  23. print("="*30, "Loop", index, time.time()-start_time, "="*30)
  24. html_str = row["dochtmlcon"]
  25. # 先经过模型处理
  26. result_dict = bidi_predict(html_str)
  27. # 获取比率总价单价
  28. word_list_1 = result_dict.get("total_money")
  29. word_list_2 = result_dict.get("unit_money")
  30. word_list_3 = result_dict.get("ratio")
  31. # print("predict ratio", word_list_3)
  32. predict_list_3.append(str(word_list_3))
  33. # print("predict total money", word_list_1)
  34. predict_list_1.append(str(word_list_1))
  35. # print("predict unit money", word_list_2)
  36. predict_list_2.append(str(word_list_2))
  37. predict_df_1 = pd.DataFrame(predict_list_1)
  38. predict_df_2 = pd.DataFrame(predict_list_2)
  39. predict_df_3 = pd.DataFrame(predict_list_3)
  40. df = pd.concat([df, predict_df_3, predict_df_1, predict_df_2], axis=1)
  41. df.to_csv(_path)
  42. print("finish write!", time.time()-start_time)
  43. if __name__ == "__main__":
  44. # path = "D:\\BIDI_DOC\\比地_文档\\比率_result.csv"
  45. path = '比率_result.csv'
  46. # path = '总价单价_result.csv'
  47. test_csv(path)