import json import sys, os import time import pandas as pd sys.path.append(os.path.abspath("../../..")) print("sys.path[-1]", sys.path[-1]) from BiddingKG.dl.interface.extract import predict def bidi_predict(html_str): content = html_str result_dict = json.loads(predict("1", content)) return result_dict def test_csv(_path): start_time = time.time() df = pd.read_csv(_path) # ratio, total_money, unit_money predict_list_1 = [] predict_list_2 = [] predict_list_3 = [] for index, row in df.iterrows(): # if index >= 1000: # break if index % 50 == 0: print("="*30, "Loop", index, time.time()-start_time, "="*30) html_str = row["dochtmlcon"] # 先经过模型处理 result_dict = bidi_predict(html_str) # 获取比率总价单价 word_list_1 = result_dict.get("total_money") word_list_2 = result_dict.get("unit_money") word_list_3 = result_dict.get("ratio") # print("predict ratio", word_list_3) predict_list_3.append(str(word_list_3)) # print("predict total money", word_list_1) predict_list_1.append(str(word_list_1)) # print("predict unit money", word_list_2) predict_list_2.append(str(word_list_2)) predict_df_1 = pd.DataFrame(predict_list_1) predict_df_2 = pd.DataFrame(predict_list_2) predict_df_3 = pd.DataFrame(predict_list_3) df = pd.concat([df, predict_df_3, predict_df_1, predict_df_2], axis=1) df.to_csv(_path) print("finish write!", time.time()-start_time) if __name__ == "__main__": # path = "D:\\BIDI_DOC\\比地_文档\\比率_result.csv" path = '比率_result.csv' # path = '总价单价_result.csv' test_csv(path)