1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- import json
- import sys, os
- import time
- import pandas as pd
- sys.path.append(os.path.abspath("../../.."))
- print("sys.path[-1]", sys.path[-1])
- from BiddingKG.dl.interface.extract import predict
- def bidi_predict(html_str):
- content = html_str
- result_dict = json.loads(predict("1", content))
- return result_dict
- def test_csv(_path):
- start_time = time.time()
- df = pd.read_csv(_path)
- # ratio, total_money, unit_money
- predict_list_1 = []
- predict_list_2 = []
- predict_list_3 = []
- for index, row in df.iterrows():
- # if index >= 1000:
- # break
- if index % 50 == 0:
- print("="*30, "Loop", index, time.time()-start_time, "="*30)
- html_str = row["dochtmlcon"]
- # 先经过模型处理
- result_dict = bidi_predict(html_str)
- # 获取比率总价单价
- word_list_1 = result_dict.get("total_money")
- word_list_2 = result_dict.get("unit_money")
- word_list_3 = result_dict.get("ratio")
- # print("predict ratio", word_list_3)
- predict_list_3.append(str(word_list_3))
- # print("predict total money", word_list_1)
- predict_list_1.append(str(word_list_1))
- # print("predict unit money", word_list_2)
- predict_list_2.append(str(word_list_2))
- predict_df_1 = pd.DataFrame(predict_list_1)
- predict_df_2 = pd.DataFrame(predict_list_2)
- predict_df_3 = pd.DataFrame(predict_list_3)
- df = pd.concat([df, predict_df_3, predict_df_1, predict_df_2], axis=1)
- df.to_csv(_path)
- print("finish write!", time.time()-start_time)
- if __name__ == "__main__":
- # path = "D:\\BIDI_DOC\\比地_文档\\比率_result.csv"
- path = '比率_result.csv'
- # path = '总价单价_result.csv'
- test_csv(path)
|