import numpy as np import pandas as pd from entity import * from didi_RL.RL_learning import RL n_actions = 2 n_features = 3 train_step = 10 max_x = 50 max_y = 50 max_time = 144 gamma = 0.9 def train(): data = load('../train_data/train_data02.pkl') print("数据量:",sum(len(i) for i in data)) step = 0 learn_num = 0 rl = RL() for i in range(len(data)-1,-1,-1): for match in data[i]: travel_time = match.order.travel_time rl.learn(s = (match.driver.x,match.driver.y), s_ = (match.order.to_x,match.order.to_y), t = match.order.order_time, t_ = match.order.arrive_time, r = (match.money / travel_time) * sum([gamma ** i for i in range(travel_time)]), detal_t = travel_time ) print("ok") rl.save_label() def predict(s,s_,t,t_,r,detal_t): q_label = load('RL_q_label.pkl') q_now = q_label[t-1][s][1] q_next = q_label[t_][s_][1] V = gamma ** detal_t * q_next - q_now + r return V if __name__ == '__main__': # train() a = load('RL_q_label.pkl') print(a[0])