123456789101112131415161718192021222324252627282930313233343536373839404142434445 |
- import numpy as np
- import pandas as pd
- from entity import *
- from didi_RL.RL_learning import RL
- n_actions = 2
- n_features = 3
- train_step = 10
- max_x = 50
- max_y = 50
- max_time = 144
- gamma = 0.9
- def train():
- data = load('../train_data/train_data02.pkl')
- print("数据量:",sum(len(i) for i in data))
- step = 0
- learn_num = 0
- rl = RL()
- for i in range(len(data)-1,-1,-1):
- for match in data[i]:
- travel_time = match.order.travel_time
- rl.learn(s = (match.driver.x,match.driver.y),
- s_ = (match.order.to_x,match.order.to_y),
- t = match.order.order_time,
- t_ = match.order.arrive_time,
- r = (match.money / travel_time) * sum([gamma ** i for i in range(travel_time)]),
- detal_t = travel_time
- )
- print("ok")
- rl.save_label()
- def predict(s,s_,t,t_,r,detal_t):
- q_label = load('RL_q_label.pkl')
- q_now = q_label[t-1][s][1]
- q_next = q_label[t_][s_][1]
- V = gamma ** detal_t * q_next - q_now + r
- return V
- if __name__ == '__main__':
- # train()
- a = load('RL_q_label.pkl')
- print(a[0])
|