from myDQN.DQN import DQN import tensorflow as tf import numpy as np import pandas as pd from entity import * n_actions = 2 n_features = 3 train_step = 10 max_x = 50 max_y = 50 max_time = 144 gamma = 0.9 def train(): data = load('../train_data/train_data.pkl') print("数据量:",sum(len(i) for i in data)) step = 0 learn_num = 0 RL = DQN(n_actions,n_features) for d in data: for match in d: s_x = match.driver.x / max_x s_y = match.driver.y / max_y s_time = match.order.order_time / max_time _s_x = match.order.to_x / max_x _s_y = match.order.to_y / max_y _s_time = match.order.arrive_time / max_time travel_time = match.order.travel_time reward = match.money # 滴滴论文 reward reward = (reward/travel_time) * sum([gamma ** i for i in range(travel_time)]) if match.is_cancel: action = 0 else: action = 1 RL.store_transition([s_x,s_y,s_time],action,reward,travel_time,[_s_x,_s_y,_s_time]) if (step > 200) and (step % train_step == 0): RL.learn() learn_num += 1 step += 1 if learn_num>20000: break RL.plot_cost() print(RL.test()) # test_data = np.array([[1/50,2/50,10/144], # [25/50,46/50,141/144], # [45/50,2/50,65/144]]) # print(RL.predict([[16/50,30/50,120/144]],[[25/50,46/50,141/144]],200,21)) # for test in test_data: # q = RL.predict(test) # print(q) if __name__ == '__main__': train() pass