from myDQN.DQN import DQN import tensorflow as tf import numpy as np import pandas as pd from entity import * n_actions = 2 n_features = 3 max_x = 50 max_y = 50 max_time = 144 def train(): data = load('../train_data/train_data.pkl') print("数据量:",sum(len(i) for i in data)) step = 0 RL = DQN(n_actions,n_features) for d in data: for match in d: s_x = match.driver.x / max_x s_y = match.driver.y / max_y s_time = match.order.order_time / max_time _s_x = match.order.to_x / max_x _s_y = match.order.to_y / max_y _s_time = match.order.arrive_time / max_time travel_time = match.order.travel_time reward = match.money RL.store_transition((s_x,s_y,s_time),0,reward,travel_time,(_s_x,_s_y,_s_time)) if (step > 200) and (step % 10 == 0): RL.learn() step += 1 RL.plot_cost() if __name__ == '__main__': train() pass