1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- from myDQN.DQN import DQN
- import tensorflow as tf
- import numpy as np
- import pandas as pd
- from entity import *
- n_actions = 2
- n_features = 3
- train_step = 10
- max_x = 50
- max_y = 50
- max_time = 144
- gamma = 0.9
- def train():
- data = load('../train_data/train_data.pkl')
- print("数据量:",sum(len(i) for i in data))
- step = 0
- learn_num = 0
- RL = DQN(n_actions,n_features)
- for d in data:
- for match in d:
- s_x = match.driver.x / max_x
- s_y = match.driver.y / max_y
- s_time = match.order.order_time / max_time
- _s_x = match.order.to_x / max_x
- _s_y = match.order.to_y / max_y
- _s_time = match.order.arrive_time / max_time
- travel_time = match.order.travel_time
- reward = match.money
- # 滴滴论文 reward
- reward = (reward/travel_time) * sum([gamma ** i for i in range(travel_time)])
- if match.is_cancel:
- action = 0
- else:
- action = 1
- RL.store_transition([s_x,s_y,s_time],action,reward,travel_time,[_s_x,_s_y,_s_time])
- if (step > 200) and (step % train_step == 0):
- RL.learn()
- learn_num += 1
- step += 1
- if learn_num>20000:
- break
- RL.plot_cost()
- print(RL.test())
- # test_data = np.array([[1/50,2/50,10/144],
- # [25/50,46/50,141/144],
- # [45/50,2/50,65/144]])
- # print(RL.predict([[16/50,30/50,120/144]],[[25/50,46/50,141/144]],200,21))
- # for test in test_data:
- # q = RL.predict(test)
- # print(q)
- if __name__ == '__main__':
- train()
- pass
|