|
@@ -1,9 +1,10 @@
|
|
import numpy as np
|
|
import numpy as np
|
|
import pandas as pd
|
|
import pandas as pd
|
|
import tensorflow as tf
|
|
import tensorflow as tf
|
|
|
|
+from entity import *
|
|
|
|
|
|
-np.random.seed(1)
|
|
|
|
-tf.set_random_seed(1)
|
|
|
|
|
|
+# np.random.seed(1)
|
|
|
|
+# tf.set_random_seed(1)
|
|
|
|
|
|
class DQN():
|
|
class DQN():
|
|
def __init__(self,
|
|
def __init__(self,
|
|
@@ -11,9 +12,9 @@ class DQN():
|
|
n_features,
|
|
n_features,
|
|
learning_rate=0.001,
|
|
learning_rate=0.001,
|
|
reward_decay=0.9,
|
|
reward_decay=0.9,
|
|
- e_greedy=0.9,
|
|
|
|
|
|
+ e_greedy=1,
|
|
replace_target_iter=300,
|
|
replace_target_iter=300,
|
|
- memory_size=800,
|
|
|
|
|
|
+ memory_size=600,
|
|
batch_size=64,
|
|
batch_size=64,
|
|
e_greedy_increment=None,
|
|
e_greedy_increment=None,
|
|
output_graph=False
|
|
output_graph=False
|
|
@@ -33,7 +34,7 @@ class DQN():
|
|
self.learn_step_counter = 0
|
|
self.learn_step_counter = 0
|
|
|
|
|
|
# initialize zero memory [s, a, r, s_]
|
|
# initialize zero memory [s, a, r, s_]
|
|
- self.memory = np.zeros((self.memory_size, n_features * 2 + 2))
|
|
|
|
|
|
+ self.memory = np.zeros((self.memory_size, n_features * 2 + 3))
|
|
|
|
|
|
# consist of [target_net, evaluate_net]
|
|
# consist of [target_net, evaluate_net]
|
|
self._build_net()
|
|
self._build_net()
|
|
@@ -55,7 +56,7 @@ class DQN():
|
|
# ------------------ build evaluate_net ------------------
|
|
# ------------------ build evaluate_net ------------------
|
|
self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input
|
|
self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input
|
|
self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target') # for calculating loss
|
|
self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target') # for calculating loss
|
|
- # print(self.s)
|
|
|
|
|
|
+
|
|
with tf.variable_scope('eval_net'):
|
|
with tf.variable_scope('eval_net'):
|
|
# c_names(collections_names) are the collections to store variables
|
|
# c_names(collections_names) are the collections to store variables
|
|
c_names, n_l1, w_initializer, b_initializer = \
|
|
c_names, n_l1, w_initializer, b_initializer = \
|
|
@@ -97,11 +98,11 @@ class DQN():
|
|
b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
|
|
b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
|
|
self.q_next = tf.matmul(l1, w2) + b2
|
|
self.q_next = tf.matmul(l1, w2) + b2
|
|
|
|
|
|
- def store_transition(self, s, a, r, s_):
|
|
|
|
|
|
+ def store_transition(self, s, a, r,travel_time, s_):
|
|
if not hasattr(self, 'memory_counter'):
|
|
if not hasattr(self, 'memory_counter'):
|
|
self.memory_counter = 0
|
|
self.memory_counter = 0
|
|
|
|
|
|
- transition = np.hstack((s, [a, r], s_))
|
|
|
|
|
|
+ transition = np.hstack((s, [a, r,travel_time], s_))
|
|
|
|
|
|
# replace the old memory with new memory
|
|
# replace the old memory with new memory
|
|
index = self.memory_counter % self.memory_size
|
|
index = self.memory_counter % self.memory_size
|
|
@@ -147,11 +148,16 @@ class DQN():
|
|
batch_index = np.arange(self.batch_size, dtype=np.int32)
|
|
batch_index = np.arange(self.batch_size, dtype=np.int32)
|
|
eval_act_index = batch_memory[:, self.n_features].astype(int)
|
|
eval_act_index = batch_memory[:, self.n_features].astype(int)
|
|
reward = batch_memory[:, self.n_features + 1]
|
|
reward = batch_memory[:, self.n_features + 1]
|
|
|
|
+ # 间隔时间段
|
|
|
|
+ travel_time = batch_memory[:, self.n_features + 2]
|
|
|
|
+ gamma = np.array([self.gamma ** t for t in travel_time])
|
|
|
|
+ # gamma = gamma.reshape((self.batch_size,1))
|
|
|
|
|
|
- q_target[batch_index, eval_act_index] = reward + self.gamma * np.max(q_next, axis=1)
|
|
|
|
|
|
+ # q_target[batch_index, eval_act_index] = reward + self.gamma * np.max(q_next, axis=1)
|
|
|
|
+ q_target[batch_index, eval_act_index] = reward + gamma * np.max(q_next, axis=1)
|
|
|
|
|
|
# train eval network
|
|
# train eval network
|
|
- _, self.cost,a = self.sess.run([self._train_op, self.loss,self.s],
|
|
|
|
|
|
+ _, self.cost = self.sess.run([self._train_op, self.loss],
|
|
feed_dict={self.s: batch_memory[:, :self.n_features],
|
|
feed_dict={self.s: batch_memory[:, :self.n_features],
|
|
self.q_target: q_target})
|
|
self.q_target: q_target})
|
|
|
|
|
|
@@ -163,7 +169,12 @@ class DQN():
|
|
|
|
|
|
def plot_cost(self):
|
|
def plot_cost(self):
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.pyplot as plt
|
|
|
|
+ print("min_loss:",min(self.cost_his))
|
|
plt.plot(np.arange(len(self.cost_his)), self.cost_his)
|
|
plt.plot(np.arange(len(self.cost_his)), self.cost_his)
|
|
plt.ylabel('Cost')
|
|
plt.ylabel('Cost')
|
|
plt.xlabel('training steps')
|
|
plt.xlabel('training steps')
|
|
- plt.show()
|
|
|
|
|
|
+ plt.show()
|
|
|
|
+
|
|
|
|
+ def predict(self,model_path=None):
|
|
|
|
+ if model_path:
|
|
|
|
+ model = load(model_path)
|