5 лет назад · e2ff3c677e
--- a/myDQN/DQN.py
+++ b/myDQN/DQN.py
@@ -1,9 +1,10 @@
 
															 import numpy as np
														
 
															 import pandas as pd
														
 
															 import tensorflow as tf
														
 
															+from entity import *
														
 
															-np.random.seed(1)
														
 
															-tf.set_random_seed(1)
														
 
															+# np.random.seed(1)
														
 
															+# tf.set_random_seed(1)
														
 
															 class DQN():
														
 
															     def __init__(self,
														
@@ -11,9 +12,9 @@ class DQN():
 
															             n_features,
														
 
															             learning_rate=0.001,
														
 
															             reward_decay=0.9,
														
 
															-            e_greedy=0.9,
														
 
															+            e_greedy=1,
														
 
															             replace_target_iter=300,
														
 
															-            memory_size=800,
														
 
															+            memory_size=600,
														
 
															             batch_size=64,
														
 
															             e_greedy_increment=None,
														
 
															             output_graph=False
														
@@ -33,7 +34,7 @@ class DQN():
 
															         self.learn_step_counter = 0
														
 
															         # initialize zero memory [s, a, r, s_]
														
 
															-        self.memory = np.zeros((self.memory_size, n_features * 2 + 2))
														
 
															+        self.memory = np.zeros((self.memory_size, n_features * 2 + 3))
														
 
															         # consist of [target_net, evaluate_net]
														
 
															         self._build_net()
														
@@ -55,7 +56,7 @@ class DQN():
 
															         # ------------------ build evaluate_net ------------------
														
 
															         self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s')  # input
														
 
															         self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target')  # for calculating loss
														
 
															-        # print(self.s)
														
 
															+
														
 
															         with tf.variable_scope('eval_net'):
														
 
															             # c_names(collections_names) are the collections to store variables
														
 
															             c_names, n_l1, w_initializer, b_initializer = \
														
@@ -97,11 +98,11 @@ class DQN():
 
															                 b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
														
 
															                 self.q_next = tf.matmul(l1, w2) + b2
														
 
															-    def store_transition(self, s, a, r, s_):
														
 
															+    def store_transition(self, s, a, r,travel_time, s_):
														
 
															         if not hasattr(self, 'memory_counter'):
														
 
															             self.memory_counter = 0
														
 
															-        transition = np.hstack((s, [a, r], s_))
														
 
															+        transition = np.hstack((s, [a, r,travel_time], s_))
														
 
															         # replace the old memory with new memory
														
 
															         index = self.memory_counter % self.memory_size
														
@@ -147,11 +148,16 @@ class DQN():
 
															         batch_index = np.arange(self.batch_size, dtype=np.int32)
														
 
															         eval_act_index = batch_memory[:, self.n_features].astype(int)
														
 
															         reward = batch_memory[:, self.n_features + 1]
														
 
															+        # 间隔时间段
														
 
															+        travel_time = batch_memory[:, self.n_features + 2]
														
 
															+        gamma = np.array([self.gamma ** t for t in travel_time])
														
 
															+        # gamma = gamma.reshape((self.batch_size,1))
														
 
															-        q_target[batch_index, eval_act_index] = reward + self.gamma * np.max(q_next, axis=1)
														
 
															+        # q_target[batch_index, eval_act_index] = reward + self.gamma * np.max(q_next, axis=1)
														
 
															+        q_target[batch_index, eval_act_index] = reward + gamma * np.max(q_next, axis=1)
														
 
															         # train eval network
														
 
															-        _, self.cost,a = self.sess.run([self._train_op, self.loss,self.s],
														
 
															+        _, self.cost = self.sess.run([self._train_op, self.loss],
														
 
															                                      feed_dict={self.s: batch_memory[:, :self.n_features],
														
 
															                                                 self.q_target: q_target})
														
@@ -163,7 +169,12 @@ class DQN():
 
															     def plot_cost(self):
														
 
															         import matplotlib.pyplot as plt
														
 
															+        print("min_loss:",min(self.cost_his))
														
 
															         plt.plot(np.arange(len(self.cost_his)), self.cost_his)
														
 
															         plt.ylabel('Cost')
														
 
															         plt.xlabel('training steps')
														
 
															-        plt.show()
														
 
															+        plt.show()
														
 
															+
														
 
															+    def predict(self,model_path=None):
														
 
															+        if model_path:
														
 
															+            model = load(model_path)
														
--- a/myDQN/train.py
+++ b/myDQN/train.py
@@ -4,14 +4,42 @@ import numpy as np
 
															 import pandas as pd
														
 
															 from entity import *
														
 
															+n_actions = 2
														
 
															+n_features = 3
														
 
															+
														
 
															+max_x = 50
														
 
															+max_y = 50
														
 
															+max_time = 144
														
 
															 def train():
														
 
															+    data = load('../train_data/train_data.pkl')
														
 
															+    print("数据量：",sum(len(i) for i in data))
														
 
															     step = 0
														
 
															-    data = load('train_data/train_data.pkl')
														
 
															-    print(data[0])
														
 
															+    RL = DQN(n_actions,n_features)
														
 
															+    for d in data:
														
 
															+        for match in d:
														
 
															+            s_x = match.driver.x / max_x
														
 
															+            s_y = match.driver.y / max_y
														
 
															+            s_time = match.order.order_time / max_time
														
 
															+            _s_x = match.order.to_x / max_x
														
 
															+            _s_y = match.order.to_y / max_y
														
 
															+            _s_time = match.order.arrive_time / max_time
														
 
															+            travel_time = match.order.travel_time
														
 
															+            reward = match.money
														
 
															+            RL.store_transition((s_x,s_y,s_time),0,reward,travel_time,(_s_x,_s_y,_s_time))
														
 
															+
														
 
															+            if (step > 200) and (step % 10 == 0):
														
 
															+                RL.learn()
														
 
															+            step += 1
														
 
															+
														
 
															+    RL.plot_cost()
														
 
															+
														
 
															+
														
 
															+
														
 
															 if __name__ == '__main__':
														
 
															     train()
														
 
															+
														
 
															     pass