znj 4 years ago
commit
3685b2ac2a
8 changed files with 342 additions and 0 deletions
  1. 32 0
      KM.py
  2. 45 0
      entity.py
  3. 16 0
      env.py
  4. 39 0
      get_data.py
  5. 169 0
      myDQN/DQN.py
  6. 17 0
      myDQN/train.py
  7. BIN
      train_data/train_data.pkl
  8. 24 0
      train_data/train_data.py

+ 32 - 0
KM.py

@@ -0,0 +1,32 @@
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+
+class KM():
+    def __init__(self, graph):
+        self.gragh = graph
+        self.gragh2 = graph
+        print(graph)
+        a = len(graph)
+        b = len(graph[0])
+        print(a)
+        print(b)
+        self.n = max(a,b)
+        if a > b:
+            graph = [row + [0] * (a - b) for row in graph]
+        elif a < b :
+            for i in range(b-a):
+                graph = graph + [[0] * b]
+        self.gragh = np.array(graph)
+        # self.left_label = np.max(self.gragh,axis=1)
+        # self.right_label = np.zeros(self.n)
+
+    def compute(self):
+        self.gragh = -self.gragh
+        print(self.gragh.shape)
+        row,col = linear_sum_assignment(self.gragh)
+        print("行坐标:", row, "列坐标:", col, "最大组合:", self.gragh[row, col])
+        row,col = linear_sum_assignment(-np.array(self.gragh2))
+        print("行坐标:", row, "列坐标:", col, "最大组合:", self.gragh[row, col])
+        return [(i,j)for i,j,value in zip(row,col,self.gragh[row,col]) if value]
+
+

+ 45 - 0
entity.py

@@ -0,0 +1,45 @@
+import numpy as np
+import random
+import pickle
+
+# 司机
+class Driver():
+    def __init__(self,id,x,y):
+        self.id = id
+        self.x = x
+        self.y = y
+
+# 订单
+class Order():
+    def __init__(self,id,x,y,to_x,to_y,order_time):
+        self.id = id
+        self.x = x
+        self.y = y
+        self.to_x = to_x
+        self.to_y = to_y
+        # time range: 1 ~ 144
+        self.order_time = order_time
+
+        self.travel_distance = (abs(self.to_x - self.x) + abs(self.to_y - self.y) ) * random.uniform(0.96,1.04)
+        self.order_money = (12 + (max(self.travel_distance,8)-8) * 1.8 ) * random.uniform(0.92,1.08)
+        self.travel_time = int(self.travel_distance / 5 + random.randint(0,1))
+        self.arrive_time = self.order_time + self.travel_time
+        self.arrive_time = self.arrive_time if self.arrive_time<=144 else self.arrive_time-144
+
+# 配对
+class Match():
+    def __init__(self,order,driver):
+        self.order = order
+        self.driver = driver
+        self.distance = (abs(order.x - driver.x) + abs(order.y - driver.y) ) * random.uniform(0.96,1.03)
+        self.money = order.order_money
+
+
+
+def save(object_to_save, path):
+    with open(path, 'wb') as f:
+        pickle.dump(object_to_save, f)
+def load(path):
+    with open(path, 'rb') as f:
+        object1 = pickle.load(f)
+        return object1

+ 16 - 0
env.py

@@ -0,0 +1,16 @@
+import numpy as np
+
+class env():
+    def __init__(self,x = 50,y = 50):
+        # 方形格子
+        self.ori = np.zeros(shape=(x,y))
+
+    def update(self,_x,_y):
+        s = self.ori.copy()
+        s[_x,_y] = 1
+        return s
+
+if __name__ == '__main__':
+    env = env()
+    s = env.update(1,1)
+    print(s)

+ 39 - 0
get_data.py

@@ -0,0 +1,39 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from entity import *
+
+
+def create_data():
+    # 区域内当前司机数目
+    driver_num = np.random.randint(3,100)
+    # 区域内当前订单数
+    order_num = np.random.randint(3,100)
+    # if driver_num > order_num:
+    #     # 价值高的优先
+    #     pass
+    # elif driver_num <= order_num:
+    #     # 取前order_num个价值高的司机,进行二分图的KM算法求解
+    #     pass
+
+    # 司机 与 乘客 的距离
+    distances = []
+    for i in range(driver_num):
+        distance = np.random.normal(3,1,order_num)
+        # print(distance)
+        distances.append(distance)
+    # plt.plot(sorted(distance))
+    # plt.show()
+    # 行程距离
+    travel_distance = np.random.normal(15,6,order_num)
+    # 订单的价格
+    rand = np.random.uniform(0.75,0.95,order_num)
+    price = travel_distance * rand
+    price = [2 * (i-8) + 10 if i>8 else 10 for i in price]
+    price = np.array(price) * np.random.uniform(0.88,0.94,order_num)
+    # print(price)
+    return list(distances),list(travel_distance),list(price)
+
+
+if __name__ == '__main__':
+    create_data()

+ 169 - 0
myDQN/DQN.py

@@ -0,0 +1,169 @@
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+
+np.random.seed(1)
+tf.set_random_seed(1)
+
+class DQN():
+    def __init__(self,
+            n_actions,
+            n_features,
+            learning_rate=0.001,
+            reward_decay=0.9,
+            e_greedy=0.9,
+            replace_target_iter=300,
+            memory_size=800,
+            batch_size=64,
+            e_greedy_increment=None,
+            output_graph=False
+            ):
+        self.n_actions = n_actions
+        self.n_features = n_features
+        self.lr = learning_rate
+        self.gamma = reward_decay
+        self.epsilon_max = e_greedy
+        self.replace_target_iter = replace_target_iter
+        self.memory_size = memory_size
+        self.batch_size = batch_size
+        self.epsilon_increment = e_greedy_increment
+        self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max
+
+        # total learning step
+        self.learn_step_counter = 0
+
+        # initialize zero memory [s, a, r, s_]
+        self.memory = np.zeros((self.memory_size, n_features * 2 + 2))
+
+        # consist of [target_net, evaluate_net]
+        self._build_net()
+        t_params = tf.get_collection('target_net_params')
+        e_params = tf.get_collection('eval_net_params')
+        self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
+
+        self.sess = tf.Session()
+
+        if output_graph:
+            # $ tensorboard --logdir=logs
+            # tf.train.SummaryWriter soon be deprecated, use following
+            tf.summary.FileWriter("logs/", self.sess.graph)
+
+        self.sess.run(tf.global_variables_initializer())
+        self.cost_his = []
+
+    def _build_net(self):
+        # ------------------ build evaluate_net ------------------
+        self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s')  # input
+        self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target')  # for calculating loss
+        # print(self.s)
+        with tf.variable_scope('eval_net'):
+            # c_names(collections_names) are the collections to store variables
+            c_names, n_l1, w_initializer, b_initializer = \
+                ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 10, \
+                tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)  # config of layers
+
+            # first layer. collections is used later when assign to target net
+            with tf.variable_scope('l1'):
+                w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
+                b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names)
+                l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1)
+
+            # second layer. collections is used later when assign to target net
+            with tf.variable_scope('l2'):
+                w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
+                b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
+                self.q_eval = tf.matmul(l1, w2) + b2
+
+        with tf.variable_scope('loss'):
+            self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval))
+        with tf.variable_scope('train'):
+            self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)
+
+        # ------------------ build target_net ------------------
+        self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_')    # input
+        with tf.variable_scope('target_net'):
+            # c_names(collections_names) are the collections to store variables
+            c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]
+
+            # first layer. collections is used later when assign to target net
+            with tf.variable_scope('l1'):
+                w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
+                b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names)
+                l1 = tf.nn.relu(tf.matmul(self.s_, w1) + b1)
+
+            # second layer. collections is used later when assign to target net
+            with tf.variable_scope('l2'):
+                w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
+                b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
+                self.q_next = tf.matmul(l1, w2) + b2
+
+    def store_transition(self, s, a, r, s_):
+        if not hasattr(self, 'memory_counter'):
+            self.memory_counter = 0
+
+        transition = np.hstack((s, [a, r], s_))
+
+        # replace the old memory with new memory
+        index = self.memory_counter % self.memory_size
+        self.memory[index, :] = transition
+
+        self.memory_counter += 1
+
+    def choose_action(self, observation):
+        # to have batch dimension when feed into tf placeholder
+        observation = observation[np.newaxis, :]
+
+        if np.random.uniform() < self.epsilon:
+            # forward feed the observation and get q value for every actions
+            actions_value = self.sess.run(self.q_eval, feed_dict={self.s: observation})
+            action = np.argmax(actions_value)
+        else:
+            action = np.random.randint(0, self.n_actions)
+        return action
+
+    def learn(self):
+        # check to replace target parameters
+        if self.learn_step_counter % self.replace_target_iter == 0:
+            self.sess.run(self.replace_target_op)
+            print('target_params_replaced\n')
+
+        # sample batch memory from all memory
+        if self.memory_counter > self.memory_size:
+            sample_index = np.random.choice(self.memory_size, size=self.batch_size)
+        else:
+            sample_index = np.random.choice(self.memory_counter, size=self.batch_size)
+        batch_memory = self.memory[sample_index, :]
+
+        q_next, q_eval = self.sess.run(
+            [self.q_next, self.q_eval],
+            feed_dict={
+                self.s_: batch_memory[:, -self.n_features:],  # fixed params
+                self.s: batch_memory[:, :self.n_features],  # newest params
+            })
+
+        # change q_target w.r.t q_eval's action
+        q_target = q_eval.copy()
+
+        batch_index = np.arange(self.batch_size, dtype=np.int32)
+        eval_act_index = batch_memory[:, self.n_features].astype(int)
+        reward = batch_memory[:, self.n_features + 1]
+
+        q_target[batch_index, eval_act_index] = reward + self.gamma * np.max(q_next, axis=1)
+
+        # train eval network
+        _, self.cost,a = self.sess.run([self._train_op, self.loss,self.s],
+                                     feed_dict={self.s: batch_memory[:, :self.n_features],
+                                                self.q_target: q_target})
+
+        self.cost_his.append(self.cost)
+
+        # increasing epsilon
+        self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max
+        self.learn_step_counter += 1
+
+    def plot_cost(self):
+        import matplotlib.pyplot as plt
+        plt.plot(np.arange(len(self.cost_his)), self.cost_his)
+        plt.ylabel('Cost')
+        plt.xlabel('training steps')
+        plt.show()

+ 17 - 0
myDQN/train.py

@@ -0,0 +1,17 @@
+from myDQN.DQN import DQN
+import tensorflow as tf
+import numpy as np
+import pandas as pd
+from entity import *
+
+
+def train():
+    step = 0
+    data = load('train_data/train_data.pkl')
+    print(data[0])
+
+
+
+if __name__ == '__main__':
+    train()
+    pass

BIN
train_data/train_data.pkl


+ 24 - 0
train_data/train_data.py

@@ -0,0 +1,24 @@
+from entity import *
+import random
+import numpy as np
+
+
+def get_data():
+    all_data = []
+    for id in range(20000):
+        day_data = []
+        for i in range(random.randint(5,25)):
+            driver = Driver(id,random.randint(0,49),random.randint(0,49))
+            to_x = random.randint(0,49)
+            to_y = random.randint(0,49)
+
+            order = Order(0,random.randint(0,49),random.randint(0,49),to_x,to_y,random.randint(1,144))
+            match = Match(order,driver)
+            day_data.append(match)
+
+        all_data.append(day_data)
+    save(all_data,"train_data.pkl")
+
+
+if __name__ == '__main__':
+    get_data()