il y a 5 ans · 3685b2ac2a
--- a/KM.py
+++ b/KM.py
@@ -0,0 +1,32 @@
 
				+import numpy as np
			
 
				+from scipy.optimize import linear_sum_assignment
			
 
				+
			
 
				+class KM():
			
 
				+    def __init__(self, graph):
			
 
				+        self.gragh = graph
			
 
				+        self.gragh2 = graph
			
 
				+        print(graph)
			
 
				+        a = len(graph)
			
 
				+        b = len(graph[0])
			
 
				+        print(a)
			
 
				+        print(b)
			
 
				+        self.n = max(a,b)
			
 
				+        if a > b:
			
 
				+            graph = [row + [0] * (a - b) for row in graph]
			
 
				+        elif a < b :
			
 
				+            for i in range(b-a):
			
 
				+                graph = graph + [[0] * b]
			
 
				+        self.gragh = np.array(graph)
			
 
				+        # self.left_label = np.max(self.gragh,axis=1)
			
 
				+        # self.right_label = np.zeros(self.n)
			
 
				+
			
 
				+    def compute(self):
			
 
				+        self.gragh = -self.gragh
			
 
				+        print(self.gragh.shape)
			
 
				+        row,col = linear_sum_assignment(self.gragh)
			
 
				+        print("行坐标:", row, "列坐标:", col, "最大组合:", self.gragh[row, col])
			
 
				+        row,col = linear_sum_assignment(-np.array(self.gragh2))
			
 
				+        print("行坐标:", row, "列坐标:", col, "最大组合:", self.gragh[row, col])
			
 
				+        return [(i,j)for i,j,value in zip(row,col,self.gragh[row,col]) if value]
			
 
				+
			
 
				+
			
--- a/entity.py
+++ b/entity.py
@@ -0,0 +1,45 @@
 
				+import numpy as np
			
 
				+import random
			
 
				+import pickle
			
 
				+
			
 
				+# 司机
			
 
				+class Driver():
			
 
				+    def __init__(self,id,x,y):
			
 
				+        self.id = id
			
 
				+        self.x = x
			
 
				+        self.y = y
			
 
				+
			
 
				+# 订单
			
 
				+class Order():
			
 
				+    def __init__(self,id,x,y,to_x,to_y,order_time):
			
 
				+        self.id = id
			
 
				+        self.x = x
			
 
				+        self.y = y
			
 
				+        self.to_x = to_x
			
 
				+        self.to_y = to_y
			
 
				+        # time range: 1 ~ 144
			
 
				+        self.order_time = order_time
			
 
				+
			
 
				+        self.travel_distance = (abs(self.to_x - self.x) + abs(self.to_y - self.y) ) * random.uniform(0.96,1.04)
			
 
				+        self.order_money = (12 + (max(self.travel_distance,8)-8) * 1.8 ) * random.uniform(0.92,1.08)
			
 
				+        self.travel_time = int(self.travel_distance / 5 + random.randint(0,1))
			
 
				+        self.arrive_time = self.order_time + self.travel_time
			
 
				+        self.arrive_time = self.arrive_time if self.arrive_time<=144 else self.arrive_time-144
			
 
				+
			
 
				+# 配对
			
 
				+class Match():
			
 
				+    def __init__(self,order,driver):
			
 
				+        self.order = order
			
 
				+        self.driver = driver
			
 
				+        self.distance = (abs(order.x - driver.x) + abs(order.y - driver.y) ) * random.uniform(0.96,1.03)
			
 
				+        self.money = order.order_money
			
 
				+
			
 
				+
			
 
				+
			
 
				+def save(object_to_save, path):
			
 
				+    with open(path, 'wb') as f:
			
 
				+        pickle.dump(object_to_save, f)
			
 
				+def load(path):
			
 
				+    with open(path, 'rb') as f:
			
 
				+        object1 = pickle.load(f)
			
 
				+        return object1
			
--- a/env.py
+++ b/env.py
@@ -0,0 +1,16 @@
 
				+import numpy as np
			
 
				+
			
 
				+class env():
			
 
				+    def __init__(self,x = 50,y = 50):
			
 
				+        # 方形格子
			
 
				+        self.ori = np.zeros(shape=(x,y))
			
 
				+
			
 
				+    def update(self,_x,_y):
			
 
				+        s = self.ori.copy()
			
 
				+        s[_x,_y] = 1
			
 
				+        return s
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    env = env()
			
 
				+    s = env.update(1,1)
			
 
				+    print(s)
			
--- a/get_data.py
+++ b/get_data.py
@@ -0,0 +1,39 @@
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+import matplotlib.pyplot as plt
			
 
				+from entity import *
			
 
				+
			
 
				+
			
 
				+def create_data():
			
 
				+    # 区域内当前司机数目
			
 
				+    driver_num = np.random.randint(3,100)
			
 
				+    # 区域内当前订单数
			
 
				+    order_num = np.random.randint(3,100)
			
 
				+    # if driver_num > order_num:
			
 
				+    #     # 价值高的优先
			
 
				+    #     pass
			
 
				+    # elif driver_num <= order_num:
			
 
				+    #     # 取前order_num个价值高的司机，进行二分图的KM算法求解
			
 
				+    #     pass
			
 
				+
			
 
				+    # 司机 与 乘客 的距离
			
 
				+    distances = []
			
 
				+    for i in range(driver_num):
			
 
				+        distance = np.random.normal(3,1,order_num)
			
 
				+        # print(distance)
			
 
				+        distances.append(distance)
			
 
				+    # plt.plot(sorted(distance))
			
 
				+    # plt.show()
			
 
				+    # 行程距离
			
 
				+    travel_distance = np.random.normal(15,6,order_num)
			
 
				+    # 订单的价格
			
 
				+    rand = np.random.uniform(0.75,0.95,order_num)
			
 
				+    price = travel_distance * rand
			
 
				+    price = [2 * (i-8) + 10 if i>8 else 10 for i in price]
			
 
				+    price = np.array(price) * np.random.uniform(0.88,0.94,order_num)
			
 
				+    # print(price)
			
 
				+    return list(distances),list(travel_distance),list(price)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    create_data()
			
--- a/myDQN/DQN.py
+++ b/myDQN/DQN.py
@@ -0,0 +1,169 @@
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+import tensorflow as tf
			
 
				+
			
 
				+np.random.seed(1)
			
 
				+tf.set_random_seed(1)
			
 
				+
			
 
				+class DQN():
			
 
				+    def __init__(self,
			
 
				+            n_actions,
			
 
				+            n_features,
			
 
				+            learning_rate=0.001,
			
 
				+            reward_decay=0.9,
			
 
				+            e_greedy=0.9,
			
 
				+            replace_target_iter=300,
			
 
				+            memory_size=800,
			
 
				+            batch_size=64,
			
 
				+            e_greedy_increment=None,
			
 
				+            output_graph=False
			
 
				+            ):
			
 
				+        self.n_actions = n_actions
			
 
				+        self.n_features = n_features
			
 
				+        self.lr = learning_rate
			
 
				+        self.gamma = reward_decay
			
 
				+        self.epsilon_max = e_greedy
			
 
				+        self.replace_target_iter = replace_target_iter
			
 
				+        self.memory_size = memory_size
			
 
				+        self.batch_size = batch_size
			
 
				+        self.epsilon_increment = e_greedy_increment
			
 
				+        self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max
			
 
				+
			
 
				+        # total learning step
			
 
				+        self.learn_step_counter = 0
			
 
				+
			
 
				+        # initialize zero memory [s, a, r, s_]
			
 
				+        self.memory = np.zeros((self.memory_size, n_features * 2 + 2))
			
 
				+
			
 
				+        # consist of [target_net, evaluate_net]
			
 
				+        self._build_net()
			
 
				+        t_params = tf.get_collection('target_net_params')
			
 
				+        e_params = tf.get_collection('eval_net_params')
			
 
				+        self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
			
 
				+
			
 
				+        self.sess = tf.Session()
			
 
				+
			
 
				+        if output_graph:
			
 
				+            # $ tensorboard --logdir=logs
			
 
				+            # tf.train.SummaryWriter soon be deprecated, use following
			
 
				+            tf.summary.FileWriter("logs/", self.sess.graph)
			
 
				+
			
 
				+        self.sess.run(tf.global_variables_initializer())
			
 
				+        self.cost_his = []
			
 
				+
			
 
				+    def _build_net(self):
			
 
				+        # ------------------ build evaluate_net ------------------
			
 
				+        self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s')  # input
			
 
				+        self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target')  # for calculating loss
			
 
				+        # print(self.s)
			
 
				+        with tf.variable_scope('eval_net'):
			
 
				+            # c_names(collections_names) are the collections to store variables
			
 
				+            c_names, n_l1, w_initializer, b_initializer = \
			
 
				+                ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 10, \
			
 
				+                tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)  # config of layers
			
 
				+
			
 
				+            # first layer. collections is used later when assign to target net
			
 
				+            with tf.variable_scope('l1'):
			
 
				+                w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
			
 
				+                b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names)
			
 
				+                l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1)
			
 
				+
			
 
				+            # second layer. collections is used later when assign to target net
			
 
				+            with tf.variable_scope('l2'):
			
 
				+                w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
			
 
				+                b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
			
 
				+                self.q_eval = tf.matmul(l1, w2) + b2
			
 
				+
			
 
				+        with tf.variable_scope('loss'):
			
 
				+            self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval))
			
 
				+        with tf.variable_scope('train'):
			
 
				+            self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)
			
 
				+
			
 
				+        # ------------------ build target_net ------------------
			
 
				+        self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_')    # input
			
 
				+        with tf.variable_scope('target_net'):
			
 
				+            # c_names(collections_names) are the collections to store variables
			
 
				+            c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]
			
 
				+
			
 
				+            # first layer. collections is used later when assign to target net
			
 
				+            with tf.variable_scope('l1'):
			
 
				+                w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
			
 
				+                b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names)
			
 
				+                l1 = tf.nn.relu(tf.matmul(self.s_, w1) + b1)
			
 
				+
			
 
				+            # second layer. collections is used later when assign to target net
			
 
				+            with tf.variable_scope('l2'):
			
 
				+                w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
			
 
				+                b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
			
 
				+                self.q_next = tf.matmul(l1, w2) + b2
			
 
				+
			
 
				+    def store_transition(self, s, a, r, s_):
			
 
				+        if not hasattr(self, 'memory_counter'):
			
 
				+            self.memory_counter = 0
			
 
				+
			
 
				+        transition = np.hstack((s, [a, r], s_))
			
 
				+
			
 
				+        # replace the old memory with new memory
			
 
				+        index = self.memory_counter % self.memory_size
			
 
				+        self.memory[index, :] = transition
			
 
				+
			
 
				+        self.memory_counter += 1
			
 
				+
			
 
				+    def choose_action(self, observation):
			
 
				+        # to have batch dimension when feed into tf placeholder
			
 
				+        observation = observation[np.newaxis, :]
			
 
				+
			
 
				+        if np.random.uniform() < self.epsilon:
			
 
				+            # forward feed the observation and get q value for every actions
			
 
				+            actions_value = self.sess.run(self.q_eval, feed_dict={self.s: observation})
			
 
				+            action = np.argmax(actions_value)
			
 
				+        else:
			
 
				+            action = np.random.randint(0, self.n_actions)
			
 
				+        return action
			
 
				+
			
 
				+    def learn(self):
			
 
				+        # check to replace target parameters
			
 
				+        if self.learn_step_counter % self.replace_target_iter == 0:
			
 
				+            self.sess.run(self.replace_target_op)
			
 
				+            print('target_params_replaced\n')
			
 
				+
			
 
				+        # sample batch memory from all memory
			
 
				+        if self.memory_counter > self.memory_size:
			
 
				+            sample_index = np.random.choice(self.memory_size, size=self.batch_size)
			
 
				+        else:
			
 
				+            sample_index = np.random.choice(self.memory_counter, size=self.batch_size)
			
 
				+        batch_memory = self.memory[sample_index, :]
			
 
				+
			
 
				+        q_next, q_eval = self.sess.run(
			
 
				+            [self.q_next, self.q_eval],
			
 
				+            feed_dict={
			
 
				+                self.s_: batch_memory[:, -self.n_features:],  # fixed params
			
 
				+                self.s: batch_memory[:, :self.n_features],  # newest params
			
 
				+            })
			
 
				+
			
 
				+        # change q_target w.r.t q_eval's action
			
 
				+        q_target = q_eval.copy()
			
 
				+
			
 
				+        batch_index = np.arange(self.batch_size, dtype=np.int32)
			
 
				+        eval_act_index = batch_memory[:, self.n_features].astype(int)
			
 
				+        reward = batch_memory[:, self.n_features + 1]
			
 
				+
			
 
				+        q_target[batch_index, eval_act_index] = reward + self.gamma * np.max(q_next, axis=1)
			
 
				+
			
 
				+        # train eval network
			
 
				+        _, self.cost,a = self.sess.run([self._train_op, self.loss,self.s],
			
 
				+                                     feed_dict={self.s: batch_memory[:, :self.n_features],
			
 
				+                                                self.q_target: q_target})
			
 
				+
			
 
				+        self.cost_his.append(self.cost)
			
 
				+
			
 
				+        # increasing epsilon
			
 
				+        self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max
			
 
				+        self.learn_step_counter += 1
			
 
				+
			
 
				+    def plot_cost(self):
			
 
				+        import matplotlib.pyplot as plt
			
 
				+        plt.plot(np.arange(len(self.cost_his)), self.cost_his)
			
 
				+        plt.ylabel('Cost')
			
 
				+        plt.xlabel('training steps')
			
 
				+        plt.show()
			
--- a/myDQN/train.py
+++ b/myDQN/train.py
@@ -0,0 +1,17 @@
 
				+from myDQN.DQN import DQN
			
 
				+import tensorflow as tf
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from entity import *
			
 
				+
			
 
				+
			
 
				+def train():
			
 
				+    step = 0
			
 
				+    data = load('train_data/train_data.pkl')
			
 
				+    print(data[0])
			
 
				+
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    train()
			
 
				+    pass
			
--- a/train_data/train_data.pkl
+++ b/train_data/train_data.pkl
--- a/train_data/train_data.py
+++ b/train_data/train_data.py
@@ -0,0 +1,24 @@
 
				+from entity import *
			
 
				+import random
			
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+def get_data():
			
 
				+    all_data = []
			
 
				+    for id in range(20000):
			
 
				+        day_data = []
			
 
				+        for i in range(random.randint(5,25)):
			
 
				+            driver = Driver(id,random.randint(0,49),random.randint(0,49))
			
 
				+            to_x = random.randint(0,49)
			
 
				+            to_y = random.randint(0,49)
			
 
				+
			
 
				+            order = Order(0,random.randint(0,49),random.randint(0,49),to_x,to_y,random.randint(1,144))
			
 
				+            match = Match(order,driver)
			
 
				+            day_data.append(match)
			
 
				+
			
 
				+        all_data.append(day_data)
			
 
				+    save(all_data,"train_data.pkl")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    get_data()