import numpy as np import pandas as pd from entity import * from didi_RL.RL_learning import RL n_actions = 2 n_features = 3 train_step = 10 max_x = 30 max_y = 30 max_time = 144 gamma = 0.9 # max_x = 10 # max_y = 10 # max_time = 20 # gamma = 0.9 def train(): # data = load('../train_data/train_data03.pkl') data = load('../train_data111.pkl') print("数据量:",sum(len(i) for i in data)) step = 0 learn_num = 0 rl = RL(time_step=max_time) for i in range(len(data)-1,-1,-1): for match in data[i]: travel_time = match.travel_time rl.learn(s = (match.driver.x,match.driver.y), s_ = (match.order.to_x,match.order.to_y), t = match.order.order_time, t_ = match.arrive_time, r = (match.money / travel_time) * sum([gamma ** i for i in range(travel_time)]), detal_t = travel_time ) print("ok") # rl.save_label('RL_q_label02.pkl') rl.save_label('RL_q_label03.pkl') q_label_path = "RL_q_label.pkl" def predict(s,s_,t,t_,r,detal_t): q_label = load(q_label_path) q_now = q_label[t-1][s][1] q_next = q_label[t_-1][s_][1] V = gamma ** detal_t * q_next - q_now + r return V if __name__ == '__main__': # train() a = load('RL_q_label03.pkl') xx = [] yy = [] Z = [] import matplotlib.pyplot as plt plt.xlabel('X') plt.ylabel('Y') plt.xlim(xmax=30, xmin=0) plt.ylim(ymax=30, ymin=0) colors1 = '#00CED1' # 点的颜色 colors2 = '#DC143C' area = np.pi * 8 # 点面积 # 画散点图 for k,v in a[108].items(): # if 16<=k[0]<=24 and 16<=k[1]<=24: print(k,v) xx.append(k[0]) yy.append(k[1]) Z.append(v[1]) colors = colors2 if v[1]>60 else colors1 plt.scatter(k[0], k[1], s=area, c=colors, alpha=0.4) # plt.plot([0, 9.5], [9.5, 0], linewidth='0.5', color='#000000') plt.legend() # 三维图 # from mpl_toolkits.mplot3d import Axes3D # fig = plt.figure() # 定义新的三维坐标轴 # ax3 = plt.axes(projection='3d') # ax3.plot_trisurf(xx, yy, Z,cmap='rainbow') # # 作图 # ax3.plot_surface(X, Y, np.array(Z), cmap='rainbow') # ax3.contour(X,Y,Z, zdim='z',offset=-2,cmap='rainbow) #等高线图,要设置offset,为Z的最小值 plt.show() pass