import numpy as np import random import pickle import os # 司机 class Driver(): def __init__(self,id,x,y,time = 1,is_busy = False): self.id = id self.x = x self.y = y self.atime = time self.is_busy = is_busy # 订单 class Order(): def __init__(self,id,x,y,to_x,to_y,order_time): self.id = id self.x = x self.y = y self.to_x = to_x self.to_y = to_y # time range: 1 ~ 144 self.order_time = order_time self.travel_distance = (abs(self.to_x - self.x) + abs(self.to_y - self.y) ) * random.uniform(0.93,1.08) self.travel_time = int(self.travel_distance / 5 + random.randint(0,1)) if self.travel_time==0: self.travel_time = 1 self.arrive_time = self.order_time + self.travel_time self.arrive_time = self.arrive_time if self.arrive_time<=144 else self.arrive_time-144 if 0137: self.order_money = (15 + (max(self.travel_distance,8)-8) * 2.3 ) * random.uniform(0.96,1.15) else: self.order_money = (12 + (max(self.travel_distance,8)-8) * 1.8 ) * random.uniform(0.90,1.10) # 配对 # cancel_prob = 0.20 cancel_prob = 0 class Match(): def __init__(self,order,driver,get_value = False,is_cancel = False): self.order = order self.driver = driver if random.random() < cancel_prob: is_cancel = True if is_cancel: self.order.order_money = 0 self.order.to_x = self.driver.x self.order.to_y = self.driver.y self.order.arrive_time = self.order.order_time + 1 if self.order.order_time<144 else 1 self.order.travel_time = 1 self.distance = abs(order.x - driver.x) + abs(order.y - driver.y) self.arrive_time = self.order.arrive_time + int(self.distance / 5) self.travel_time = self.arrive_time - self.order.order_time self.arrive_time = self.arrive_time if self.arrive_time <= 144 else self.arrive_time - 144 # if 0137: # self.order_money = (15 + (max(self.travel_distance,8)-8) * 2.3 ) * random.uniform(0.96,1.15) # else: # self.order_money = (12 + (max(self.travel_distance,8)-8) * 1.8 ) * random.uniform(0.90,1.10) self.money = order.order_money self.is_cancel = is_cancel self.value = -1 if get_value: self.value = self.compute_value() def compute_value(self): r = (self.money / self.travel_time) * sum([gamma ** i for i in range(self.travel_time)]) value = predict(s=(self.driver.x, self.driver.y), s_=(self.order.to_x, self.order.to_y), t=self.order.order_time, t_=self.arrive_time, r=r, detal_t=self.travel_time ) return value def save(object_to_save, path): with open(path, 'wb') as f: pickle.dump(object_to_save, f) def load(path): with open(path, 'rb') as f: object1 = pickle.load(f) return object1 gamma = 0.9 # q_label_path = os.path.dirname(__file__)+"/didi_RL/RL_q_label02.pkl" q_label_path = os.path.dirname(__file__)+"/didi_RL/RL_q_label03.pkl" q_label = load(q_label_path) def predict(s,s_,t,t_,r,detal_t): now_s = q_label[t-1].get(s) if now_s: q_now = now_s[1] else: q_now = 0 next_s = q_label[t_-1].get(s_) if next_s: q_next = next_s[1] else: q_next = 0 V = gamma ** detal_t * q_next - q_now + r return V