123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- import numpy as np
- import random
- import pickle
- import os
- # 司机
- class Driver():
- def __init__(self,id,x,y,time = 1,is_busy = False):
- self.id = id
- self.x = x
- self.y = y
- self.atime = time
- self.is_busy = is_busy
- # 订单
- class Order():
- def __init__(self,id,x,y,to_x,to_y,order_time):
- self.id = id
- self.x = x
- self.y = y
- self.to_x = to_x
- self.to_y = to_y
- # time range: 1 ~ 144
- self.order_time = order_time
- self.travel_distance = (abs(self.to_x - self.x) + abs(self.to_y - self.y) ) * random.uniform(0.93,1.08)
- self.travel_time = int(self.travel_distance / 5 + random.randint(0,1))
- if self.travel_time==0:
- self.travel_time = 1
- self.arrive_time = self.order_time + self.travel_time
- self.arrive_time = self.arrive_time if self.arrive_time<=144 else self.arrive_time-144
- if 0<order_time<34 or 84<order_time<110 or order_time>137:
- self.order_money = (15 + (max(self.travel_distance,8)-8) * 2.3 ) * random.uniform(0.96,1.15)
- else:
- self.order_money = (12 + (max(self.travel_distance,8)-8) * 1.8 ) * random.uniform(0.90,1.10)
- # 配对
- # cancel_prob = 0.20
- cancel_prob = 0
- class Match():
- def __init__(self,order,driver,get_value = False,is_cancel = False):
- self.order = order
- self.driver = driver
- if random.random() < cancel_prob:
- is_cancel = True
- if is_cancel:
- self.order.order_money = 0
- self.order.to_x = self.driver.x
- self.order.to_y = self.driver.y
- self.order.arrive_time = self.order.order_time + 1 if self.order.order_time<144 else 1
- self.order.travel_time = 1
- self.distance = abs(order.x - driver.x) + abs(order.y - driver.y)
- self.arrive_time = self.order.arrive_time + int(self.distance / 5)
- self.travel_time = self.arrive_time - self.order.order_time
- self.arrive_time = self.arrive_time if self.arrive_time <= 144 else self.arrive_time - 144
- # if 0<order.order_time<34 or 84<order.order_time<110 or order.order_time>137:
- # self.order_money = (15 + (max(self.travel_distance,8)-8) * 2.3 ) * random.uniform(0.96,1.15)
- # else:
- # self.order_money = (12 + (max(self.travel_distance,8)-8) * 1.8 ) * random.uniform(0.90,1.10)
- self.money = order.order_money
- self.is_cancel = is_cancel
- self.value = -1
- if get_value:
- self.value = self.compute_value()
- def compute_value(self):
- r = (self.money / self.travel_time) * sum([gamma ** i for i in range(self.travel_time)])
- value = predict(s=(self.driver.x, self.driver.y),
- s_=(self.order.to_x, self.order.to_y),
- t=self.order.order_time,
- t_=self.arrive_time,
- r=r,
- detal_t=self.travel_time
- )
- return value
- def save(object_to_save, path):
- with open(path, 'wb') as f:
- pickle.dump(object_to_save, f)
- def load(path):
- with open(path, 'rb') as f:
- object1 = pickle.load(f)
- return object1
- gamma = 0.9
- # q_label_path = os.path.dirname(__file__)+"/didi_RL/RL_q_label02.pkl"
- q_label_path = os.path.dirname(__file__)+"/didi_RL/RL_q_label03.pkl"
- q_label = load(q_label_path)
- def predict(s,s_,t,t_,r,detal_t):
- now_s = q_label[t-1].get(s)
- if now_s:
- q_now = now_s[1]
- else:
- q_now = 0
- next_s = q_label[t_-1].get(s_)
- if next_s:
- q_next = next_s[1]
- else:
- q_next = 0
- V = gamma ** detal_t * q_next - q_now + r
- return V
|