train.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. import numpy as np
  2. import pandas as pd
  3. from entity import *
  4. from didi_RL.RL_learning import RL
  5. n_actions = 2
  6. n_features = 3
  7. train_step = 10
  8. max_x = 50
  9. max_y = 50
  10. max_time = 144
  11. gamma = 0.9
  12. def train():
  13. data = load('../train_data/train_data02.pkl')
  14. print("数据量:",sum(len(i) for i in data))
  15. step = 0
  16. learn_num = 0
  17. rl = RL()
  18. for i in range(len(data)-1,-1,-1):
  19. for match in data[i]:
  20. travel_time = match.order.travel_time
  21. rl.learn(s = (match.driver.x,match.driver.y),
  22. s_ = (match.order.to_x,match.order.to_y),
  23. t = match.order.order_time,
  24. t_ = match.order.arrive_time,
  25. r = (match.money / travel_time) * sum([gamma ** i for i in range(travel_time)]),
  26. detal_t = travel_time
  27. )
  28. print("ok")
  29. rl.save_label()
  30. def predict(s,s_,t,t_,r,detal_t):
  31. q_label = load('RL_q_label.pkl')
  32. q_now = q_label[t-1][s][1]
  33. q_next = q_label[t_][s_][1]
  34. V = gamma ** detal_t * q_next - q_now + r
  35. return V
  36. if __name__ == '__main__':
  37. # train()
  38. a = load('RL_q_label.pkl')
  39. print(a[0])