Guarantees on Robot System Performance Using Stochastic Simulation Rollouts

@misc{vincent_guarantees_2023,
  title = {Guarantees on {Robot} {System} {Performance} {Using} {Stochastic} {Simulation} {Rollouts}},
  copyright = {All rights reserved},
  url = {http://arxiv.org/abs/2309.10874},
  abstract = {We provide finite-sample performance guarantees for control policies executed on stochastic robotic systems. Given an open- or closed-loop policy and a finite set of trajectory rollouts under the policy, we bound the expected value, value-at-risk, and conditional-value-at-risk of the trajectory cost, and the probability of failure in a sparse rewards setting. The bounds hold, with user-specified probability, for any policy synthesis technique and can be seen as a post-design safety certification. Generating the bounds only requires sampling simulation rollouts, without assumptions on the distribution or complexity of the underlying stochastic system. We adapt these bounds to also give a constraint satisfaction test to verify safety of the robot system. Furthermore, we extend our method to apply when selecting the best policy from a set of candidates, requiring a multi-hypothesis correction. We show the statistical validity of our bounds in the Ant, Half-cheetah, and Swimmer MuJoCo environments and demonstrate our constraint satisfaction test with the Ant. Finally, using the 20 degree-of-freedom MuJoCo Shadow Hand, we show the necessity of the multi-hypothesis correction.},
  urldate = {2023-09-29},
  author = {Vincent, Joseph A. and Feldman, Aaron O. and Schwager, Mac},
  month = sep,
  year = {2023},
  note = {Under Review},
  keywords = {Computer Science - Robotics, Electrical Engineering and Systems Science - Systems and Control, stoch\_guaran},
  month_numeric = {9}
}