How Generalizable Is My Behavior Cloning Policy? A Statistical Evaluation Approach to Trustworthy Performance Evaluation

@article{vincent_how_2024,
  title = {How {Generalizable} {Is} {My} {Behavior} {Cloning} {Policy}? {A} {Statistical} {Evaluation} {Approach} to {Trustworthy} {Performance} {Evaluation}},
  volume = {9},
  copyright = {All rights reserved},
  url = {https://ieeexplore.ieee.org/document/10638686},
  language = {en},
  number = {10},
  journal = {IEEE Robotics and Automation Letters},
  author = {Vincent, Joseph A. and Nishimura, Haruki and Itkina, Masha and Shah, Paarth and Schwager, Mac and Kollar, Thomas},
  month = aug,
  year = {2024},
  keywords = {stat\_testing},
  pages = {8619--8626},
  month_numeric = {8}
}