NeRF-Loc: Transformer-Based Object Localization Within Neural Radiance Fields

@article{sun_nerf-loc_2023,
  title = {{NeRF}-{Loc}: {Transformer}-{Based} {Object} {Localization} {Within} {Neural} {Radiance} {Fields}},
  volume = {8},
  shorttitle = {{NeRF}-{Loc}},
  abstract = {Neural Radiance Fields (NeRFs) have been successfully used for scene representation. Recent works have also developed robotic navigation and manipulation systems using NeRF-based environment representations. As object localization is the foundation for many robotic applications, to further unleash the potential of NeRFs in robotic systems, we study object localization within a NeRF scene. We propose a transformerbased framework NeRF-Loc to extract 3D bounding boxes of objects in NeRF scenes. NeRF-Loc takes a pre-trained NeRF model and camera view as input, and produces labeled 3D bounding boxes of objects as output. Concretely, we design a pair of paralleled transformer encoder branches, namely the coarse stream and the fine stream, to encode both the context and details of target objects. The encoded features are then fused together with attention layers to alleviate ambiguities for accurate object localization. We have compared our method with the conventional transformer-based method and our method achieves better performance. In addition, we also present the first NeRF samples-based object localization benchmark NeRFLocBench.},
  language = {en},
  number = {8},
  urldate = {2022-10-07},
  journal = {IEEE Robotics and Automation Letters (RA-L)},
  author = {Sun, Jiankai and Xu, Yan and Ding, Mingyu and Yi, Hongwei and Wang, Chen and Wang, Jingdong and Zhang, Liangjun and Schwager, Mac},
  month = jul,
  year = {2023},
  note = {IEEE Robotics and Automation Letters (RA-L)},
  keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Robotics, localization},
  pages = {5244 -- 5250},
  month_numeric = {7}
}