Learning Large Graph-based MDPs with Historical Data

@article{haksar_learning_2021,
  title = {Learning {Large} {Graph}-based {MDPs} with {Historical} {Data}},
  issn = {2325-5870},
  abstract = {We consider learning the dynamics and measurement model parameters of a graph-based Markov decision process (GMDP) given a history of measurements. Graph-based models have been used in modeling many data-based applications, such as recognition tasks, disease epidemics, forest wildfires, freeway traffic, and social networks. We leverage the Expectation-Maximization framework and develop an algorithm that optimizes the measurement likelihood and has favorable complexity for large models. In contrast to prior work, we directly consider GMDPs with significantly large state and measurement spaces, arbitrary coupling structure, and long measurement sequences. We also consider a special structural property called Anonymous Influence, which we use to test hypotheses and gain insights into the data. We demonstrate the effectiveness of our learning algorithm by considering two real-world data sets, on the 2020 Novel Coronavirus (COVID-19) pandemic in California and on user interactions on Twitter. Our results show that the learned GMDP models better explain the data compared to an uncoupled model assumption.},
  language = {en},
  journal = {IEEE Transactions on Control of Network Systems},
  author = {Haksar, Ravi N. and Schwager, Mac},
  year = {2021},
  note = {Available Online, Early Access},
  keywords = {learning\_dynamical\_systems}
}