main.aux

\relax 
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax 
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\citation{Kain2020,Scheinker2018}
\citation{Huang2013,Bruchon2017,Scheinker2020,Hirlaender2019,Welsch2015,Albright2019}
\citation{Hanuka2020,Roussel2020}
\citation{Bruchon2020,Bruchon2019,Kain2020,Pang2020,John2020}
\citation{Sutton2018,DulacArnold2019}
\citation{Brochon2020}
\citation{Brochon2020}
\newlabel{FirstPage}{{}{1}{}{section*.1}{}}
\newlabel{FirstPage@cref}{{}{[1][1][]1}}
\@writefile{toc}{\contentsline {title}{Model-free and Bayesian Ensembling Model-based Deep Reinforcement Learning for Particle Accelerator Control Demonstrated on the FERMI FEL}{1}{section*.2}\protected@file@percent }
\@writefile{toc}{\contentsline {abstract}{Abstract}{1}{section*.1}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction and Motivation}{1}{section*.3}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces The Elettra research centre hosting the FERMI free electron laser \cite  {Brochon2020}.\relax }}{1}{figure.caption.4}\protected@file@percent }
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{fig:elletra_research}{{1}{1}{The Elettra research centre hosting the FERMI free electron laser \cite {Brochon2020}.\relax }{figure.caption.4}{}}
\newlabel{fig:elletra_research@cref}{{[figure][1][]1}{[1][1][]1}}
\citation{Brockman2016}
\@writefile{toc}{\contentsline {subsection}{\numberline {A}An Overview of the Main Results}{2}{section*.5}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {II}The set-up of the studied problem}{2}{section*.6}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {A}The Physical Set-up}{2}{section*.7}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces A schematic view on the set-up of the FERMI FEL.\relax }}{2}{figure.caption.9}\protected@file@percent }
\newlabel{fig:schematic_FEL}{{2}{2}{A schematic view on the set-up of the FERMI FEL.\relax }{figure.caption.9}{}}
\newlabel{fig:schematic_FEL@cref}{{[figure][2][]2}{[1][2][]2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {B}The Training Environment}{2}{section*.8}\protected@file@percent }
\citation{Heess2017,Schulman2017,Silver2014,Lillicrap2015,OpenAI2018}
\citation{Deisenroth2011}
\citation{Sutton2018,Williams1992,Baxter2011,pmlr-v28-levine13,Schulman2015,Schulman2017}
\citation{Szepesvari2010,Lillicrap2015,Silver2014}
\citation{Sutton2018,Levine2020}
\citation{Gu2016}
\citation{Kain2020,Hirlaender2020a}
\citation{Sutton2018}
\@writefile{toc}{\contentsline {section}{\numberline {III}Deep Reinforcement learning}{3}{section*.10}\protected@file@percent }
\newlabel{eq:cumulative_reward}{{2}{3}{}{equation.3.2}{}}
\newlabel{eq:cumulative_reward@cref}{{[equation][2][]2}{[1][3][]3}}
\newlabel{eq:trajectory_distribution}{{3}{3}{}{equation.3.3}{}}
\newlabel{eq:trajectory_distribution@cref}{{[equation][3][]3}{[1][3][]3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A}Model-free Reinforcement Learning}{3}{section*.11}\protected@file@percent }
\newlabel{s:Model-free reinforcement learning}{{III\,A}{3}{}{section*.11}{}}
\newlabel{s:Model-free reinforcement learning@cref}{{[subsection][1][3]III\,A}{[1][3][]3}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {1}Approximate dynamic programming}{3}{section*.12}\protected@file@percent }
\newlabel{eq:state-value-function}{{4}{3}{}{equation.3.4}{}}
\newlabel{eq:state-value-function@cref}{{[equation][4][]4}{[1][3][]3}}
\newlabel{eq:minimize_bellmann_optimality}{{6}{3}{}{equation.3.6}{}}
\newlabel{eq:minimize_bellmann_optimality@cref}{{[equation][6][]6}{[1][3][]3}}
\citation{Hasselt2015,Mnih2013,Lillicrap2015,Gu2016,Wang2015}
\citation{Gu2016}
\citation{Gu2016}
\citation{NIPS2010_091d584f,Hasselt2015,fujimoto2018addressing}
\citation{Lillicrap2015,Gu2016,Silver2014}
\citation{fujimoto2018addressing}
\citation{Gu2016}
\citation{Hirlaender2020a}
\citation{Gu2016}
\citation{Wang2019}
\citation{Gal2016,6654139}
\citation{Chua2018,Wang2019a}
\citation{Boer2005}
\citation{Sutton1991}
\citation{Pearce2018}
\citation{Kurutach2018}
\citation{Wang2019}
\citation{Chua2018,Janner2019,Wang2019a}
\citation{Kurutach2018}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2}Design decisions for MFRL}{4}{section*.13}\protected@file@percent }
\newlabel{ss:Normalized advantage function}{{III\,A\,2}{4}{}{section*.13}{}}
\newlabel{ss:Normalized advantage function@cref}{{[subsubsection][2][3,1]III\,A\,2}{[1][4][]4}}
\newlabel{eq:state-action-value-approxiation}{{9}{4}{}{equation.3.9}{}}
\newlabel{eq:state-action-value-approxiation@cref}{{[equation][9][]9}{[1][4][]4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {B}Uncertainty Aware DYNA-style Reinforcement Learning}{4}{section*.14}\protected@file@percent }
\newlabel{ss:Uncertainty aware DYNA-style reinforcement learning}{{III\,B}{4}{}{section*.14}{}}
\newlabel{ss:Uncertainty aware DYNA-style reinforcement learning@cref}{{[subsection][2][3]III\,B}{[1][4][]4}}
\newlabel{eq:dynamics_model}{{11}{4}{}{equation.3.11}{}}
\newlabel{eq:dynamics_model@cref}{{[equation][11][]11}{[1][4][]4}}
\citation{Goodfellow2016}
\citation{Sutton2018}
\citation{Schulman2015}
\citation{Kurutach2018}
\citation{Kurutach2018}
\citation{Schulman2017}
\citation{fujimoto2018addressing,Hill2018}
\citation{Haarnoja2018a}
\citation{Kurutach2018}
\citation{kidambi2020morel}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces A schematic overview of the \emph  {AE-DYNA} approach used in this paper.\relax }}{5}{figure.caption.15}\protected@file@percent }
\newlabel{fig:MBRL_overview}{{3}{5}{A schematic overview of the \emph {AE-DYNA} approach used in this paper.\relax }{figure.caption.15}{}}
\newlabel{fig:MBRL_overview@cref}{{[figure][3][]3}{[1][4][]5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {C}Critical Design Decisions in MBRL}{5}{section*.16}\protected@file@percent }
\newlabel{ss:critical_design}{{III\,C}{5}{}{section*.16}{}}
\newlabel{ss:critical_design@cref}{{[subsection][3][3]III\,C}{[1][4][]5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {1}The uncertainty aware dynamics model}{5}{section*.17}\protected@file@percent }
\newlabel{ss:The uncertainty aware dynamics model}{{III\,C\,1}{5}{}{section*.17}{}}
\newlabel{ss:The uncertainty aware dynamics model@cref}{{[subsubsection][1][3,3]III\,C\,1}{[1][5][]5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2}The controller algorithm}{5}{section*.18}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3}Handling of the uncertainty}{5}{section*.19}\protected@file@percent }
\citation{Janner2019}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4}The data acquisition}{6}{section*.20}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {IV}Experimental results from FERMI RL online tests}{6}{section*.21}\protected@file@percent }
\newlabel{sec:Experimental results from FERMI RL online tests}{{IV}{6}{}{section*.21}{}}
\newlabel{sec:Experimental results from FERMI RL online tests@cref}{{[section][4][]IV}{[1][6][]6}}
\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces Overview of the algorithms. \relax }}{6}{table.caption.22}\protected@file@percent }
\newlabel{tab:overview_algorithms}{{I}{6}{Overview of the algorithms. \relax }{table.caption.22}{}}
\newlabel{tab:overview_algorithms@cref}{{[table][1][]I}{[1][6][]6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A}MFRL Tests}{6}{section*.23}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces The training of different variants of the \emph  {NAF2} algorithm on the FERMI FEL, averaged over two complete trainings (the standard-deviations are indicated by the shaded areas). The number of iterations (blue) shows the steps until the intensity is optimised, starting from a random initial position.\relax }}{6}{figure.caption.24}\protected@file@percent }
\newlabel{fig:NAF_training}{{4}{6}{The training of different variants of the \emph {NAF2} algorithm on the FERMI FEL, averaged over two complete trainings (the standard-deviations are indicated by the shaded areas). The number of iterations (blue) shows the steps until the intensity is optimised, starting from a random initial position.\relax }{figure.caption.24}{}}
\newlabel{fig:NAF_training@cref}{{[figure][4][]4}{[1][6][]6}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces The evolution of the states of the \emph  {NAF2} algorithm on the FERMI FEL using a double network during the training.\relax }}{6}{figure.caption.25}\protected@file@percent }
\newlabel{fig:NAF_evolution_double}{{5}{6}{The evolution of the states of the \emph {NAF2} algorithm on the FERMI FEL using a double network during the training.\relax }{figure.caption.25}{}}
\newlabel{fig:NAF_evolution_double@cref}{{[figure][5][]5}{[1][6][]6}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces The evolution of the states of the \emph  {NAF2} algorithm on the FERMI FEL using a single network during the training.\relax }}{7}{figure.caption.26}\protected@file@percent }
\newlabel{fig:NAF_evolution_single}{{6}{7}{The evolution of the states of the \emph {NAF2} algorithm on the FERMI FEL using a single network during the training.\relax }{figure.caption.26}{}}
\newlabel{fig:NAF_evolution_single@cref}{{[figure][6][]6}{[1][6][]7}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces The verification episodes of the variants of the trained model-free \emph  {NAF2} algorithm on the FERMI FEL. The number of iterations (blue) shows the steps until the intensity is optimised, starting from a random initial position.\relax }}{7}{figure.caption.27}\protected@file@percent }
\newlabel{fig:NAF_verification}{{7}{7}{The verification episodes of the variants of the trained model-free \emph {NAF2} algorithm on the FERMI FEL. The number of iterations (blue) shows the steps until the intensity is optimised, starting from a random initial position.\relax }{figure.caption.27}{}}
\newlabel{fig:NAF_verification@cref}{{[figure][7][]7}{[1][6][]7}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces The training metrics of the \emph  {AE-DYNA-SAC} on the FERMI FEL using a single network (dashed) and a double network (solid). The Bellman error (\cref  {eq:minimize_bellmann_optimality}) and the state-value function (\cref  {eq:state-value-function}) are shown. \relax }}{7}{figure.caption.28}\protected@file@percent }
\newlabel{fig:NAF_convergence}{{8}{7}{The training metrics of the \emph {AE-DYNA-SAC} on the FERMI FEL using a single network (dashed) and a double network (solid). The Bellman error (\cref {eq:minimize_bellmann_optimality}) and the state-value function (\cref {eq:state-value-function}) are shown. \relax }{figure.caption.28}{}}
\newlabel{fig:NAF_convergence@cref}{{[figure][8][]8}{[1][6][]7}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces The training observables of the \emph  {AE-DYNA-SAC} on the FERMI FEL. Detail are provided in the text.\relax }}{7}{figure.caption.30}\protected@file@percent }
\newlabel{fig:AE-DYNA_observables}{{9}{7}{The training observables of the \emph {AE-DYNA-SAC} on the FERMI FEL. Detail are provided in the text.\relax }{figure.caption.30}{}}
\newlabel{fig:AE-DYNA_observables@cref}{{[figure][9][]9}{[1][7][]7}}
\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces The verification episodes of the trained model-based methods: \emph  {ME-TRPO} and \emph  {AE-DYNA-SAC} on the FERMI FEL. The number of iterations (blue) shows the steps until the intensity is optimised, starting from a random initial position.\relax }}{7}{figure.caption.31}\protected@file@percent }
\newlabel{fig:AE-DYNA_verification}{{10}{7}{The verification episodes of the trained model-based methods: \emph {ME-TRPO} and \emph {AE-DYNA-SAC} on the FERMI FEL. The number of iterations (blue) shows the steps until the intensity is optimised, starting from a random initial position.\relax }{figure.caption.31}{}}
\newlabel{fig:AE-DYNA_verification@cref}{{[figure][10][]10}{[1][7][]7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {B}MBRL Tests}{7}{section*.29}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces The training observables of the \emph  {ME-TRPO} on the FERMI FEL. Detail are provided in the text.\relax }}{8}{figure.caption.32}\protected@file@percent }
\newlabel{fig:ME-TRPO_observables}{{11}{8}{The training observables of the \emph {ME-TRPO} on the FERMI FEL. Detail are provided in the text.\relax }{figure.caption.32}{}}
\newlabel{fig:ME-TRPO_observables@cref}{{[figure][11][]11}{[1][7][]8}}
\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces The evolution of the states during the worst verification episodes of the trained \emph  {ME-TRPO} and the \emph  {AE-DYNA-SAC} on the FERMI FEL.\relax }}{8}{figure.caption.33}\protected@file@percent }
\newlabel{fig:Worst_episode_MBRL}{{12}{8}{The evolution of the states during the worst verification episodes of the trained \emph {ME-TRPO} and the \emph {AE-DYNA-SAC} on the FERMI FEL.\relax }{figure.caption.33}{}}
\newlabel{fig:Worst_episode_MBRL@cref}{{[figure][12][]12}{[1][7][]8}}
\@writefile{toc}{\contentsline {section}{\numberline {V}Discussion and outlook}{8}{section*.34}\protected@file@percent }
\citation{Hirlaender2020b}
\citation{Furutaa}
\citation{fujimoto2018addressing}
\citation{Silver2014}
\citation{fujimoto2018addressing,Haarnoja2018a}
\@writefile{lot}{\contentsline {table}{\numberline {II}{\ignorespaces An overview over the verification performance of the different trained algorithms on the FERMI FEL including their standard deviation. \relax }}{9}{table.caption.35}\protected@file@percent }
\newlabel{tab:overview_verification}{{II}{9}{An overview over the verification performance of the different trained algorithms on the FERMI FEL including their standard deviation. \relax }{table.caption.35}{}}
\newlabel{tab:overview_verification@cref}{{[table][2][]II}{[1][8][]9}}
\@writefile{toc}{\contentsline {section}{\numberline {VI}Conclusions}{9}{section*.36}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {VII}Acknowledgements}{9}{section*.37}\protected@file@percent }
\@writefile{toc}{\appendix }
\@writefile{toc}{\contentsline {section}{\numberline {A}A Non-linear Standard Control Problem}{9}{section*.38}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1}NAF2 Details}{9}{section*.39}\protected@file@percent }
\newlabel{appendix:naf2}{{A\,1}{9}{}{section*.39}{}}
\newlabel{appendix:naf2@cref}{{[subappendix][1][2147483647,1]A\,1}{[1][9][]9}}
\citation{BarthMaron2018}
\citation{Brockman2016}
\citation{Gu2007,Chen2011,Bardsley2012,Pearce2018}
\citation{Pearce2018}
\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces Cumulative reward of different \emph  {NAF} implementations as discussed in the text on the \emph  {inverted pendulum} without noise.\relax }}{10}{figure.caption.40}\protected@file@percent }
\newlabel{fig:comparsion_smoothing_small}{{13}{10}{Cumulative reward of different \emph {NAF} implementations as discussed in the text on the \emph {inverted pendulum} without noise.\relax }{figure.caption.40}{}}
\newlabel{fig:comparsion_smoothing_small@cref}{{[figure][13][2147483647]13}{[1][10][]10}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2}The Impact of Noise}{10}{section*.41}\protected@file@percent }
\newlabel{appendix:The impact of noise}{{A\,2}{10}{}{section*.41}{}}
\newlabel{appendix:The impact of noise@cref}{{[subappendix][2][2147483647,1]A\,2}{[1][10][]10}}
\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces Cumulative reward of different \emph  {NAF} implementations on the \emph  {inverted pendulum} with artificial noise as discussed in the text.\relax }}{10}{figure.caption.42}\protected@file@percent }
\newlabel{fig:comparsion_noise}{{14}{10}{Cumulative reward of different \emph {NAF} implementations on the \emph {inverted pendulum} with artificial noise as discussed in the text.\relax }{figure.caption.42}{}}
\newlabel{fig:comparsion_noise@cref}{{[figure][14][2147483647]14}{[1][10][]10}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {a}Regression assuming homoskedastic Gaussian noise using `anchored ensembling'}{10}{section*.43}\protected@file@percent }
\newlabel{eq_MAP_loglike_anc}{{A4}{10}{}{equation.A.4}{}}
\newlabel{eq_MAP_loglike_anc@cref}{{[equation][4][2147483647,1]A4}{[1][10][]10}}
\newlabel{eqn_anch_loss_matrix}{{A5}{10}{}{equation.A.5}{}}
\newlabel{eqn_anch_loss_matrix@cref}{{[equation][5][2147483647,1]A5}{[1][10][]10}}
\newlabel{eqn_anch_loss_init}{{A6}{11}{}{equation.A.6}{}}
\newlabel{eqn_anch_loss_init@cref}{{[equation][6][2147483647,1]A6}{[1][11][]11}}
\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces Cumulative reward of \emph  {AE-DYNA-SAC} on the \emph  {inverted pendulum} with artificial noise using the `anchor ensembling'.\relax }}{11}{figure.caption.44}\protected@file@percent }
\newlabel{fig:comparsion_noise_ae_dyna}{{15}{11}{Cumulative reward of \emph {AE-DYNA-SAC} on the \emph {inverted pendulum} with artificial noise using the `anchor ensembling'.\relax }{figure.caption.44}{}}
\newlabel{fig:comparsion_noise_ae_dyna@cref}{{[figure][15][2147483647]15}{[1][11][]11}}
\@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces Varying number of models in the ensemble of the \emph  {AE-DYNA-SAC} on the \emph  {inverted pendulum}.\relax }}{11}{figure.caption.45}\protected@file@percent }
\newlabel{fig:Compare_models_sizes}{{16}{11}{Varying number of models in the ensemble of the \emph {AE-DYNA-SAC} on the \emph {inverted pendulum}.\relax }{figure.caption.45}{}}
\newlabel{fig:Compare_models_sizes@cref}{{[figure][16][2147483647]16}{[1][11][]11}}
\@writefile{lof}{\contentsline {figure}{\numberline {17}{\ignorespaces The comparison of the \emph  {NAF2} and the \emph  {AE-DYNA-SAC} on the noisy \emph  {inverted pendulum}.\relax }}{11}{figure.caption.47}\protected@file@percent }
\newlabel{fig:comparsion_NAF_AE-DYNA}{{17}{11}{The comparison of the \emph {NAF2} and the \emph {AE-DYNA-SAC} on the noisy \emph {inverted pendulum}.\relax }{figure.caption.47}{}}
\newlabel{fig:comparsion_NAF_AE-DYNA@cref}{{[figure][17][2147483647]17}{[1][11][]11}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3}NAF versus AE-DYNA}{11}{section*.46}\protected@file@percent }
\bibdata{mainNotes,tex/Bibliography}
\bibcite{Kain2020}{{1}{2020}{{Kain\ \emph  {et~al.}}}{{Kain, Hirlander, Goddard, Velotti, Porta, Bruchon,\ and\ Valentino}}}
\bibcite{Scheinker2018}{{2}{2018}{{Scheinker\ \emph  {et~al.}}}{{Scheinker, Edelen, Bohler, Emma,\ and\ Lutman}}}
\bibcite{Huang2013}{{3}{2013}{{Huang\ \emph  {et~al.}}}{{Huang, Corbett, Safranek,\ and\ Wu}}}
\bibcite{Bruchon2017}{{4}{2017}{{Bruchon\ \emph  {et~al.}}}{{Bruchon, Fenu, Gaio, Lonza, Pellegrino,\ and\ Saule}}}
\bibcite{Scheinker2020}{{5}{2020}{{Scheinker\ \emph  {et~al.}}}{{Scheinker, Hirlaender, Velotti, Gessner, Porta, Kain, Goddard,\ and\ Ramjiawan}}}
\bibcite{Hirlaender2019}{{6}{2019}{{Hirlaender\ \emph  {et~al.}}}{{Hirlaender, Fraser, Goddard, Kain, Prieto, Stoel, Szakaly,\ and\ Velotti}}}
\bibcite{Welsch2015}{{7}{2015}{{Welsch}}{{}}}
\bibcite{Albright2019}{{8}{2019}{{Albright\ \emph  {et~al.}}}{{Albright, Alemany~Fernandez, Angoletta, Bartosik, Beaumont, Bellodi, Biancacci, Bozzolan, Buzio, Di~Lorenzo, Frassier, Gamba, Hirlander, Huschauer, Kain, Kotzian, Kuchler, Latina, Levens, Mahner, Manosperti, Marqversen, Moreno~Garcia, Nicosia, O'Neil, Ozturk, Saa~Hernandez, Scrivens, Jensen, Tranquille, Wetton,\ and\ Zampetakis}}}
\bibcite{Hanuka2020}{{9}{2020}{{Hanuka\ \emph  {et~al.}}}{{Hanuka, Huang, Shtalenkova, Kennedy, Edelen, Lalchand, Ratner,\ and\ Duris}}}
\bibcite{Roussel2020}{{10}{2020}{{Roussel\ \emph  {et~al.}}}{{Roussel, Hanuka,\ and\ Edelen}}}
\bibcite{Bruchon2020}{{11}{2020}{{Bruchon\ \emph  {et~al.}}}{{Bruchon, Fenu, Gaio, Lonza, O'Shea, Pellegrino,\ and\ Salvato}}}
\bibcite{Bruchon2019}{{12}{2019}{{Bruchon\ \emph  {et~al.}}}{{Bruchon, Fenu, Gaio, Lonza, Pellegrino,\ and\ Salvato}}}
\bibcite{Pang2020}{{13}{2020}{{Pang\ \emph  {et~al.}}}{{Pang, Thulasidasan,\ and\ Rybarcyk}}}
\bibcite{John2020}{{14}{2020}{{John\ \emph  {et~al.}}}{{John, Herwig, Kafkes, Pellico, Perdue, Quintero-Parra, Schupbach, Seiya, Tran, Duarte, Huang, Schram,\ and\ Keller}}}
\bibcite{Sutton2018}{{15}{2018}{{Sutton\ and\ Barto}}{{}}}
\bibcite{DulacArnold2019}{{16}{2019}{{Dulac-Arnold\ \emph  {et~al.}}}{{Dulac-Arnold, Mankowitz,\ and\ Hester}}}
\bibcite{Brochon2020}{{17}{2020}{{Bruchon}}{{}}}
\bibcite{Brockman2016}{{18}{2016}{{Brockman\ \emph  {et~al.}}}{{Brockman, Cheung, Pettersson, Schneider, Schulman, Tang,\ and\ Zaremba}}}
\bibcite{Heess2017}{{19}{2017}{{Heess\ \emph  {et~al.}}}{{Heess, TB, Sriram, Lemmon, Merel, Wayne, Tassa, Erez, Wang, Eslami, Riedmiller,\ and\ Silver}}}
\bibcite{Schulman2017}{{20}{2017}{{Schulman\ \emph  {et~al.}}}{{Schulman, Wolski, Dhariwal, Radford,\ and\ Klimov}}}
\bibcite{Silver2014}{{21}{2014}{{Silver\ \emph  {et~al.}}}{{Silver, Lever, Heess, Degris, Wierstra,\ and\ Riedmiller}}}
\bibcite{Lillicrap2015}{{22}{2015}{{Lillicrap\ \emph  {et~al.}}}{{Lillicrap, Hunt, Pritzel, Heess, Erez, Tassa, Silver,\ and\ Wierstra}}}
\bibcite{OpenAI2018}{{23}{2018}{{OpenAI\ \emph  {et~al.}}}{{OpenAI, Andrychowicz, Baker, Chociej, Jozefowicz, McGrew, Pachocki, Petron, Plappert, Powell, Ray, Schneider, Sidor, Tobin, Welinder, Weng,\ and\ Zaremba}}}
\bibcite{Deisenroth2011}{{24}{2011}{{Deisenroth\ and\ Rasmussen}}{{}}}
\bibcite{Williams1992}{{25}{1992}{{Williams}}{{}}}
\bibcite{Baxter2011}{{26}{2011}{{Baxter\ and\ Bartlett}}{{}}}
\bibcite{pmlr-v28-levine13}{{27}{2013}{{Levine\ and\ Koltun}}{{}}}
\bibcite{Schulman2015}{{28}{2015}{{Schulman\ \emph  {et~al.}}}{{Schulman, Levine, Moritz, Jordan,\ and\ Abbeel}}}
\bibcite{Szepesvari2010}{{29}{2010}{{Szepesv{\'{a}}ri}}{{}}}
\@writefile{toc}{\contentsline {section}{\numberline {}References}{12}{section*.48}\protected@file@percent }
\bibcite{Levine2020}{{30}{2020}{{Levine\ \emph  {et~al.}}}{{Levine, Kumar, Tucker,\ and\ Fu}}}
\bibcite{Gu2016}{{31}{2016}{{Gu\ \emph  {et~al.}}}{{Gu, Lillicrap, Sutskever,\ and\ Levine}}}
\bibcite{Hirlaender2020a}{{32}{2020}{{Hirlaender}}{{}}}
\bibcite{Hasselt2015}{{33}{2015}{{van Hasselt\ \emph  {et~al.}}}{{van Hasselt, Guez,\ and\ Silver}}}
\bibcite{Mnih2013}{{34}{2013}{{Mnih\ \emph  {et~al.}}}{{Mnih, Kavukcuoglu, Silver, Graves, Antonoglou, Wierstra,\ and\ Riedmiller}}}
\bibcite{Wang2015}{{35}{2015}{{Wang\ \emph  {et~al.}}}{{Wang, Schaul, Hessel, van Hasselt, Lanctot,\ and\ de~Freitas}}}
\bibcite{NIPS2010_091d584f}{{36}{2010}{{Hasselt}}{{}}}
\bibcite{fujimoto2018addressing}{{37}{2018}{{Fujimoto\ \emph  {et~al.}}}{{Fujimoto, van Hoof,\ and\ Meger}}}
\bibcite{Wang2019}{{38}{2019}{{Wang\ \emph  {et~al.}}}{{Wang, Bao, Clavera, Hoang, Wen, Langlois, Zhang, Zhang, Abbeel,\ and\ Ba}}}
\bibcite{Gal2016}{{39}{2016}{{Gal\ \emph  {et~al.}}}{{Gal, McAllister,\ and\ Rasmussen}}}
\bibcite{6654139}{{40}{2015}{{{Deisenroth}\ \emph  {et~al.}}}{{{Deisenroth}, {Fox},\ and\ {Rasmussen}}}}
\bibcite{Chua2018}{{41}{2018}{{Chua\ \emph  {et~al.}}}{{Chua, Calandra, McAllister,\ and\ Levine}}}
\bibcite{Wang2019a}{{42}{2019}{{Wang\ and\ Ba}}{{}}}
\bibcite{Boer2005}{{43}{2005}{{de~Boer\ \emph  {et~al.}}}{{de~Boer, Kroese, Mannor,\ and\ Rubinstein}}}
\bibcite{Sutton1991}{{44}{1991}{{Sutton}}{{}}}
\bibcite{Pearce2018}{{45}{2018}{{Pearce\ \emph  {et~al.}}}{{Pearce, Leibfried, Brintrup, Zaki,\ and\ Neely}}}
\bibcite{Kurutach2018}{{46}{2018}{{Kurutach\ \emph  {et~al.}}}{{Kurutach, Clavera, Duan, Tamar,\ and\ Abbeel}}}
\bibcite{Janner2019}{{47}{2019}{{Janner\ \emph  {et~al.}}}{{Janner, Fu, Zhang,\ and\ Levine}}}
\bibcite{Goodfellow2016}{{48}{2016}{{Goodfellow\ \emph  {et~al.}}}{{Goodfellow, Bengio,\ and\ Courville}}}
\bibcite{Hill2018}{{49}{2018}{{Hill\ \emph  {et~al.}}}{{Hill, Raffin, Ernestus, Gleave, Kanervisto, Traore, Dhariwal, Hesse, Klimov, Nichol, Plappert, Radford, Schulman, Sidor,\ and\ Wu}}}
\bibcite{Haarnoja2018a}{{50}{2018}{{Haarnoja\ \emph  {et~al.}}}{{Haarnoja, Zhou, Hartikainen, Tucker, Ha, Tan, Kumar, Zhu, Gupta, Abbeel,\ and\ Levine}}}
\bibcite{kidambi2020morel}{{51}{2020}{{Kidambi\ \emph  {et~al.}}}{{Kidambi, Rajeswaran, Netrapalli,\ and\ Joachims}}}
\bibcite{Hirlaender2020b}{{52}{2020}{{Hirlaender\ and\ Bruchon}}{{}}}
\bibcite{Furutaa}{{53}{1991}{{Furuta\ \emph  {et~al.}}}{{Furuta, Yamakita,\ and\ Kobayashi}}}
\bibcite{BarthMaron2018}{{54}{2018}{{Barth-Maron\ \emph  {et~al.}}}{{Barth-Maron, Hoffman, Budden, Dabney, Horgan, TB, Muldal, Heess,\ and\ Lillicrap}}}
\bibcite{Gu2007}{{55}{2007}{{Gu\ and\ Oliver}}{{}}}
\bibcite{Chen2011}{{56}{2011}{{Chen\ and\ Oliver}}{{}}}
\bibcite{Bardsley2012}{{57}{2012}{{Bardsley}}{{}}}
\newlabel{LastBibItem}{{57}{13}{}{section*.48}{}}
\newlabel{LastBibItem@cref}{{[subappendix][3][2147483647,1]A\,3}{[1][13][]13}}
\bibstyle{apsrev4-2}
\citation{REVTEX42Control}
\citation{apsrev42Control}
\newlabel{LastPage}{{}{13}{}{}{}}
\gdef \@abspage@last{13}