-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.aux
262 lines (262 loc) · 25.3 KB
/
main.aux
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\citation{Kain2020,Scheinker2018}
\citation{Huang2013,Bruchon2017,Scheinker2020,Hirlaender2019,Welsch2015,Albright2019}
\citation{Hanuka2020,Roussel2020}
\citation{Bruchon2020,Bruchon2019,Kain2020,Pang2020,John2020}
\citation{Sutton2018,DulacArnold2019}
\citation{Brochon2020}
\citation{Brochon2020}
\newlabel{FirstPage}{{}{1}{}{section*.1}{}}
\newlabel{FirstPage@cref}{{}{[1][1][]1}}
\@writefile{toc}{\contentsline {title}{Model-free and Bayesian Ensembling Model-based Deep Reinforcement Learning for Particle Accelerator Control Demonstrated on the FERMI FEL}{1}{section*.2}\protected@file@percent }
\@writefile{toc}{\contentsline {abstract}{Abstract}{1}{section*.1}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction and Motivation}{1}{section*.3}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces The Elettra research centre hosting the FERMI free electron laser \cite {Brochon2020}.\relax }}{1}{figure.caption.4}\protected@file@percent }
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{fig:elletra_research}{{1}{1}{The Elettra research centre hosting the FERMI free electron laser \cite {Brochon2020}.\relax }{figure.caption.4}{}}
\newlabel{fig:elletra_research@cref}{{[figure][1][]1}{[1][1][]1}}
\citation{Brockman2016}
\@writefile{toc}{\contentsline {subsection}{\numberline {A}An Overview of the Main Results}{2}{section*.5}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {II}The set-up of the studied problem}{2}{section*.6}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {A}The Physical Set-up}{2}{section*.7}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces A schematic view on the set-up of the FERMI FEL.\relax }}{2}{figure.caption.9}\protected@file@percent }
\newlabel{fig:schematic_FEL}{{2}{2}{A schematic view on the set-up of the FERMI FEL.\relax }{figure.caption.9}{}}
\newlabel{fig:schematic_FEL@cref}{{[figure][2][]2}{[1][2][]2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {B}The Training Environment}{2}{section*.8}\protected@file@percent }
\citation{Heess2017,Schulman2017,Silver2014,Lillicrap2015,OpenAI2018}
\citation{Deisenroth2011}
\citation{Sutton2018,Williams1992,Baxter2011,pmlr-v28-levine13,Schulman2015,Schulman2017}
\citation{Szepesvari2010,Lillicrap2015,Silver2014}
\citation{Sutton2018,Levine2020}
\citation{Gu2016}
\citation{Kain2020,Hirlaender2020a}
\citation{Sutton2018}
\@writefile{toc}{\contentsline {section}{\numberline {III}Deep Reinforcement learning}{3}{section*.10}\protected@file@percent }
\newlabel{eq:cumulative_reward}{{2}{3}{}{equation.3.2}{}}
\newlabel{eq:cumulative_reward@cref}{{[equation][2][]2}{[1][3][]3}}
\newlabel{eq:trajectory_distribution}{{3}{3}{}{equation.3.3}{}}
\newlabel{eq:trajectory_distribution@cref}{{[equation][3][]3}{[1][3][]3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A}Model-free Reinforcement Learning}{3}{section*.11}\protected@file@percent }
\newlabel{s:Model-free reinforcement learning}{{III\,A}{3}{}{section*.11}{}}
\newlabel{s:Model-free reinforcement learning@cref}{{[subsection][1][3]III\,A}{[1][3][]3}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {1}Approximate dynamic programming}{3}{section*.12}\protected@file@percent }
\newlabel{eq:state-value-function}{{4}{3}{}{equation.3.4}{}}
\newlabel{eq:state-value-function@cref}{{[equation][4][]4}{[1][3][]3}}
\newlabel{eq:minimize_bellmann_optimality}{{6}{3}{}{equation.3.6}{}}
\newlabel{eq:minimize_bellmann_optimality@cref}{{[equation][6][]6}{[1][3][]3}}
\citation{Hasselt2015,Mnih2013,Lillicrap2015,Gu2016,Wang2015}
\citation{Gu2016}
\citation{Gu2016}
\citation{NIPS2010_091d584f,Hasselt2015,fujimoto2018addressing}
\citation{Lillicrap2015,Gu2016,Silver2014}
\citation{fujimoto2018addressing}
\citation{Gu2016}
\citation{Hirlaender2020a}
\citation{Gu2016}
\citation{Wang2019}
\citation{Gal2016,6654139}
\citation{Chua2018,Wang2019a}
\citation{Boer2005}
\citation{Sutton1991}
\citation{Pearce2018}
\citation{Kurutach2018}
\citation{Wang2019}
\citation{Chua2018,Janner2019,Wang2019a}
\citation{Kurutach2018}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2}Design decisions for MFRL}{4}{section*.13}\protected@file@percent }
\newlabel{ss:Normalized advantage function}{{III\,A\,2}{4}{}{section*.13}{}}
\newlabel{ss:Normalized advantage function@cref}{{[subsubsection][2][3,1]III\,A\,2}{[1][4][]4}}
\newlabel{eq:state-action-value-approxiation}{{9}{4}{}{equation.3.9}{}}
\newlabel{eq:state-action-value-approxiation@cref}{{[equation][9][]9}{[1][4][]4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {B}Uncertainty Aware DYNA-style Reinforcement Learning}{4}{section*.14}\protected@file@percent }
\newlabel{ss:Uncertainty aware DYNA-style reinforcement learning}{{III\,B}{4}{}{section*.14}{}}
\newlabel{ss:Uncertainty aware DYNA-style reinforcement learning@cref}{{[subsection][2][3]III\,B}{[1][4][]4}}
\newlabel{eq:dynamics_model}{{11}{4}{}{equation.3.11}{}}
\newlabel{eq:dynamics_model@cref}{{[equation][11][]11}{[1][4][]4}}
\citation{Goodfellow2016}
\citation{Sutton2018}
\citation{Schulman2015}
\citation{Kurutach2018}
\citation{Kurutach2018}
\citation{Schulman2017}
\citation{fujimoto2018addressing,Hill2018}
\citation{Haarnoja2018a}
\citation{Kurutach2018}
\citation{kidambi2020morel}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces A schematic overview of the \emph {AE-DYNA} approach used in this paper.\relax }}{5}{figure.caption.15}\protected@file@percent }
\newlabel{fig:MBRL_overview}{{3}{5}{A schematic overview of the \emph {AE-DYNA} approach used in this paper.\relax }{figure.caption.15}{}}
\newlabel{fig:MBRL_overview@cref}{{[figure][3][]3}{[1][4][]5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {C}Critical Design Decisions in MBRL}{5}{section*.16}\protected@file@percent }
\newlabel{ss:critical_design}{{III\,C}{5}{}{section*.16}{}}
\newlabel{ss:critical_design@cref}{{[subsection][3][3]III\,C}{[1][4][]5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {1}The uncertainty aware dynamics model}{5}{section*.17}\protected@file@percent }
\newlabel{ss:The uncertainty aware dynamics model}{{III\,C\,1}{5}{}{section*.17}{}}
\newlabel{ss:The uncertainty aware dynamics model@cref}{{[subsubsection][1][3,3]III\,C\,1}{[1][5][]5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2}The controller algorithm}{5}{section*.18}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3}Handling of the uncertainty}{5}{section*.19}\protected@file@percent }
\citation{Janner2019}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4}The data acquisition}{6}{section*.20}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {IV}Experimental results from FERMI RL online tests}{6}{section*.21}\protected@file@percent }
\newlabel{sec:Experimental results from FERMI RL online tests}{{IV}{6}{}{section*.21}{}}
\newlabel{sec:Experimental results from FERMI RL online tests@cref}{{[section][4][]IV}{[1][6][]6}}
\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces Overview of the algorithms. \relax }}{6}{table.caption.22}\protected@file@percent }
\newlabel{tab:overview_algorithms}{{I}{6}{Overview of the algorithms. \relax }{table.caption.22}{}}
\newlabel{tab:overview_algorithms@cref}{{[table][1][]I}{[1][6][]6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A}MFRL Tests}{6}{section*.23}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces The training of different variants of the \emph {NAF2} algorithm on the FERMI FEL, averaged over two complete trainings (the standard-deviations are indicated by the shaded areas). The number of iterations (blue) shows the steps until the intensity is optimised, starting from a random initial position.\relax }}{6}{figure.caption.24}\protected@file@percent }
\newlabel{fig:NAF_training}{{4}{6}{The training of different variants of the \emph {NAF2} algorithm on the FERMI FEL, averaged over two complete trainings (the standard-deviations are indicated by the shaded areas). The number of iterations (blue) shows the steps until the intensity is optimised, starting from a random initial position.\relax }{figure.caption.24}{}}
\newlabel{fig:NAF_training@cref}{{[figure][4][]4}{[1][6][]6}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces The evolution of the states of the \emph {NAF2} algorithm on the FERMI FEL using a double network during the training.\relax }}{6}{figure.caption.25}\protected@file@percent }
\newlabel{fig:NAF_evolution_double}{{5}{6}{The evolution of the states of the \emph {NAF2} algorithm on the FERMI FEL using a double network during the training.\relax }{figure.caption.25}{}}
\newlabel{fig:NAF_evolution_double@cref}{{[figure][5][]5}{[1][6][]6}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces The evolution of the states of the \emph {NAF2} algorithm on the FERMI FEL using a single network during the training.\relax }}{7}{figure.caption.26}\protected@file@percent }
\newlabel{fig:NAF_evolution_single}{{6}{7}{The evolution of the states of the \emph {NAF2} algorithm on the FERMI FEL using a single network during the training.\relax }{figure.caption.26}{}}
\newlabel{fig:NAF_evolution_single@cref}{{[figure][6][]6}{[1][6][]7}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces The verification episodes of the variants of the trained model-free \emph {NAF2} algorithm on the FERMI FEL. The number of iterations (blue) shows the steps until the intensity is optimised, starting from a random initial position.\relax }}{7}{figure.caption.27}\protected@file@percent }
\newlabel{fig:NAF_verification}{{7}{7}{The verification episodes of the variants of the trained model-free \emph {NAF2} algorithm on the FERMI FEL. The number of iterations (blue) shows the steps until the intensity is optimised, starting from a random initial position.\relax }{figure.caption.27}{}}
\newlabel{fig:NAF_verification@cref}{{[figure][7][]7}{[1][6][]7}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces The training metrics of the \emph {AE-DYNA-SAC} on the FERMI FEL using a single network (dashed) and a double network (solid). The Bellman error (\cref {eq:minimize_bellmann_optimality}) and the state-value function (\cref {eq:state-value-function}) are shown. \relax }}{7}{figure.caption.28}\protected@file@percent }
\newlabel{fig:NAF_convergence}{{8}{7}{The training metrics of the \emph {AE-DYNA-SAC} on the FERMI FEL using a single network (dashed) and a double network (solid). The Bellman error (\cref {eq:minimize_bellmann_optimality}) and the state-value function (\cref {eq:state-value-function}) are shown. \relax }{figure.caption.28}{}}
\newlabel{fig:NAF_convergence@cref}{{[figure][8][]8}{[1][6][]7}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces The training observables of the \emph {AE-DYNA-SAC} on the FERMI FEL. Detail are provided in the text.\relax }}{7}{figure.caption.30}\protected@file@percent }
\newlabel{fig:AE-DYNA_observables}{{9}{7}{The training observables of the \emph {AE-DYNA-SAC} on the FERMI FEL. Detail are provided in the text.\relax }{figure.caption.30}{}}
\newlabel{fig:AE-DYNA_observables@cref}{{[figure][9][]9}{[1][7][]7}}
\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces The verification episodes of the trained model-based methods: \emph {ME-TRPO} and \emph {AE-DYNA-SAC} on the FERMI FEL. The number of iterations (blue) shows the steps until the intensity is optimised, starting from a random initial position.\relax }}{7}{figure.caption.31}\protected@file@percent }
\newlabel{fig:AE-DYNA_verification}{{10}{7}{The verification episodes of the trained model-based methods: \emph {ME-TRPO} and \emph {AE-DYNA-SAC} on the FERMI FEL. The number of iterations (blue) shows the steps until the intensity is optimised, starting from a random initial position.\relax }{figure.caption.31}{}}
\newlabel{fig:AE-DYNA_verification@cref}{{[figure][10][]10}{[1][7][]7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {B}MBRL Tests}{7}{section*.29}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces The training observables of the \emph {ME-TRPO} on the FERMI FEL. Detail are provided in the text.\relax }}{8}{figure.caption.32}\protected@file@percent }
\newlabel{fig:ME-TRPO_observables}{{11}{8}{The training observables of the \emph {ME-TRPO} on the FERMI FEL. Detail are provided in the text.\relax }{figure.caption.32}{}}
\newlabel{fig:ME-TRPO_observables@cref}{{[figure][11][]11}{[1][7][]8}}
\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces The evolution of the states during the worst verification episodes of the trained \emph {ME-TRPO} and the \emph {AE-DYNA-SAC} on the FERMI FEL.\relax }}{8}{figure.caption.33}\protected@file@percent }
\newlabel{fig:Worst_episode_MBRL}{{12}{8}{The evolution of the states during the worst verification episodes of the trained \emph {ME-TRPO} and the \emph {AE-DYNA-SAC} on the FERMI FEL.\relax }{figure.caption.33}{}}
\newlabel{fig:Worst_episode_MBRL@cref}{{[figure][12][]12}{[1][7][]8}}
\@writefile{toc}{\contentsline {section}{\numberline {V}Discussion and outlook}{8}{section*.34}\protected@file@percent }
\citation{Hirlaender2020b}
\citation{Furutaa}
\citation{fujimoto2018addressing}
\citation{Silver2014}
\citation{fujimoto2018addressing,Haarnoja2018a}
\@writefile{lot}{\contentsline {table}{\numberline {II}{\ignorespaces An overview over the verification performance of the different trained algorithms on the FERMI FEL including their standard deviation. \relax }}{9}{table.caption.35}\protected@file@percent }
\newlabel{tab:overview_verification}{{II}{9}{An overview over the verification performance of the different trained algorithms on the FERMI FEL including their standard deviation. \relax }{table.caption.35}{}}
\newlabel{tab:overview_verification@cref}{{[table][2][]II}{[1][8][]9}}
\@writefile{toc}{\contentsline {section}{\numberline {VI}Conclusions}{9}{section*.36}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {VII}Acknowledgements}{9}{section*.37}\protected@file@percent }
\@writefile{toc}{\appendix }
\@writefile{toc}{\contentsline {section}{\numberline {A}A Non-linear Standard Control Problem}{9}{section*.38}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1}NAF2 Details}{9}{section*.39}\protected@file@percent }
\newlabel{appendix:naf2}{{A\,1}{9}{}{section*.39}{}}
\newlabel{appendix:naf2@cref}{{[subappendix][1][2147483647,1]A\,1}{[1][9][]9}}
\citation{BarthMaron2018}
\citation{Brockman2016}
\citation{Gu2007,Chen2011,Bardsley2012,Pearce2018}
\citation{Pearce2018}
\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces Cumulative reward of different \emph {NAF} implementations as discussed in the text on the \emph {inverted pendulum} without noise.\relax }}{10}{figure.caption.40}\protected@file@percent }
\newlabel{fig:comparsion_smoothing_small}{{13}{10}{Cumulative reward of different \emph {NAF} implementations as discussed in the text on the \emph {inverted pendulum} without noise.\relax }{figure.caption.40}{}}
\newlabel{fig:comparsion_smoothing_small@cref}{{[figure][13][2147483647]13}{[1][10][]10}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2}The Impact of Noise}{10}{section*.41}\protected@file@percent }
\newlabel{appendix:The impact of noise}{{A\,2}{10}{}{section*.41}{}}
\newlabel{appendix:The impact of noise@cref}{{[subappendix][2][2147483647,1]A\,2}{[1][10][]10}}
\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces Cumulative reward of different \emph {NAF} implementations on the \emph {inverted pendulum} with artificial noise as discussed in the text.\relax }}{10}{figure.caption.42}\protected@file@percent }
\newlabel{fig:comparsion_noise}{{14}{10}{Cumulative reward of different \emph {NAF} implementations on the \emph {inverted pendulum} with artificial noise as discussed in the text.\relax }{figure.caption.42}{}}
\newlabel{fig:comparsion_noise@cref}{{[figure][14][2147483647]14}{[1][10][]10}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {a}Regression assuming homoskedastic Gaussian noise using `anchored ensembling'}{10}{section*.43}\protected@file@percent }
\newlabel{eq_MAP_loglike_anc}{{A4}{10}{}{equation.A.4}{}}
\newlabel{eq_MAP_loglike_anc@cref}{{[equation][4][2147483647,1]A4}{[1][10][]10}}
\newlabel{eqn_anch_loss_matrix}{{A5}{10}{}{equation.A.5}{}}
\newlabel{eqn_anch_loss_matrix@cref}{{[equation][5][2147483647,1]A5}{[1][10][]10}}
\newlabel{eqn_anch_loss_init}{{A6}{11}{}{equation.A.6}{}}
\newlabel{eqn_anch_loss_init@cref}{{[equation][6][2147483647,1]A6}{[1][11][]11}}
\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces Cumulative reward of \emph {AE-DYNA-SAC} on the \emph {inverted pendulum} with artificial noise using the `anchor ensembling'.\relax }}{11}{figure.caption.44}\protected@file@percent }
\newlabel{fig:comparsion_noise_ae_dyna}{{15}{11}{Cumulative reward of \emph {AE-DYNA-SAC} on the \emph {inverted pendulum} with artificial noise using the `anchor ensembling'.\relax }{figure.caption.44}{}}
\newlabel{fig:comparsion_noise_ae_dyna@cref}{{[figure][15][2147483647]15}{[1][11][]11}}
\@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces Varying number of models in the ensemble of the \emph {AE-DYNA-SAC} on the \emph {inverted pendulum}.\relax }}{11}{figure.caption.45}\protected@file@percent }
\newlabel{fig:Compare_models_sizes}{{16}{11}{Varying number of models in the ensemble of the \emph {AE-DYNA-SAC} on the \emph {inverted pendulum}.\relax }{figure.caption.45}{}}
\newlabel{fig:Compare_models_sizes@cref}{{[figure][16][2147483647]16}{[1][11][]11}}
\@writefile{lof}{\contentsline {figure}{\numberline {17}{\ignorespaces The comparison of the \emph {NAF2} and the \emph {AE-DYNA-SAC} on the noisy \emph {inverted pendulum}.\relax }}{11}{figure.caption.47}\protected@file@percent }
\newlabel{fig:comparsion_NAF_AE-DYNA}{{17}{11}{The comparison of the \emph {NAF2} and the \emph {AE-DYNA-SAC} on the noisy \emph {inverted pendulum}.\relax }{figure.caption.47}{}}
\newlabel{fig:comparsion_NAF_AE-DYNA@cref}{{[figure][17][2147483647]17}{[1][11][]11}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3}NAF versus AE-DYNA}{11}{section*.46}\protected@file@percent }
\bibdata{mainNotes,tex/Bibliography}
\bibcite{Kain2020}{{1}{2020}{{Kain\ \emph {et~al.}}}{{Kain, Hirlander, Goddard, Velotti, Porta, Bruchon,\ and\ Valentino}}}
\bibcite{Scheinker2018}{{2}{2018}{{Scheinker\ \emph {et~al.}}}{{Scheinker, Edelen, Bohler, Emma,\ and\ Lutman}}}
\bibcite{Huang2013}{{3}{2013}{{Huang\ \emph {et~al.}}}{{Huang, Corbett, Safranek,\ and\ Wu}}}
\bibcite{Bruchon2017}{{4}{2017}{{Bruchon\ \emph {et~al.}}}{{Bruchon, Fenu, Gaio, Lonza, Pellegrino,\ and\ Saule}}}
\bibcite{Scheinker2020}{{5}{2020}{{Scheinker\ \emph {et~al.}}}{{Scheinker, Hirlaender, Velotti, Gessner, Porta, Kain, Goddard,\ and\ Ramjiawan}}}
\bibcite{Hirlaender2019}{{6}{2019}{{Hirlaender\ \emph {et~al.}}}{{Hirlaender, Fraser, Goddard, Kain, Prieto, Stoel, Szakaly,\ and\ Velotti}}}
\bibcite{Welsch2015}{{7}{2015}{{Welsch}}{{}}}
\bibcite{Albright2019}{{8}{2019}{{Albright\ \emph {et~al.}}}{{Albright, Alemany~Fernandez, Angoletta, Bartosik, Beaumont, Bellodi, Biancacci, Bozzolan, Buzio, Di~Lorenzo, Frassier, Gamba, Hirlander, Huschauer, Kain, Kotzian, Kuchler, Latina, Levens, Mahner, Manosperti, Marqversen, Moreno~Garcia, Nicosia, O'Neil, Ozturk, Saa~Hernandez, Scrivens, Jensen, Tranquille, Wetton,\ and\ Zampetakis}}}
\bibcite{Hanuka2020}{{9}{2020}{{Hanuka\ \emph {et~al.}}}{{Hanuka, Huang, Shtalenkova, Kennedy, Edelen, Lalchand, Ratner,\ and\ Duris}}}
\bibcite{Roussel2020}{{10}{2020}{{Roussel\ \emph {et~al.}}}{{Roussel, Hanuka,\ and\ Edelen}}}
\bibcite{Bruchon2020}{{11}{2020}{{Bruchon\ \emph {et~al.}}}{{Bruchon, Fenu, Gaio, Lonza, O'Shea, Pellegrino,\ and\ Salvato}}}
\bibcite{Bruchon2019}{{12}{2019}{{Bruchon\ \emph {et~al.}}}{{Bruchon, Fenu, Gaio, Lonza, Pellegrino,\ and\ Salvato}}}
\bibcite{Pang2020}{{13}{2020}{{Pang\ \emph {et~al.}}}{{Pang, Thulasidasan,\ and\ Rybarcyk}}}
\bibcite{John2020}{{14}{2020}{{John\ \emph {et~al.}}}{{John, Herwig, Kafkes, Pellico, Perdue, Quintero-Parra, Schupbach, Seiya, Tran, Duarte, Huang, Schram,\ and\ Keller}}}
\bibcite{Sutton2018}{{15}{2018}{{Sutton\ and\ Barto}}{{}}}
\bibcite{DulacArnold2019}{{16}{2019}{{Dulac-Arnold\ \emph {et~al.}}}{{Dulac-Arnold, Mankowitz,\ and\ Hester}}}
\bibcite{Brochon2020}{{17}{2020}{{Bruchon}}{{}}}
\bibcite{Brockman2016}{{18}{2016}{{Brockman\ \emph {et~al.}}}{{Brockman, Cheung, Pettersson, Schneider, Schulman, Tang,\ and\ Zaremba}}}
\bibcite{Heess2017}{{19}{2017}{{Heess\ \emph {et~al.}}}{{Heess, TB, Sriram, Lemmon, Merel, Wayne, Tassa, Erez, Wang, Eslami, Riedmiller,\ and\ Silver}}}
\bibcite{Schulman2017}{{20}{2017}{{Schulman\ \emph {et~al.}}}{{Schulman, Wolski, Dhariwal, Radford,\ and\ Klimov}}}
\bibcite{Silver2014}{{21}{2014}{{Silver\ \emph {et~al.}}}{{Silver, Lever, Heess, Degris, Wierstra,\ and\ Riedmiller}}}
\bibcite{Lillicrap2015}{{22}{2015}{{Lillicrap\ \emph {et~al.}}}{{Lillicrap, Hunt, Pritzel, Heess, Erez, Tassa, Silver,\ and\ Wierstra}}}
\bibcite{OpenAI2018}{{23}{2018}{{OpenAI\ \emph {et~al.}}}{{OpenAI, Andrychowicz, Baker, Chociej, Jozefowicz, McGrew, Pachocki, Petron, Plappert, Powell, Ray, Schneider, Sidor, Tobin, Welinder, Weng,\ and\ Zaremba}}}
\bibcite{Deisenroth2011}{{24}{2011}{{Deisenroth\ and\ Rasmussen}}{{}}}
\bibcite{Williams1992}{{25}{1992}{{Williams}}{{}}}
\bibcite{Baxter2011}{{26}{2011}{{Baxter\ and\ Bartlett}}{{}}}
\bibcite{pmlr-v28-levine13}{{27}{2013}{{Levine\ and\ Koltun}}{{}}}
\bibcite{Schulman2015}{{28}{2015}{{Schulman\ \emph {et~al.}}}{{Schulman, Levine, Moritz, Jordan,\ and\ Abbeel}}}
\bibcite{Szepesvari2010}{{29}{2010}{{Szepesv{\'{a}}ri}}{{}}}
\@writefile{toc}{\contentsline {section}{\numberline {}References}{12}{section*.48}\protected@file@percent }
\bibcite{Levine2020}{{30}{2020}{{Levine\ \emph {et~al.}}}{{Levine, Kumar, Tucker,\ and\ Fu}}}
\bibcite{Gu2016}{{31}{2016}{{Gu\ \emph {et~al.}}}{{Gu, Lillicrap, Sutskever,\ and\ Levine}}}
\bibcite{Hirlaender2020a}{{32}{2020}{{Hirlaender}}{{}}}
\bibcite{Hasselt2015}{{33}{2015}{{van Hasselt\ \emph {et~al.}}}{{van Hasselt, Guez,\ and\ Silver}}}
\bibcite{Mnih2013}{{34}{2013}{{Mnih\ \emph {et~al.}}}{{Mnih, Kavukcuoglu, Silver, Graves, Antonoglou, Wierstra,\ and\ Riedmiller}}}
\bibcite{Wang2015}{{35}{2015}{{Wang\ \emph {et~al.}}}{{Wang, Schaul, Hessel, van Hasselt, Lanctot,\ and\ de~Freitas}}}
\bibcite{NIPS2010_091d584f}{{36}{2010}{{Hasselt}}{{}}}
\bibcite{fujimoto2018addressing}{{37}{2018}{{Fujimoto\ \emph {et~al.}}}{{Fujimoto, van Hoof,\ and\ Meger}}}
\bibcite{Wang2019}{{38}{2019}{{Wang\ \emph {et~al.}}}{{Wang, Bao, Clavera, Hoang, Wen, Langlois, Zhang, Zhang, Abbeel,\ and\ Ba}}}
\bibcite{Gal2016}{{39}{2016}{{Gal\ \emph {et~al.}}}{{Gal, McAllister,\ and\ Rasmussen}}}
\bibcite{6654139}{{40}{2015}{{{Deisenroth}\ \emph {et~al.}}}{{{Deisenroth}, {Fox},\ and\ {Rasmussen}}}}
\bibcite{Chua2018}{{41}{2018}{{Chua\ \emph {et~al.}}}{{Chua, Calandra, McAllister,\ and\ Levine}}}
\bibcite{Wang2019a}{{42}{2019}{{Wang\ and\ Ba}}{{}}}
\bibcite{Boer2005}{{43}{2005}{{de~Boer\ \emph {et~al.}}}{{de~Boer, Kroese, Mannor,\ and\ Rubinstein}}}
\bibcite{Sutton1991}{{44}{1991}{{Sutton}}{{}}}
\bibcite{Pearce2018}{{45}{2018}{{Pearce\ \emph {et~al.}}}{{Pearce, Leibfried, Brintrup, Zaki,\ and\ Neely}}}
\bibcite{Kurutach2018}{{46}{2018}{{Kurutach\ \emph {et~al.}}}{{Kurutach, Clavera, Duan, Tamar,\ and\ Abbeel}}}
\bibcite{Janner2019}{{47}{2019}{{Janner\ \emph {et~al.}}}{{Janner, Fu, Zhang,\ and\ Levine}}}
\bibcite{Goodfellow2016}{{48}{2016}{{Goodfellow\ \emph {et~al.}}}{{Goodfellow, Bengio,\ and\ Courville}}}
\bibcite{Hill2018}{{49}{2018}{{Hill\ \emph {et~al.}}}{{Hill, Raffin, Ernestus, Gleave, Kanervisto, Traore, Dhariwal, Hesse, Klimov, Nichol, Plappert, Radford, Schulman, Sidor,\ and\ Wu}}}
\bibcite{Haarnoja2018a}{{50}{2018}{{Haarnoja\ \emph {et~al.}}}{{Haarnoja, Zhou, Hartikainen, Tucker, Ha, Tan, Kumar, Zhu, Gupta, Abbeel,\ and\ Levine}}}
\bibcite{kidambi2020morel}{{51}{2020}{{Kidambi\ \emph {et~al.}}}{{Kidambi, Rajeswaran, Netrapalli,\ and\ Joachims}}}
\bibcite{Hirlaender2020b}{{52}{2020}{{Hirlaender\ and\ Bruchon}}{{}}}
\bibcite{Furutaa}{{53}{1991}{{Furuta\ \emph {et~al.}}}{{Furuta, Yamakita,\ and\ Kobayashi}}}
\bibcite{BarthMaron2018}{{54}{2018}{{Barth-Maron\ \emph {et~al.}}}{{Barth-Maron, Hoffman, Budden, Dabney, Horgan, TB, Muldal, Heess,\ and\ Lillicrap}}}
\bibcite{Gu2007}{{55}{2007}{{Gu\ and\ Oliver}}{{}}}
\bibcite{Chen2011}{{56}{2011}{{Chen\ and\ Oliver}}{{}}}
\bibcite{Bardsley2012}{{57}{2012}{{Bardsley}}{{}}}
\newlabel{LastBibItem}{{57}{13}{}{section*.48}{}}
\newlabel{LastBibItem@cref}{{[subappendix][3][2147483647,1]A\,3}{[1][13][]13}}
\bibstyle{apsrev4-2}
\citation{REVTEX42Control}
\citation{apsrev42Control}
\newlabel{LastPage}{{}{13}{}{}{}}
\gdef \@abspage@last{13}