docs/index.html

<html>

<head>
	<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
	<script async="" src="./js/analytics.js"></script>
	<script src="./js/jsapi" type="text/javascript"></script>
	<script type="text/javascript">google.load("jquery", "1.3.2");</script>
	<!-- Google Tag Manager -->
	<script>(function (w, d, s, l, i) {
			w[l] = w[l] || []; w[l].push({
				'gtm.start':
					new Date().getTime(), event: 'gtm.js'
			}); var f = d.getElementsByTagName(s)[0],
				j = d.createElement(s), dl = l != 'dataLayer' ? '&l=' + l : ''; j.async = true; j.src =
					'https://www.googletagmanager.com/gtm.js?id=' + i + dl; f.parentNode.insertBefore(j, f);
		})(window, document, 'script', 'dataLayer', 'GTM-WLCRH4G');</script>
	<!-- End Google Tag Manager -->
	<!-- Required meta tags -->
	<meta charset="utf-8">
	<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
	<!-- Bootstrap CSS -->
	<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css"
		integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">
	<link href="style.css" rel="stylesheet">
	<link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
	<link href="https://fonts.googleapis.com/css?family=Open+Sans:300,300i,400,400i,600,600i,700,700i,800,800i"
		rel="stylesheet">
	<link href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:300,400,700,300italic,400italic,700italic"
		rel="stylesheet" type="text/css">

	 <!-- HTML Meta Tags -->
  <title>IQ-Learn: Inverse soft-Q Learning for Imitation</title>
  <meta name="description" content="Introducing Inverse Q-Learning, a novel SOTA framework for simple, scalable and stable Imitation Learning.">

  <!-- Facebook Meta Tags -->
  <meta property="og:url" content="https://div99.github.io/IQ-Learn/">
  <meta property="og:type" content="website">
  <meta property="og:title" content="IQ-Learn: Inverse soft-Q Learning for Imitation">
  <meta property="og:description" content="Introducing Inverse Q-Learning, a novel SOTA framework for simple, scalable and stable Imitation Learning.">
  <meta property="og:image" content="https://div99.github.io/IQ-Learn/approach2.png">

  <!-- Twitter Meta Tags -->
  <meta name="twitter:card" content="summary_large_image">
  <meta property="twitter:domain" content="div99.github.io">
  <meta property="twitter:url" content="https://div99.github.io/IQ-Learn/">
  <meta name="twitter:title" content="IQ-Learn: Inverse soft-Q Learning for Imitation">
  <meta name="twitter:description" content="Introducing Inverse Q-Learning, a novel SOTA framework for simple, scalable and stable Imitation Learning.">
  <meta name="twitter:image" content="https://div99.github.io/IQ-Learn/approach2.png">
</head>

<body data-new-gr-c-s-check-loaded="14.984.0" data-gr-ext-installed="">
	<!-- Google Tag Manager -->
	<script>(function (w, d, s, l, i) {
			w[l] = w[l] || []; w[l].push({
				'gtm.start':
					new Date().getTime(), event: 'gtm.js'
			}); var f = d.getElementsByTagName(s)[0],
				j = d.createElement(s), dl = l != 'dataLayer' ? '&l=' + l : ''; j.async = true; j.src =
					'https://www.googletagmanager.com/gtm.js?id=' + i + dl; f.parentNode.insertBefore(j, f);
		})(window, document, 'script', 'dataLayer', 'GTM-WLCRH4G');</script>
	<!-- End Google Tag Manager -->
	<br>
	<center>
		<div id="hero">
			<h1>IQ-Learn: Inverse soft-Q Learning for Imitation</h1>
			<div class="authors">
				<table align="center" width="1030px">
					<tbody>
						<tr>
							<td align="center" width="300px">
								<center>
									<span><a href="https://divyanshgarg.com/">Divyansh
											Garg</a><sup>1<sup></span>
								</center>
							</td>
							<td align="center" width="300px">
								<center>
									<span><a href="https://www.linkedin.com/in/shuvam-chakraborty-458631121/">Shuvam
											Chakraborty</a><sup>1<sup></span>
								</center>
							</td>
							<td align="center" width="300px">
								<center>
									<span><a href="https://cundy.me/">Chris
											Cundy</a><sup>1<sup></span>
								</center>
							</td>
							<td align="center" width="300px">
								<center>
									<span><a href="https://tsong.me/">Jiaming
											Song</a><sup>1<sup></span>
								</center>
							</td>
							<td align=" center" width="300px">
								<center>
									<span><a href="https://cs.stanford.edu/~ermon/">Stefano
											Ermon</a><sup>1<sup></span>
								</center>
							</td>
						</tr>
					</tbody>
				</table>
			</div>
			<table align="center" width="700px">
				<tbody>
					<tr>
						<td align="center" width="200px">
							<center>
								<span style="font-size:20px">Stanford University<sup>1</sup></span>
							</center>
						</td>
					</tr>
				</tbody>
			</table>
			<table align="center" width="800px">
				<tbody>
					<tr>
						<td align="center" width="150px">
							<center>
								<span style="font-size:25px">In NeurIPS 2021 <b><em>(Spotlight)</em></b></span>
								</span>
							</center>
						</td>
					</tr>
				</tbody>
			</table>
	</center>
	<center>
		<table style="margin-top: 20px">
			<tbody>
				<tr>
					<td>
						<center><a href="https://arxiv.org/abs/2106.12142" target="_blank" class="nav-link link"><img
									class="filter-blue" src="icons/paper_icon.svg" width="48" height="48"><br>Paper</a>
						</center>
					</td>
					<td>
						<center><a href="/~https://github.com/Div99/IQ-Learn" target="_blank" class="nav-link link"><img
									class="filter-blue" src="icons/github.svg" width="48" height="48"><br>Code<br></a>
						</center>
					</td>
					<td>
						<center><a
								href="https://slideslive.com/embed/presentation/38967041?embed_parent_url=https%3A%2F%2Fneurips.cc%2Fvirtual%2F2021%2Fposter%2F26537&embed_container_origin=https%3A%2F%2Fneurips.cc&embed_container_id=presentation-embed-38967041&auto_load=true&auto_play=false&zoom_ratio=&disable_fullscreen=false&locale=en&vertical_enabled=true&vertical_enabled_on_mobile=false&allow_hidden_controls_when_paused=true&fit_to_viewport=true&user_uuid=2f7f8b9e-d23a-478f-ad00-f0905aa4836d"
								target="_blank" class="nav-link link"><img class="filter-blue" src="icons/youtube.svg"
									width="48" height="48"><br>Talk<br></a></center>
					</td>
				</tr>
			</tbody>
		</table>
	</center><br>
	</div>
	<!-- <table align="center" width="650px">
		<tbody>
			<tr>
				<td align="center" width="150px">
					<center>
						<span style="font-size:20px"><a href="/~https://github.com/Div99/W-Stereo-Disp">
								[GitHub]</a></span>
					</center>
				</td>
				<td align="center" width="150px">
					<center>
						<span style="font-size:20px"><a href="https://slideslive.com/38937842"> [Talk]</a></span>
					</center>
				</td>
				<td align="center" width="150px">
					<center>
						<span style="font-size:20px"><a href="https://arxiv.org/abs/2007.03085"> [Paper]</a></span>
					</center>
				</td>
				<td align="center" width="150px">
					<center>
						<span style="font-size:20px"><a href="poster.pdf"> [Poster]</a></span>
					</center>
				</td>
			</tr>
			<tr>
			</tr>
		</tbody>
	</table> -->
	<!--   		  <br><br>
<hr> -->

	<table align="center" width="750px">
		<tbody>
			<tr>
				<td width="400px">
					<center>
						<img class="img-banner" src="teaser.gif"><br>
					</center>
				</td>
			</tr>
			<tr>
				<td width=" 300px">
					<center>
						<div style="font-size:17px; padding-bottom: 10px">
							<i>IQ-Learn reaching human performance on Atari through
								pure imitation<br></i>
						</div>
						<span style="font-size:15px;"><i>Showing <span class=" text-primary bold">Pong</span> (Top
								Left),
								<span class="text-danger bold">Breakout</span> (Top Right), <span
									class="text-success bold">Space
									Invaders</span> (Bottom Left) ,
								<span class="text-info bold">QBert</span> (Bottom Right).</i>
						</span>
					</center>
				</td>
			</tr>
		</tbody>
	</table>
	<br>
	<hr>
	<center>
		<h1>Abstract</h1>
	</center>
	<table align="center" width="850px">
		<tbody>
			<tr>
				<td>
				</td>
			</tr>
		</tbody>
	</table>
	<p class="mt-3">
		In many sequential decision-making problems (e.g., robotics control, game playing, sequential prediction), human
		or expert data is available containing useful information about the task. However, imitation learning (IL) from
		a small amount of expert data can be challenging in high-dimensional environments with complex dynamics.
		Behavioral cloning is a simple method that is widely used due to its simplicity of implementation and stable
		convergence but doesn't utilize any information involving the environment's dynamics. Many existing methods that
		exploit dynamics information are difficult to train in practice due to an adversarial optimization process over
		reward and policy approximators or biased, high variance gradient estimators. <br><br> We introduce a method for
		dynamics-aware IL which avoids adversarial training by <b><em>learning a single Q-function</em></b>, implicitly
		representing
		both reward and policy. On standard benchmarks, the implicitly learned rewards show a high positive correlation
		with the ground-truth rewards, illustrating our method can also be used for inverse reinforcement learning
		(IRL). Our method, <span class=" text-primary"><strong>Inverse soft-Q learning (IQ-Learn)</strong></span>
		obtains
		<strong>state-of-the-art results</strong> in
		offline and
		online
		imitation learning settings, significantly outperforming existing methods both in the number of required
		environment interactions and scalability in high-dimensional spaces, often by more than <strong>3X</strong>.
	</p>
	<br><br>
	<hr>
	<center>
		<h1>Video</h1>
	</center>
	<table align="center" width="1100px">
		<tbody>
			<tr>
			</tr>
		</tbody>
	</table>

	<table align="center" width="800px">
		<tbody>
			<tr>
				<td align="center" width="800px">
					<div id="presentation-embed-38967041" class="slp my-auto" style="width: 100%;">
						<iframe
							src="https://slideslive.com/embed/presentation/38967041?embed_parent_url=https%3A%2F%2Fneurips.cc%2Fvirtual%2F2021%2Fposter%2F26537&embed_container_origin=https%3A%2F%2Fneurips.cc&embed_container_id=presentation-embed-38967041&auto_load=true&auto_play=false&zoom_ratio=&disable_fullscreen=false&locale=en&vertical_enabled=true&vertical_enabled_on_mobile=false&allow_hidden_controls_when_paused=true&fit_to_viewport=true&user_uuid=2f7f8b9e-d23a-478f-ad00-f0905aa4836d"
							height="564" scrolling="no" frameborder="0"
							sandbox="allow-forms allow-pointer-lock allow-popups allow-same-origin allow-scripts allow-top-navigation"
							allow="autoplay; fullscreen" allowfullscreen="" webkitallowfullscreen=""
							mozallowfullscreen="" style="margin: 0px auto; display: block; width: 100%;"></iframe>
					</div>
				</td>
			</tr>

		</tbody>
	</table>
	<hr>
	<center>
		<h1>Approach</h1>
	</center>
	<table align="center" width="600px">
		<tbody>
			<tr>
				<td align="center"><a href="/~https://github.com/Div99/IQ-Learn"><img class="round"
							style="height:1000px; margin-left: 60px" src="approach.png"></a></td>
				<!-- </br> -->
			</tr>
		</tbody>
	</table>
	<center> <br>
		<!-- <span style="font-size:28px">Code coming soon!</span></i>			  	 -->
		<span style="font-size:24px">&nbsp;<a href="/~https://github.com/Div99/IQ-Learn">[GitHub]</a>
		</span><i></i>
		<span style="font-size:28px"></span>
		<br>
	</center>
	<table align="center" width="800px">
		<tbody>
			<tr></tr>
		</tbody>
	</table>
	<br>
	<hr>
	<!-- <table align=center width=550px> -->
	<center>
		<h1>Recovering Rewards</h1>
	</center>
	<br>
	<table align="center" width="600px">
		<tbody>
			<tr>
				<td align="center"><a href="grid.pdf"><img class="round" style="height:300px" src="grid.png"></a></td>
				<!-- </br> -->
				<!-- </br> -->
			</tr>
		</tbody>
	</table>
	<br>
	<center>
		<span style="font-size:14pt">
			Recovering environment rewards on a discrete GridWorld environment with 5 possible actions: <span
				class="text-success">up, down,
				left, right, stay</span>
		</span>
	</center>
	<br>
	<hr>
	<center>
		<h1>Paper</h1>
	</center>
	<table align="center" width="600px">
		<tbody>
			<tr>
				<td align="center"><a href="https://arxiv.org/abs/2106.12142"><img style="height:350px"
							src="paper_thumb.png"></a></td>
			</tr>
		</tbody>
	</table>
	<br>
	<table align="center" width="500px">
		<tbody>
			<tr>
				<td><span style="font-size:24px">
						<center>
							<a
								href="https://proceedings.neurips.cc/paper/2021/file/210f760a89db30aa72ca258a3483cc7f-Paper.pdf">[Paper]</a>
						</center>
					</span></td>
				<td><span style="font-size:24px">
						<center>
							<a
								href="https://proceedings.neurips.cc/paper/2021/file/210f760a89db30aa72ca258a3483cc7f-Supplemental.pdf">[Suppl]</a>
						</center>
					</span></td>
				<td><span style="font-size:24px">
						<center>
							<a href="neurips_2021_iq.txt">[Bibtex]</a>
						</center>
					</span></td>
			</tr>
		</tbody>
	</table>
	<br>
	<hr>
	<center>
		<h1>Poster</h1>
	</center>
	<table align="center" width="600px">
		<br>
		<tbody>
			<tr>
				<td align="center"><a href="poster.pdf"><img class="paper-big" style="height:650px"
							src="poster.jpg"></a></td>
			</tr>
		</tbody>
	</table>
	<br>
	<hr>
	<center>
		<h1>Citation</h1>
	</center>
	<table align="center" width="1000px">
		<tbody>
			<tr>
				<td><span style="font-size:14pt">
					</span>
				</td>
			</tr>
		</tbody>
	</table>
	<pre>
@inproceedings{
	garg2021iqlearn,
	title={IQ-Learn: Inverse soft-Q Learning for Imitation},
	author={Divyansh Garg and Shuvam Chakraborty and Chris Cundy and Jiaming Song and Stefano Ermon},
	booktitle={Thirty-Fifth Conference on Neural Information Processing Systems},
	year={2021},
	url={https://openreview.net/forum?id=Aeo-xqtb5p}
	}
</pre>
	<br><br>
	<br><br>
	<script>
		(function (i, s, o, g, r, a, m) {
			i['GoogleAnalyticsObject'] = r; i[r] = i[r] || function () {
				(i[r].q = i[r].q || []).push(arguments)
			}, i[r].l = 1 * new Date(); a = s.createElement(o),
				m = s.getElementsByTagName(o)[0]; a.async = 1; a.src = g; m.parentNode.insertBefore(a, m)
		})(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');
		ga('create', 'UA-75863369-1', 'auto');
		ga('send', 'pageview');
	</script>
</body>

</html>