forked from nerfies/nerfies.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
401 lines (371 loc) · 17.3 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="description"
content="CREStE: Scalable Mapless Navigation with Internet Scale Priors and Counterfactual Guidance">
<meta name="keywords" content="CREStE, creste, mapless navigation, internet scale priors, counterfactual guidance">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>CREStE: Scalable Mapless Navigation with Internet Scale Priors and Counterfactual Guidance</title>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-PYVRSFMDRL"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag() {
dataLayer.push(arguments);
}
gtag('js', new Date());
gtag('config', 'G-PYVRSFMDRL');
</script>
<link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
<link rel="stylesheet" href="./static/css/bulma.min.css">
<link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
<link rel="stylesheet" href="./static/css/bulma-slider.min.css">
<!-- <link rel="stylesheet" href="./static/css/fontawesome.all.min.css"> -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
<link rel="stylesheet" href="./static/css/index.css">
<link rel="icon" href="./static/images/favicon.svg">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
<!-- <script defer src="./static/js/fontawesome.all.min.js"></script> -->
<!-- <script src="./static/js/bulma-carousel.min.js"></script>
<script src="./static/js/bulma-slider.min.js"></script> -->
<script src="./static/js/index.js"></script>
<script src="./static/js/map.js?v=<?php echo time(); ?>"></script>
<!-- ?v=<?php echo time(); ? -->
<script src="./static/js/bibtex.js"></script>
</head>
<body>
<section class="hero">
<div class="hero-body">
<div class="container is-max-desktop">
<div class="columns is-centered">
<div class="column has-text-centered">
<h1 class="title is-2 publication-title">CREStE: Scalable Mapless Navigation with Internet Scale Priors and
Counterfactual Guidance</h1>
<div class="is-size-5 publication-authors">
<span class="author-block">
<a href="https://www.arthurkzhang.com">Arthur Zhang</a><sup></sup>,</span>
<span class="author-block">
<a href="https://hari-sikchi.github.io">Harshit Sikchi</a><sup></sup>,</span>
<span class="author-block">
<a href="https://amyzhang.github.io">Amy Zhang</a><sup></sup>,
</span>
<span class="author-block">
<a href="https://www.joydeepb.com">Joydeep Biswas</a><sup></sup>,
</span>
</div>
<div class="is-size-5 publication-authors">
<span class="author-block"><sup></sup>University of Texas at Austin</span>
</div>
<!--
<div class="column has-text-centered">
<img class="responsive-logo" style="width: 35%" src="./static/images/logos.jpg" alt="Lab Logos">
</div> -->
<div class="column has-text-centered">
<img class="responsive-logo" style="width: 35%" src="./static/images/logos.jpg" alt="Lab Logos">
<div class="publication-links">
<!-- PDF Link. -->
<span class="link-block">
<a href="./static/data/creste_paper.pdf" class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fas fa-file-pdf"></i>
</span>
<span>Paper</span>
</a>
</span>
<span class="link-block">
<a href="" class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="ai ai-arxiv"></i>
</span>
<span>arXiv (Coming Soon!)</span>
</a>
</span>
<!-- TODO: Video Link. -->
<span class="link-block">
<a href="https://youtu.be/bC8vUrO3VmA" class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fab fa-youtube"></i>
</span>
<span>Downtown Deployment</span>
</a>
</span>
<!-- TODO: Code Link. -->
<span class="link-block">
<a href="" class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fab fa-github"></i>
</span>
<span>Code (Coming Soon!)</span>
</a>
</span>
<!-- Dataset Link. -->
<!-- <span class="link-block">
<a href="https://github.com/google/nerfies/releases/tag/0.1"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="far fa-images"></i>
</span>
<span>Data</span>
</a>
</div> -->
</div>
</div>
</div>
</div>
</div>
</section>
<!-- TODO: Implement Mosaic Code Here -->
<section class="hero teaser">
<!-- Caption for the mosaic -->
<div class="container is-max-desktop is-size-5 has-text-justified">
<b>
CREStE learns representations and rewards for mapless navigation by distilling priors from visual
foundation models trained on internet scale data and learning from counterfactual demonstrations.
</b>
</div>
<div class="container is-full-width">
<video id="dollyzoom" style="border: 1px solid darkgrey; border-radius: 8px;" autoplay controls muted playsinline
height="50%">
<source src="./static/videos/publicready_mainmethod.mp4" type="video/mp4">
Your browser does not support the video tag.
</video>
</div>
</section>
<section class="section">
<div class="container is-max-desktop">
<!-- Abstract. -->
<div class="columns is-centered has-text-centered">
<div class="column is-four-fifths">
<h2 class="title is-3">Abstract</h2>
<div class="content has-text-justified">
<p>
<b>CREStE (Counterfactuals for Reward Enhancement with Structured Embeddings)</b> is the first approach to
learn representations that address the full mapless navigation
problem. CREStE learns generalizable bird's eye view (BEV) scene representations for urban environments by
distilling priors from visual foundation models trained on internet-scale data. Using this
representation, we predict BEV reward maps for navigation that are aligned with expert and counterfactual
demonstrations. CREStE outperforms all state-of-the-art approaches in mapless urban navigation,
traversing a <b>2 kilometer mission with just 1 intervention</b>, demonstrating our
generalizability
to unseen semantic entities and terrains,
challenging scenarios with little room for error, and fine-grained human preferences.
</p>
<p>
Our approach acheives this without an exhaustive list of semantic classes, large-scale robot
datasets, or
carefully designed reward functions. We acheive this with the following contributions: 1) A novel model
architecture and learning objective that leverages visual foundation models to learn geometrically
grounded semantic,
geometric, and instance-aware representations 2) A counterfactual-based inverse
reinforcement learning
objective and framework for learning reward functions that attend to the most important features for
navigation.
</p>
</div>
</div>
</div>
</div>
</section>
<section class="section">
<div class="container is-max-desktop">
<div class="columns is-centered is-full-width">
<!-- Visual Effects. -->
<div class="column">
<div class="content">
<h2 class="title is-3">Learning Priors from Visual Foundation Models</h2>
<p>
CREStE proposes a novel architecture and distillation objective for synergizing semantic and instance
priors from Dinov2 and SegmentAnythingv2, resulting in a lightweight perceptual encoder that predicts a
generalizable BEV representation from a single RGB-D image.
</p>
<video id="dollyzoom" autoplay controls muted playsinline height="100%">
<source src="./static/videos/perceptualencoder.mp4" type="video/mp4">
</video>
</div>
</div>
<!--/ Visual Effects. -->
<!-- Matting. -->
<div class="column">
<div class="columns is-centered is-full-width">
<div class="column content">
<h2 class="title is-3">Learning Rewards from Counterfactuals</h2>
<p>
CREStE introduces a principled counterfactual-based inverse reinforcement learning objective and active
learning framework that queries humans for counterfactual annotations to align rewards with human
preferences.
</p>
<video id="dollyzoom" autoplay controls muted playsinline height="50%">
<source src="./static/videos/rewardframework.mp4" type="video/mp4">
Your browser does not support the video tag.
</video>
</div>
</div>
</div>
<!--/ Matting. -->
</section>
<section class="section">
<div class="container is-max-desktop">
<!-- Animation. -->
<div class="columns is-centered">
<div class="column is-full-width">
<h2 class="title is-3">Kilometer Scale Mapless Navigation Deployment</h2>
<div class="content has-text-justified">
<p>
We deploy CREStE on a 2 kilometer unseen urban loop to evaluate it on the task of long-horizon mapless
navigation. Trained with only 2.5 hours of real-world demonstrations, CREStE is able to complete the
entire mission with just a single intervention, demonstrating its robustness and generalizability to
diverse urban environments. We include short clips from this deployment below, including the sole
failure,
and link the full uncut video externally for viewing.
</p>
<!-- Container for the interactive map -->
<div class="interactive-map-container">
<!-- Left column: Static map -->
<div class="map-container">
<img src="./static/images/map.jpg" alt="Map" class="map-image" />
<!-- Markers injected by JS -->
</div>
<!-- Right column: Video container -->
<div class="video-container" id="videoContent">
<p>Select a location to expand a video.</p>
</div>
</div>
<!-- Preview popup that appears when hovering over markers -->
<div id="previewPopup" class="preview-popup">
<!-- No <img> tag needed -->
</div>
</div>
</div>
</div>
</section>
<section class="section">
<div class="container is-max-desktop">
<div class="columns is-vcentered">
<!-- Left Column: Text -->
<div class="column is-centered">
<div class="content is-full-width">
<h2 class="title is-3">Additional Quantitative Studies</h2>
<p>
We evaluate CREStE in 5 different urban environments across Austin, Texas with a variety of challenging
terrains,
dynamic obstacles, and diverse semantic entities. We denote the <text style="color:red"><b>unseen
environments below in red</b></text> and <text style="color:green"><b>seen
environments below in green</b></text>. We compare CREStE against SOTA mapless navigation
approaches, and
measure the average time to reach subgoal <b>(AST)</b>, percentage of subgoals reached per mission
<b>(%S)</b>, and the
number of interventions required per 100 meters <b>(NIR)</b>.
</p>
<img class="image" src="./static/images/shorthorizonexperiments.jpg" alt="Additional Quantitative Studies">
</div>
<!-- TODO: Add three videos side by side here. Have them equally spaced in width and equal height -->
<div class="content is-full-width">
Below, we compare CREStE against two SOTA baselines that perform geometric obstacle avoidance and follow
terrain-preferences. While these approaches consider important factors for
navigation, they are unable to generalize to diverse urban scenes with uneven elevation, unseen semantic
classes and terrains,
and
novel lighting and viewpoint conditions. See our paper for full details on our quantitative experiments.
</div>
<div class="columns is-full-width">
<!-- Video 1 -->
<div class="column">
<h4 class="title is-4">Geometric Only</h4>
<div class="video-wrapper">
<video src="./static/videos/paperexperimentsprocessed/hemphillpark_geometric.mp4" autoplay controls
muted loop playsinline>
</video>
</div>
</div>
<!-- Video 2 -->
<div class="column">
<h3 class="title is-5">Terrain + Geometric (PACER+G)
</h4>
<div class="video-wrapper">
<video src="./static/videos/paperexperimentsprocessed/hemphillpark_pacer.mp4" autoplay controls muted
loop playsinline>
</video>
</div>
</div>
<!-- Video 3 -->
<div class="column">
<p class="title is-4">CREStE (Ours)</p>
<div class="video-wrapper">
<video src="./static/videos/paperexperimentsprocessed/hemphillpark_creste.mp4" autoplay controls muted
loop playsinline>
</video>
</div>
</div>
<!--/ Animation. -->
</div>
</div>
</div>
</div>
</div>
</section>
<section class="section">
<div class="container is-max-desktop">
<div class="columns is-vcentered">
<!-- Left Column: Text -->
<div class="column is-centered">
<div class="content is-full-width">
<h2 class="title is-3">Acknowledgements</h2>
<p>
This work has taken place in the Autonomous Mobile Robotics Laboratory (AMRL) and Machine Decision-making
through
Interaction Laboratory (MIDI) at UT Austin. AMRL research is supported in part by NSF (CAREER-2046955,
PARTNER-2402650) and ARO (W911NF-24-2-0025).
MIDI research is supported in part by NSF (CAREER-2340651, PARTNER-2402650), DARPA (HR00112490431),
and ARO
(W911NF-24-1-0193).
Any opinions, findings, and conclusions expressed in this material are those of the authors and do not
necessarily reflect the views of the sponsors.
</p>
</div>
</div>
</div>
</div>
</section>
<section class="section" id="BibTeX">
<div class="container is-max-desktop content">
<h2 class="title">BibTeX</h2>
<div class="bibtex-container" style="position: relative;">
<pre><code id="bibtexCode">@article{zhang2025creste,
author = {Zhang, Arthur and Sikchi, Harsh and Zhang, Amy and Biswas, Joydeep},
title = {CREStE: Scalable Mapless Navigation with Internet Scale Priors and Counterfactual Guidance},
journal = {arXiv},
year = {2025},
}</code></pre>
<button id="copyBibtexBtn" class="copy-btn" title="Copy BibTeX">
<i class="fas fa-copy"></i>
</button>
</div>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content has-text-centered">
<a class="icon-link" href="./static/data/creste_paper.pdf">
<i class="fas fa-file-pdf"></i>
</a>
<a class="icon-link" href="https://github.com/artzha" class="external-link" disabled>
<i class="fab fa-github"></i>
</a>
</div>
<div class="columns is-centered">
<div class="column is-8">
<div class="content">
<p>
Website source code from <a href="https://github.com/nerfies/nerfies.github.io">
<span class="dnerf">Nerfies</span>
</a>
</p>
</div>
</div>
</div>
</div>
</footer>
</body>
</html>