index.html

<!DOCTYPE html>
<html>

<head>
  <meta charset="utf-8">
  <meta name="description"
    content="CREStE: Scalable Mapless Navigation with Internet Scale Priors and Counterfactual Guidance">
  <meta name="keywords" content="CREStE, creste, mapless navigation, internet scale priors, counterfactual guidance">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>CREStE: Scalable Mapless Navigation with Internet Scale Priors and Counterfactual Guidance</title>

  <!-- Global site tag (gtag.js) - Google Analytics -->
  <script async src="https://www.googletagmanager.com/gtag/js?id=G-PYVRSFMDRL"></script>
  <script>
    window.dataLayer = window.dataLayer || [];

    function gtag() {
      dataLayer.push(arguments);
    }

    gtag('js', new Date());

    gtag('config', 'G-PYVRSFMDRL');
  </script>

  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">

  <link rel="stylesheet" href="./static/css/bulma.min.css">
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
  <!-- <link rel="stylesheet" href="./static/css/fontawesome.all.min.css"> -->
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="./static/css/index.css">
  <link rel="icon" href="./static/images/favicon.svg">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <!-- <script defer src="./static/js/fontawesome.all.min.js"></script> -->
  <!-- <script src="./static/js/bulma-carousel.min.js"></script>
  <script src="./static/js/bulma-slider.min.js"></script> -->
  <script src="./static/js/index.js"></script>
  <script src="./static/js/map.js?v=<?php echo time(); ?>"></script>
  <!-- ?v=<?php echo time(); ? -->
  <script src="./static/js/bibtex.js"></script>
</head>

<body>
  <section class="hero">
    <div class="hero-body">
      <div class="container is-max-desktop">
        <div class="columns is-centered">
          <div class="column has-text-centered">
            <h1 class="title is-2 publication-title">CREStE: Scalable Mapless Navigation with Internet Scale Priors and
              Counterfactual Guidance</h1>
            <div class="is-size-5 publication-authors">
              <span class="author-block">
                <a href="https://www.arthurkzhang.com">Arthur Zhang</a><sup></sup>,</span>
              <span class="author-block">
                <a href="https://hari-sikchi.github.io">Harshit Sikchi</a><sup></sup>,</span>
              <span class="author-block">
                <a href="https://amyzhang.github.io">Amy Zhang</a><sup></sup>,
              </span>
              <span class="author-block">
                <a href="https://www.joydeepb.com">Joydeep Biswas</a><sup></sup>,
              </span>
            </div>

            <div class="is-size-5 publication-authors">
              <span class="author-block"><sup></sup>University of Texas at Austin</span>
            </div>
            <!-- 
            <div class="column has-text-centered">
              <img class="responsive-logo" style="width: 35%" src="./static/images/logos.jpg" alt="Lab Logos">
            </div> -->

            <div class="column has-text-centered">
              <img class="responsive-logo" style="width: 35%" src="./static/images/logos.jpg" alt="Lab Logos">
              <div class="publication-links">
                <!-- PDF Link. -->
                <span class="link-block">
                  <a href="./static/data/creste_paper.pdf" class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="fas fa-file-pdf"></i>
                    </span>
                    <span>Paper</span>
                  </a>
                </span>
                <span class="link-block">
                  <a href="" class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="ai ai-arxiv"></i>
                    </span>
                    <span>arXiv (Coming Soon!)</span>
                  </a>
                </span>
                <!-- TODO: Video Link. -->
                <span class="link-block">
                  <a href="https://youtu.be/bC8vUrO3VmA" class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="fab fa-youtube"></i>
                    </span>
                    <span>Downtown Deployment</span>
                  </a>
                </span>
                <!-- TODO: Code Link. -->
                <span class="link-block">
                  <a href="" class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="fab fa-github"></i>
                    </span>
                    <span>Code (Coming Soon!)</span>
                  </a>
                </span>
                <!-- Dataset Link. -->
                <!-- <span class="link-block">
                  <a href="https://github.com/google/nerfies/releases/tag/0.1"
                    class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="far fa-images"></i>
                    </span>
                    <span>Data</span>
                  </a>
              </div> -->
              </div>
            </div>
          </div>
        </div>
      </div>
  </section>

  <!-- TODO: Implement Mosaic Code Here -->
  <section class="hero teaser">
    <!-- Caption for the mosaic -->
    <div class="container is-max-desktop is-size-5 has-text-justified">
      <b>
        CREStE learns representations and rewards for mapless navigation by distilling priors from visual
        foundation models trained on internet scale data and learning from counterfactual demonstrations.
      </b>
    </div>
    <div class="container is-full-width">
      <video id="dollyzoom" style="border: 1px solid darkgrey; border-radius: 8px;" autoplay controls muted playsinline
        height="50%">
        <source src="./static/videos/publicready_mainmethod.mp4" type="video/mp4">
        Your browser does not support the video tag.
      </video>
    </div>
  </section>
  <section class="section">
    <div class="container is-max-desktop">
      <!-- Abstract. -->
      <div class="columns is-centered has-text-centered">
        <div class="column is-four-fifths">
          <h2 class="title is-3">Abstract</h2>
          <div class="content has-text-justified">
            <p>
              <b>CREStE (Counterfactuals for Reward Enhancement with Structured Embeddings)</b> is the first approach to
              learn representations that address the full mapless navigation
              problem. CREStE learns generalizable bird's eye view (BEV) scene representations for urban environments by
              distilling priors from visual foundation models trained on internet-scale data. Using this
              representation, we predict BEV reward maps for navigation that are aligned with expert and counterfactual
              demonstrations. CREStE outperforms all state-of-the-art approaches in mapless urban navigation,
              traversing a <b>2 kilometer mission with just 1 intervention</b>, demonstrating our
              generalizability
              to unseen semantic entities and terrains,
              challenging scenarios with little room for error, and fine-grained human preferences.
            </p>

            <p>
              Our approach acheives this without an exhaustive list of semantic classes, large-scale robot
              datasets, or
              carefully designed reward functions. We acheive this with the following contributions: 1) A novel model
              architecture and learning objective that leverages visual foundation models to learn geometrically
              grounded semantic,
              geometric, and instance-aware representations 2) A counterfactual-based inverse
              reinforcement learning
              objective and framework for learning reward functions that attend to the most important features for
              navigation.
            </p>
          </div>
        </div>
      </div>
    </div>
  </section>

  <section class="section">
    <div class="container is-max-desktop">

      <div class="columns is-centered is-full-width">

        <!-- Visual Effects. -->
        <div class="column">
          <div class="content">
            <h2 class="title is-3">Learning Priors from Visual Foundation Models</h2>
            <p>
              CREStE proposes a novel architecture and distillation objective for synergizing semantic and instance
              priors from Dinov2 and SegmentAnythingv2, resulting in a lightweight perceptual encoder that predicts a
              generalizable BEV representation from a single RGB-D image.
            </p>
            <video id="dollyzoom" autoplay controls muted playsinline height="100%">
              <source src="./static/videos/perceptualencoder.mp4" type="video/mp4">
            </video>
          </div>
        </div>
        <!--/ Visual Effects. -->

        <!-- Matting. -->
        <div class="column">
          <div class="columns is-centered is-full-width">
            <div class="column content">
              <h2 class="title is-3">Learning Rewards from Counterfactuals</h2>
              <p>
                CREStE introduces a principled counterfactual-based inverse reinforcement learning objective and active
                learning framework that queries humans for counterfactual annotations to align rewards with human
                preferences.
              </p>
              <video id="dollyzoom" autoplay controls muted playsinline height="50%">
                <source src="./static/videos/rewardframework.mp4" type="video/mp4">
                Your browser does not support the video tag.
              </video>

            </div>
          </div>
        </div>
        <!--/ Matting. -->
  </section>

  <section class="section">
    <div class="container is-max-desktop">
      <!-- Animation. -->
      <div class="columns is-centered">
        <div class="column is-full-width">
          <h2 class="title is-3">Kilometer Scale Mapless Navigation Deployment</h2>
          <div class="content has-text-justified">
            <p>
              We deploy CREStE on a 2 kilometer unseen urban loop to evaluate it on the task of long-horizon mapless
              navigation. Trained with only 2.5 hours of real-world demonstrations, CREStE is able to complete the
              entire mission with just a single intervention, demonstrating its robustness and generalizability to
              diverse urban environments. We include short clips from this deployment below, including the sole
              failure,
              and link the full uncut video externally for viewing.
            </p>
            <!-- Container for the interactive map -->
            <div class="interactive-map-container">
              <!-- Left column: Static map -->
              <div class="map-container">
                <img src="./static/images/map.jpg" alt="Map" class="map-image" />
                <!-- Markers injected by JS -->
              </div>
              <!-- Right column: Video container -->
              <div class="video-container" id="videoContent">
                <p>Select a location to expand a video.</p>
              </div>
            </div>

            <!-- Preview popup that appears when hovering over markers -->
            <div id="previewPopup" class="preview-popup">
              <!-- No <img> tag needed -->
            </div>
          </div>
        </div>

      </div>
  </section>

  <section class="section">
    <div class="container is-max-desktop">
      <div class="columns is-vcentered">
        <!-- Left Column: Text -->
        <div class="column is-centered">
          <div class="content is-full-width">
            <h2 class="title is-3">Additional Quantitative Studies</h2>
            <p>
              We evaluate CREStE in 5 different urban environments across Austin, Texas with a variety of challenging
              terrains,
              dynamic obstacles, and diverse semantic entities. We denote the <text style="color:red"><b>unseen
                  environments below in red</b></text> and <text style="color:green"><b>seen
                  environments below in green</b></text>. We compare CREStE against SOTA mapless navigation
              approaches, and
              measure the average time to reach subgoal <b>(AST)</b>, percentage of subgoals reached per mission
              <b>(%S)</b>, and the
              number of interventions required per 100 meters <b>(NIR)</b>.
            </p>
            <img class="image" src="./static/images/shorthorizonexperiments.jpg" alt="Additional Quantitative Studies">
          </div>
          <!-- TODO: Add three videos side by side here. Have them equally spaced in width and equal height -->
          <div class="content is-full-width">
            Below, we compare CREStE against two SOTA baselines that perform geometric obstacle avoidance and follow
            terrain-preferences. While these approaches consider important factors for
            navigation, they are unable to generalize to diverse urban scenes with uneven elevation, unseen semantic
            classes and terrains,
            and
            novel lighting and viewpoint conditions. See our paper for full details on our quantitative experiments.
          </div>
          <div class="columns is-full-width">
            <!-- Video 1 -->
            <div class="column">
              <h4 class="title is-4">Geometric Only</h4>
              <div class="video-wrapper">
                <video src="./static/videos/paperexperimentsprocessed/hemphillpark_geometric.mp4" autoplay controls
                  muted loop playsinline>
                </video>
              </div>
            </div>
            <!-- Video 2 -->
            <div class="column">
              <h3 class="title is-5">Terrain + Geometric (PACER+G)
                </h4>
                <div class="video-wrapper">
                  <video src="./static/videos/paperexperimentsprocessed/hemphillpark_pacer.mp4" autoplay controls muted
                    loop playsinline>
                  </video>
                </div>
            </div>
            <!-- Video 3 -->
            <div class="column">
              <p class="title is-4">CREStE (Ours)</p>
              <div class="video-wrapper">
                <video src="./static/videos/paperexperimentsprocessed/hemphillpark_creste.mp4" autoplay controls muted
                  loop playsinline>
                </video>
              </div>
            </div>
            <!--/ Animation. -->
          </div>
        </div>
      </div>
    </div>
    </div>
  </section>


  <section class="section">
    <div class="container is-max-desktop">
      <div class="columns is-vcentered">
        <!-- Left Column: Text -->
        <div class="column is-centered">
          <div class="content is-full-width">
            <h2 class="title is-3">Acknowledgements</h2>
            <p>
              This work has taken place in the Autonomous Mobile Robotics Laboratory (AMRL) and Machine Decision-making
              through
              Interaction Laboratory (MIDI) at UT Austin. AMRL research is supported in part by NSF (CAREER-2046955,
              PARTNER-2402650) and ARO (W911NF-24-2-0025).
              MIDI research is supported in part by NSF (CAREER-2340651, PARTNER-2402650), DARPA (HR00112490431),
              and ARO
              (W911NF-24-1-0193).
              Any opinions, findings, and conclusions expressed in this material are those of the authors and do not
              necessarily reflect the views of the sponsors.
            </p>
          </div>
        </div>
      </div>
    </div>
  </section>

  <section class="section" id="BibTeX">
    <div class="container is-max-desktop content">
      <h2 class="title">BibTeX</h2>
      <div class="bibtex-container" style="position: relative;">
        <pre><code id="bibtexCode">@article{zhang2025creste,
    author    = {Zhang, Arthur and Sikchi, Harsh and Zhang, Amy and Biswas, Joydeep},
    title     = {CREStE: Scalable Mapless Navigation with Internet Scale Priors and Counterfactual Guidance},
    journal   = {arXiv},
    year      = {2025},
  }</code></pre>
        <button id="copyBibtexBtn" class="copy-btn" title="Copy BibTeX">
          <i class="fas fa-copy"></i>
        </button>
      </div>
    </div>
  </section>


  <footer class="footer">
    <div class="container">
      <div class="content has-text-centered">
        <a class="icon-link" href="./static/data/creste_paper.pdf">
          <i class="fas fa-file-pdf"></i>
        </a>
        <a class="icon-link" href="https://github.com/artzha" class="external-link" disabled>
          <i class="fab fa-github"></i>
        </a>
      </div>
      <div class="columns is-centered">
        <div class="column is-8">
          <div class="content">
            <p>
              Website source code from <a href="https://github.com/nerfies/nerfies.github.io">
                <span class="dnerf">Nerfies</span>
              </a>
            </p>
          </div>
        </div>
      </div>
    </div>
  </footer>

</body>

</html>