index.html

<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <meta name="description"
        content="BoDiffusion: Diffusing Sparse Observations for Full-Body Human Motion Synthesis.">
  <!-- <meta name="keywords" content="Nerfies, D-NeRF, NeRF"> -->
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>BoDiffusion: Diffusing Sparse Observations for Full-Body Human Motion Synthesis</title>

  <!-- Global site tag (gtag.js) - Google Analytics -->
  <!-- <script async src="https://www.googletagmanager.com/gtag/js?id=G-PYVRSFMDRL"></script>
  <script>
    window.dataLayer = window.dataLayer || [];

    function gtag() {
      dataLayer.push(arguments);
    }

    gtag('js', new Date());

    gtag('config', 'G-PYVRSFMDRL');
  </script> -->

  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
        rel="stylesheet">

  <link rel="stylesheet" href="./static/css/bulma.min.css">
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
        href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="./static/css/index.css">
  <link rel="icon" href="./static/images/favicon.svg">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script defer src="./static/js/fontawesome.all.min.js"></script>
  <script src="./static/js/bulma-carousel.min.js"></script>
  <script src="./static/js/bulma-slider.min.js"></script>
  <script src="./static/js/index.js"></script>
</head>
<body>

<nav class="navbar" role="navigation" aria-label="main navigation">
  <div class="navbar-brand">
    <a role="button" class="navbar-burger" aria-label="menu" aria-expanded="false">
      <span aria-hidden="true"></span>
      <span aria-hidden="true"></span>
      <span aria-hidden="true"></span>
    </a>
  </div>
  <!-- <div class="navbar-menu">
    <div class="navbar-start" style="flex-grow: 1; justify-content: center;">
     <a class="navbar-item" href="https://keunhong.com">
      <span class="icon">
          <i class="fas fa-home"></i>
      </span>
      </a> 

       <div class="navbar-item has-dropdown is-hoverable">
        <a class="navbar-link">
          More Research
        </a>
        <div class="navbar-dropdown">
          <a class="navbar-item" href="https://hypernerf.github.io">
            HyperNeRF
          </a>
          <a class="navbar-item" href="https://nerfies.github.io">
            Nerfies
          </a>
          <a class="navbar-item" href="https://latentfusion.github.io">
            LatentFusion
          </a>
          <a class="navbar-item" href="https://photoshape.github.io">
            PhotoShape
          </a>
        </div> 
       </div>
    </div>

  </div> -->
</nav>


<section class="hero">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <div class="columns is-centered">
        <div class="column has-text-centered">
          <h1 class="title is-1 publication-title">BoDiffusion: Diffusing Sparse Observations for Full-Body Human Motion Synthesis</h1>
          <div class="is-size-5 publication-authors">
            <span class="author-block">
              <a href="https://angelacast135.github.io">Angela Castillo</a><sup>*1</sup>,</span>
            <span class="author-block">
              <a href="https://mc-escobar11.github.io">Maria Escobar</a><sup>*1</sup>,</span>
            <span class="author-block">
              <a href="https://guillaumejs2403.github.io">Guillaume Jeanneret</a><sup>2</sup>,
            </span>
            <span class="author-block">
              <a href="https://www.albertpumarola.com">Albert Pumarola</a><sup>3</sup>,
            </span>
            <p></p>
            <span class="author-block">
              <a href="https://scholar.google.com/citations?user=k0nZO90AAAAJ&hl=en&oi=ao">Pablo Arbeláez</a><sup>1</sup>,
            </span>
            <span class="author-block">
              <a href="https://scholar.google.com/citations?user=7T0CPEkAAAAJ&hl=en">Ali Thabet</a><sup>3</sup>,
            </span>
            <span class="author-block">
              <a href="https://gdude.de">Artsiom Sanakoyeu</a><sup>3</sup>
            </span>
          </div>

          <div class="is-size-5 publication-authors">
            <span class="author-block"><sup>1</sup>CINFONIA, Universidad de Los Andes, </span>
            <span class="author-block"><sup>2</sup>University of Caen Normandie, ENSICAEN, CNRS, France, </span>
            <p></p>
            <span class="author-block"><sup>3</sup>Meta AI</span>            
          </div>
           <div class="is-size-5 publication-authors">
            <span class="author-block"><sup>*</sup>Denotes equal contribution.</span> 
          </div>

          <div class="column has-text-centered">
            <div class="publication-links">
              <!-- PDF Link. -->
              <span class="link-block">
                <a href="https://arxiv.org/pdf/2304.11118.pdf"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fas fa-file-pdf"></i>
                  </span>
                  <span>Paper</span>
                </a>
              </span>
              <span class="link-block">
                <a href="https://arxiv.org/abs/2304.11118"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="ai ai-arxiv"></i>
                  </span>
                  <span>arXiv</span>
                </a>
              </span>
              <!-- Video Link. -->
              <!-- <span class="link-block">
                <a href="https://www.youtube.com/watch?v=MrKrnHhk8IA"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fab fa-youtube"></i>
                  </span>
                  <span>Video</span>
                </a>
              </span> -->
              <!-- Code Link. -->
              <span class="link-block">
                <a href="https://github.com/BCV-Uniandes/BoDiffusion"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fab fa-github"></i>
                  </span>
                  <span>Code</span>
                  </a>
              </span>
              <!-- Dataset Link. -->
              <!-- <span class="link-block">
                <a href="https://github.com/google/nerfies/releases/tag/0.1"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="far fa-images"></i>
                  </span>
                  <span>Data</span>
                  </a> -->
            </div>

          </div>
        </div>
      </div>
    </div>
  </div>
</section>

<section class="hero teaser">
  <div class="container is-max-desktop">
    <div class="hero-body">
      <video id="teaser" autoplay muted loop playsinline height="100%">
        <source src="./static/videos/BoDiffusion_final1.mp4"
                type="video/mp4">
      </video>
      <h2 class="subtitle has-text-centered">
        <span class="dnerf">BoDiffusion</span> synthesizes more accurate motions with substantially less jitter than AvatarPoser.
      </h2>
    </div>
  </div>
</section>


<!-- <section class="hero is-light is-small">
  <div class="hero-body">
    <div class="container">
      <div id="results-carousel" class="carousel results-carousel">
        <div class="item item-steve">
          <video poster="" id="steve" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/steve.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-chair-tp">
          <video poster="" id="chair-tp" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/chair-tp.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-shiba">
          <video poster="" id="shiba" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/shiba.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-fullbody">
          <video poster="" id="fullbody" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/fullbody.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-blueshirt">
          <video poster="" id="blueshirt" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/blueshirt.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-mask">
          <video poster="" id="mask" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/mask.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-coffee">
          <video poster="" id="coffee" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/coffee.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-toby">
          <video poster="" id="toby" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/toby2.mp4"
                    type="video/mp4">
          </video>
        </div>
      </div>
    </div>
  </div>
</section> -->


<section class="section">
  <div class="container is-max-desktop">
    <!-- Abstract. -->
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p>
            Mixed reality applications require tracking the user's full-body motion to enable an 
            immersive experience. However, typical head-mounted devices can only track head 
            and hand movements, leading to a limited reconstruction of full-body motion due 
            to variability in lower body configurations. 
          </p>
          
          <p>
            We propose <span class="dnerf">BoDiffusion</span> -- a generative diffusion model for motion synthesis to 
            tackle this under-constrained reconstruction problem. We present a time and 
            space conditioning scheme that allows <span class="dnerf">BoDiffusion</span> to leverage sparse tracking 
            inputs while generating smooth and realistic full-body motion sequences. 
            To the best of our knowledge, this is the first approach that uses the reverse 
            diffusion process to model full-body tracking as a conditional sequence generation task. 
            We conduct experiments on the large-scale motion-capture dataset AMASS and show 
            that our approach outperforms the state-of-the-art by a significant margin in 
            terms of full-body motion realism and joint reconstruction error.
          </p>
        </div>
      </div>
    </div>
    <!--/ Abstract. -->

    <!-- Paper video. -->
    <!-- <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Video</h2>
        <div class="publication-video">
          <iframe src="https://www.youtube.com/embed/MrKrnHhk8IA?rel=0&amp;showinfo=0"
                  frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
        </div>
      </div>
    </div> -->
    <!--/ Paper video. -->
  </div>
</section>


<section class="section">
  <div class="container is-max-desktop">

    <div class="columns is-centered">
      <div class="column is-full-width">
        <h2 class="title is-3"><span class="dnerf">BoDiffusion</span> </h2>
        <!-- Interpolating. -->
        <h3 class="title is-4">Architecture</h3>
        <div class="column content">
          <p>
            <span class="dnerf">BoDiffusion</span> is a diffusion process synthesizing 
            full-body motion using sparse tracking signals as conditioning. 
          </p>
          <!-- <video id="matting-video" controls playsinline height="100%">
            <source src="./static/videos/matting.mp4"
                    type="video/mp4">
          </video> -->
          <div class="content has-text-centered">
            <img src="./static/images/Bodiffusion.png"
                class="interpolation-image"
                alt="Interpolate start reference image."/>
          </div>
          
        </div>

        <h3 class="title is-4">Denoising Steps</h3>
        <div class="column content">
          <p>
            During inference, we start from random 
            Gaussian noise and perform T denoising steps until we reach a clean 
            output motion. 
          </p>
          <!-- <video id="matting-video" controls playsinline height="100%">
            <source src="./static/videos/matting.mp4"
                    type="video/mp4">
          </video> -->
          <div class="content has-text-centered">
            <img src="./static/images/denoising_steps.png"
                class="interpolation-image"
                alt="Interpolate start reference image."/>
          </div>
          
        </div>

        <div class="column content">
          <p>
            <span class="dnerf">BoDiffusion</span> synthesizes substantially more accurate 
            and plausible full-body poses, particularly in the lower body where no 
            IMU data are captured.
          </p>
          <!-- <video id="matting-video" controls playsinline height="100%">
            <source src="./static/videos/matting.mp4"
                    type="video/mp4">
          </video> -->
          <div class="content has-text-centered">
            <img src="./static/images/seq_color_dit.png"
                class="interpolation-image"
                alt="Interpolate start reference image."/>
          </div>
          
        </div>
      </div>
    </div>
    <!--/ Matting. -->

    <!-- Animation. -->
    <div class="columns is-centered">
      <div class="column is-full-width">
        <h2 class="title is-3">More Examples</h2>

        <!-- Interpolating. -->
        <h3 class="title is-4">Unconventional Poses</h3>
        <div class="column content">
          <p>
            <span class="dnerf">BoDiffusion</span> predicts plausible poses even for uncommon 
            movements like crouching or lying down.
          </p>
          <!-- <video id="matting-video" controls playsinline height="100%">
            <source src="./static/videos/matting.mp4"
                    type="video/mp4">
          </video> -->
          <div class="content has-text-centered">
            <img src="./static/images/supp_mat_indv_AP.png"
                class="interpolation-image"
                alt="Interpolate start reference image."/>
          </div>
          
        </div>
        <h3 class="title is-4">Error on individual poses</h3>
        <div class="column content">
          <p>
            <span class="dnerf">BoDiffusion</span> predicts poses with higher fidelity 
          to the ground truth. In contrast, AvatarPoser struggles 
          to predict accurate lower-body configurations.
          </p>
          <!-- <video id="matting-video" controls playsinline height="100%">
            <source src="./static/videos/matting.mp4"
                    type="video/mp4">
          </video> -->
          <div class="content has-text-centered">
            <img src="./static/images/supp_mat_indv_flag.png"
                class="interpolation-image"
                alt="Interpolate start reference image."/>
          </div>
          
        </div>
        <!-- <div class="columns is-vcentered interpolation-panel"> -->
        <!-- <div class="column is-3 has-text-centered">
          <img src="./static/images/interpolate_start.jpg"
                class="interpolation-image"
                alt="Interpolate start reference image."/>
          <p>Start Frame</p>
        </div> -->
          <!-- <div class="column interpolation-video-column">
            <div id="interpolation-image-wrapper">
              Loading...
            </div>
            <input class="slider is-fullwidth is-large is-info"
                   id="interpolation-slider"
                   step="1" min="0" max="100" value="0" type="range">
          </div>
          <div class="column is-3 has-text-centered">
            <img src="./static/images/interpolate_end.jpg"
                 class="interpolation-image"
                 alt="Interpolation end reference image."/>
            <p class="is-bold">End Frame</p>
          </div> 
        </div> -->
        <br/>
        <!--/ Interpolating. -->

        <!-- Re-rendering. -->
        <!-- <h3 class="title is-4">Re-rendering the input video</h3>
        <div class="content has-text-justified">
          <p>
            Using <span class="dnerf">Nerfies</span>, you can re-render a video from a novel
            viewpoint such as a stabilized camera by playing back the training deformations.
          </p>
        </div>
        <div class="content has-text-centered">
          <video id="replay-video"
                 controls
                 muted
                 preload
                 playsinline
                 width="75%">
            <source src="./static/videos/replay.mp4"
                    type="video/mp4">
          </video>
        </div> -->
        <!--/ Re-rendering. -->

      </div>
    </div>
    <!--/ Animation. -->


    <!-- Concurrent Work. -->
    <!-- <div class="columns is-centered">
      <div class="column is-full-width">
        <h2 class="title is-3">Related Links</h2>

        <div class="content has-text-justified">
          <p>
            There's a lot of excellent work that was introduced around the same time as ours.
          </p>
          <p>
            <a href="https://arxiv.org/abs/2104.09125">Progressive Encoding for Neural Optimization</a> introduces an idea similar to our windowed position encoding for coarse-to-fine optimization.
          </p>
          <p>
            <a href="https://www.albertpumarola.com/research/D-NeRF/index.html">D-NeRF</a> and <a href="https://gvv.mpi-inf.mpg.de/projects/nonrigid_nerf/">NR-NeRF</a>
            both use deformation fields to model non-rigid scenes.
          </p>
          <p>
            Some works model videos with a NeRF by directly modulating the density, such as <a href="https://video-nerf.github.io/">Video-NeRF</a>, <a href="https://www.cs.cornell.edu/~zl548/NSFF/">NSFF</a>, and <a href="https://neural-3d-video.github.io/">DyNeRF</a>
          </p>
          <p>
            There are probably many more by the time you are reading this. Check out <a href="https://dellaert.github.io/NeRF/">Frank Dellart's survey on recent NeRF papers</a>, and <a href="https://github.com/yenchenlin/awesome-NeRF">Yen-Chen Lin's curated list of NeRF papers</a>.
          </p>
        </div>
      </div>
    </div> -->
    <!--/ Concurrent Work. -->

  </div>
</section>


<section class="section" id="BibTeX">
  <div class="container is-max-desktop content">
    <h2 class="title">BibTeX</h2>
    <pre><code>@article{castillo2023bodiffusion,
  author    = {Castillo, Angela and Escobar, Maria and Jeanneret, Guillaume and Pumarola, Albert and Arbeláez, Pablo and Thabet, Ali and Sanakoyeu, Artsiom},
  title     = {BoDiffusion: Diffusing Sparse Observations for Full-Body Human Motion Synthesis},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
  year      = {2023},
}</code></pre>
  </div>
</section>


<footer class="footer">
  <div class="container">
    <div class="content has-text-centered">
      <a class="icon-link"
         href="./static/images/2304.11118.pdf">
        <i class="fas fa-file-pdf"></i>
      </a>
      <a class="icon-link" href="https://github.com/angelacast135" class="external-link" disabled>
        <i class="fab fa-github"></i>
      </a>
    </div>
    <div class="columns is-centered">
      <div class="column is-8">
        <div class="content">
          <!-- <p>
            This website is licensed under a <a rel="license"
                                                href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
            Commons Attribution-ShareAlike 4.0 International License</a>.
          </p> -->
          <p>
            This website uses the template from this <a
              href="https://github.com/nerfies/nerfies.github.io">source code</a>.
          </p>
        </div>
      </div>
    </div>
  </div>
</footer>

</body>
</html>