805 lines
28 KiB
HTML
805 lines
28 KiB
HTML
|
||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||
<html>
|
||
|
||
<script type="text/javascript" charset="utf-8" src="https://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js"></script>
|
||
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||
|
||
<style type="text/css">
|
||
body {
|
||
font-family: "Titillium Web", "HelveticaNeue-Light", "Helvetica Neue Light", "Helvetica Neue", Helvetica, Arial, "Lucida Grande", sans-serif;
|
||
font-weight: 300;
|
||
font-size: 17px;
|
||
margin-left: auto;
|
||
margin-right: auto;
|
||
}
|
||
|
||
@media screen and (min-width: 980px){
|
||
body {
|
||
width: 980px;
|
||
}
|
||
}
|
||
|
||
h1 {
|
||
font-weight:300;
|
||
line-height: 1.15em;
|
||
}
|
||
|
||
h2 {
|
||
font-size: 1.75em;
|
||
}
|
||
a:link,a:visited {
|
||
color: #5364cc;
|
||
text-decoration: none;
|
||
}
|
||
a:hover {
|
||
color: #208799;
|
||
}
|
||
h1 {
|
||
text-align: center;
|
||
}
|
||
h2,h3 {
|
||
text-align: left;
|
||
}
|
||
|
||
h1 {
|
||
font-size: 40px;
|
||
font-weight: 500;
|
||
}
|
||
h2 {
|
||
font-weight: 400;
|
||
margin: 16px 0px 4px 0px;
|
||
}
|
||
h3 {
|
||
font-weight: 600;
|
||
margin: 16px 0px 4px 0px;
|
||
}
|
||
|
||
.paper-title {
|
||
padding: 1px 0px 1px 0px;
|
||
}
|
||
section {
|
||
margin: 32px 0px 32px 0px;
|
||
text-align: justify;
|
||
clear: both;
|
||
}
|
||
.col-5 {
|
||
width: 20%;
|
||
float: left;
|
||
}
|
||
.col-4 {
|
||
width: 25%;
|
||
float: left;
|
||
}
|
||
.col-3 {
|
||
width: 33%;
|
||
float: left;
|
||
}
|
||
.col-2 {
|
||
width: 50%;
|
||
float: left;
|
||
}
|
||
.col-1 {
|
||
width: 100%;
|
||
float: left;
|
||
}
|
||
|
||
.author-row, .affil-row {
|
||
font-size: 26px;
|
||
}
|
||
|
||
.author-row-new {
|
||
text-align: center;
|
||
}
|
||
|
||
.author-row-new a {
|
||
display: inline-block;
|
||
font-size: 20px;
|
||
padding: 4px;
|
||
}
|
||
|
||
.author-row-new sup {
|
||
color: #313436;
|
||
font-size: 12px;
|
||
}
|
||
|
||
.affiliations-new {
|
||
font-size: 18px;
|
||
text-align: center;
|
||
width: 80%;
|
||
margin: 0 auto;
|
||
margin-bottom: 20px;
|
||
}
|
||
|
||
.row {
|
||
margin: 16px 0px 16px 0px;
|
||
}
|
||
.authors {
|
||
font-size: 26px;
|
||
}
|
||
.affiliatons {
|
||
font-size: 18px;
|
||
}
|
||
.affil-row {
|
||
margin-top: 18px;
|
||
}
|
||
.teaser {
|
||
max-width: 100%;
|
||
}
|
||
.text-center {
|
||
text-align: center;
|
||
}
|
||
.screenshot {
|
||
width: 256px;
|
||
border: 1px solid #ddd;
|
||
}
|
||
.screenshot-el {
|
||
margin-bottom: 16px;
|
||
}
|
||
hr {
|
||
height: 1px;
|
||
border: 0;
|
||
border-top: 1px solid #ddd;
|
||
margin: 0;
|
||
}
|
||
.material-icons {
|
||
vertical-align: -6px;
|
||
}
|
||
p {
|
||
line-height: 1.25em;
|
||
}
|
||
.caption {
|
||
font-size: 16px;
|
||
color: #666;
|
||
margin-top: 4px;
|
||
margin-bottom: 10px;
|
||
}
|
||
video {
|
||
display: block;
|
||
margin: auto;
|
||
}
|
||
figure {
|
||
display: block;
|
||
margin: auto;
|
||
margin-top: 10px;
|
||
margin-bottom: 10px;
|
||
}
|
||
#bibtex pre {
|
||
font-size: 14px;
|
||
background-color: #eee;
|
||
padding: 16px;
|
||
}
|
||
.blue {
|
||
color: #2c82c9;
|
||
font-weight: bold;
|
||
}
|
||
.orange {
|
||
color: #d35400;
|
||
font-weight: bold;
|
||
}
|
||
.flex-row {
|
||
display: flex;
|
||
flex-flow: row wrap;
|
||
padding: 0;
|
||
margin: 0;
|
||
list-style: none;
|
||
}
|
||
|
||
.paper-btn-coming-soon {
|
||
position: relative;
|
||
top: 0;
|
||
left: 0;
|
||
}
|
||
|
||
.coming-soon {
|
||
position: absolute;
|
||
top: -15px;
|
||
right: -15px;
|
||
}
|
||
|
||
.paper-btn {
|
||
position: relative;
|
||
text-align: center;
|
||
|
||
display: inline-block;
|
||
margin: 8px;
|
||
padding: 8px 8px;
|
||
|
||
border-width: 0;
|
||
outline: none;
|
||
border-radius: 2px;
|
||
|
||
background-color: #5364cc;
|
||
color: white !important;
|
||
font-size: 20px;
|
||
width: 100px;
|
||
font-weight: 600;
|
||
}
|
||
.paper-btn-parent {
|
||
display: flex;
|
||
justify-content: center;
|
||
margin: 16px 0px;
|
||
}
|
||
|
||
.paper-btn:hover {
|
||
opacity: 0.85;
|
||
}
|
||
|
||
.container {
|
||
margin-left: auto;
|
||
margin-right: auto;
|
||
padding-left: 16px;
|
||
padding-right: 16px;
|
||
}
|
||
|
||
.venue {
|
||
font-size: 23px;
|
||
}
|
||
|
||
.topnav {
|
||
background-color: #EEEEEE;
|
||
overflow: hidden;
|
||
}
|
||
|
||
.topnav div {
|
||
max-width: 1070px;
|
||
margin: 0 auto;
|
||
}
|
||
|
||
.topnav a {
|
||
display: inline-block;
|
||
color: black;
|
||
text-align: center;
|
||
vertical-align: middle;
|
||
padding: 16px 16px;
|
||
text-decoration: none;
|
||
font-size: 18px;
|
||
}
|
||
|
||
.topnav img {
|
||
padding: 2px 0px;
|
||
width: 100%;
|
||
margin: 0.2em 0px 0.3em 0px;
|
||
vertical-align: middle;
|
||
}
|
||
|
||
pre {
|
||
font-size: 0.9em;
|
||
padding-left: 7px;
|
||
padding-right: 7px;
|
||
padding-top: 3px;
|
||
padding-bottom: 3px;
|
||
border-radius: 3px;
|
||
background-color: rgb(235, 235, 235);
|
||
overflow-x: auto;
|
||
}
|
||
|
||
.download-thumb {
|
||
display: flex;
|
||
}
|
||
|
||
@media only screen and (max-width: 620px) {
|
||
.download-thumb {
|
||
display: none;
|
||
}
|
||
}
|
||
|
||
.paper-stuff {
|
||
width: 50%;
|
||
font-size: 20px;
|
||
}
|
||
|
||
@media only screen and (max-width: 620px) {
|
||
.paper-stuff {
|
||
width: 100%;
|
||
}
|
||
}
|
||
* {
|
||
box-sizing: border-box;
|
||
}
|
||
|
||
.column {
|
||
text-align: center;
|
||
float: left;
|
||
width: 16.666%;
|
||
padding: 5px;
|
||
}
|
||
|
||
/* Clearfix (clear floats) */
|
||
.row::after {
|
||
content: "";
|
||
clear: both;
|
||
display: table;
|
||
}
|
||
|
||
/* Responsive layout - makes the three columns stack on top of each other instead of next to each other */
|
||
@media screen and (max-width: 500px) {
|
||
.column {
|
||
width: 100%;
|
||
}
|
||
}
|
||
</style>
|
||
|
||
<script type="text/javascript" src="../js/hidebib.js"></script>
|
||
<link href='https://fonts.googleapis.com/css?family=Titillium+Web:400,600,400italic,600italic,300,300italic' rel='stylesheet' type='text/css'>
|
||
<head>
|
||
<title> LION: Latent Point Diffusion Models for 3D Shape Generation </title>
|
||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||
<!-- meta property="og:description" content="Score-Based Generative Modeling with Critically-Damped Langevin Diffusion"/ -->
|
||
<link href="https://fonts.googleapis.com/css2?family=Material+Icons" rel="stylesheet">
|
||
<!-- meta name="twitter:card" content="summary_large_image" -->
|
||
<!-- meta name="twitter:creator" content="@timudk" -->
|
||
<!-- meta name="twitter:title" content="Score-Based Generative Modeling with Critically-Damped Langevin Diffusion" -->
|
||
<!-- meta name="twitter:description" content="Inspired by connections to statistical mechanics, we propose a novel diffusion process, critically-damped Langevin diffusion, that perturbs the data in a smoother manner by leveraging auxiliary velocity variables. This allows us to denoise more efficiently and learn higher quality generative models." -->
|
||
<!-- meta name="twitter:image" content="https://nv-tlabs.github.io/CLD-SGM/assets/cld_teaser_resized.png" -->
|
||
</head>
|
||
|
||
<body>
|
||
<div class="topnav" id="myTopnav">
|
||
<div>
|
||
<a href="https://www.nvidia.com/"><img width="100%" src="assets/nvidia.svg"></a>
|
||
<a href="https://nv-tlabs.github.io/" ><strong>Toronto AI Lab</strong></a>
|
||
</div>
|
||
</div>
|
||
<div class="container">
|
||
<div class="paper-title">
|
||
<h1>
|
||
<font color="#5364cc">LION</font>:
|
||
<font color="#5364cc">L</font>atent Point Diffus<font color="#5364cc">ion</font> Models <br> for 3D Shape Generation</h1>
|
||
</div>
|
||
|
||
<div id="authors">
|
||
<center>
|
||
<div class="author-row-new">
|
||
<a href="https://www.cs.utoronto.ca/~xiaohui/">Xiaohui Zeng<sup>1,2,3</sup></a>,
|
||
<a href="http://latentspace.cc/">Arash Vahdat<sup>1</sup></a>,
|
||
<a href="https://www.fwilliams.info/">Francis Williams<sup>1</sup></a>,
|
||
<a href="https://zgojcic.github.io/">Zan Gojcic<sup>1</sup></a>,
|
||
<a href="https://orlitany.github.io/">Or Litany<sup>1</sup></a>,
|
||
<a href="https://www.cs.utoronto.ca/~fidler/">Sanja Fidler<sup>1,2,3</sup></a>,
|
||
<a href="https://karstenkreis.github.io/">Karsten Kreis<sup>1</sup></a>
|
||
</div>
|
||
</center>
|
||
<center>
|
||
<div class="affiliations">
|
||
<span><sup>1</sup> NVIDIA</span>
|
||
<span><sup>2</sup> University of Toronto</span>
|
||
<span><sup>3</sup> Vector Institute</span> <br/>
|
||
</div>
|
||
|
||
<div class="affil-row">
|
||
<div class="venue text-center"><b>NeurIPS 2022 </b></div>
|
||
</div>
|
||
|
||
</center>
|
||
|
||
<div style="clear: both">
|
||
<div class="paper-btn-parent">
|
||
<a class="paper-btn" href="https://arxiv.org/abs/2112.07068">
|
||
<span class="material-icons"> description </span>
|
||
Paper
|
||
</a>
|
||
<div class="paper-btn-coming-soon">
|
||
<a class="paper-btn" href="https://github.com/nv-tlabs/LION">
|
||
<span class="material-icons"> code </span>
|
||
Code
|
||
</a>
|
||
</div>
|
||
</div></div>
|
||
</div>
|
||
|
||
<section id="teaser-image">
|
||
<center>
|
||
<figure>
|
||
<video class="centered" width="80%" controls autoplay loop muted playsinline class="video-background " >
|
||
<source src="assets/LION_video_v8.mp4#t=0.001" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<!--
|
||
<p class="caption">
|
||
LION's Generation process.
|
||
</p> <br>
|
||
-->
|
||
</figure>
|
||
|
||
</center>
|
||
</section>
|
||
|
||
<section id="news">
|
||
<hr>
|
||
<h2>News</h2>
|
||
<div class="row">
|
||
<div><span class="material-icons"> event </span> [Sept 2022] Build the project page <a href="https://github.com/nv-tlabs/LION">Page</a>!</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="abstract"/>
|
||
<hr>
|
||
<h2>Abstract</h2>
|
||
<div class="flex-row">
|
||
<p>
|
||
Denoising diffusion models (DDMs) have shown promising results in 3D point cloud synthesis. To advance 3D DDMs and make them useful
|
||
for digital artists, we require (i) high generation quality, (ii) flexibility for manipulation and applications such as conditional
|
||
synthesis and shape interpolation, and (iii) the ability to output smooth surfaces or meshes. To this end, we introduce the
|
||
hierarchical Latent Point Diffusion Model (LION) for 3D shape generation. LION is set up as a variational autoencoder (VAE) with
|
||
a hierarchical latent space that combines a global shape latent representation with a point-structured latent space. For generation,
|
||
we train two hierarchical DDMs in these latent spaces. The hierarchical VAE approach boosts performance compared to DDMs that operate
|
||
on point clouds directly, while the point-structured latents are still ideally suited for DDM-based modeling. Experimentally, LION
|
||
achieves state-of-the-art generation performance on multiple ShapeNet benchmarks. Furthermore, our VAE framework allows us to easily
|
||
use LION for different relevant tasks without re-training the latent DDMs: We show that LION excels at multimodal shape denoising and
|
||
voxel-conditioned synthesis. We also demonstrate shape autoencoding and latent shape interpolation, and we augment LION with modern
|
||
surface reconstruction techniques to generate smooth 3D meshes. We hope that LION provides a powerful tool for artists working with
|
||
3D shapes due to its high-quality generation, flexibility, and surface reconstruction.
|
||
</p>
|
||
</div>
|
||
</section>
|
||
<section id="method"/>
|
||
<hr>
|
||
<h2>Method</h2>
|
||
<div class="flex-row">
|
||
<p>
|
||
LION is set up as a hierarchical point cloud VAE with denoising diffusion models over the shape latent and latent point distributions.
|
||
Point-Voxel CNNs (PVCNN) with adaptive Group Normalization (Ada. GN) are used as neural networks.
|
||
The latent points can be interpreted as a smoothed version of the input point cloud.
|
||
Shape As Points (SAP) is optionally used for mesh reconstruction.
|
||
</p>
|
||
</div>
|
||
<center>
|
||
<figure style="width: 100%;">
|
||
<a>
|
||
<img width="80%" src="assets/pipeline.jpg">
|
||
</a>
|
||
<p class="caption" style="margin-bottom: 24px;">
|
||
Architecture of LION.
|
||
</p>
|
||
</figure>
|
||
|
||
</center>
|
||
</section>
|
||
|
||
<!--
|
||
<section id="teaser-video">
|
||
</p>
|
||
<center>
|
||
<figure>
|
||
<video class="centered" width="50%" controls muted autoplay>
|
||
<source src="assets/LION_demo.mp4#t=0.001" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<p class="caption">
|
||
Generated output from LION.
|
||
</p>
|
||
</figure>
|
||
</center>
|
||
</p>
|
||
</section>
|
||
-->
|
||
|
||
<section id="novelties"/>
|
||
<hr>
|
||
<h2>Technical Contributions</h2>
|
||
<div class="flex-row">
|
||
<p>We make the following technical contributions:
|
||
<ul style="list-style-type:disc;">
|
||
<li>We explore the training of multiple denoising diffusion models (DDMs) in a latent space..</li>
|
||
<li>We train latent DDMs in 3D generation.</li>
|
||
<li>We outperform all baselines and demonstrate that LION scale to extremely diverse shape datasets, like modeling 13 or even 55 ShapeNet categories jointly without conditioning. </li>
|
||
</ul>
|
||
</p>
|
||
</div>
|
||
</section>
|
||
|
||
|
||
<section id="results">
|
||
<hr>
|
||
<h2>Generation (Single Category)</h2>
|
||
<div class="flex-row">
|
||
<p>Samples from LION trained on single catgory. </p>
|
||
</div>
|
||
|
||
<center>
|
||
<figure>
|
||
<video class="centered" width="100%" controls autoplay muted playsinline class="video-background " >
|
||
<source src="assets/gen_airplane.mp4#t=0.001" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<p class="caption">
|
||
Generated point clouds and reconstructed mesh of airplanes.
|
||
</p> <br>
|
||
</figure>
|
||
<figure>
|
||
<video class="centered" width="100%" controls autoplay muted playsinline class="video-background " >
|
||
<source src="assets/gen_chair.mp4#t=0.001" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<p class="caption">
|
||
Generated point clouds and reconstructed mesh of chair.
|
||
</p> <br>
|
||
</figure>
|
||
<figure>
|
||
<video class="centered" width="100%" controls autoplay muted playsinline class="video-background " >
|
||
<source src="assets/gen_car.mp4#t=0.001" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<p class="caption">
|
||
Generated point clouds and reconstructed mesh of car.
|
||
</p> <br>
|
||
</figure>
|
||
|
||
<figure style="width: 100%;">
|
||
<video class="centered" width="100%" controls autoplay muted playsinline class="video-background " >
|
||
<source src="assets/gen_animal553_v2.mp4#t=0.001" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<p class="caption" style="margin-bottom: 24px;">
|
||
Generated point clouds and reconstructed mesh of Animal.
|
||
</p> <br>
|
||
</figure>
|
||
</center>
|
||
<center>
|
||
<figure>
|
||
<video class="centered" width="100%" controls muted autoplay playsinline class="video-background " >
|
||
<source src="assets/gen_bottle.mp4#t=11" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<p class="caption">
|
||
Generated point clouds and reconstructed mesh of bottle.
|
||
</p> <br>
|
||
</figure>
|
||
</center>
|
||
|
||
<center>
|
||
<figure>
|
||
<video class="centered" width="100%" controls autoplay muted playsinline class="video-background " >
|
||
<source src="assets/gen_mug.mp4#t=11" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<p class="caption">
|
||
Generated point clouds and reconstructed mesh of mug.
|
||
|
||
</p> <br>
|
||
</figure>
|
||
</center>
|
||
|
||
<hr>
|
||
<h2>Generation (Multi-Classes)</h2>
|
||
<!-- <div class="flex-row">
|
||
<p>samples from LION trained on multiple ShapeNet catgories, without conditioning. </p>
|
||
</div> -->
|
||
<center>
|
||
<figure>
|
||
<video class="centered" width="100%" controls autoplay muted playsinline class="video-background " >
|
||
<source src="assets/gen_all_v13.mp4#t=0.001" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<p class="caption">
|
||
Generated point clouds and reconstructed mesh. LION model trained on 13 ShapeNet categories jointly without conditioning.
|
||
</p>
|
||
<br>
|
||
</figure>
|
||
</center>
|
||
|
||
<center>
|
||
<figure>
|
||
<video class="centered" width="100%" controls muted playsinline class="video-background " >
|
||
<source src="assets/gen_all_55.mp4#t=0.001" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<p class="caption">
|
||
Generated point clouds and reconstructed mesh. LION model trained on 55 ShapeNet categories jointly without conditioning.
|
||
</p>
|
||
<br>
|
||
</figure>
|
||
</center>
|
||
|
||
</section>
|
||
|
||
<section id="more_results">
|
||
<hr>
|
||
<h2>More Results</h2>
|
||
<h3>Interpolation </h3>
|
||
<div class="flex-row">
|
||
<p>LION can interpolate two shapes by traversing the latent space. The generated shapes are clean and semantically plausible along the entire interpolation path. </p>
|
||
</div>
|
||
<figure>
|
||
<video class="centered" width="100%" controls muted playsinline class="video-background " >
|
||
<source src="assets/LION_interp.mp4#t=0.001" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<p class="caption">
|
||
Left most shape: the source shape. Right most shape: the target shape. The shapes in middle are interpolated results between source and target shape.
|
||
</p>
|
||
</figure>
|
||
<center>
|
||
<figure>
|
||
<video class="centered" width="50%" controls loop autoplay muted playsinline class="video-background " >
|
||
<source src="assets/LION_interp_seq.mp4#t=0.001" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<p class="caption">
|
||
LION traverses the latent space and interpolates many different shapes.
|
||
</p>
|
||
</figure>
|
||
</center>
|
||
<br>
|
||
<h3>Fast Sampling with DDIM </h3>
|
||
<div class="flex-row">
|
||
<p>The sampling time of LION can be reduced by applying DDIM sampler.
|
||
DDIM sampler with 25 steps can already generate high-quality shapes, which takes less than 1 sec. </p>
|
||
</div>
|
||
<center>
|
||
<figure style="width: 100%;">
|
||
<a>
|
||
<img width="100%" src="assets/ddim_sample.png">
|
||
</a>
|
||
<p class="caption" style="margin-bottom: 24px;">
|
||
DDIM samples from LION trained on different data. The top two rows show the number of steps and the wall-clock time used when drawing one sample.
|
||
With DDIM sampling, we can reduce the sampling time from 27.09 sec (1000 steps) to less than 1 sec (25 steps) to generate an object.
|
||
</p>
|
||
</figure>
|
||
</center>
|
||
|
||
<br>
|
||
|
||
|
||
<h3>Voxel-Conditioned Synthesis </h3>
|
||
<div class="flex-row">
|
||
<p>Given a coarse voxel grid, LION can generate different plausible detailed shapes. </p>
|
||
<p>In practice, an artist using a 3D generative model may have a rough idea of the desired shape. For instance, they may be able to quickly construct a coarse voxelized shape, to which the generative model then adds realistic details. </p>
|
||
</div>
|
||
|
||
<center>
|
||
<figure style="width: 80%;">
|
||
|
||
<video class="centered" width="80%" controls muted playsinline class="video-background " >
|
||
<source src="assets/airplane_voxel.mp4#t=14.8" type="video/mp4">
|
||
Your browser does not support the video tag.
|
||
</video>
|
||
<p class="caption" style="margin-bottom: 24px;" width="30%">
|
||
Left: Input voxel grid. Right: two point clouds generated by LION and the reconstructed mesh.
|
||
<!-- Voxel-guided synthesis experiments, on different categories. We run diffuse-denoise in latent space to generate diverse plausible clean shapes (first row, left plane: 250 diffuse-denoise steps; first row, right plane: 200 steps;) -->
|
||
</p>
|
||
|
||
</figure>
|
||
</center>
|
||
<br>
|
||
|
||
|
||
<h3> Single View Reconstruction </h3>
|
||
<div class="flex-row">
|
||
<p>
|
||
We extend LION to also allow for single view reconstruction (SVR) from RGB data. We render 2D
|
||
images from the 3D ShapeNet shapes, extracted the images’ CLIP image embeddings, and
|
||
trained LION’s latent diffusion models while conditioning on the shapes’ CLIP image embeddings.
|
||
At test time, we then take a single view 2D image, extract the CLIP image embedding, and generate
|
||
corresponding 3D shapes, thereby effectively performing SVR. We show SVR results from real
|
||
RGB data
|
||
</p>
|
||
</div>
|
||
<center>
|
||
<figure style="width: 100%;">
|
||
<a>
|
||
<img width="49%" src="assets/svr/img2shape_mitsuba_full.jpg">
|
||
<img width="49%" src="assets/svr/img2shape_cari2s_mm_mitsuba_full.jpg">
|
||
</a>
|
||
<p class="caption" style="margin-bottom: 24px;">
|
||
Single view reconstruction from RGB images of chair. For each input image, LION can generate multi-modal outputs.
|
||
</p>
|
||
</figure>
|
||
</center>
|
||
<!--
|
||
<figure style="width: 50%;">
|
||
<a>
|
||
<img width="100%" src="assets/svr/img2shape_cari2s_mm_mitsuba_full.jpg">
|
||
</a>
|
||
<p class="caption" style="margin-bottom: 24px;">
|
||
Single view reconstruction from RGB images of car. For each input image, LION can generate multi-modal outputs.
|
||
</p>
|
||
</figure>
|
||
-->
|
||
<center>
|
||
<figure style="width: 100%;">
|
||
<a>
|
||
<img width="100%" src="assets/svr/img2shape_cari2s_mitsuba_full.jpg">
|
||
</a>
|
||
<p class="caption" style="margin-bottom: 24px;">
|
||
More single view reconstruction from RGB images of car.
|
||
</p>
|
||
</figure>
|
||
</center>
|
||
<br>
|
||
<h3> Text-Guided Generation </h3>
|
||
<div class="flex-row">
|
||
<p>
|
||
Using CLIP’s text encoder, our method additionally allows for text-guided generation.
|
||
</p>
|
||
</div>
|
||
<center>
|
||
<figure style="width: 100%;">
|
||
<a>
|
||
<img width="35%" src="assets/clipforge_chair.png">
|
||
<img width="60%" src="assets/clipforge_car.png">
|
||
</a>
|
||
<p class="caption" style="margin-bottom: 24px;">
|
||
Text-driven shape generation of chairs with LION. Bottom row is the text input
|
||
</p>
|
||
</figure>
|
||
</center>
|
||
<h3> Per-sample Text-driven Texture Synthesis </h3>
|
||
<div class="flex-row">
|
||
<p>
|
||
We apply Text2mesh on some generated meshes from LION to additionally synthesize textures in a text-driven manner, leveraging CLIP. The original mesh is generated by LION.
|
||
</p>
|
||
</div>
|
||
<div class="row">
|
||
<div class="column">
|
||
<img width="100%" src="assets/text2mesh/strawberries_airplane-rec_3.jpg">
|
||
<figcaption align = "center">An airplane made of strawberry</figcaption>
|
||
</div>
|
||
|
||
<div class="column">
|
||
<img width="100%" src="assets/text2mesh/fabric_leather_airplane-rec_3.jpg">
|
||
<figcaption align = "center">An airplane made of fabric leather </figcaption>
|
||
</div>
|
||
<div class="column">
|
||
<img width="100%" src="assets/text2mesh/wood_chair-rec_421_norm1.jpg">
|
||
<figcaption align = "center">A chair made of wood</figcaption>
|
||
</div>
|
||
<div class="column">
|
||
<img width="100%" src="assets/text2mesh/wrong_copied1-rec_293_norm0.jpg">
|
||
<figcaption align = "center">A car made of rusty metal</figcaption>
|
||
</div>
|
||
|
||
<div class="column">
|
||
<img width="100%" src="assets/text2mesh/brick_car-rec_67_norm1.jpg">
|
||
<figcaption align = "center">A car made of brick</figcaption>
|
||
</div>
|
||
|
||
|
||
<div class="column">
|
||
<img width="100%" src="assets/text2mesh/wrong_copied1-rec_12_norm1.jpg">
|
||
<figcaption align = "center">A denim fabric animal</figcaption>
|
||
</div>
|
||
</div>
|
||
<br>
|
||
|
||
|
||
</section>
|
||
<section id="paper">
|
||
<h2>Paper</h2>
|
||
<hr>
|
||
<div class="flex-row">
|
||
<div class="download-thumb">
|
||
<div style="box-sizing: border-box; padding: 16px; margin: auto;">
|
||
<a href="https://nv-tlabs.github.io/CLD-SGM"><img class="screenshot" src="assets/cld_paper_preview.png"></a>
|
||
</div>
|
||
</div>
|
||
<div class="paper-stuff">
|
||
<p><b>LION: Latent Point Diffusion Models for 3D Shape Generation</b></p>
|
||
<p>Xiaohui Zeng, Arash Vahdat, Francis Williams, Zan Gojcic, Or Litany, Sanja Fidler, Karsten Kreis</p>
|
||
<p><i>Advances in Neural Information Processing Systems (NeurIPS), 2022 <b></b></i></p>
|
||
<!--
|
||
<div><span class="material-icons"> description </span><a href="https://arxiv.org/abs/2112.07068"> arXiv version</a></div>
|
||
<div><span class="material-icons"> insert_comment </span><a href="assets/dockhorn2021score.bib"> BibTeX</a></div>
|
||
<div><span class="material-icons"> integration_instructions </span><a href="https://github.com/nv-tlabs/CLD-SGM"> Code</a></div>
|
||
-->
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="bibtex">
|
||
<h2>Citation</h2>
|
||
<hr>
|
||
<pre><code>@inproceedings{
|
||
zeng2022lion,
|
||
title={ LION: Latent Point Diffusion Models for 3D Shape Generation },
|
||
author={ Xiaohui Zeng and Arash Vahdat and Francis Williams and Zan Gojcic and Or Litany and Sanja Fidler and Karsten Kreis },
|
||
booktitle={Advances in Neural Information Processing Systems (NeurIPS)},
|
||
year={2022}
|
||
}</code></pre>
|
||
</section>
|
||
</div>
|
||
</body>
|
||
</html>
|