From da8d22deba45478ed547f64ed83347437f170a5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laure=CE=B7t?= Date: Tue, 6 Jun 2023 20:47:56 +0200 Subject: [PATCH] init --- .editorconfig | 12 + .envrc | 1 + .gitattributes | 2 + .gitignore | 305 ++++++++++++++ .vscode/extensions.json | 5 + .vscode/settings.json | 6 + assets/aube.jpg | 3 + assets/generative-overview.png | 3 + assets/inp_n7.jpg | 3 + assets/safran_logo.png | 3 + assets/safran_logo.svg | 3 + flake.lock | 43 ++ flake.nix | 17 + src/paper.tex | 309 +++++++++++++++ src/paper.xmpdata | 14 + src/zotero.bib | 704 +++++++++++++++++++++++++++++++++ 16 files changed, 1433 insertions(+) create mode 100644 .editorconfig create mode 100644 .envrc create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 .vscode/extensions.json create mode 100644 .vscode/settings.json create mode 100644 assets/aube.jpg create mode 100644 assets/generative-overview.png create mode 100644 assets/inp_n7.jpg create mode 100644 assets/safran_logo.png create mode 100644 assets/safran_logo.svg create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 src/paper.tex create mode 100644 src/paper.xmpdata create mode 100644 src/zotero.bib diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..5d47c21 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,12 @@ +# EditorConfig is awesome: https://EditorConfig.org + +# top-most EditorConfig file +root = true + +[*] +indent_style = space +indent_size = 2 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..8392d15 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake \ No newline at end of file diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..30ef200 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +assets/** filter=lfs diff=lfs merge=lfs -text +**/*.pdf filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..11f6813 --- /dev/null +++ b/.gitignore @@ -0,0 +1,305 @@ +.direnv +*.ist + +# https://github.com/github/gitignore/blob/main/TeX.gitignore +## Core latex/pdflatex auxiliary files: +*.aux +*.lof +*.log +*.lot +*.fls +*.out +*.toc +*.fmt +*.fot +*.cb +*.cb2 +.*.lb + +## Intermediate documents: +*.dvi +*.xdv +*-converted-to.* +# these rules might exclude image files for figures etc. +# *.ps +# *.eps +# *.pdf + +## Generated if empty string is given at "Please type another file name for output:" +.pdf + +## Bibliography auxiliary files (bibtex/biblatex/biber): +*.bbl +*.bcf +*.blg +*-blx.aux +*-blx.bib +*.run.xml + +## Build tool auxiliary files: +*.fdb_latexmk +*.synctex +*.synctex(busy) +*.synctex.gz +*.synctex.gz(busy) +*.pdfsync + +## Build tool directories for auxiliary files +# latexrun +latex.out/ + +## Auxiliary and intermediate files from other packages: +# algorithms +*.alg +*.loa + +# achemso +acs-*.bib + +# amsthm +*.thm + +# beamer +*.nav +*.pre +*.snm +*.vrb + +# changes +*.soc + +# comment +*.cut + +# cprotect +*.cpt + +# elsarticle (documentclass of Elsevier journals) +*.spl + +# endnotes +*.ent + +# fixme +*.lox + +# feynmf/feynmp +*.mf +*.mp +*.t[1-9] +*.t[1-9][0-9] +*.tfm + +#(r)(e)ledmac/(r)(e)ledpar +*.end +*.?end +*.[1-9] +*.[1-9][0-9] +*.[1-9][0-9][0-9] +*.[1-9]R +*.[1-9][0-9]R +*.[1-9][0-9][0-9]R +*.eledsec[1-9] +*.eledsec[1-9]R +*.eledsec[1-9][0-9] +*.eledsec[1-9][0-9]R +*.eledsec[1-9][0-9][0-9] +*.eledsec[1-9][0-9][0-9]R + +# glossaries +*.acn +*.acr +*.glg +*.glo +*.gls +*.glsdefs +*.lzo +*.lzs +*.slg +*.slo +*.sls + +# uncomment this for glossaries-extra (will ignore makeindex's style files!) +# *.ist + +# gnuplot +*.gnuplot +*.table + +# gnuplottex +*-gnuplottex-* + +# gregoriotex +*.gaux +*.glog +*.gtex + +# htlatex +*.4ct +*.4tc +*.idv +*.lg +*.trc +*.xref + +# hyperref +*.brf + +# knitr +*-concordance.tex +# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files +# *.tikz +*-tikzDictionary + +# listings +*.lol + +# luatexja-ruby +*.ltjruby + +# makeidx +*.idx +*.ilg +*.ind + +# minitoc +*.maf +*.mlf +*.mlt +*.mtc[0-9]* +*.slf[0-9]* +*.slt[0-9]* +*.stc[0-9]* + +# minted +_minted* +*.pyg + +# morewrites +*.mw + +# newpax +*.newpax + +# nomencl +*.nlg +*.nlo +*.nls + +# pax +*.pax + +# pdfpcnotes +*.pdfpc + +# sagetex +*.sagetex.sage +*.sagetex.py +*.sagetex.scmd + +# scrwfile +*.wrt + +# svg +svg-inkscape/ + +# sympy +*.sout +*.sympy +sympy-plots-for-*.tex/ + +# pdfcomment +*.upa +*.upb + +# pythontex +*.pytxcode +pythontex-files-*/ + +# tcolorbox +*.listing + +# thmtools +*.loe + +# TikZ & PGF +*.dpth +*.md5 +*.auxlock + +# titletoc +*.ptc + +# todonotes +*.tdo + +# vhistory +*.hst +*.ver + +# easy-todo +*.lod + +# xcolor +*.xcp + +# xmpincl +*.xmpi + +# xindy +*.xdy + +# xypic precompiled matrices and outlines +*.xyc +*.xyd + +# endfloat +*.ttt +*.fff + +# Latexian +TSWLatexianTemp* + +## Editors: +# WinEdt +*.bak +*.sav + +# Texpad +.texpadtmp + +# LyX +*.lyx~ + +# Kile +*.backup + +# gummi +.*.swp + +# KBibTeX +*~[0-9]* + +# TeXnicCenter +*.tps + +# auto folder when using emacs and auctex +./auto/* +*.el + +# expex forward references with \gathertags +*-tags.tex + +# standalone packages +*.sta + +# Makeindex log files +*.lpz + +# xwatermark package +*.xwm + +# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib +# option is specified. Footnotes are the stored in a file with suffix Notes.bib. +# Uncomment the next line to have this generated file ignored. +#*Notes.bib diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..ae4d31e --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,5 @@ +{ + "recommendations": [ + "james-yu.latex-workshop" + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..7d26a4c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "explorer.excludeGitIgnore": true, + "latex-workshop.latex.recipe.default": "latexmk (lualatex)", + "gitlens.codeLens.authors.enabled": false, + "gitlens.codeLens.recentChange.enabled": false, +} \ No newline at end of file diff --git a/assets/aube.jpg b/assets/aube.jpg new file mode 100644 index 0000000..c82a390 --- /dev/null +++ b/assets/aube.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5faa7d8d97790f3acc9ee509baec25d83e034cc0513c58c55d7775a5bc7d396 +size 305958 diff --git a/assets/generative-overview.png b/assets/generative-overview.png new file mode 100644 index 0000000..f59bd41 --- /dev/null +++ b/assets/generative-overview.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de7308b1725eeda2a144fb34dbd95b19387380a9ee5a381651485bbed8c27a30 +size 525595 diff --git a/assets/inp_n7.jpg b/assets/inp_n7.jpg new file mode 100644 index 0000000..b02ed97 --- /dev/null +++ b/assets/inp_n7.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79eaccdc9b7cb688cc5525d5fabe290663e4409b07b8c24b31d7e4bd6bcda386 +size 64325 diff --git a/assets/safran_logo.png b/assets/safran_logo.png new file mode 100644 index 0000000..98eb6ad --- /dev/null +++ b/assets/safran_logo.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a2abbdbc9ae5439d9477bfc897f8bdc5c1beac0adcbfafc0470c2062f967d42 +size 36912 diff --git a/assets/safran_logo.svg b/assets/safran_logo.svg new file mode 100644 index 0000000..9ee4d11 --- /dev/null +++ b/assets/safran_logo.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d2eda63c1e38b0ce72d6621dd0ffe4b9b9c2e1743f628c712f46b47323c9c71 +size 354775 diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..da7649b --- /dev/null +++ b/flake.lock @@ -0,0 +1,43 @@ +{ + "nodes": { + "flake-utils": { + "locked": { + "lastModified": 1667395993, + "narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1673796341, + "narHash": "sha256-1kZi9OkukpNmOaPY7S5/+SlCDOuYnP3HkXHvNDyLQcc=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "6dccdc458512abce8d19f74195bb20fdb067df50", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..12be1ce --- /dev/null +++ b/flake.nix @@ -0,0 +1,17 @@ +{ + description = "Biblio proj long"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let pkgs = nixpkgs.legacyPackages.${system}; + in { + devShell = pkgs.mkShell { + buildInputs = with pkgs; [ texlive.combined.scheme-full ]; + }; + }); +} diff --git a/src/paper.tex b/src/paper.tex new file mode 100644 index 0000000..73979d3 --- /dev/null +++ b/src/paper.tex @@ -0,0 +1,309 @@ +\documentclass[ + 11pt, + a4paper, + twoside, + openany +]{book} + +% Packages +\usepackage{fontspec} +\usepackage{libertinus-otf} +\usepackage[a4paper, hmargin=2cm, vmargin=3cm]{geometry} +\usepackage{graphicx} +\usepackage{microtype} +\usepackage{amsmath} +\usepackage[numbers]{natbib} +\usepackage[french]{babel} +\usepackage{glossaries} +\usepackage{nomencl} +\usepackage{svg} + +% pdfx loads both hyperref and xcolor internally +% \usepackage{hyperref} +% \usepackage{xcolor} +\usepackage[a-3u]{pdfx} + +% We use \hypersetup to pass options to hyperref +\hypersetup{ + colorlinks = true, + breaklinks = true, +} + +% paragraph settings +\setlength{\parindent}{0cm} +\setlength{\parskip}{7pt}% + +% assets path +\graphicspath{{../assets/}} + +% header and footer settings +\usepackage{lastpage} +\usepackage{fancyhdr} +\pagestyle{fancy} +\renewcommand{\headrulewidth}{0pt} +\fancyhf{} +\cfoot{} +\rfoot{\hypersetup{hidelinks}\thepage/\pageref{LastPage}} + +\title{ + \huge \textbf{Rapport de Projet de Fin d'Études} +} +\author{ + Laurent Fainsin \\ + {\tt laurent@fainsin.bzh} +} +\date{ + \vspace{10.5cm} + Département Sciences du Numérique \\ + Troisième année \\ + 2022 — 2023 +} + +\newacronym{ssa}{Safran S.A.}{Safran Société Anonyme} +\newacronym{n7}{ENSEEIHT}{École nationale supérieure d'électrotechnique, d'électronique, d'informatique, d'hydraulique et des télécommunications} +\newacronym{pfe}{PFE}{Projet de Fin d'Études} + +\newacronym{gnn}{GNN}{Graph Neural Networks} +\newacronym{cnn}{CNN}{Convolutional Neural Network} +\newacronym{arm}{ARM}{Auto-Regressive Model} +\newacronym{nf}{NF}{Normalizing Flows} +\newacronym{ldm}{LDM}{Latent Diffusion Models} +\newacronym{ddpm}{DDPM}{Denoising Diffusion Probabilistic Models} +\newacronym{vae}{VAE}{Variational Auto-Encoder} +\newacronym{gan}{GAN}{Generative Adversarial Network} +\newacronym{nerf}{NeRF}{Neural Radiance Fields} + +\newacronym{ml}{ML}{Machine Learning} +\newacronym{dl}{DL}{Deep Learning} +\newacronym{ai}{AI}{Artificial Intelligence} + +\newacronym{pca}{PCA}{Principal Component Analysis} +\newacronym{pde}{PDE}{Partial Differential Equation} +\newacronym{cfd}{CFD}{Computational Fluid Dynamics} +\newacronym{ann}{ANN}{Artificial Neural Network} +\newacronym{mlp}{MLP}{Multi-Layer Perceptron} +\newacronym{relu}{ReLU}{Rectified Linear Unit} +\newacronym{mse}{MSE}{Mean Squared Error} +\newacronym{mae}{MAE}{Mean Absolute Error} +\newacronym{rmse}{RMSE}{Root Mean Squared Error} +\newacronym{mape}{MAPE}{Mean Absolute Percentage Error} +\makenoidxglossaries + +% \nomenclature{DDPM}{test} +% \makenomenclature + +\begin{document} + +\frontmatter + +\vbox{ + \centering + \includegraphics[width=5cm]{inp_n7.jpg} + \hspace{1cm} + \includegraphics[width=5cm]{safran_logo.png} + \vspace{2cm} + \maketitle +} + +{ + \thispagestyle{empty} + \chapter*{Remerciements} + \addcontentsline{toc}{chapter}{Remerciements} +} + +Je tiens à remercier Xavier Roynard, Michel Alessandro Bucci et Brian Staber, mes tuteurs de stage, ainsi que les équipes de \gls{ssa} pour leur accueil et leur accompagnement tout au long de ce stage. + +J'aimerais également remercier l'ensemble de mes professeurs de l'\gls{n7}, pour m'avoir permis d'acquérir les connaissances nécessaires à la réalisation de ce projet. + +\clearpage + +{ + \hypersetup{hidelinks} + \addcontentsline{toc}{chapter}{Table des matières} + \tableofcontents +} + +\clearpage + +{ + \hypersetup{hidelinks} + \addcontentsline{toc}{chapter}{Table des figures} + \listoffigures +} + +\clearpage + +{ + \hypersetup{hidelinks} + \addcontentsline{toc}{chapter}{Glossaire} + \printnoidxglossaries +} + +% { +% \hypersetup{hidelinks} +% \addcontentsline{toc}{chapter}{Nomenclature} +% \printnomenclature +% } + +% \listoftables +% \addcontentsline{toc}{chapter}{Liste des tableaux} + +\mainmatter + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\chapter{Introduction} + +\begin{figure}[h] + \centering + \includegraphics[width=16cm]{aube.jpg} + \caption{Aubes du moteur Leap-1A} +\end{figure} + +Dans le domaine industriel, les codes de simulation numérique sont désormais un outil indispensable pour la conception de systèmes complexes, en particulier pour les modules de réacteurs d'avions ou d'hélicoptères. + +De telles simulations sont par exemple utilisées pour évaluer les performances aérodynamiques d'un composant tel qu'une aube de turbine. En partant d'une géométrie de nominale, dans la phase d'optimisation, la pièce est progressivement modifiée afin d'optimiser certaines quantités d'intérêt. + +Malheureusement, ce processus de conception itératif présente deux limites: +\begin{itemize} + \item Le coût de calcul d'une simulation numérique de type \gls{cfd} est lourd, plusieurs heures sont nécessaires pour un unique calcul. + \item Le nombre de degrés de liberté pour la géométrie d'un profil complexe discrétisée avec un maillage non structuré est important, ce qui rend impossible l'exploration complète de l'espace de recherche de la solution optimale. +\end{itemize} + +\smallskip +Les approches d'optimisation assistées par surfaces de réponse permettent de répondre partiellement à ces difficultés. Cependant cette stratégie admet deux limitations intrinsèques: +\begin{itemize} + \item Elles nécessitent un long de travail de paramétrisation. + \item Elles souffrent grandement du fléau de la dimension, i.e. la taille des problèmes considérés est généralement limitée. +\end{itemize} + +\smallskip +En particulier, une représentation latente parcimonieuse de la géométrie faciliterait l'exploration de l'espace de recherche et l'utilisation de métamodèles classiques pour la prédiction des quantités d'intérêt. + +Récemment, les modèles génératifs profonds comme les \gls{vae} ou les \gls{gan} ont été appliqués avec succès à des données structurées (typiquement des images). +Ceux-ci permettent de construire un espace latent représentatif d'un jeu de données donné et de générer de nouveaux échantillons qui partagent des caractéristiques importantes du jeu de données d'entraînement. + +Cependant, dans le cas de la simulation numérique, les données prennent souvent la forme de graphes en raison de l'utilisation de maillages pour représenter les surfaces pièces à concevoir. Dans le contexte d'une application industrielle, il est donc crucial d'adapter les modèles susmentionnés afin de pouvoir utiliser des données non structurées en entrée. Les \gls{gnn} permettent de traiter des données non structurées telles que des maillages ou des nuages de points. + +Différentes solutions pour ont été proposées dans la littérature pour réaliser des convolutions et agrégations sur graphes ou nuages de points. Cependant, peu d'entre elles conviennent à l'application des réseaux sur graphes sur des données générées par des simulations numériques. + +Le but de ce stage est d'évaluer le potentiel de ces nouvelles méthodes sur des jeux de données réalisés en internes et représentatifs pour \gls{ssa}. Et éventuellement de proposer des améliorations spécifiques aux maillages utilisés en simulations numériques. + +L'étude vise tout d'abord à étudier la bibliographie disponible d'un côté sur les modèles génératifs et d'un autre sur les réseaux convolutionnels sur graphes. L'objectif est, dans une première phase, de faire un benchmark des différentes solutions de modèles génératifs sur graphe de type \gls{vae} et \gls{gan} afin de créer une représentation latente des géométries d'aubes 3D. Pour cela un dataset avec quelques milliers d'échantillons d'aubes 3D et leurs performances aérodynamique est disponible à \gls{ssa}. Le modèle résultant sera ensuite testé pour générer de nouvelles géométries et pour prédire les quantités d'intérêt par le biais de métamodèles classiques. +Enfin, si l'avancement sur les premières tâches le permet, d'autres modèles génératifs peuvent être considérés comme le \gls{nf} ou les \gls{ldm}. + +\chapter{État de l'art} + +Ce chapitre présente les différents concepts et méthodes nécessaires à la compréhension du travail réalisé durant ce stage. + +Dans le cadre de cette étude, nous nous intéressons à la génération de géométries d'aubes de turbines. Ces géométries font parties de certaine modalitées de données, les maillages (qui sont un type de graphes) et les nuages de points. Ces modalités sont relativement peu étudiée dans la littérature de l'apprentissage automatique comparé aux modalités plus classique comme les images, le texte ou encore l'audio. En effet, ces données sont non structurées et il est donc nécessaire d'utiliser des méthodes spécifiques pour les traiter. + +Il reste pertinent de noter que les méthodes présentées dans ce chapitre sont récentes et que la littérature évolue très rapidement. De plus, les méthodes présentées ici sont très nombreuses et il est impossible de toutes les présenter. Nous avons donc choisi de présenter les méthodes les plus pertinentes pour permettre une bonne compréhension globale du travail réalisé durant ce stage. + +\cite{peng_shape_2021} +\cite{sulzer_deep_2022} + + +\glsreset{gnn} +\section{\gls{gnn}} + +\cite{velickovic_graph_2018} +\cite{gao_graph_2019} +\cite{brody_how_2022} + +\section{Point Cloud stuff new modality} + +\cite{qi_pointnet_2017} +\cite{qi_pointnet_2017-1} + +\section{Modèles génératifs} + +\cite{faez_deep_2020} +\cite{guo_systematic_2022} +\cite{zhu_survey_2022} + +\cite{kipf_graph_nodate} +\cite{kipf_semi-supervised_2017} +\cite{simonovsky_graphvae_2018} +\cite{burgess_understanding_2018} + +\glsreset{vae} +\subsection{\gls{vae}} + +\cite{kingma_auto-encoding_2022} +\cite{kipf_variational_2016} +\cite{alemi_deep_2019} +\cite{shah_auto-decoding_2020} +\cite{doersch_tutorial_2021} +\cite{yacoby_failure_2021} +\cite{kim_setvae_2021} +\cite{salha-galvan_contributions_2022} +\cite{higgins_beta-vae_2022} + +\glsreset{gan} +\subsection{\gls{gan}} + +\cite{goodfellow_generative_2014} +\cite{salimans_improved_2016} +\cite{arjovsky_towards_2017} +\cite{arjovsky_wasserstein_2017} + +\glsreset{arm} +\subsection{\gls{arm}} + +\cite{nash_polygen_2020} +\cite{liao_efficient_2020} + +\glsreset{nf} +\subsection{\gls{nf}} + +\cite{su_f-vaes_2018} +\cite{yang_pointflow_2019} + +\glsreset{ldm} +\subsection{\gls{ldm}} + +\cite{song_generative_2020} +\cite{thomas_kpconv_2019} +\cite{tang_searching_2020} +\cite{liu_point-voxel_2019} +\cite{zhou_3d_2021} +\cite{nguyen_point-set_2021} +\cite{zeng_lion_2022} +\cite{nichol_point-e_2022} + +\glsreset{nerf} +\subsection{\gls{nerf}} + +\cite{takikawa_neural_2021} +\cite{nam_3d-ldm_2022} + + +\chapter{Déroulement du stage} + +\section{Lecture de la littérature} + +\section{Prise en main des données} + +\section{Test de l'état de l'art} + +\section{Réimplementation de l'état de l'art} + +\section{Struggle, send help pls} + +\chapter{Conclusion} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\appendix + +%% ajouter les annexes ici + +\backmatter + +\nocite{*} +\addcontentsline{toc}{chapter}{Bibliographie} +\bibliography{zotero} +\bibliographystyle{plainnat} + +\end{document} diff --git a/src/paper.xmpdata b/src/paper.xmpdata new file mode 100644 index 0000000..2ce13d7 --- /dev/null +++ b/src/paper.xmpdata @@ -0,0 +1,14 @@ +\Author{Laurent Fainsin} +\Title{ + Rapport de Projet de Fin d'Études +} +\Language{French} +\Keywords{Stage, PFE, ENSEEIHT, Master} +\Publisher{Self-Published} +\Subject{ + Rapport de Projet de Fin d'Études +} +\Date{2023-09-07} +\PublicationType{} +\Source{} +\URLlink{} diff --git a/src/zotero.bib b/src/zotero.bib new file mode 100644 index 0000000..b79c542 --- /dev/null +++ b/src/zotero.bib @@ -0,0 +1,704 @@ + +@misc{goodfellow_generative_2014, + title = {Generative {Adversarial} {Networks}}, + url = {http://arxiv.org/abs/1406.2661}, + doi = {10.48550/arXiv.1406.2661}, + abstract = {We propose a new framework for estimating generative models via an adversarial process, in which we simultaneously train two models: a generative model G that captures the data distribution, and a discriminative model D that estimates the probability that a sample came from the training data rather than G. The training procedure for G is to maximize the probability of D making a mistake. This framework corresponds to a minimax two-player game. In the space of arbitrary functions G and D, a unique solution exists, with G recovering the training data distribution and D equal to 1/2 everywhere. In the case where G and D are defined by multilayer perceptrons, the entire system can be trained with backpropagation. There is no need for any Markov chains or unrolled approximate inference networks during either training or generation of samples. Experiments demonstrate the potential of the framework through qualitative and quantitative evaluation of the generated samples.}, + urldate = {2023-01-29}, + publisher = {arXiv}, + author = {Goodfellow, Ian J. and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua}, + month = jun, + year = {2014}, + note = {arXiv:1406.2661 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/5STMX2XJ/Goodfellow et al. - 2014 - Generative Adversarial Networks.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/MYEGE7IK/1406.html:text/html}, +} + +@misc{salimans_improved_2016, + title = {Improved {Techniques} for {Training} {GANs}}, + url = {http://arxiv.org/abs/1606.03498}, + doi = {10.48550/arXiv.1606.03498}, + abstract = {We present a variety of new architectural features and training procedures that we apply to the generative adversarial networks (GANs) framework. We focus on two applications of GANs: semi-supervised learning, and the generation of images that humans find visually realistic. Unlike most work on generative models, our primary goal is not to train a model that assigns high likelihood to test data, nor do we require the model to be able to learn well without using any labels. Using our new techniques, we achieve state-of-the-art results in semi-supervised classification on MNIST, CIFAR-10 and SVHN. The generated images are of high quality as confirmed by a visual Turing test: our model generates MNIST samples that humans cannot distinguish from real data, and CIFAR-10 samples that yield a human error rate of 21.3\%. We also present ImageNet samples with unprecedented resolution and show that our methods enable the model to learn recognizable features of ImageNet classes.}, + urldate = {2023-01-29}, + publisher = {arXiv}, + author = {Salimans, Tim and Goodfellow, Ian and Zaremba, Wojciech and Cheung, Vicki and Radford, Alec and Chen, Xi}, + month = jun, + year = {2016}, + note = {arXiv:1606.03498 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/PEYM38ZW/Salimans et al. - 2016 - Improved Techniques for Training GANs.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/5XMXB7WV/1606.html:text/html}, +} + +@misc{arjovsky_towards_2017, + title = {Towards {Principled} {Methods} for {Training} {Generative} {Adversarial} {Networks}}, + url = {http://arxiv.org/abs/1701.04862}, + doi = {10.48550/arXiv.1701.04862}, + abstract = {The goal of this paper is not to introduce a single algorithm or method, but to make theoretical steps towards fully understanding the training dynamics of generative adversarial networks. In order to substantiate our theoretical analysis, we perform targeted experiments to verify our assumptions, illustrate our claims, and quantify the phenomena. This paper is divided into three sections. The first section introduces the problem at hand. The second section is dedicated to studying and proving rigorously the problems including instability and saturation that arize when training generative adversarial networks. The third section examines a practical and theoretically grounded direction towards solving these problems, while introducing new tools to study them.}, + urldate = {2023-01-29}, + publisher = {arXiv}, + author = {Arjovsky, Martin and Bottou, Léon}, + month = jan, + year = {2017}, + note = {arXiv:1701.04862 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/AEE2NPN4/Arjovsky and Bottou - 2017 - Towards Principled Methods for Training Generative.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/QEE7N7KP/1701.html:text/html}, +} + +@misc{arjovsky_wasserstein_2017, + title = {Wasserstein {GAN}}, + url = {http://arxiv.org/abs/1701.07875}, + doi = {10.48550/arXiv.1701.07875}, + abstract = {We introduce a new algorithm named WGAN, an alternative to traditional GAN training. In this new model, we show that we can improve the stability of learning, get rid of problems like mode collapse, and provide meaningful learning curves useful for debugging and hyperparameter searches. Furthermore, we show that the corresponding optimization problem is sound, and provide extensive theoretical work highlighting the deep connections to other distances between distributions.}, + urldate = {2023-01-29}, + publisher = {arXiv}, + author = {Arjovsky, Martin and Chintala, Soumith and Bottou, Léon}, + month = dec, + year = {2017}, + note = {arXiv:1701.07875 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/KW83LJBX/Arjovsky et al. - 2017 - Wasserstein GAN.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/YA9DTUVB/1701.html:text/html}, +} + +@misc{song_generative_2020, + title = {Generative {Modeling} by {Estimating} {Gradients} of the {Data} {Distribution}}, + url = {http://arxiv.org/abs/1907.05600}, + doi = {10.48550/arXiv.1907.05600}, + abstract = {We introduce a new generative model where samples are produced via Langevin dynamics using gradients of the data distribution estimated with score matching. Because gradients can be ill-defined and hard to estimate when the data resides on low-dimensional manifolds, we perturb the data with different levels of Gaussian noise, and jointly estimate the corresponding scores, i.e., the vector fields of gradients of the perturbed data distribution for all noise levels. For sampling, we propose an annealed Langevin dynamics where we use gradients corresponding to gradually decreasing noise levels as the sampling process gets closer to the data manifold. Our framework allows flexible model architectures, requires no sampling during training or the use of adversarial methods, and provides a learning objective that can be used for principled model comparisons. Our models produce samples comparable to GANs on MNIST, CelebA and CIFAR-10 datasets, achieving a new state-of-the-art inception score of 8.87 on CIFAR-10. Additionally, we demonstrate that our models learn effective representations via image inpainting experiments.}, + urldate = {2023-01-29}, + publisher = {arXiv}, + author = {Song, Yang and Ermon, Stefano}, + month = oct, + year = {2020}, + note = {arXiv:1907.05600 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + annote = {Comment: NeurIPS 2019 (Oral)}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/NDB8ZJRC/Song and Ermon - 2020 - Generative Modeling by Estimating Gradients of the.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/KG2SAQFI/1907.html:text/html}, +} + +@article{yacoby_failure_2021, + title = {Failure {Modes} of {Variational} {Autoencoders} and {Their} {Effects} on {Downstream} {Tasks}}, + url = {https://openreview.net/forum?id=5Spjp0zDYt}, + abstract = {Variational Auto-encoders (VAEs) are deep generative latent variable models that are widely used for a number of downstream tasks. While it has been demonstrated that VAE training can suffer from a number of pathologies, existing literature lacks characterizations of exactly when these pathologies occur and how they impact down-stream task performance. In this paper we concretely characterize conditions under which VAE training exhibits pathologies and connect these failure modes to undesirable effects on specific downstream tasks, such as learning compressed and disentangled representations, adversarial robustness and semi-supervised learning.}, + language = {en}, + urldate = {2023-01-29}, + author = {Yacoby, Yaniv and Pan, Weiwei and Doshi-Velez, Finale}, + month = mar, + year = {2021}, + file = {Full Text PDF:/home/laurent/Zotero/storage/J37MD8SR/Yacoby et al. - 2021 - Failure Modes of Variational Autoencoders and Thei.pdf:application/pdf}, +} + +@inproceedings{higgins_beta-vae_2022, + title = {beta-{VAE}: {Learning} {Basic} {Visual} {Concepts} with a {Constrained} {Variational} {Framework}}, + shorttitle = {beta-{VAE}}, + url = {https://openreview.net/forum?id=Sy2fzU9gl}, + abstract = {Learning an interpretable factorised representation of the independent data generative factors of the world without supervision is an important precursor for the development of artificial intelligence that is able to learn and reason in the same way that humans do. We introduce beta-VAE, a new state-of-the-art framework for automated discovery of interpretable factorised latent representations from raw image data in a completely unsupervised manner. Our approach is a modification of the variational autoencoder (VAE) framework. We introduce an adjustable hyperparameter beta that balances latent channel capacity and independence constraints with reconstruction accuracy. We demonstrate that beta-VAE with appropriately tuned beta {\textgreater} 1 qualitatively outperforms VAE (beta = 1), as well as state of the art unsupervised (InfoGAN) and semi-supervised (DC-IGN) approaches to disentangled factor learning on a variety of datasets (celebA, faces and chairs). Furthermore, we devise a protocol to quantitatively compare the degree of disentanglement learnt by different models, and show that our approach also significantly outperforms all baselines quantitatively. Unlike InfoGAN, beta-VAE is stable to train, makes few assumptions about the data and relies on tuning a single hyperparameter, which can be directly optimised through a hyper parameter search using weakly labelled data or through heuristic visual inspection for purely unsupervised data.}, + language = {en}, + urldate = {2023-01-29}, + author = {Higgins, Irina and Matthey, Loic and Pal, Arka and Burgess, Christopher and Glorot, Xavier and Botvinick, Matthew and Mohamed, Shakir and Lerchner, Alexander}, + month = jul, + year = {2022}, + file = {Full Text PDF:/home/laurent/Zotero/storage/FD5Q6H4B/Higgins et al. - 2022 - beta-VAE Learning Basic Visual Concepts with a Co.pdf:application/pdf}, +} + +@misc{kingma_auto-encoding_2022, + title = {Auto-{Encoding} {Variational} {Bayes}}, + url = {http://arxiv.org/abs/1312.6114}, + doi = {10.48550/arXiv.1312.6114}, + abstract = {How can we perform efficient inference and learning in directed probabilistic models, in the presence of continuous latent variables with intractable posterior distributions, and large datasets? We introduce a stochastic variational inference and learning algorithm that scales to large datasets and, under some mild differentiability conditions, even works in the intractable case. Our contributions are two-fold. First, we show that a reparameterization of the variational lower bound yields a lower bound estimator that can be straightforwardly optimized using standard stochastic gradient methods. Second, we show that for i.i.d. datasets with continuous latent variables per datapoint, posterior inference can be made especially efficient by fitting an approximate inference model (also called a recognition model) to the intractable posterior using the proposed lower bound estimator. Theoretical advantages are reflected in experimental results.}, + urldate = {2023-01-29}, + publisher = {arXiv}, + author = {Kingma, Diederik P. and Welling, Max}, + month = dec, + year = {2022}, + note = {arXiv:1312.6114 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + annote = {Comment: Fixes a typo in the abstract, no other changes}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/8MXMAC2E/Kingma and Welling - 2022 - Auto-Encoding Variational Bayes.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/TDNMVVSS/1312.html:text/html}, +} + +@misc{zeng_lion_2022, + title = {{LION}: {Latent} {Point} {Diffusion} {Models} for {3D} {Shape} {Generation}}, + shorttitle = {{LION}}, + url = {http://arxiv.org/abs/2210.06978}, + doi = {10.48550/arXiv.2210.06978}, + abstract = {Denoising diffusion models (DDMs) have shown promising results in 3D point cloud synthesis. To advance 3D DDMs and make them useful for digital artists, we require (i) high generation quality, (ii) flexibility for manipulation and applications such as conditional synthesis and shape interpolation, and (iii) the ability to output smooth surfaces or meshes. To this end, we introduce the hierarchical Latent Point Diffusion Model (LION) for 3D shape generation. LION is set up as a variational autoencoder (VAE) with a hierarchical latent space that combines a global shape latent representation with a point-structured latent space. For generation, we train two hierarchical DDMs in these latent spaces. The hierarchical VAE approach boosts performance compared to DDMs that operate on point clouds directly, while the point-structured latents are still ideally suited for DDM-based modeling. Experimentally, LION achieves state-of-the-art generation performance on multiple ShapeNet benchmarks. Furthermore, our VAE framework allows us to easily use LION for different relevant tasks: LION excels at multimodal shape denoising and voxel-conditioned synthesis, and it can be adapted for text- and image-driven 3D generation. We also demonstrate shape autoencoding and latent shape interpolation, and we augment LION with modern surface reconstruction techniques to generate smooth 3D meshes. We hope that LION provides a powerful tool for artists working with 3D shapes due to its high-quality generation, flexibility, and surface reconstruction. Project page and code: https://nv-tlabs.github.io/LION.}, + urldate = {2023-01-29}, + publisher = {arXiv}, + author = {Zeng, Xiaohui and Vahdat, Arash and Williams, Francis and Gojcic, Zan and Litany, Or and Fidler, Sanja and Kreis, Karsten}, + month = oct, + year = {2022}, + note = {arXiv:2210.06978 [cs, stat]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Statistics - Machine Learning}, + annote = {Comment: NeurIPS 2022}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/FACF8TI9/Zeng et al. - 2022 - LION Latent Point Diffusion Models for 3D Shape G.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/X57XTJQR/2210.html:text/html}, +} + +@misc{nichol_point-e_2022, + title = {Point-{E}: {A} {System} for {Generating} {3D} {Point} {Clouds} from {Complex} {Prompts}}, + shorttitle = {Point-{E}}, + url = {http://arxiv.org/abs/2212.08751}, + doi = {10.48550/arXiv.2212.08751}, + abstract = {While recent work on text-conditional 3D object generation has shown promising results, the state-of-the-art methods typically require multiple GPU-hours to produce a single sample. This is in stark contrast to state-of-the-art generative image models, which produce samples in a number of seconds or minutes. In this paper, we explore an alternative method for 3D object generation which produces 3D models in only 1-2 minutes on a single GPU. Our method first generates a single synthetic view using a text-to-image diffusion model, and then produces a 3D point cloud using a second diffusion model which conditions on the generated image. While our method still falls short of the state-of-the-art in terms of sample quality, it is one to two orders of magnitude faster to sample from, offering a practical trade-off for some use cases. We release our pre-trained point cloud diffusion models, as well as evaluation code and models, at https://github.com/openai/point-e.}, + urldate = {2023-01-29}, + publisher = {arXiv}, + author = {Nichol, Alex and Jun, Heewoo and Dhariwal, Prafulla and Mishkin, Pamela and Chen, Mark}, + month = dec, + year = {2022}, + note = {arXiv:2212.08751 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning}, + annote = {Comment: 8 pages, 11 figures}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/8IW28GBH/Nichol et al. - 2022 - Point-E A System for Generating 3D Point Clouds f.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/LMQF9Q55/2212.html:text/html}, +} + +@misc{kim_setvae_2021, + title = {{SetVAE}: {Learning} {Hierarchical} {Composition} for {Generative} {Modeling} of {Set}-{Structured} {Data}}, + shorttitle = {{SetVAE}}, + url = {http://arxiv.org/abs/2103.15619}, + doi = {10.48550/arXiv.2103.15619}, + abstract = {Generative modeling of set-structured data, such as point clouds, requires reasoning over local and global structures at various scales. However, adopting multi-scale frameworks for ordinary sequential data to a set-structured data is nontrivial as it should be invariant to the permutation of its elements. In this paper, we propose SetVAE, a hierarchical variational autoencoder for sets. Motivated by recent progress in set encoding, we build SetVAE upon attentive modules that first partition the set and project the partition back to the original cardinality. Exploiting this module, our hierarchical VAE learns latent variables at multiple scales, capturing coarse-to-fine dependency of the set elements while achieving permutation invariance. We evaluate our model on point cloud generation task and achieve competitive performance to the prior arts with substantially smaller model capacity. We qualitatively demonstrate that our model generalizes to unseen set sizes and learns interesting subset relations without supervision. Our implementation is available at https://github.com/jw9730/setvae.}, + urldate = {2023-03-31}, + publisher = {arXiv}, + author = {Kim, Jinwoo and Yoo, Jaehoon and Lee, Juho and Hong, Seunghoon}, + month = mar, + year = {2021}, + note = {arXiv:2103.15619 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning}, + annote = {Comment: 19 pages, 20 figures}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/WUTNGI56/Kim et al. - 2021 - SetVAE Learning Hierarchical Composition for Gene.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/25K7W3C4/2103.html:text/html}, +} + +@misc{takikawa_neural_2021, + title = {Neural {Geometric} {Level} of {Detail}: {Real}-time {Rendering} with {Implicit} {3D} {Shapes}}, + shorttitle = {Neural {Geometric} {Level} of {Detail}}, + url = {http://arxiv.org/abs/2101.10994}, + doi = {10.48550/arXiv.2101.10994}, + abstract = {Neural signed distance functions (SDFs) are emerging as an effective representation for 3D shapes. State-of-the-art methods typically encode the SDF with a large, fixed-size neural network to approximate complex shapes with implicit surfaces. Rendering with these large networks is, however, computationally expensive since it requires many forward passes through the network for every pixel, making these representations impractical for real-time graphics. We introduce an efficient neural representation that, for the first time, enables real-time rendering of high-fidelity neural SDFs, while achieving state-of-the-art geometry reconstruction quality. We represent implicit surfaces using an octree-based feature volume which adaptively fits shapes with multiple discrete levels of detail (LODs), and enables continuous LOD with SDF interpolation. We further develop an efficient algorithm to directly render our novel neural SDF representation in real-time by querying only the necessary LODs with sparse octree traversal. We show that our representation is 2-3 orders of magnitude more efficient in terms of rendering speed compared to previous works. Furthermore, it produces state-of-the-art reconstruction quality for complex shapes under both 3D geometric and 2D image-space metrics.}, + urldate = {2023-03-28}, + publisher = {arXiv}, + author = {Takikawa, Towaki and Litalien, Joey and Yin, Kangxue and Kreis, Karsten and Loop, Charles and Nowrouzezahrai, Derek and Jacobson, Alec and McGuire, Morgan and Fidler, Sanja}, + month = jan, + year = {2021}, + note = {arXiv:2101.10994 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Graphics}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/KJMJR4CB/Takikawa et al. - 2021 - Neural Geometric Level of Detail Real-time Render.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/ST444B55/2101.html:text/html}, +} + +@misc{nash_polygen_2020, + title = {{PolyGen}: {An} {Autoregressive} {Generative} {Model} of {3D} {Meshes}}, + shorttitle = {{PolyGen}}, + url = {http://arxiv.org/abs/2002.10880}, + doi = {10.48550/arXiv.2002.10880}, + abstract = {Polygon meshes are an efficient representation of 3D geometry, and are of central importance in computer graphics, robotics and games development. Existing learning-based approaches have avoided the challenges of working with 3D meshes, instead using alternative object representations that are more compatible with neural architectures and training approaches. We present an approach which models the mesh directly, predicting mesh vertices and faces sequentially using a Transformer-based architecture. Our model can condition on a range of inputs, including object classes, voxels, and images, and because the model is probabilistic it can produce samples that capture uncertainty in ambiguous scenarios. We show that the model is capable of producing high-quality, usable meshes, and establish log-likelihood benchmarks for the mesh-modelling task. We also evaluate the conditional models on surface reconstruction metrics against alternative methods, and demonstrate competitive performance despite not training directly on this task.}, + urldate = {2023-03-28}, + publisher = {arXiv}, + author = {Nash, Charlie and Ganin, Yaroslav and Eslami, S. M. Ali and Battaglia, Peter W.}, + month = feb, + year = {2020}, + note = {arXiv:2002.10880 [cs, stat]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Statistics - Machine Learning, Computer Science - Graphics}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/JE5MEK9K/Nash et al. - 2020 - PolyGen An Autoregressive Generative Model of 3D .pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/7Y3HEDRQ/2002.html:text/html}, +} + +@misc{zhang_3dshape2vecset_2023, + title = {{3DShape2VecSet}: {A} {3D} {Shape} {Representation} for {Neural} {Fields} and {Generative} {Diffusion} {Models}}, + shorttitle = {{3DShape2VecSet}}, + url = {http://arxiv.org/abs/2301.11445}, + doi = {10.48550/arXiv.2301.11445}, + abstract = {We introduce 3DShape2VecSet, a novel shape representation for neural fields designed for generative diffusion models. Our shape representation can encode 3D shapes given as surface models or point clouds, and represents them as neural fields. The concept of neural fields has previously been combined with a global latent vector, a regular grid of latent vectors, or an irregular grid of latent vectors. Our new representation encodes neural fields on top of a set of vectors. We draw from multiple concepts, such as the radial basis function representation and the cross attention and self-attention function, to design a learnable representation that is especially suitable for processing with transformers. Our results show improved performance in 3D shape encoding and 3D shape generative modeling tasks. We demonstrate a wide variety of generative applications: unconditioned generation, category-conditioned generation, text-conditioned generation, point-cloud completion, and image-conditioned generation.}, + urldate = {2023-03-28}, + publisher = {arXiv}, + author = {Zhang, Biao and Tang, Jiapeng and Niessner, Matthias and Wonka, Peter}, + month = feb, + year = {2023}, + note = {arXiv:2301.11445 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Graphics}, + annote = {Comment: Project demo: https://youtu.be/KKQsQccpBFk}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/T8R7H6N4/Zhang et al. - 2023 - 3DShape2VecSet A 3D Shape Representation for Neur.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/6GNICSIQ/2301.html:text/html}, +} + +@misc{yang_pointflow_2019, + title = {{PointFlow}: {3D} {Point} {Cloud} {Generation} with {Continuous} {Normalizing} {Flows}}, + shorttitle = {{PointFlow}}, + url = {http://arxiv.org/abs/1906.12320}, + doi = {10.48550/arXiv.1906.12320}, + abstract = {As 3D point clouds become the representation of choice for multiple vision and graphics applications, the ability to synthesize or reconstruct high-resolution, high-fidelity point clouds becomes crucial. Despite the recent success of deep learning models in discriminative tasks of point clouds, generating point clouds remains challenging. This paper proposes a principled probabilistic framework to generate 3D point clouds by modeling them as a distribution of distributions. Specifically, we learn a two-level hierarchy of distributions where the first level is the distribution of shapes and the second level is the distribution of points given a shape. This formulation allows us to both sample shapes and sample an arbitrary number of points from a shape. Our generative model, named PointFlow, learns each level of the distribution with a continuous normalizing flow. The invertibility of normalizing flows enables the computation of the likelihood during training and allows us to train our model in the variational inference framework. Empirically, we demonstrate that PointFlow achieves state-of-the-art performance in point cloud generation. We additionally show that our model can faithfully reconstruct point clouds and learn useful representations in an unsupervised manner. The code will be available at https://github.com/stevenygd/PointFlow.}, + urldate = {2023-03-28}, + publisher = {arXiv}, + author = {Yang, Guandao and Huang, Xun and Hao, Zekun and Liu, Ming-Yu and Belongie, Serge and Hariharan, Bharath}, + month = sep, + year = {2019}, + note = {arXiv:1906.12320 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning}, + annote = {Comment: Published in ICCV 2019}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/V87MQMLC/Yang et al. - 2019 - PointFlow 3D Point Cloud Generation with Continuo.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/KEHU85VD/1906.html:text/html}, +} + +@misc{fan_generative_2023, + title = {Generative {Diffusion} {Models} on {Graphs}: {Methods} and {Applications}}, + shorttitle = {Generative {Diffusion} {Models} on {Graphs}}, + url = {http://arxiv.org/abs/2302.02591}, + doi = {10.48550/arXiv.2302.02591}, + abstract = {Diffusion models, as a novel generative paradigm, have achieved remarkable success in various image generation tasks such as image inpainting, image-to-text translation, and video generation. Graph generation is a crucial computational task on graphs with numerous real-world applications. It aims to learn the distribution of given graphs and then generate new graphs. Given the great success of diffusion models in image generation, increasing efforts have been made to leverage these techniques to advance graph generation in recent years. In this paper, we first provide a comprehensive overview of generative diffusion models on graphs, In particular, we review representative algorithms for three variants of graph diffusion models, i.e., Score Matching with Langevin Dynamics (SMLD), Denoising Diffusion Probabilistic Model (DDPM), and Score-based Generative Model (SGM). Then, we summarize the major applications of generative diffusion models on graphs with a specific focus on molecule and protein modeling. Finally, we discuss promising directions in generative diffusion models on graph-structured data.}, + urldate = {2023-03-27}, + publisher = {arXiv}, + author = {Fan, Wenqi and Liu, Chengyi and Liu, Yunqing and Li, Jiatong and Li, Hang and Liu, Hui and Tang, Jiliang and Li, Qing}, + month = feb, + year = {2023}, + note = {arXiv:2302.02591 [cs]}, + keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Computer Science - Social and Information Networks}, + annote = { + +Score Matching with Langevin Dynamics (SMLD) + + +Denoising Diffusion Probabilistic Model (DDPM) + + +Score-based Generative Model (SGM) + + + +}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/3M3G2JY5/Fan et al. - 2023 - Generative Diffusion Models on Graphs Methods and.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/8YV9HJ3W/2302.html:text/html}, +} + +@misc{zhu_survey_2022, + title = {A {Survey} on {Deep} {Graph} {Generation}: {Methods} and {Applications}}, + shorttitle = {A {Survey} on {Deep} {Graph} {Generation}}, + url = {http://arxiv.org/abs/2203.06714}, + doi = {10.48550/arXiv.2203.06714}, + abstract = {Graphs are ubiquitous in encoding relational information of real-world objects in many domains. Graph generation, whose purpose is to generate new graphs from a distribution similar to the observed graphs, has received increasing attention thanks to the recent advances of deep learning models. In this paper, we conduct a comprehensive review on the existing literature of deep graph generation from a variety of emerging methods to its wide application areas. Specifically, we first formulate the problem of deep graph generation and discuss its difference with several related graph learning tasks. Secondly, we divide the state-of-the-art methods into three categories based on model architectures and summarize their generation strategies. Thirdly, we introduce three key application areas of deep graph generation. Lastly, we highlight challenges and opportunities in the future study of deep graph generation. We hope that our survey will be useful for researchers and practitioners who are interested in this exciting and rapidly-developing field.}, + urldate = {2023-03-27}, + publisher = {arXiv}, + author = {Zhu, Yanqiao and Du, Yuanqi and Wang, Yinkai and Xu, Yichen and Zhang, Jieyu and Liu, Qiang and Wu, Shu}, + month = dec, + year = {2022}, + note = {arXiv:2203.06714 [cs, q-bio]}, + keywords = {Computer Science - Machine Learning, Computer Science - Social and Information Networks, Quantitative Biology - Molecular Networks}, + annote = {Comment: Accepted to the First Learning on Graphs Conference (LoG 2022)}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/SQWM9VTD/Zhu et al. - 2022 - A Survey on Deep Graph Generation Methods and App.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/IWAETBS6/2203.html:text/html}, +} + +@misc{shah_auto-decoding_2020, + title = {Auto-decoding {Graphs}}, + url = {http://arxiv.org/abs/2006.02879}, + doi = {10.48550/arXiv.2006.02879}, + abstract = {We present an approach to synthesizing new graph structures from empirically specified distributions. The generative model is an auto-decoder that learns to synthesize graphs from latent codes. The graph synthesis model is learned jointly with an empirical distribution over the latent codes. Graphs are synthesized using self-attention modules that are trained to identify likely connectivity patterns. Graph-based normalizing flows are used to sample latent codes from the distribution learned by the auto-decoder. The resulting model combines accuracy and scalability. On benchmark datasets of large graphs, the presented model outperforms the state of the art by a factor of 1.5 in mean accuracy and average rank across at least three different graph statistics, with a 2x speedup during inference.}, + urldate = {2023-03-27}, + publisher = {arXiv}, + author = {Shah, Sohil Atul and Koltun, Vladlen}, + month = jun, + year = {2020}, + note = {arXiv:2006.02879 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/M7JSQ9YK/Shah et Koltun - 2020 - Auto-decoding Graphs.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/WLZYXR33/2006.html:text/html}, +} + +@misc{faez_deep_2020, + title = {Deep {Graph} {Generators}: {A} {Survey}}, + shorttitle = {Deep {Graph} {Generators}}, + url = {http://arxiv.org/abs/2012.15544}, + doi = {10.48550/arXiv.2012.15544}, + abstract = {Deep generative models have achieved great success in areas such as image, speech, and natural language processing in the past few years. Thanks to the advances in graph-based deep learning, and in particular graph representation learning, deep graph generation methods have recently emerged with new applications ranging from discovering novel molecular structures to modeling social networks. This paper conducts a comprehensive survey on deep learning-based graph generation approaches and classifies them into five broad categories, namely, autoregressive, autoencoder-based, RL-based, adversarial, and flow-based graph generators, providing the readers a detailed description of the methods in each class. We also present publicly available source codes, commonly used datasets, and the most widely utilized evaluation metrics. Finally, we highlight the existing challenges and discuss future research directions.}, + urldate = {2023-03-27}, + publisher = {arXiv}, + author = {Faez, Faezeh and Ommi, Yassaman and Baghshah, Mahdieh Soleymani and Rabiee, Hamid R.}, + month = dec, + year = {2020}, + note = {arXiv:2012.15544 [cs]}, + keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Computer Science - Social and Information Networks}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/G3J3B658/Faez et al. - 2020 - Deep Graph Generators A Survey.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/NSQQDIEH/2012.html:text/html}, +} + +@misc{shayestehfard_aligngraph_2023, + title = {{AlignGraph}: {A} {Group} of {Generative} {Models} for {Graphs}}, + shorttitle = {{AlignGraph}}, + url = {http://arxiv.org/abs/2301.11273}, + doi = {10.48550/arXiv.2301.11273}, + abstract = {It is challenging for generative models to learn a distribution over graphs because of the lack of permutation invariance: nodes may be ordered arbitrarily across graphs, and standard graph alignment is combinatorial and notoriously expensive. We propose AlignGraph, a group of generative models that combine fast and efficient graph alignment methods with a family of deep generative models that are invariant to node permutations. Our experiments demonstrate that our framework successfully learns graph distributions, outperforming competitors by 25\% -560\% in relevant performance scores.}, + urldate = {2023-03-27}, + publisher = {arXiv}, + author = {Shayestehfard, Kimia and Brooks, Dana and Ioannidis, Stratis}, + month = jan, + year = {2023}, + note = {arXiv:2301.11273 [cs]}, + keywords = {Computer Science - Machine Learning, Computer Science - Social and Information Networks}, + annote = {Comment: 12 pages, 2 figures, 4 tables}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/I69NJXUI/Shayestehfard et al. - 2023 - AlignGraph A Group of Generative Models for Graph.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/LPF9DVAW/2301.html:text/html}, +} + +@article{kipf_graph_nodate, + title = {Graph {Neural} {Networks} for {Modeling} {Small} {Molecules}}, + language = {en}, + author = {Kipf, Thomas and Veličković, Petar and Li, Yujia}, + file = {Kipf et al. - Graph Neural Networks for Modeling Small Molecules.pdf:/home/laurent/Zotero/storage/6WZAZFX8/Kipf et al. - Graph Neural Networks for Modeling Small Molecules.pdf:application/pdf}, +} + +@misc{simonovsky_graphvae_2018, + title = {{GraphVAE}: {Towards} {Generation} of {Small} {Graphs} {Using} {Variational} {Autoencoders}}, + shorttitle = {{GraphVAE}}, + url = {http://arxiv.org/abs/1802.03480}, + doi = {10.48550/arXiv.1802.03480}, + abstract = {Deep learning on graphs has become a popular research topic with many applications. However, past work has concentrated on learning graph embedding tasks, which is in contrast with advances in generative models for images and text. Is it possible to transfer this progress to the domain of graphs? We propose to sidestep hurdles associated with linearization of such discrete structures by having a decoder output a probabilistic fully-connected graph of a predefined maximum size directly at once. Our method is formulated as a variational autoencoder. We evaluate on the challenging task of molecule generation.}, + urldate = {2023-03-27}, + publisher = {arXiv}, + author = {Simonovsky, Martin and Komodakis, Nikos}, + month = feb, + year = {2018}, + note = {arXiv:1802.03480 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/IWG2JIVU/Simonovsky et Komodakis - 2018 - GraphVAE Towards Generation of Small Graphs Using.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/PW5ZG5WH/1802.html:text/html}, +} + +@misc{liao_efficient_2020, + title = {Efficient {Graph} {Generation} with {Graph} {Recurrent} {Attention} {Networks}}, + url = {http://arxiv.org/abs/1910.00760}, + abstract = {We propose a new family of efficient and expressive deep generative models of graphs, called Graph Recurrent Attention Networks (GRANs). Our model generates graphs one block of nodes and associated edges at a time. The block size and sampling stride allow us to trade off sample quality for efficiency. Compared to previous RNN-based graph generative models, our framework better captures the auto-regressive conditioning between the already-generated and to-be-generated parts of the graph using Graph Neural Networks (GNNs) with attention. This not only reduces the dependency on node ordering but also bypasses the long-term bottleneck caused by the sequential nature of RNNs. Moreover, we parameterize the output distribution per block using a mixture of Bernoulli, which captures the correlations among generated edges within the block. Finally, we propose to handle node orderings in generation by marginalizing over a family of canonical orderings. On standard benchmarks, we achieve state-of-the-art time efficiency and sample quality compared to previous models. Additionally, we show our model is capable of generating large graphs of up to 5K nodes with good quality. To the best of our knowledge, GRAN is the first deep graph generative model that can scale to this size. Our code is released at: https://github.com/lrjconan/GRAN.}, + urldate = {2023-03-27}, + publisher = {arXiv}, + author = {Liao, Renjie and Li, Yujia and Song, Yang and Wang, Shenlong and Nash, Charlie and Hamilton, William L. and Duvenaud, David and Urtasun, Raquel and Zemel, Richard S.}, + month = jul, + year = {2020}, + note = {arXiv:1910.00760 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + annote = {Comment: Neural Information Processing Systems (NeurIPS) 2019}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/YB44QN2I/Liao et al. - 2020 - Efficient Graph Generation with Graph Recurrent At.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/XXCHWITE/1910.html:text/html}, +} + +@misc{guo_systematic_2022, + title = {A {Systematic} {Survey} on {Deep} {Generative} {Models} for {Graph} {Generation}}, + url = {http://arxiv.org/abs/2007.06686}, + doi = {10.48550/arXiv.2007.06686}, + abstract = {Graphs are important data representations for describing objects and their relationships, which appear in a wide diversity of real-world scenarios. As one of a critical problem in this area, graph generation considers learning the distributions of given graphs and generating more novel graphs. Owing to their wide range of applications, generative models for graphs, which have a rich history, however, are traditionally hand-crafted and only capable of modeling a few statistical properties of graphs. Recent advances in deep generative models for graph generation is an important step towards improving the fidelity of generated graphs and paves the way for new kinds of applications. This article provides an extensive overview of the literature in the field of deep generative models for graph generation. Firstly, the formal definition of deep generative models for the graph generation and the preliminary knowledge are provided. Secondly, taxonomies of deep generative models for both unconditional and conditional graph generation are proposed respectively; the existing works of each are compared and analyzed. After that, an overview of the evaluation metrics in this specific domain is provided. Finally, the applications that deep graph generation enables are summarized and five promising future research directions are highlighted.}, + urldate = {2023-03-24}, + publisher = {arXiv}, + author = {Guo, Xiaojie and Zhao, Liang}, + month = oct, + year = {2022}, + note = {arXiv:2007.06686 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + annote = {Comment: Accepted in TPAMI}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/M6I3YJP8/Guo et Zhao - 2022 - A Systematic Survey on Deep Generative Models for .pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/8N8L3XCF/2007.html:text/html}, +} + +@misc{doersch_tutorial_2021, + title = {Tutorial on {Variational} {Autoencoders}}, + url = {http://arxiv.org/abs/1606.05908}, + doi = {10.48550/arXiv.1606.05908}, + abstract = {In just three years, Variational Autoencoders (VAEs) have emerged as one of the most popular approaches to unsupervised learning of complicated distributions. VAEs are appealing because they are built on top of standard function approximators (neural networks), and can be trained with stochastic gradient descent. VAEs have already shown promise in generating many kinds of complicated data, including handwritten digits, faces, house numbers, CIFAR images, physical models of scenes, segmentation, and predicting the future from static images. This tutorial introduces the intuitions behind VAEs, explains the mathematics behind them, and describes some empirical behavior. No prior knowledge of variational Bayesian methods is assumed.}, + urldate = {2023-03-24}, + publisher = {arXiv}, + author = {Doersch, Carl}, + month = jan, + year = {2021}, + note = {arXiv:1606.05908 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/84J4LNV2/Doersch - 2021 - Tutorial on Variational Autoencoders.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/EWJRB7BM/1606.html:text/html}, +} + +@misc{salha-galvan_contributions_2022, + title = {Contributions to {Representation} {Learning} with {Graph} {Autoencoders} and {Applications} to {Music} {Recommendation}}, + url = {http://arxiv.org/abs/2205.14651}, + doi = {10.48550/arXiv.2205.14651}, + abstract = {Graph autoencoders (GAE) and variational graph autoencoders (VGAE) emerged as two powerful groups of unsupervised node embedding methods, with various applications to graph-based machine learning problems such as link prediction and community detection. Nonetheless, at the beginning of this Ph.D. project, GAE and VGAE models were also suffering from key limitations, preventing them from being adopted in the industry. In this thesis, we present several contributions to improve these models, with the general aim of facilitating their use to address industrial-level problems involving graph representations. Firstly, we propose two strategies to overcome the scalability issues of previous GAE and VGAE models, permitting to effectively train these models on large graphs with millions of nodes and edges. These strategies leverage graph degeneracy and stochastic subgraph decoding techniques, respectively. Besides, we introduce Gravity-Inspired GAE and VGAE, providing the first extensions of these models for directed graphs, that are ubiquitous in industrial applications. We also consider extensions of GAE and VGAE models for dynamic graphs. Furthermore, we argue that GAE and VGAE models are often unnecessarily complex, and we propose to simplify them by leveraging linear encoders. Lastly, we introduce Modularity-Aware GAE and VGAE to improve community detection on graphs, while jointly preserving good performances on link prediction. In the last part of this thesis, we evaluate our methods on several graphs extracted from the music streaming service Deezer. We put the emphasis on graph-based music recommendation problems. In particular, we show that our methods can improve the detection of communities of similar musical items to recommend to users, that they can effectively rank similar artists in a cold start setting, and that they permit modeling the music genre perception across cultures.}, + urldate = {2023-03-24}, + publisher = {arXiv}, + author = {Salha-Galvan, Guillaume}, + month = may, + year = {2022}, + note = {arXiv:2205.14651 [cs] +version: 1}, + keywords = {Computer Science - Machine Learning, Computer Science - Social and Information Networks, Computer Science - Information Retrieval}, + annote = {Comment: Ph.D. thesis defended at {\textbackslash}'Ecole Polytechnique (IPP) in March 2022. As mentioned in this thesis, several chapters present results also published in scientific articles written with co-authors}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/4R2Z87LG/Salha-Galvan - 2022 - Contributions to Representation Learning with Grap.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/AMRY4RUI/2205.html:text/html}, +} + +@misc{su_f-vaes_2018, + title = {f-{VAEs}: {Improve} {VAEs} with {Conditional} {Flows}}, + shorttitle = {f-{VAEs}}, + url = {http://arxiv.org/abs/1809.05861}, + doi = {10.48550/arXiv.1809.05861}, + abstract = {In this paper, we integrate VAEs and flow-based generative models successfully and get f-VAEs. Compared with VAEs, f-VAEs generate more vivid images, solved the blurred-image problem of VAEs. Compared with flow-based models such as Glow, f-VAE is more lightweight and converges faster, achieving the same performance under smaller-size architecture.}, + urldate = {2023-03-24}, + publisher = {arXiv}, + author = {Su, Jianlin and Wu, Guang}, + month = sep, + year = {2018}, + note = {arXiv:1809.05861 [cs, stat]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Statistics - Machine Learning}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/2YPANJ73/Su et Wu - 2018 - f-VAEs Improve VAEs with Conditional Flows.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/9M6GUZEX/1809.html:text/html}, +} + +@misc{burgess_understanding_2018, + title = {Understanding disentangling in \${\textbackslash}beta\$-{VAE}}, + url = {http://arxiv.org/abs/1804.03599}, + doi = {10.48550/arXiv.1804.03599}, + abstract = {We present new intuitions and theoretical assessments of the emergence of disentangled representation in variational autoencoders. Taking a rate-distortion theory perspective, we show the circumstances under which representations aligned with the underlying generative factors of variation of data emerge when optimising the modified ELBO bound in \${\textbackslash}beta\$-VAE, as training progresses. From these insights, we propose a modification to the training regime of \${\textbackslash}beta\$-VAE, that progressively increases the information capacity of the latent code during training. This modification facilitates the robust learning of disentangled representations in \${\textbackslash}beta\$-VAE, without the previous trade-off in reconstruction accuracy.}, + urldate = {2023-03-23}, + publisher = {arXiv}, + author = {Burgess, Christopher P. and Higgins, Irina and Pal, Arka and Matthey, Loic and Watters, Nick and Desjardins, Guillaume and Lerchner, Alexander}, + month = apr, + year = {2018}, + note = {arXiv:1804.03599 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Computer Science - Artificial Intelligence}, + annote = {Comment: Presented at the 2017 NIPS Workshop on Learning Disentangled Representations}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/I7FNXM4I/Burgess et al. - 2018 - Understanding disentangling in \$beta\$-VAE.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/4JPKDD7F/1804.html:text/html}, +} + +@misc{brody_how_2022, + title = {How {Attentive} are {Graph} {Attention} {Networks}?}, + url = {http://arxiv.org/abs/2105.14491}, + doi = {10.48550/arXiv.2105.14491}, + abstract = {Graph Attention Networks (GATs) are one of the most popular GNN architectures and are considered as the state-of-the-art architecture for representation learning with graphs. In GAT, every node attends to its neighbors given its own representation as the query. However, in this paper we show that GAT computes a very limited kind of attention: the ranking of the attention scores is unconditioned on the query node. We formally define this restricted kind of attention as static attention and distinguish it from a strictly more expressive dynamic attention. Because GATs use a static attention mechanism, there are simple graph problems that GAT cannot express: in a controlled problem, we show that static attention hinders GAT from even fitting the training data. To remove this limitation, we introduce a simple fix by modifying the order of operations and propose GATv2: a dynamic graph attention variant that is strictly more expressive than GAT. We perform an extensive evaluation and show that GATv2 outperforms GAT across 11 OGB and other benchmarks while we match their parametric costs. Our code is available at https://github.com/tech-srl/how\_attentive\_are\_gats . GATv2 is available as part of the PyTorch Geometric library, the Deep Graph Library, and the TensorFlow GNN library.}, + urldate = {2023-03-22}, + publisher = {arXiv}, + author = {Brody, Shaked and Alon, Uri and Yahav, Eran}, + month = jan, + year = {2022}, + note = {arXiv:2105.14491 [cs]}, + keywords = {Computer Science - Machine Learning}, + annote = {Comment: Published in ICLR 2022}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/C5CY9B82/Brody et al. - 2022 - How Attentive are Graph Attention Networks.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/RWEJ8RAY/2105.html:text/html}, +} + +@misc{velickovic_graph_2018, + title = {Graph {Attention} {Networks}}, + url = {http://arxiv.org/abs/1710.10903}, + doi = {10.48550/arXiv.1710.10903}, + abstract = {We present graph attention networks (GATs), novel neural network architectures that operate on graph-structured data, leveraging masked self-attentional layers to address the shortcomings of prior methods based on graph convolutions or their approximations. By stacking layers in which nodes are able to attend over their neighborhoods' features, we enable (implicitly) specifying different weights to different nodes in a neighborhood, without requiring any kind of costly matrix operation (such as inversion) or depending on knowing the graph structure upfront. In this way, we address several key challenges of spectral-based graph neural networks simultaneously, and make our model readily applicable to inductive as well as transductive problems. Our GAT models have achieved or matched state-of-the-art results across four established transductive and inductive graph benchmarks: the Cora, Citeseer and Pubmed citation network datasets, as well as a protein-protein interaction dataset (wherein test graphs remain unseen during training).}, + urldate = {2023-03-22}, + publisher = {arXiv}, + author = {Veličković, Petar and Cucurull, Guillem and Casanova, Arantxa and Romero, Adriana and Liò, Pietro and Bengio, Yoshua}, + month = feb, + year = {2018}, + note = {arXiv:1710.10903 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Computer Science - Artificial Intelligence, Computer Science - Social and Information Networks}, + annote = {Comment: To appear at ICLR 2018. 12 pages, 2 figures}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/3X4HALUD/Veličković et al. - 2018 - Graph Attention Networks.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/JGM27EQ6/1710.html:text/html}, +} + +@misc{kipf_semi-supervised_2017, + title = {Semi-{Supervised} {Classification} with {Graph} {Convolutional} {Networks}}, + url = {http://arxiv.org/abs/1609.02907}, + doi = {10.48550/arXiv.1609.02907}, + abstract = {We present a scalable approach for semi-supervised learning on graph-structured data that is based on an efficient variant of convolutional neural networks which operate directly on graphs. We motivate the choice of our convolutional architecture via a localized first-order approximation of spectral graph convolutions. Our model scales linearly in the number of graph edges and learns hidden layer representations that encode both local graph structure and features of nodes. In a number of experiments on citation networks and on a knowledge graph dataset we demonstrate that our approach outperforms related methods by a significant margin.}, + urldate = {2023-03-22}, + publisher = {arXiv}, + author = {Kipf, Thomas N. and Welling, Max}, + month = feb, + year = {2017}, + note = {arXiv:1609.02907 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + annote = {Comment: Published as a conference paper at ICLR 2017}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/N2GXN6ZZ/Kipf et Welling - 2017 - Semi-Supervised Classification with Graph Convolut.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/WMTNID7V/1609.html:text/html}, +} + +@misc{gao_graph_2019, + title = {Graph {U}-{Nets}}, + url = {http://arxiv.org/abs/1905.05178}, + doi = {10.48550/arXiv.1905.05178}, + abstract = {We consider the problem of representation learning for graph data. Convolutional neural networks can naturally operate on images, but have significant challenges in dealing with graph data. Given images are special cases of graphs with nodes lie on 2D lattices, graph embedding tasks have a natural correspondence with image pixel-wise prediction tasks such as segmentation. While encoder-decoder architectures like U-Nets have been successfully applied on many image pixel-wise prediction tasks, similar methods are lacking for graph data. This is due to the fact that pooling and up-sampling operations are not natural on graph data. To address these challenges, we propose novel graph pooling (gPool) and unpooling (gUnpool) operations in this work. The gPool layer adaptively selects some nodes to form a smaller graph based on their scalar projection values on a trainable projection vector. We further propose the gUnpool layer as the inverse operation of the gPool layer. The gUnpool layer restores the graph into its original structure using the position information of nodes selected in the corresponding gPool layer. Based on our proposed gPool and gUnpool layers, we develop an encoder-decoder model on graph, known as the graph U-Nets. Our experimental results on node classification and graph classification tasks demonstrate that our methods achieve consistently better performance than previous models.}, + urldate = {2023-03-21}, + publisher = {arXiv}, + author = {Gao, Hongyang and Ji, Shuiwang}, + month = may, + year = {2019}, + note = {arXiv:1905.05178 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + annote = {Comment: 10 pages, ICML19}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/QIVY2Z39/Gao et Ji - 2019 - Graph U-Nets.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/YHWGK3H7/1905.html:text/html}, +} + +@misc{kipf_variational_2016, + title = {Variational {Graph} {Auto}-{Encoders}}, + url = {http://arxiv.org/abs/1611.07308}, + doi = {10.48550/arXiv.1611.07308}, + abstract = {We introduce the variational graph auto-encoder (VGAE), a framework for unsupervised learning on graph-structured data based on the variational auto-encoder (VAE). This model makes use of latent variables and is capable of learning interpretable latent representations for undirected graphs. We demonstrate this model using a graph convolutional network (GCN) encoder and a simple inner product decoder. Our model achieves competitive results on a link prediction task in citation networks. In contrast to most existing models for unsupervised learning on graph-structured data and link prediction, our model can naturally incorporate node features, which significantly improves predictive performance on a number of benchmark datasets.}, + urldate = {2023-03-21}, + publisher = {arXiv}, + author = {Kipf, Thomas N. and Welling, Max}, + month = nov, + year = {2016}, + note = {arXiv:1611.07308 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + annote = {Comment: Bayesian Deep Learning Workshop (NIPS 2016)}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/8LYSMTVS/Kipf et Welling - 2016 - Variational Graph Auto-Encoders.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/KCLQX6TX/1611.html:text/html}, +} + +@misc{alemi_deep_2019, + title = {Deep {Variational} {Information} {Bottleneck}}, + url = {http://arxiv.org/abs/1612.00410}, + doi = {10.48550/arXiv.1612.00410}, + abstract = {We present a variational approximation to the information bottleneck of Tishby et al. (1999). This variational approach allows us to parameterize the information bottleneck model using a neural network and leverage the reparameterization trick for efficient training. We call this method "Deep Variational Information Bottleneck", or Deep VIB. We show that models trained with the VIB objective outperform those that are trained with other forms of regularization, in terms of generalization performance and robustness to adversarial attack.}, + urldate = {2023-03-21}, + publisher = {arXiv}, + author = {Alemi, Alexander A. and Fischer, Ian and Dillon, Joshua V. and Murphy, Kevin}, + month = oct, + year = {2019}, + note = {arXiv:1612.00410 [cs, math]}, + keywords = {Computer Science - Machine Learning, Computer Science - Information Theory}, + annote = {Comment: 19 pages, 8 figures, Accepted to ICLR17}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/LMPVVWG5/Alemi et al. - 2019 - Deep Variational Information Bottleneck.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/VBXN4EUZ/1612.html:text/html}, +} + +@misc{thomas_kpconv_2019, + title = {{KPConv}: {Flexible} and {Deformable} {Convolution} for {Point} {Clouds}}, + shorttitle = {{KPConv}}, + url = {http://arxiv.org/abs/1904.08889}, + abstract = {We present Kernel Point Convolution (KPConv), a new design of point convolution, i.e. that operates on point clouds without any intermediate representation. The convolution weights of KPConv are located in Euclidean space by kernel points, and applied to the input points close to them. Its capacity to use any number of kernel points gives KPConv more flexibility than fixed grid convolutions. Furthermore, these locations are continuous in space and can be learned by the network. Therefore, KPConv can be extended to deformable convolutions that learn to adapt kernel points to local geometry. Thanks to a regular subsampling strategy, KPConv is also efficient and robust to varying densities. Whether they use deformable KPConv for complex tasks, or rigid KPconv for simpler tasks, our networks outperform state-of-the-art classification and segmentation approaches on several datasets. We also offer ablation studies and visualizations to provide understanding of what has been learned by KPConv and to validate the descriptive power of deformable KPConv.}, + urldate = {2023-05-15}, + publisher = {arXiv}, + author = {Thomas, Hugues and Qi, Charles R. and Deschaud, Jean-Emmanuel and Marcotegui, Beatriz and Goulette, François and Guibas, Leonidas J.}, + month = aug, + year = {2019}, + note = {arXiv:1904.08889 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + annote = {Comment: Camera-ready, accepted to ICCV 2019; project website: https://github.com/HuguesTHOMAS/KPConv}, + file = {arXiv.org Snapshot:/home/laurent/Zotero/storage/5CY645DK/1904.html:text/html;Full Text PDF:/home/laurent/Zotero/storage/782FKEML/Thomas et al. - 2019 - KPConv Flexible and Deformable Convolution for Po.pdf:application/pdf}, +} + +@misc{tang_searching_2020, + title = {Searching {Efficient} {3D} {Architectures} with {Sparse} {Point}-{Voxel} {Convolution}}, + url = {http://arxiv.org/abs/2007.16100}, + doi = {10.48550/arXiv.2007.16100}, + abstract = {Self-driving cars need to understand 3D scenes efficiently and accurately in order to drive safely. Given the limited hardware resources, existing 3D perception models are not able to recognize small instances (e.g., pedestrians, cyclists) very well due to the low-resolution voxelization and aggressive downsampling. To this end, we propose Sparse Point-Voxel Convolution (SPVConv), a lightweight 3D module that equips the vanilla Sparse Convolution with the high-resolution point-based branch. With negligible overhead, this point-based branch is able to preserve the fine details even from large outdoor scenes. To explore the spectrum of efficient 3D models, we first define a flexible architecture design space based on SPVConv, and we then present 3D Neural Architecture Search (3D-NAS) to search the optimal network architecture over this diverse design space efficiently and effectively. Experimental results validate that the resulting SPVNAS model is fast and accurate: it outperforms the state-of-the-art MinkowskiNet by 3.3\%, ranking 1st on the competitive SemanticKITTI leaderboard. It also achieves 8x computation reduction and 3x measured speedup over MinkowskiNet with higher accuracy. Finally, we transfer our method to 3D object detection, and it achieves consistent improvements over the one-stage detection baseline on KITTI.}, + urldate = {2023-04-26}, + publisher = {arXiv}, + author = {Tang, Haotian and Liu, Zhijian and Zhao, Shengyu and Lin, Yujun and Lin, Ji and Wang, Hanrui and Han, Song}, + month = aug, + year = {2020}, + note = {arXiv:2007.16100 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + annote = {Comment: ECCV 2020. The first two authors contributed equally to this work. Project page: http://spvnas.mit.edu/}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/A2S9RZVE/Tang et al. - 2020 - Searching Efficient 3D Architectures with Sparse P.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/QK6WTDZH/2007.html:text/html}, +} + +@misc{nguyen_point-set_2021, + title = {Point-set {Distances} for {Learning} {Representations} of {3D} {Point} {Clouds}}, + url = {http://arxiv.org/abs/2102.04014}, + doi = {10.48550/arXiv.2102.04014}, + abstract = {Learning an effective representation of 3D point clouds requires a good metric to measure the discrepancy between two 3D point sets, which is non-trivial due to their irregularity. Most of the previous works resort to using the Chamfer discrepancy or Earth Mover's distance, but those metrics are either ineffective in measuring the differences between point clouds or computationally expensive. In this paper, we conduct a systematic study with extensive experiments on distance metrics for 3D point clouds. From this study, we propose to use sliced Wasserstein distance and its variants for learning representations of 3D point clouds. In addition, we introduce a new algorithm to estimate sliced Wasserstein distance that guarantees that the estimated value is close enough to the true one. Experiments show that the sliced Wasserstein distance and its variants allow the neural network to learn a more efficient representation compared to the Chamfer discrepancy. We demonstrate the efficiency of the sliced Wasserstein metric and its variants on several tasks in 3D computer vision including training a point cloud autoencoder, generative modeling, transfer learning, and point cloud registration.}, + urldate = {2023-04-21}, + publisher = {arXiv}, + author = {Nguyen, Trung and Pham, Quang-Hieu and Le, Tam and Pham, Tung and Ho, Nhat and Hua, Binh-Son}, + month = sep, + year = {2021}, + note = {arXiv:2102.04014 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + annote = {Comment: ICCV 2021 camera-ready paper (8 pages) with supplementary (3.5 pages)}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/ZND8758D/Nguyen et al. - 2021 - Point-set Distances for Learning Representations o.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/IUDCHXC2/2102.html:text/html}, +} + +@misc{peng_shape_2021, + title = {Shape {As} {Points}: {A} {Differentiable} {Poisson} {Solver}}, + shorttitle = {Shape {As} {Points}}, + url = {http://arxiv.org/abs/2106.03452}, + doi = {10.48550/arXiv.2106.03452}, + abstract = {In recent years, neural implicit representations gained popularity in 3D reconstruction due to their expressiveness and flexibility. However, the implicit nature of neural implicit representations results in slow inference time and requires careful initialization. In this paper, we revisit the classic yet ubiquitous point cloud representation and introduce a differentiable point-to-mesh layer using a differentiable formulation of Poisson Surface Reconstruction (PSR) that allows for a GPU-accelerated fast solution of the indicator function given an oriented point cloud. The differentiable PSR layer allows us to efficiently and differentiably bridge the explicit 3D point representation with the 3D mesh via the implicit indicator field, enabling end-to-end optimization of surface reconstruction metrics such as Chamfer distance. This duality between points and meshes hence allows us to represent shapes as oriented point clouds, which are explicit, lightweight and expressive. Compared to neural implicit representations, our Shape-As-Points (SAP) model is more interpretable, lightweight, and accelerates inference time by one order of magnitude. Compared to other explicit representations such as points, patches, and meshes, SAP produces topology-agnostic, watertight manifold surfaces. We demonstrate the effectiveness of SAP on the task of surface reconstruction from unoriented point clouds and learning-based reconstruction.}, + urldate = {2023-04-17}, + publisher = {arXiv}, + author = {Peng, Songyou and Jiang, Chiyu "Max" and Liao, Yiyi and Niemeyer, Michael and Pollefeys, Marc and Geiger, Andreas}, + month = nov, + year = {2021}, + note = {arXiv:2106.03452 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Graphics}, + annote = {Comment: NeurIPS 2021 (Oral). Project page: https://pengsongyou.github.io/sap. Code: https://github.com/autonomousvision/shape\_as\_points}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/V5BVL34J/Peng et al. - 2021 - Shape As Points A Differentiable Poisson Solver.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/7J3IDAKQ/2106.html:text/html}, +} + +@misc{sulzer_deep_2022, + title = {Deep {Surface} {Reconstruction} from {Point} {Clouds} with {Visibility} {Information}}, + url = {http://arxiv.org/abs/2202.01810}, + doi = {10.48550/arXiv.2202.01810}, + abstract = {Most current neural networks for reconstructing surfaces from point clouds ignore sensor poses and only operate on raw point locations. Sensor visibility, however, holds meaningful information regarding space occupancy and surface orientation. In this paper, we present two simple ways to augment raw point clouds with visibility information, so it can directly be leveraged by surface reconstruction networks with minimal adaptation. Our proposed modifications consistently improve the accuracy of generated surfaces as well as the generalization ability of the networks to unseen shape domains. Our code and data is available at https://github.com/raphaelsulzer/dsrv-data.}, + urldate = {2023-04-17}, + publisher = {arXiv}, + author = {Sulzer, Raphael and Landrieu, Loic and Boulch, Alexandre and Marlet, Renaud and Vallet, Bruno}, + month = feb, + year = {2022}, + note = {arXiv:2202.01810 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + annote = {Comment: 13 pages}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/ZDTHHW9H/Sulzer et al. - 2022 - Deep Surface Reconstruction from Point Clouds with.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/X84KMGRU/2202.html:text/html}, +} + +@misc{nam_3d-ldm_2022, + title = {{3D}-{LDM}: {Neural} {Implicit} {3D} {Shape} {Generation} with {Latent} {Diffusion} {Models}}, + shorttitle = {{3D}-{LDM}}, + url = {http://arxiv.org/abs/2212.00842}, + doi = {10.48550/arXiv.2212.00842}, + abstract = {Diffusion models have shown great promise for image generation, beating GANs in terms of generation diversity, with comparable image quality. However, their application to 3D shapes has been limited to point or voxel representations that can in practice not accurately represent a 3D surface. We propose a diffusion model for neural implicit representations of 3D shapes that operates in the latent space of an auto-decoder. This allows us to generate diverse and high quality 3D surfaces. We additionally show that we can condition our model on images or text to enable image-to-3D generation and text-to-3D generation using CLIP embeddings. Furthermore, adding noise to the latent codes of existing shapes allows us to explore shape variations.}, + urldate = {2023-04-11}, + publisher = {arXiv}, + author = {Nam, Gimin and Khlifi, Mariem and Rodriguez, Andrew and Tono, Alberto and Zhou, Linqi and Guerrero, Paul}, + month = dec, + year = {2022}, + note = {arXiv:2212.00842 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/8DKDU5WY/Nam et al. - 2022 - 3D-LDM Neural Implicit 3D Shape Generation with L.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/DMM8K287/2212.html:text/html}, +} + +@misc{zhou_3d_2021, + title = {{3D} {Shape} {Generation} and {Completion} through {Point}-{Voxel} {Diffusion}}, + url = {http://arxiv.org/abs/2104.03670}, + doi = {10.48550/arXiv.2104.03670}, + abstract = {We propose a novel approach for probabilistic generative modeling of 3D shapes. Unlike most existing models that learn to deterministically translate a latent vector to a shape, our model, Point-Voxel Diffusion (PVD), is a unified, probabilistic formulation for unconditional shape generation and conditional, multi-modal shape completion. PVD marries denoising diffusion models with the hybrid, point-voxel representation of 3D shapes. It can be viewed as a series of denoising steps, reversing the diffusion process from observed point cloud data to Gaussian noise, and is trained by optimizing a variational lower bound to the (conditional) likelihood function. Experiments demonstrate that PVD is capable of synthesizing high-fidelity shapes, completing partial point clouds, and generating multiple completion results from single-view depth scans of real objects.}, + urldate = {2023-04-04}, + publisher = {arXiv}, + author = {Zhou, Linqi and Du, Yilun and Wu, Jiajun}, + month = aug, + year = {2021}, + note = {arXiv:2104.03670 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + annote = {Comment: Project page: https://alexzhou907.github.io/pvd}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/WGECL3FJ/Zhou et al. - 2021 - 3D Shape Generation and Completion through Point-V.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/C3AEKFNE/2104.html:text/html}, +} + +@misc{liu_point-voxel_2019, + title = {Point-{Voxel} {CNN} for {Efficient} {3D} {Deep} {Learning}}, + url = {http://arxiv.org/abs/1907.03739}, + doi = {10.48550/arXiv.1907.03739}, + abstract = {We present Point-Voxel CNN (PVCNN) for efficient, fast 3D deep learning. Previous work processes 3D data using either voxel-based or point-based NN models. However, both approaches are computationally inefficient. The computation cost and memory footprints of the voxel-based models grow cubically with the input resolution, making it memory-prohibitive to scale up the resolution. As for point-based networks, up to 80\% of the time is wasted on structuring the sparse data which have rather poor memory locality, not on the actual feature extraction. In this paper, we propose PVCNN that represents the 3D input data in points to reduce the memory consumption, while performing the convolutions in voxels to reduce the irregular, sparse data access and improve the locality. Our PVCNN model is both memory and computation efficient. Evaluated on semantic and part segmentation datasets, it achieves much higher accuracy than the voxel-based baseline with 10x GPU memory reduction; it also outperforms the state-of-the-art point-based models with 7x measured speedup on average. Remarkably, the narrower version of PVCNN achieves 2x speedup over PointNet (an extremely efficient model) on part and scene segmentation benchmarks with much higher accuracy. We validate the general effectiveness of PVCNN on 3D object detection: by replacing the primitives in Frustrum PointNet with PVConv, it outperforms Frustrum PointNet++ by 2.4\% mAP on average with 1.5x measured speedup and GPU memory reduction.}, + urldate = {2023-04-04}, + publisher = {arXiv}, + author = {Liu, Zhijian and Tang, Haotian and Lin, Yujun and Han, Song}, + month = dec, + year = {2019}, + note = {arXiv:1907.03739 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + annote = {Comment: NeurIPS 2019. The first two authors contributed equally to this work. Project page: http://pvcnn.mit.edu/}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/A2XJARYA/Liu et al. - 2019 - Point-Voxel CNN for Efficient 3D Deep Learning.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/LF7RPTGF/1907.html:text/html}, +} + +@misc{qi_pointnet_2017, + title = {{PointNet}: {Deep} {Learning} on {Point} {Sets} for {3D} {Classification} and {Segmentation}}, + shorttitle = {{PointNet}}, + url = {http://arxiv.org/abs/1612.00593}, + doi = {10.48550/arXiv.1612.00593}, + abstract = {Point cloud is an important type of geometric data structure. Due to its irregular format, most researchers transform such data to regular 3D voxel grids or collections of images. This, however, renders data unnecessarily voluminous and causes issues. In this paper, we design a novel type of neural network that directly consumes point clouds and well respects the permutation invariance of points in the input. Our network, named PointNet, provides a unified architecture for applications ranging from object classification, part segmentation, to scene semantic parsing. Though simple, PointNet is highly efficient and effective. Empirically, it shows strong performance on par or even better than state of the art. Theoretically, we provide analysis towards understanding of what the network has learnt and why the network is robust with respect to input perturbation and corruption.}, + urldate = {2023-04-04}, + publisher = {arXiv}, + author = {Qi, Charles R. and Su, Hao and Mo, Kaichun and Guibas, Leonidas J.}, + month = apr, + year = {2017}, + note = {arXiv:1612.00593 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + annote = {Comment: CVPR 2017}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/SV6H7XA9/Qi et al. - 2017 - PointNet Deep Learning on Point Sets for 3D Class.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/YF79EZLH/1612.html:text/html}, +} + +@misc{qi_pointnet_2017-1, + title = {{PointNet}++: {Deep} {Hierarchical} {Feature} {Learning} on {Point} {Sets} in a {Metric} {Space}}, + shorttitle = {{PointNet}++}, + url = {http://arxiv.org/abs/1706.02413}, + doi = {10.48550/arXiv.1706.02413}, + abstract = {Few prior works study deep learning on point sets. PointNet by Qi et al. is a pioneer in this direction. However, by design PointNet does not capture local structures induced by the metric space points live in, limiting its ability to recognize fine-grained patterns and generalizability to complex scenes. In this work, we introduce a hierarchical neural network that applies PointNet recursively on a nested partitioning of the input point set. By exploiting metric space distances, our network is able to learn local features with increasing contextual scales. With further observation that point sets are usually sampled with varying densities, which results in greatly decreased performance for networks trained on uniform densities, we propose novel set learning layers to adaptively combine features from multiple scales. Experiments show that our network called PointNet++ is able to learn deep point set features efficiently and robustly. In particular, results significantly better than state-of-the-art have been obtained on challenging benchmarks of 3D point clouds.}, + urldate = {2023-04-03}, + publisher = {arXiv}, + author = {Qi, Charles R. and Yi, Li and Su, Hao and Guibas, Leonidas J.}, + month = jun, + year = {2017}, + note = {arXiv:1706.02413 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/4FPME54R/Qi et al. - 2017 - PointNet++ Deep Hierarchical Feature Learning on .pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/SXSSFMBW/1706.html:text/html}, +}