reorganization des images

This commit is contained in:
Laureηt 2023-01-25 14:43:19 +01:00
parent eb9ec0f0d4
commit f3cda0b3d6
Signed by: Laurent
SSH key fingerprint: SHA256:kZEpW8cMJ54PDeCvOhzreNr4FSh6R13CMGH/POoO8DI
22 changed files with 8890 additions and 15 deletions

BIN
assets/DETR.pdf Normal file

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 323 KiB

BIN
assets/Mask2Former.pdf Normal file

Binary file not shown.

8830
assets/MaskRCNN.pdf Normal file

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 178 KiB

View file

Before

Width:  |  Height:  |  Size: 7.1 MiB

After

Width:  |  Height:  |  Size: 7.1 MiB

View file

Before

Width:  |  Height:  |  Size: 9.7 MiB

After

Width:  |  Height:  |  Size: 9.7 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

View file

Before

Width:  |  Height:  |  Size: 181 KiB

After

Width:  |  Height:  |  Size: 181 KiB

View file

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

View file

Before

Width:  |  Height:  |  Size: 124 KiB

After

Width:  |  Height:  |  Size: 124 KiB

View file

Before

Width:  |  Height:  |  Size: 2.6 MiB

After

Width:  |  Height:  |  Size: 2.6 MiB

View file

Before

Width:  |  Height:  |  Size: 346 KiB

After

Width:  |  Height:  |  Size: 346 KiB

View file

Before

Width:  |  Height:  |  Size: 86 KiB

After

Width:  |  Height:  |  Size: 86 KiB

View file

Before

Width:  |  Height:  |  Size: 5.4 MiB

After

Width:  |  Height:  |  Size: 5.4 MiB

View file

Before

Width:  |  Height:  |  Size: 2.5 MiB

After

Width:  |  Height:  |  Size: 2.5 MiB

Binary file not shown.

View file

@ -47,8 +47,8 @@ The field of 3D reconstruction techniques in photography, such as Reflectance Tr
\begin{figure}[ht] \begin{figure}[ht]
\centering \centering
\begin{tabular}{cc} \begin{tabular}{cc}
\includegraphics[height=0.3\linewidth]{matte.jpg} & \includegraphics[height=0.3\linewidth]{previous_work/matte.jpg} &
\includegraphics[height=0.3\linewidth]{shiny.jpg} \includegraphics[height=0.3\linewidth]{previous_work/shiny.jpg}
\end{tabular} \end{tabular}
\caption{Left: a scene with matte spheres. Right: a scene with a shiny sphere.} \caption{Left: a scene with matte spheres. Right: a scene with a shiny sphere.}
\label{fig:intro} \label{fig:intro}
@ -61,8 +61,8 @@ Previous work by Laurent Fainsin et al. in~\cite{spheredetect} attempted to addr
\begin{figure}[ht] \begin{figure}[ht]
\centering \centering
\begin{tabular}{cc} \begin{tabular}{cc}
\includegraphics[height=0.3\linewidth]{matte_inference.png} & \includegraphics[height=0.3\linewidth]{previous_work/matte_inference.png} &
\includegraphics[height=0.3\linewidth]{shiny_inference.png} \includegraphics[height=0.3\linewidth]{previous_work/shiny_inference.png}
\end{tabular} \end{tabular}
\caption{Mask R-CNN~\cite{MaskRCNN} inferences from~\cite{spheredetect} on Figure~\ref{fig:intro}.} \caption{Mask R-CNN~\cite{MaskRCNN} inferences from~\cite{spheredetect} on Figure~\ref{fig:intro}.}
\label{fig:previouswork} \label{fig:previouswork}
@ -77,8 +77,8 @@ In~\cite{spheredetect}, it is explained that obtaining clean photographs with sp
\begin{figure}[ht] \begin{figure}[ht]
\centering \centering
\begin{tabular}{cc} \begin{tabular}{cc}
\includegraphics[height=0.3\linewidth]{dataset1.jpg} & \includegraphics[height=0.3\linewidth]{previous_work/bear.jpg} &
\includegraphics[height=0.3\linewidth]{dataset2.jpg} \includegraphics[height=0.3\linewidth]{previous_work/plush.jpg}
\end{tabular} \end{tabular}
\caption{Example of the synthetic dataset used in~\cite{spheredetect}.} \caption{Example of the synthetic dataset used in~\cite{spheredetect}.}
\label{fig:spheredetect_dataset} \label{fig:spheredetect_dataset}
@ -93,8 +93,8 @@ Antoine Laurent, a PhD candidate at INP of Toulouse, is working on the field of
\begin{figure}[ht] \begin{figure}[ht]
\centering \centering
\begin{tabular}{cc} \begin{tabular}{cc}
\includegraphics[height=0.3\linewidth]{antoine_laurent_1.jpg} & \includegraphics[height=0.3\linewidth]{antoine_laurent/cheveaux.jpg} &
\includegraphics[height=0.3\linewidth]{antoine_laurent_2.jpg} \includegraphics[height=0.3\linewidth]{antoine_laurent/mammouths.jpg}
\end{tabular} \end{tabular}
\caption{Example of clean photographs with 3D spherical markers from Antoine Laurent.} \caption{Example of clean photographs with 3D spherical markers from Antoine Laurent.}
\label{fig:antoine_laurent_dataset} \label{fig:antoine_laurent_dataset}
@ -110,7 +110,9 @@ DeepLight~\cite{legendre_deeplight_2019} is a research paper from Google that pr
\begin{figure}[ht] \begin{figure}[ht]
\centering \centering
\includegraphics[height=0.4\linewidth]{deeplight.png} \includegraphics[height=0.4\linewidth]{deeplight/Prober_Crop_small.jpg}
\includegraphics[height=0.4\linewidth]{deeplight/NAVID_20181022_104053_1393_frame_small.jpg}
\includegraphics[height=0.4\linewidth]{deeplight/Prober_figure_small.jpg}
\caption{Dataset acquisition technique from~\cite{legendre_deeplight_2019}.} \caption{Dataset acquisition technique from~\cite{legendre_deeplight_2019}.}
\label{fig:deeplight_dataset} \label{fig:deeplight_dataset}
\end{figure} \end{figure}
@ -124,8 +126,8 @@ In the paper "A Dataset of Multi-Illumination Images in the Wild"~\cite{murmann_
\begin{figure}[ht] \begin{figure}[ht]
\centering \centering
\begin{tabular}{cc} \begin{tabular}{cc}
\includegraphics[height=0.3\linewidth]{dir_7_mip2.jpg} & \includegraphics[height=0.3\linewidth]{mip/dir_7_mip2.jpg} &
\includegraphics[height=0.3\linewidth]{materials_mip2.png} \includegraphics[height=0.3\linewidth]{mip/materials_mip2.png}
\end{tabular} \end{tabular}
\caption{Example data from~\cite{murmann_dataset_2019}.} \caption{Example data from~\cite{murmann_dataset_2019}.}
\label{fig:murmann_dataset} \label{fig:murmann_dataset}
@ -145,7 +147,7 @@ In~\cite{spheredetect}, the authors use Mask R-CNN~\cite{MaskRCNN} as a base mod
\begin{figure}[ht] \begin{figure}[ht]
\centering \centering
\includegraphics[height=0.3\linewidth]{MaskRCNN.png} \includegraphics[height=0.3\linewidth]{MaskRCNN.pdf}
\caption{The Mask-RCNN~\cite{MaskRCNN} architecture.} \caption{The Mask-RCNN~\cite{MaskRCNN} architecture.}
\label{fig:maskrcnn} \label{fig:maskrcnn}
\end{figure} \end{figure}
@ -186,7 +188,7 @@ GPNs could be an alternative to Ellipse-RCNN for detecting ellipses in images, b
\subsection{DETR \& DINO} \subsection{DETR \& DINO}
DETR (DEtection TRansformer)~\cite{carion_end--end_2020} is a new method that views object detection as a direct set prediction problem. The main goal of DETR is to streamline the detection pipeline by removing the need for many hand-designed components like a non-maximum suppression procedure or anchor generation that explicitly encode prior knowledge about the task. DETR (DEtection TRansformer)~\cite{carion_end--end_2020} is a new method proposed by Facebook that views object detection as a direct set prediction problem. The main goal of DETR is to streamline the detection pipeline by removing the need for many hand-designed components like a non-maximum suppression procedure or anchor generation that explicitly encode prior knowledge about the task.
DETR uses a set-based global loss that forces unique predictions via bipartite matching, and a transformer encoder-decoder architecture, as seen in Figure~\ref{fig:detr}. Given a fixed small set of learned object queries, the model reasons about the relations of the objects and the global image context to directly output the final set of predictions in parallel. This makes the model conceptually simple and does not require a specialized library, unlike many other modern detectors. DETR uses a set-based global loss that forces unique predictions via bipartite matching, and a transformer encoder-decoder architecture, as seen in Figure~\ref{fig:detr}. Given a fixed small set of learned object queries, the model reasons about the relations of the objects and the global image context to directly output the final set of predictions in parallel. This makes the model conceptually simple and does not require a specialized library, unlike many other modern detectors.
@ -194,7 +196,7 @@ DETR demonstrates accuracy and run-time performance on par with the well-establi
\begin{figure}[ht] \begin{figure}[ht]
\centering \centering
\includegraphics[height=0.2\linewidth]{DETR.png} \includegraphics[height=0.2\linewidth]{DETR.pdf}
\caption{The DETR~\cite{carion_end--end_2020} architecture.} \caption{The DETR~\cite{carion_end--end_2020} architecture.}
\label{fig:detr} \label{fig:detr}
\end{figure} \end{figure}
@ -210,6 +212,13 @@ DINO (DETR with Improved deNoising anchOr boxes)~\cite{zhang_dino_2022} is a sta
\subsection{Mask2Former} \subsection{Mask2Former}
\begin{figure}[ht]
\centering
\includegraphics[height=0.4\linewidth]{Mask2Former.pdf}
\caption{The Mask2Former~\cite{cheng_masked-attention_2022} architecture.}
\label{fig:mask2former}
\end{figure}
\section{Training} \section{Training}
\subsection{Loss functions} \subsection{Loss functions}

View file

@ -166,7 +166,7 @@
} }
@misc{noauthor_datasets_nodate, @misc{noauthor_datasets_nodate,
title = {Datasets}, title = {Datasets {HuggingFace}},
url = {https://huggingface.co/docs/datasets/index}, url = {https://huggingface.co/docs/datasets/index},
abstract = {Were on a journey to advance and democratize artificial intelligence through open source and open science.}, abstract = {Were on a journey to advance and democratize artificial intelligence through open source and open science.},
urldate = {2023-01-17}, urldate = {2023-01-17},
@ -379,3 +379,39 @@ Publisher: IEEE},
keywords = {Computer Science - Computer Vision and Pattern Recognition}, keywords = {Computer Science - Computer Vision and Pattern Recognition},
file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/NFL7ASJI/Zhang et al. - 2022 - DINO DETR with Improved DeNoising Anchor Boxes fo.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/IJEI9W7E/2203.html:text/html}, file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/NFL7ASJI/Zhang et al. - 2022 - DINO DETR with Improved DeNoising Anchor Boxes fo.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/IJEI9W7E/2203.html:text/html},
} }
@article{legendre_supplemental_nodate,
title = {Supplemental {Materials} for {DeepLight}: {Learning} {Illumination} for {Unconstrained} {Mobile} {Mixed} {Reality}},
language = {en},
author = {LeGendre, Chloe and Ma, Wan-Chun and Fyffe, Graham and Flynn, John and Charbonnel, Laurent and Busch, Jay and Debevec, Paul},
file = {LeGendre et al. - Supplemental Materials for DeepLight Learning Ill.pdf:/home/laurent/Zotero/storage/BKVSXXYE/LeGendre et al. - Supplemental Materials for DeepLight Learning Ill.pdf:application/pdf},
}
@misc{noauthor_multi_nodate,
title = {Multi {Illumination} {Dataset}},
url = {https://projects.csail.mit.edu/illumination/databrowser/},
urldate = {2023-01-24},
}
@misc{noauthor_format_nodate,
title = {Format selector for 2112.01527},
url = {https://arxiv.org/format/2112.01527},
urldate = {2023-01-25},
file = {Format selector for 2112.01527:/home/laurent/Zotero/storage/LUPN2K2W/2112.html:text/html},
}
@misc{cheng_masked-attention_2022,
title = {Masked-attention {Mask} {Transformer} for {Universal} {Image} {Segmentation}},
url = {http://arxiv.org/abs/2112.01527},
doi = {10.48550/arXiv.2112.01527},
abstract = {Image segmentation is about grouping pixels with different semantics, e.g., category or instance membership, where each choice of semantics defines a task. While only the semantics of each task differ, current research focuses on designing specialized architectures for each task. We present Masked-attention Mask Transformer (Mask2Former), a new architecture capable of addressing any image segmentation task (panoptic, instance or semantic). Its key components include masked attention, which extracts localized features by constraining cross-attention within predicted mask regions. In addition to reducing the research effort by at least three times, it outperforms the best specialized architectures by a significant margin on four popular datasets. Most notably, Mask2Former sets a new state-of-the-art for panoptic segmentation (57.8 PQ on COCO), instance segmentation (50.1 AP on COCO) and semantic segmentation (57.7 mIoU on ADE20K).},
urldate = {2023-01-25},
publisher = {arXiv},
author = {Cheng, Bowen and Misra, Ishan and Schwing, Alexander G. and Kirillov, Alexander and Girdhar, Rohit},
month = jun,
year = {2022},
note = {arXiv:2112.01527 [cs]},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning},
annote = {Comment: CVPR 2022. Project page/code/models: https://bowenc0221.github.io/mask2former},
file = {arXiv Fulltext PDF:/home/laurent/Zotero/storage/9XS7V8FP/Cheng et al. - 2022 - Masked-attention Mask Transformer for Universal Im.pdf:application/pdf;arXiv.org Snapshot:/home/laurent/Zotero/storage/LC5ZEEIC/2112.html:text/html},
}