From 78e9f7728e37241da3a22bb1f27180d99bd7e245 Mon Sep 17 00:00:00 2001
From: Laurent <laurent@lagon.tech>
Date: Fri, 2 Feb 2024 12:30:41 +0000
Subject: [PATCH] (doc/fluxion/ld) add `StableDiffusion_1` docstrings

---
 .../stable_diffusion_1/__init__.py            |  2 +
 .../stable_diffusion_1/model.py               | 85 +++++++++++++++++++
 .../stable_diffusion_xl/model.py              |  4 +-
 3 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/src/refiners/foundationals/latent_diffusion/stable_diffusion_1/__init__.py b/src/refiners/foundationals/latent_diffusion/stable_diffusion_1/__init__.py
index 4064f4b..f8cd4c7 100644
--- a/src/refiners/foundationals/latent_diffusion/stable_diffusion_1/__init__.py
+++ b/src/refiners/foundationals/latent_diffusion/stable_diffusion_1/__init__.py
@@ -1,6 +1,7 @@
 from refiners.foundationals.latent_diffusion.stable_diffusion_1.controlnet import SD1ControlnetAdapter
 from refiners.foundationals.latent_diffusion.stable_diffusion_1.image_prompt import SD1IPAdapter
 from refiners.foundationals.latent_diffusion.stable_diffusion_1.model import (
+    SD1Autoencoder,
     StableDiffusion_1,
     StableDiffusion_1_Inpainting,
 )
@@ -10,6 +11,7 @@ from refiners.foundationals.latent_diffusion.stable_diffusion_1.unet import SD1U
 __all__ = [
     "StableDiffusion_1",
     "StableDiffusion_1_Inpainting",
+    "SD1Autoencoder",
     "SD1UNet",
     "SD1ControlnetAdapter",
     "SD1IPAdapter",
diff --git a/src/refiners/foundationals/latent_diffusion/stable_diffusion_1/model.py b/src/refiners/foundationals/latent_diffusion/stable_diffusion_1/model.py
index a4c312d..d42252e 100644
--- a/src/refiners/foundationals/latent_diffusion/stable_diffusion_1/model.py
+++ b/src/refiners/foundationals/latent_diffusion/stable_diffusion_1/model.py
@@ -13,10 +13,24 @@ from refiners.foundationals.latent_diffusion.stable_diffusion_1.unet import SD1U
 
 
 class SD1Autoencoder(LatentDiffusionAutoencoder):
+    """Stable Diffusion 1.5 autoencoder model.
+
+    Attributes:
+        encoder_scale: The encoder scale to use.
+    """
+
     encoder_scale: float = 0.18215
 
 
 class StableDiffusion_1(LatentDiffusionModel):
+    """Stable Diffusion 1.5 model.
+
+    Attributes:
+        unet: The U-Net model.
+        clip_text_encoder: The text encoder.
+        lda: The image autoencoder.
+    """
+
     unet: SD1UNet
     clip_text_encoder: CLIPTextEncoderL
     lda: SD1Autoencoder
@@ -30,6 +44,16 @@ class StableDiffusion_1(LatentDiffusionModel):
         device: Device | str = "cpu",
         dtype: DType = torch.float32,
     ) -> None:
+        """Initializes the model.
+
+        Args:
+            unet: The SD1UNet U-Net model to use.
+            lda: The SD1Autoencoder image autoencoder to use.
+            clip_text_encoder: The CLIPTextEncoderL text encoder to use.
+            solver: The solver to use.
+            device: The PyTorch device to use.
+            dtype: The PyTorch data type to use.
+        """
         unet = unet or SD1UNet(in_channels=4)
         lda = lda or SD1Autoencoder()
         clip_text_encoder = clip_text_encoder or CLIPTextEncoderL()
@@ -45,6 +69,13 @@ class StableDiffusion_1(LatentDiffusionModel):
         )
 
     def compute_clip_text_embedding(self, text: str, negative_text: str = "") -> Tensor:
+        """Compute the CLIP text embedding associated with the given prompt and negative prompt.
+
+        Args:
+            text: The prompt to compute the CLIP text embedding of.
+            negative_text: The negative prompt to compute the CLIP text embedding of.
+                If not provided, the negative prompt is assumed to be empty (i.e., `""`).
+        """
         conditional_embedding = self.clip_text_encoder(text)
         if text == negative_text:
             return torch.cat(tensors=(conditional_embedding, conditional_embedding), dim=0)
@@ -53,10 +84,22 @@ class StableDiffusion_1(LatentDiffusionModel):
         return torch.cat(tensors=(negative_embedding, conditional_embedding), dim=0)
 
     def set_unet_context(self, *, timestep: Tensor, clip_text_embedding: Tensor, **_: Tensor) -> None:
+        """Set the various context parameters required by the U-Net model.
+
+        Args:
+            timestep: The timestep tensor to use.
+            clip_text_embedding: The CLIP text embedding tensor to use.
+        """
         self.unet.set_timestep(timestep=timestep)
         self.unet.set_clip_text_embedding(clip_text_embedding=clip_text_embedding)
 
     def set_self_attention_guidance(self, enable: bool, scale: float = 1.0) -> None:
+        """Set whether to enable self-attention guidance.
+
+        Args:
+            enable: Whether to enable self-attention guidance.
+            scale: The scale to use.
+        """
         if enable:
             if sag := self._find_sag_adapter():
                 sag.scale = scale
@@ -67,9 +110,11 @@ class StableDiffusion_1(LatentDiffusionModel):
                 sag.eject()
 
     def has_self_attention_guidance(self) -> bool:
+        """Whether the model has self-attention guidance or not."""
         return self._find_sag_adapter() is not None
 
     def _find_sag_adapter(self) -> SD1SAGAdapter | None:
+        """Finds the self-attention guidance adapter."""
         for p in self.unet.get_parents():
             if isinstance(p, SD1SAGAdapter):
                 return p
@@ -78,6 +123,17 @@ class StableDiffusion_1(LatentDiffusionModel):
     def compute_self_attention_guidance(
         self, x: Tensor, noise: Tensor, step: int, *, clip_text_embedding: Tensor, **kwargs: Tensor
     ) -> Tensor:
+        """Compute the self-attention guidance.
+
+        Args:
+            x: The input tensor.
+            noise: The noise tensor.
+            step: The step to compute the self-attention guidance at.
+            clip_text_embedding: The CLIP text embedding to compute the self-attention guidance with.
+
+        Returns:
+            The computed self-attention guidance.
+        """
         sag = self._find_sag_adapter()
         assert sag is not None
 
@@ -106,6 +162,14 @@ class StableDiffusion_1(LatentDiffusionModel):
 
 
 class StableDiffusion_1_Inpainting(StableDiffusion_1):
+    """Stable Diffusion 1.5 inpainting model.
+
+    Attributes:
+        unet: The U-Net model.
+        clip_text_encoder: The text encoder.
+        lda: The image autoencoder.
+    """
+
     def __init__(
         self,
         unet: SD1UNet | None = None,
@@ -140,6 +204,16 @@ class StableDiffusion_1_Inpainting(StableDiffusion_1):
         mask: Image.Image,
         latents_size: tuple[int, int] = (64, 64),
     ) -> tuple[Tensor, Tensor]:
+        """Set the inpainting conditions.
+
+        Args:
+            target_image: The target image to inpaint.
+            mask: The mask to use for inpainting.
+            latents_size: The size of the latents to use.
+
+        Returns:
+            The mask latents and the target image latents.
+        """
         target_image = target_image.convert(mode="RGB")
         mask = mask.convert(mode="L")
 
@@ -156,6 +230,17 @@ class StableDiffusion_1_Inpainting(StableDiffusion_1):
     def compute_self_attention_guidance(
         self, x: Tensor, noise: Tensor, step: int, *, clip_text_embedding: Tensor, **kwargs: Tensor
     ) -> Tensor:
+        """Compute the self-attention guidance.
+
+        Args:
+            x: The input tensor.
+            noise: The noise tensor.
+            step: The step to compute the self-attention guidance at.
+            clip_text_embedding: The CLIP text embedding to compute the self-attention guidance with.
+
+        Returns:
+            The computed self-attention guidance.
+        """
         sag = self._find_sag_adapter()
         assert sag is not None
         assert self.mask_latents is not None
diff --git a/src/refiners/foundationals/latent_diffusion/stable_diffusion_xl/model.py b/src/refiners/foundationals/latent_diffusion/stable_diffusion_xl/model.py
index b3891ea..27fe55a 100644
--- a/src/refiners/foundationals/latent_diffusion/stable_diffusion_xl/model.py
+++ b/src/refiners/foundationals/latent_diffusion/stable_diffusion_xl/model.py
@@ -25,7 +25,7 @@ class StableDiffusion_XL(LatentDiffusionModel):
     Attributes:
         unet: The U-Net model.
         clip_text_encoder: The text encoder.
-        lda (SDXLAutoencoder): The image autoencoder.
+        lda: The image autoencoder.
     """
 
     unet: SDXLUNet
@@ -103,7 +103,7 @@ class StableDiffusion_XL(LatentDiffusionModel):
         time_ids: Tensor,
         **_: Tensor,
     ) -> None:
-        """Sets the various context parameters required by the U-Net model.
+        """Set the various context parameters required by the U-Net model.
 
         Args:
             timestep: The timestep to set.