(doc/fluxion/ld) add SDXLUNet docstrings

2024-09-19 19:05:28 +00:00 · 2024-02-02 10:09:11 +00:00 · 2024-02-02 10:09:11 +00:00 · f93a145e15
parent f9d4ec18e1
commit f93a145e15
1 changed files with 50 additions and 1 deletions
--- a/src/refiners/foundationals/latent_diffusion/stable_diffusion_xl/unet.py
+++ b/src/refiners/foundationals/latent_diffusion/stable_diffusion_xl/unet.py
@ -239,7 +239,24 @@ class OutputBlock(fl.Chain):
 class SDXLUNet(fl.Chain):
-    def __init__(self, in_channels: int, device: Device | str | None = None, dtype: DType | None = None) -> None:
+    """Stable Diffusion XL U-Net.
    See [[arXiv:2307.01952] SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis](https://arxiv.org/abs/2307.01952) for more details.
    """
    def __init__(
        self,
        in_channels: int,
        device: Device | str | None = None,
        dtype: DType | None = None,
    ) -> None:
        """Initialize the U-Net.
        Args:
            in_channels: Number of input channels.
            device: Device to use for computation.
            dtype: Data type to use for computation.
        """
        self.in_channels = in_channels
        super().__init__(
            TimestepEncoder(device=device, dtype=dtype),
@ -273,13 +290,45 @@ class SDXLUNet(fl.Chain):
        }
    def set_clip_text_embedding(self, clip_text_embedding: Tensor) -> None:
        """Set the clip text embedding context.
        Note:
            This context is required by the `SDXLCrossAttention` blocks.
        Args:
            clip_text_embedding: The CLIP text embedding tensor.
        """
        self.set_context(context="cross_attention_block", value={"clip_text_embedding": clip_text_embedding})
    def set_timestep(self, timestep: Tensor) -> None:
        """Set the timestep context.
        Note:
            This is required by `TimestepEncoder`.
        Args:
            timestep: The timestep tensor.
        """
        self.set_context(context="diffusion", value={"timestep": timestep})
    def set_time_ids(self, time_ids: Tensor) -> None:
        """Set the time IDs context.
        Note:
            This is required by `TextTimeEmbedding`.
        Args:
            time_ids: The time IDs tensor.
        """
        self.set_context(context="diffusion", value={"time_ids": time_ids})
    def set_pooled_text_embedding(self, pooled_text_embedding: Tensor) -> None:
        """Set the pooled text embedding context.
        Note:
            This is required by `TextTimeEmbedding`.
        Args:
            pooled_text_embedding: The pooled text embedding tensor.
        """
        self.set_context(context="diffusion", value={"pooled_text_embedding": pooled_text_embedding})