ldm: properly resize non-square init image

2024-11-21 13:48:46 +00:00 · 2023-09-20 10:15:17 +02:00 · 2023-09-20 10:15:17 +02:00 · 2faff9f57a
parent 01aeaf3e36
commit 2faff9f57a
2 changed files with 19 additions and 4 deletions
--- a/src/refiners/foundationals/latent_diffusion/model.py
+++ b/src/refiners/foundationals/latent_diffusion/model.py
@ -49,16 +49,16 @@ class LatentDiffusionModel(fl.Module, ABC):
        first_step: int = 0,
        noise: Tensor | None = None,
    ) -> Tensor:
+        height, width = size
        if noise is None:
-            height, width = size
            noise = torch.randn(1, 4, height // 8, width // 8, device=self.device)
        assert list(noise.shape[2:]) == [
-            size[0] // 8,
-            size[1] // 8,
+            height // 8,
+            width // 8,
        ], f"noise shape is not compatible: {noise.shape}, with size: {size}"
        if init_image is None:
            return noise
-        encoded_image = self.lda.encode_image(image=init_image.resize(size=size))
+        encoded_image = self.lda.encode_image(image=init_image.resize(size=(width, height)))
        return self.scheduler.add_noise(x=encoded_image, noise=noise, step=self.steps[first_step])

    @property
--- a/tests/e2e/test_diffusion.py
+++ b/tests/e2e/test_diffusion.py
@ -492,6 +492,21 @@ def test_diffusion_std_init_image(
    ensure_similar_images(predicted_image, expected_image_std_init_image)


+@torch.no_grad()
+def test_rectangular_init_latents(
+    sd15_std: StableDiffusion_1,
+    cutecat_init: Image.Image,
+):
+    sd15 = sd15_std
+
+    # Just check latents initialization with a non-square image (and not the entire diffusion)
+    width, height = 512, 504
+    rect_init_image = cutecat_init.crop((0, 0, width, height))
+    x = sd15.init_latents((height, width), rect_init_image)
+
+    assert sd15.lda.decode_latents(x).size == (width, height)
+
+
@torch.no_grad()
 def test_diffusion_inpainting(
    sd15_inpainting: StableDiffusion_1_Inpainting,