refiners/tests/foundationals/latent_diffusion/test_auto_encoder.py

from pathlib import Path
from warnings import warn

import pytest
import torch
from PIL import Image
from tests.utils import ensure_similar_images

from refiners.fluxion.utils import no_grad
from refiners.foundationals.latent_diffusion import LatentDiffusionAutoencoder, SD1Autoencoder, SDXLAutoencoder


@pytest.fixture(scope="module")
def ref_path() -> Path:
    return Path(__file__).parent / "test_auto_encoder_ref"


@pytest.fixture(scope="module", params=["SD1.5", "SDXL"])
def lda(
    request: pytest.FixtureRequest,
    test_weights_path: Path,
    test_dtype_fp32_bf16_fp16: torch.dtype,
    test_device: torch.device,
) -> LatentDiffusionAutoencoder:
    model_version = request.param
    match (model_version, test_dtype_fp32_bf16_fp16):
        case ("SD1.5", _):
            weight_path = test_weights_path / "lda.safetensors"
            if not weight_path.is_file():
                warn(f"could not find weights at {weight_path}, skipping")
                pytest.skip(allow_module_level=True)
            model = SD1Autoencoder().load_from_safetensors(weight_path)
        case ("SDXL", torch.float16):
            weight_path = test_weights_path / "sdxl-lda-fp16-fix.safetensors"
            if not weight_path.is_file():
                warn(f"could not find weights at {weight_path}, skipping")
                pytest.skip(allow_module_level=True)
            model = SDXLAutoencoder().load_from_safetensors(weight_path)
        case ("SDXL", _):
            weight_path = test_weights_path / "sdxl-lda.safetensors"
            if not weight_path.is_file():
                warn(f"could not find weights at {weight_path}, skipping")
                pytest.skip(allow_module_level=True)
            model = SDXLAutoencoder().load_from_safetensors(weight_path)
        case _:
            raise ValueError(f"Unknown model version: {model_version}")
    model = model.to(device=test_device, dtype=test_dtype_fp32_bf16_fp16)
    return model


@pytest.fixture(scope="module")
def sample_image(ref_path: Path) -> Image.Image:
    test_image = ref_path / "macaw.png"
    if not test_image.is_file():
        warn(f"could not reference image at {test_image}, skipping")
        pytest.skip(allow_module_level=True)
    img = Image.open(test_image)  # type: ignore
    assert img.size == (512, 512)
    return img


@no_grad()
def test_encode_decode_image(lda: LatentDiffusionAutoencoder, sample_image: Image.Image):
    encoded = lda.image_to_latents(sample_image)
    decoded = lda.latents_to_image(encoded)

    assert decoded.mode == "RGB"  # type: ignore

    # Ensure no saturation. The green channel (band = 1) must not max out.
    assert max(iter(decoded.getdata(band=1))) < 255  # type: ignore

    ensure_similar_images(sample_image, decoded, min_psnr=20, min_ssim=0.9)


@no_grad()
def test_encode_decode_images(lda: LatentDiffusionAutoencoder, sample_image: Image.Image):
    encoded = lda.images_to_latents([sample_image, sample_image])
    images = lda.latents_to_images(encoded)
    assert isinstance(images, list)
    assert len(images) == 2
    ensure_similar_images(sample_image, images[1], min_psnr=20, min_ssim=0.9)


@no_grad()
def test_tiled_autoencoder(lda: LatentDiffusionAutoencoder, sample_image: Image.Image):
    sample_image = sample_image.resize((2048, 2048))  # type: ignore

    with lda.tiled_inference(sample_image, tile_size=(512, 512)):
        encoded = lda.tiled_image_to_latents(sample_image)
        result = lda.tiled_latents_to_image(encoded)

    ensure_similar_images(sample_image, result, min_psnr=35, min_ssim=0.985)


@no_grad()
def test_tiled_autoencoder_rectangular_tiles(lda: LatentDiffusionAutoencoder, sample_image: Image.Image):
    sample_image = sample_image.resize((2048, 2048))  # type: ignore

    with lda.tiled_inference(sample_image, tile_size=(512, 1024)):
        encoded = lda.tiled_image_to_latents(sample_image)
        result = lda.tiled_latents_to_image(encoded)

    ensure_similar_images(sample_image, result, min_psnr=35, min_ssim=0.985)


@no_grad()
def test_tiled_autoencoder_large_tile(lda: LatentDiffusionAutoencoder, sample_image: Image.Image):
    sample_image = sample_image.resize((1024, 1024))  # type: ignore

    with lda.tiled_inference(sample_image, tile_size=(2048, 2048)):
        encoded = lda.tiled_image_to_latents(sample_image)
        result = lda.tiled_latents_to_image(encoded)

    ensure_similar_images(sample_image, result, min_psnr=34, min_ssim=0.975)


@no_grad()
def test_tiled_autoencoder_rectangular_image(lda: LatentDiffusionAutoencoder, sample_image: Image.Image):
    sample_image = sample_image.crop((0, 0, 300, 500))
    sample_image = sample_image.resize((sample_image.width * 4, sample_image.height * 4))  # type: ignore

    with lda.tiled_inference(sample_image, tile_size=(512, 512)):
        encoded = lda.tiled_image_to_latents(sample_image)
        result = lda.tiled_latents_to_image(encoded)

    ensure_similar_images(sample_image, result, min_psnr=37, min_ssim=0.985)


def test_value_error_tile_encode_no_context(lda: LatentDiffusionAutoencoder, sample_image: Image.Image) -> None:
    with pytest.raises(ValueError):
        lda.tiled_image_to_latents(sample_image)

    with pytest.raises(ValueError):
        lda.tiled_latents_to_image(torch.randn(1, 8, 16, 16, device=lda.device))
run lint rules using latest isort settings 2023-12-11 10:46:38 +00:00			`from pathlib import Path`
initial commit 2023-08-04 13:28:41 +00:00			`from warnings import warn`
run lint rules using latest isort settings 2023-12-11 10:46:38 +00:00
			`import pytest`
			`import torch`
initial commit 2023-08-04 13:28:41 +00:00			`from PIL import Image`
run lint rules using latest isort settings 2023-12-11 10:46:38 +00:00			`from tests.utils import ensure_similar_images`
initial commit 2023-08-04 13:28:41 +00:00
modify some foundational tests to also test in float16 and bfloat16 2024-10-03 08:47:37 +00:00			`from refiners.fluxion.utils import no_grad`
			`from refiners.foundationals.latent_diffusion import LatentDiffusionAutoencoder, SD1Autoencoder, SDXLAutoencoder`
initial commit 2023-08-04 13:28:41 +00:00

			`@pytest.fixture(scope="module")`
			`def ref_path() -> Path:`
			`return Path(__file__).parent / "test_auto_encoder_ref"`


modify some foundational tests to also test in float16 and bfloat16 2024-10-03 08:47:37 +00:00			`@pytest.fixture(scope="module", params=["SD1.5", "SDXL"])`
			`def lda(`
			`request: pytest.FixtureRequest,`
			`test_weights_path: Path,`
			`test_dtype_fp32_bf16_fp16: torch.dtype,`
			`test_device: torch.device,`
			`) -> LatentDiffusionAutoencoder:`
			`model_version = request.param`
			`match (model_version, test_dtype_fp32_bf16_fp16):`
			`case ("SD1.5", _):`
			`weight_path = test_weights_path / "lda.safetensors"`
			`if not weight_path.is_file():`
			`warn(f"could not find weights at {weight_path}, skipping")`
			`pytest.skip(allow_module_level=True)`
			`model = SD1Autoencoder().load_from_safetensors(weight_path)`
			`case ("SDXL", torch.float16):`
			`weight_path = test_weights_path / "sdxl-lda-fp16-fix.safetensors"`
			`if not weight_path.is_file():`
			`warn(f"could not find weights at {weight_path}, skipping")`
			`pytest.skip(allow_module_level=True)`
			`model = SDXLAutoencoder().load_from_safetensors(weight_path)`
			`case ("SDXL", _):`
			`weight_path = test_weights_path / "sdxl-lda.safetensors"`
			`if not weight_path.is_file():`
			`warn(f"could not find weights at {weight_path}, skipping")`
			`pytest.skip(allow_module_level=True)`
			`model = SDXLAutoencoder().load_from_safetensors(weight_path)`
			`case _:`
			`raise ValueError(f"Unknown model version: {model_version}")`
			`model = model.to(device=test_device, dtype=test_dtype_fp32_bf16_fp16)`
			`return model`
initial commit 2023-08-04 13:28:41 +00:00

			`@pytest.fixture(scope="module")`
			`def sample_image(ref_path: Path) -> Image.Image:`
			`test_image = ref_path / "macaw.png"`
			`if not test_image.is_file():`
			`warn(f"could not reference image at {test_image}, skipping")`
			`pytest.skip(allow_module_level=True)`
update pyright and fix Pillow 10.3 typing issues 2024-04-02 15:30:57 +00:00			`img = Image.open(test_image) # type: ignore`
initial commit 2023-08-04 13:28:41 +00:00			`assert img.size == (512, 512)`
			`return img`


upgrade pyright to 1.1.342 ; improve no_grad typing 2023-12-29 09:59:51 +00:00			`@no_grad()`
Implement Tiled Autoencoder inference to save VRAM 2024-06-24 16:35:43 +00:00			`def test_encode_decode_image(lda: LatentDiffusionAutoencoder, sample_image: Image.Image):`
			`encoded = lda.image_to_latents(sample_image)`
			`decoded = lda.latents_to_image(encoded)`
initial commit 2023-08-04 13:28:41 +00:00
update pyright and fix Pillow 10.3 typing issues 2024-04-02 15:30:57 +00:00			`assert decoded.mode == "RGB" # type: ignore`
initial commit 2023-08-04 13:28:41 +00:00
			`# Ensure no saturation. The green channel (band = 1) must not max out.`
			`assert max(iter(decoded.getdata(band=1))) < 255 # type: ignore`

			`ensure_similar_images(sample_image, decoded, min_psnr=20, min_ssim=0.9)`
clip text, lda encode batch inputs * text_encoder([str1, str2]) * lda decode_latents/encode_image image_to_latent/latent_to_image * images_to_tensor, tensor_to_images --------- Co-authored-by: Cédric Deltheil <355031+deltheil@users.noreply.github.com> 2024-02-01 14:05:43 +00:00

			`@no_grad()`
Implement Tiled Autoencoder inference to save VRAM 2024-06-24 16:35:43 +00:00			`def test_encode_decode_images(lda: LatentDiffusionAutoencoder, sample_image: Image.Image):`
			`encoded = lda.images_to_latents([sample_image, sample_image])`
			`images = lda.latents_to_images(encoded)`
clip text, lda encode batch inputs * text_encoder([str1, str2]) * lda decode_latents/encode_image image_to_latent/latent_to_image * images_to_tensor, tensor_to_images --------- Co-authored-by: Cédric Deltheil <355031+deltheil@users.noreply.github.com> 2024-02-01 14:05:43 +00:00			`assert isinstance(images, list)`
			`assert len(images) == 2`
			`ensure_similar_images(sample_image, images[1], min_psnr=20, min_ssim=0.9)`
Implement Tiled Autoencoder inference to save VRAM 2024-06-24 16:35:43 +00:00

			`@no_grad()`
			`def test_tiled_autoencoder(lda: LatentDiffusionAutoencoder, sample_image: Image.Image):`
			`sample_image = sample_image.resize((2048, 2048)) # type: ignore`

			`with lda.tiled_inference(sample_image, tile_size=(512, 512)):`
			`encoded = lda.tiled_image_to_latents(sample_image)`
			`result = lda.tiled_latents_to_image(encoded)`

			`ensure_similar_images(sample_image, result, min_psnr=35, min_ssim=0.985)`


			`@no_grad()`
			`def test_tiled_autoencoder_rectangular_tiles(lda: LatentDiffusionAutoencoder, sample_image: Image.Image):`
			`sample_image = sample_image.resize((2048, 2048)) # type: ignore`

			`with lda.tiled_inference(sample_image, tile_size=(512, 1024)):`
			`encoded = lda.tiled_image_to_latents(sample_image)`
			`result = lda.tiled_latents_to_image(encoded)`

			`ensure_similar_images(sample_image, result, min_psnr=35, min_ssim=0.985)`


			`@no_grad()`
			`def test_tiled_autoencoder_large_tile(lda: LatentDiffusionAutoencoder, sample_image: Image.Image):`
			`sample_image = sample_image.resize((1024, 1024)) # type: ignore`

			`with lda.tiled_inference(sample_image, tile_size=(2048, 2048)):`
			`encoded = lda.tiled_image_to_latents(sample_image)`
			`result = lda.tiled_latents_to_image(encoded)`

			`ensure_similar_images(sample_image, result, min_psnr=34, min_ssim=0.975)`


			`@no_grad()`
			`def test_tiled_autoencoder_rectangular_image(lda: LatentDiffusionAutoencoder, sample_image: Image.Image):`
			`sample_image = sample_image.crop((0, 0, 300, 500))`
			`sample_image = sample_image.resize((sample_image.width * 4, sample_image.height * 4)) # type: ignore`

			`with lda.tiled_inference(sample_image, tile_size=(512, 512)):`
			`encoded = lda.tiled_image_to_latents(sample_image)`
			`result = lda.tiled_latents_to_image(encoded)`

			`ensure_similar_images(sample_image, result, min_psnr=37, min_ssim=0.985)`


			`def test_value_error_tile_encode_no_context(lda: LatentDiffusionAutoencoder, sample_image: Image.Image) -> None:`
			`with pytest.raises(ValueError):`
			`lda.tiled_image_to_latents(sample_image)`

			`with pytest.raises(ValueError):`
			`lda.tiled_latents_to_image(torch.randn(1, 8, 16, 16, device=lda.device))`