refiners/tests/foundationals/clip/test_image_encoder.py

from pathlib import Path

import pytest
import torch
from transformers import CLIPVisionModelWithProjection  # type: ignore

from refiners.fluxion.utils import load_from_safetensors, no_grad
from refiners.foundationals.clip.image_encoder import CLIPImageEncoderH


@pytest.fixture(scope="module")
def our_encoder(
    clip_image_encoder_huge_weights_path: Path,
    test_device: torch.device,
    test_dtype_fp32_bf16_fp16: torch.dtype,
) -> CLIPImageEncoderH:
    encoder = CLIPImageEncoderH(device=test_device, dtype=test_dtype_fp32_bf16_fp16)
    tensors = load_from_safetensors(clip_image_encoder_huge_weights_path)
    encoder.load_state_dict(tensors)
    return encoder


@pytest.fixture(scope="module")
def ref_encoder(
    unclip21_transformers_stabilityai_path: str,
    test_device: torch.device,
    test_dtype_fp32_bf16_fp16: torch.dtype,
    use_local_weights: bool,
) -> CLIPVisionModelWithProjection:
    return CLIPVisionModelWithProjection.from_pretrained(  # type: ignore
        unclip21_transformers_stabilityai_path,
        local_files_only=use_local_weights,
        subfolder="image_encoder",
    ).to(device=test_device, dtype=test_dtype_fp32_bf16_fp16)  # type: ignore


@no_grad()
@pytest.mark.flaky(reruns=3)
def test_encoder(
    ref_encoder: CLIPVisionModelWithProjection,
    our_encoder: CLIPImageEncoderH,
):
    assert ref_encoder.dtype == our_encoder.dtype
    assert ref_encoder.device == our_encoder.device
    x = torch.randn((1, 3, 224, 224), dtype=ref_encoder.dtype, device=ref_encoder.device)

    ref_embeddings = ref_encoder(x).image_embeds
    our_embeddings = our_encoder(x)

    assert ref_embeddings.shape == (1, 1024)
    assert our_embeddings.shape == (1, 1024)

    assert torch.allclose(our_embeddings, ref_embeddings, atol=0.05)
tests: add test for clip image encoder This covers a CLIPImageEncoderH model (Stable Diffusion v2-1-unclip) specifically 2023-08-30 08:20:55 +00:00			`from pathlib import Path`

run lint rules using latest isort settings 2023-12-11 10:46:38 +00:00			`import pytest`
			`import torch`
tests: add test for clip image encoder This covers a CLIPImageEncoderH model (Stable Diffusion v2-1-unclip) specifically 2023-08-30 08:20:55 +00:00			`from transformers import CLIPVisionModelWithProjection # type: ignore`

upgrade pyright to 1.1.342 ; improve no_grad typing 2023-12-29 09:59:51 +00:00			`from refiners.fluxion.utils import load_from_safetensors, no_grad`
run lint rules using latest isort settings 2023-12-11 10:46:38 +00:00			`from refiners.foundationals.clip.image_encoder import CLIPImageEncoderH`
tests: add test for clip image encoder This covers a CLIPImageEncoderH model (Stable Diffusion v2-1-unclip) specifically 2023-08-30 08:20:55 +00:00

			`@pytest.fixture(scope="module")`
modify some foundational tests to also test in float16 and bfloat16 2024-10-03 08:47:37 +00:00			`def our_encoder(`
update tests to use new fixtures 2024-10-09 09:28:34 +00:00			`clip_image_encoder_huge_weights_path: Path,`
modify some foundational tests to also test in float16 and bfloat16 2024-10-03 08:47:37 +00:00			`test_device: torch.device,`
			`test_dtype_fp32_bf16_fp16: torch.dtype,`
			`) -> CLIPImageEncoderH:`
			`encoder = CLIPImageEncoderH(device=test_device, dtype=test_dtype_fp32_bf16_fp16)`
update tests to use new fixtures 2024-10-09 09:28:34 +00:00			`tensors = load_from_safetensors(clip_image_encoder_huge_weights_path)`
tests: add test for clip image encoder This covers a CLIPImageEncoderH model (Stable Diffusion v2-1-unclip) specifically 2023-08-30 08:20:55 +00:00			`encoder.load_state_dict(tensors)`
			`return encoder`


			`@pytest.fixture(scope="module")`
modify some foundational tests to also test in float16 and bfloat16 2024-10-03 08:47:37 +00:00			`def ref_encoder(`
update tests to use new fixtures 2024-10-09 09:28:34 +00:00			`unclip21_transformers_stabilityai_path: str,`
modify some foundational tests to also test in float16 and bfloat16 2024-10-03 08:47:37 +00:00			`test_device: torch.device,`
			`test_dtype_fp32_bf16_fp16: torch.dtype,`
update tests to use new fixtures 2024-10-09 09:28:34 +00:00			`use_local_weights: bool,`
modify some foundational tests to also test in float16 and bfloat16 2024-10-03 08:47:37 +00:00			`) -> CLIPVisionModelWithProjection:`
			`return CLIPVisionModelWithProjection.from_pretrained( # type: ignore`
update tests to use new fixtures 2024-10-09 09:28:34 +00:00			`unclip21_transformers_stabilityai_path,`
			`local_files_only=use_local_weights,`
modify some foundational tests to also test in float16 and bfloat16 2024-10-03 08:47:37 +00:00			`subfolder="image_encoder",`
update tests to use new fixtures 2024-10-09 09:28:34 +00:00			`).to(device=test_device, dtype=test_dtype_fp32_bf16_fp16) # type: ignore`
tests: add test for clip image encoder This covers a CLIPImageEncoderH model (Stable Diffusion v2-1-unclip) specifically 2023-08-30 08:20:55 +00:00

modify some foundational tests to also test in float16 and bfloat16 2024-10-03 08:47:37 +00:00			`@no_grad()`
			`@pytest.mark.flaky(reruns=3)`
tests: add test for clip image encoder This covers a CLIPImageEncoderH model (Stable Diffusion v2-1-unclip) specifically 2023-08-30 08:20:55 +00:00			`def test_encoder(`
			`ref_encoder: CLIPVisionModelWithProjection,`
			`our_encoder: CLIPImageEncoderH,`
			`):`
modify some foundational tests to also test in float16 and bfloat16 2024-10-03 08:47:37 +00:00			`assert ref_encoder.dtype == our_encoder.dtype`
			`assert ref_encoder.device == our_encoder.device`
			`x = torch.randn((1, 3, 224, 224), dtype=ref_encoder.dtype, device=ref_encoder.device)`
tests: add test for clip image encoder This covers a CLIPImageEncoderH model (Stable Diffusion v2-1-unclip) specifically 2023-08-30 08:20:55 +00:00
modify some foundational tests to also test in float16 and bfloat16 2024-10-03 08:47:37 +00:00			`ref_embeddings = ref_encoder(x).image_embeds`
			`our_embeddings = our_encoder(x)`
tests: add test for clip image encoder This covers a CLIPImageEncoderH model (Stable Diffusion v2-1-unclip) specifically 2023-08-30 08:20:55 +00:00
			`assert ref_embeddings.shape == (1, 1024)`
			`assert our_embeddings.shape == (1, 1024)`

modify some foundational tests to also test in float16 and bfloat16 2024-10-03 08:47:37 +00:00			`assert torch.allclose(our_embeddings, ref_embeddings, atol=0.05)`