diff --git a/scripts/prepare_test_weights.py b/scripts/prepare_test_weights.py index 8e70c4a..f30f87a 100644 --- a/scripts/prepare_test_weights.py +++ b/scripts/prepare_test_weights.py @@ -291,6 +291,13 @@ def download_controlnet(): ] download_files(urls, net_folder) + tile_folder = os.path.join(base_folder, "control_v11f1e_sd15_tile") + urls = [ + "https://huggingface.co/lllyasviel/control_v11f1e_sd15_tile/raw/main/config.json", + "https://huggingface.co/lllyasviel/control_v11f1e_sd15_tile/resolve/main/diffusion_pytorch_model.bin", + ] + download_files(urls, tile_folder) + mfidabel_folder = os.path.join(test_weights_dir, "mfidabel", "controlnet-segment-anything") urls = [ "https://huggingface.co/mfidabel/controlnet-segment-anything/raw/main/config.json", @@ -597,6 +604,12 @@ def convert_controlnet(): "tests/weights/controlnet/mfidabel_controlnet-segment-anything.safetensors", expected_hash="d536eebb", ) + run_conversion_script( + "convert_diffusers_controlnet.py", + "tests/weights/lllyasviel/control_v11f1e_sd15_tile", + "tests/weights/controlnet/lllyasviel_control_v11f1e_sd15_tile.safetensors", + expected_hash="42463af8", + ) def convert_unclip(): diff --git a/tests/e2e/test_diffusion.py b/tests/e2e/test_diffusion.py index d97b5a5..80bd583 100644 --- a/tests/e2e/test_diffusion.py +++ b/tests/e2e/test_diffusion.py @@ -194,6 +194,14 @@ def controlnet_data_scale_decay( yield (cn_name, condition_image, expected_image, weights_path) +@pytest.fixture(scope="module") +def controlnet_data_tile(ref_path: Path, test_weights_path: Path) -> tuple[Image.Image, Image.Image, Path]: + condition_image = _img_open(ref_path / f"low_res_dog.png").convert("RGB").resize((1024, 1024)) # type: ignore + expected_image = _img_open(ref_path / f"expected_controlnet_tile.png").convert("RGB") + weights_path = test_weights_path / "controlnet" / "lllyasviel_control_v11f1e_sd15_tile.safetensors" + return condition_image, expected_image, weights_path + + @pytest.fixture(scope="module") def controlnet_data_canny(ref_path: Path, test_weights_path: Path) -> tuple[str, Image.Image, Image.Image, Path]: cn_name = "canny" @@ -1091,6 +1099,43 @@ def test_diffusion_controlnet( ensure_similar_images(predicted_image, expected_image, min_psnr=35, min_ssim=0.98) +@no_grad() +def test_diffusion_controlnet_tile_upscale( + sd15_std: StableDiffusion_1, + controlnet_data_tile: tuple[Image.Image, Image.Image, Path], + test_device: torch.device, +): + sd15 = sd15_std + + condition_image, expected_image, cn_weights_path = controlnet_data_tile + + controlnet: SD1ControlnetAdapter = SD1ControlnetAdapter( + sd15.unet, name="tile", scale=1.0, weights=load_from_safetensors(cn_weights_path) + ).inject() + + cn_condition = image_to_tensor(condition_image, device=test_device) + + prompt = "best quality" + negative_prompt = "blur, lowres, bad anatomy, bad hands, cropped, worst quality" + clip_text_embedding = sd15.compute_clip_text_embedding(text=prompt, negative_text=negative_prompt) + + manual_seed(0) + x = sd15.init_latents((1024, 1024), condition_image).to(test_device) + + for step in sd15.steps: + controlnet.set_controlnet_condition(cn_condition) + x = sd15( + x, + step=step, + clip_text_embedding=clip_text_embedding, + condition_scale=7.5, + ) + predicted_image = sd15.lda.latents_to_image(x) + + # Note: rather large tolerances are used on purpose here (loose comparison with diffusers' output) + ensure_similar_images(predicted_image, expected_image, min_psnr=24, min_ssim=0.75) + + @no_grad() def test_diffusion_controlnet_scale_decay( sd15_std: StableDiffusion_1, diff --git a/tests/e2e/test_diffusion_ref/README.md b/tests/e2e/test_diffusion_ref/README.md index a5325b3..73cea4c 100644 --- a/tests/e2e/test_diffusion_ref/README.md +++ b/tests/e2e/test_diffusion_ref/README.md @@ -57,8 +57,7 @@ Special cases: - `expected_controllora_PyraCanny+CPDS.png` - `expected_controllora_disabled.png` - `expected_style_aligned.png` - - `expected_controlnet_.png` (canny|depth|lineart|normals|sam|stack) - - `expected_controlnet__scale_decay.png` (canny) + - `expected_controlnet_canny_scale_decay.png` ## Other images @@ -90,6 +89,9 @@ Special cases: - `cutecat_guide_PyraCanny.png` and `cutecat_guide_CPDS.png` were [generated inside Fooocus](https://github.com/lllyasviel/Fooocus/blob/e8d88d3e250e541c6daf99d6ef734e8dc3cfdc7f/extras/preprocessors.py). +- `low_res_dog.png` and `expected_controlnet_tile.png` are taken from Diffusers [documentation](https://huggingface.co/lllyasviel/control_v11f1e_sd15_tile/tree/main/images), respectively named +`original.png` and `output.png`. + ## VAE without randomness ```diff diff --git a/tests/e2e/test_diffusion_ref/expected_controlnet_tile.png b/tests/e2e/test_diffusion_ref/expected_controlnet_tile.png new file mode 100644 index 0000000..4dc96d7 Binary files /dev/null and b/tests/e2e/test_diffusion_ref/expected_controlnet_tile.png differ diff --git a/tests/e2e/test_diffusion_ref/low_res_dog.png b/tests/e2e/test_diffusion_ref/low_res_dog.png new file mode 100644 index 0000000..4b48b88 Binary files /dev/null and b/tests/e2e/test_diffusion_ref/low_res_dog.png differ