refiners/scripts/conversion/convert_diffusers_unet.py

import argparse
from pathlib import Path
from typing import Any

import torch
from diffusers import UNet2DConditionModel  # type: ignore
from torch import nn

from refiners.fluxion.model_converter import ModelConverter
from refiners.fluxion.utils import load_from_safetensors, load_tensors
from refiners.foundationals.latent_diffusion import SD1UNet, SDXLUNet
from refiners.foundationals.latent_diffusion.stable_diffusion_xl.lcm import SDXLLcmAdapter


class Args(argparse.Namespace):
    source_path: str
    output_path: str | None
    subfolder: str
    half: bool
    verbose: bool
    skip_init_check: bool


def setup_converter(args: Args) -> ModelConverter:
    # low_cpu_mem_usage=False stops some annoying console messages us to `pip install accelerate`
    source: nn.Module = UNet2DConditionModel.from_pretrained(  # type: ignore
        pretrained_model_name_or_path=args.source_path,
        subfolder=args.subfolder,
        low_cpu_mem_usage=False,
    )
    if args.override_weights is not None:
        if args.override_weights.endswith(".pth"):
            sd = load_tensors(args.override_weights)
        elif args.override_weights.endswith(".safetensors"):
            sd = load_from_safetensors(args.override_weights)
        else:
            raise ValueError(f"Unsupported file format: {args.override_weights}")
        source.load_state_dict(sd)
    source_in_channels: int = source.config.in_channels  # type: ignore
    source_clip_embedding_dim: int = source.config.cross_attention_dim  # type: ignore
    source_has_time_ids: bool = source.config.addition_embed_type == "text_time"  # type: ignore
    source_is_lcm: bool = source.config.time_cond_proj_dim is not None

    if source_has_time_ids:
        target = SDXLUNet(in_channels=source_in_channels)
    else:
        target = SD1UNet(in_channels=source_in_channels)

    if source_is_lcm:
        assert isinstance(target, SDXLUNet)
        SDXLLcmAdapter(target=target).inject()

    x = torch.randn(1, source_in_channels, 32, 32)
    timestep = torch.tensor(data=[0])
    clip_text_embeddings = torch.randn(1, 77, source_clip_embedding_dim)

    target.set_timestep(timestep=timestep)
    target.set_clip_text_embedding(clip_text_embedding=clip_text_embeddings)
    added_cond_kwargs = {}
    if isinstance(target, SDXLUNet):
        added_cond_kwargs = {"text_embeds": torch.randn(1, 1280), "time_ids": torch.randn(1, 6)}
        target.set_time_ids(time_ids=added_cond_kwargs["time_ids"])
        target.set_pooled_text_embedding(pooled_text_embedding=added_cond_kwargs["text_embeds"])

    target_args = (x,)

    source_kwargs: dict[str, Any] = {}
    if source_has_time_ids:
        source_kwargs["added_cond_kwargs"] = added_cond_kwargs
    if source_is_lcm:
        source_kwargs["timestep_cond"] = torch.randn(1, source.config.time_cond_proj_dim)

    source_args = {
        "positional": (x, timestep, clip_text_embeddings),
        "keyword": source_kwargs,
    }

    converter = ModelConverter(
        source_model=source,
        target_model=target,
        skip_init_check=args.skip_init_check,
        skip_output_check=True,
        verbose=args.verbose,
    )
    if not converter.run(
        source_args=source_args,
        target_args=target_args,
    ):
        raise RuntimeError("Model conversion failed")
    return converter


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Converts a Diffusion UNet model to a Refiners SD1UNet or SDXLUNet model"
    )
    parser.add_argument(
        "--from",
        type=str,
        dest="source_path",
        default="runwayml/stable-diffusion-v1-5",
        help=(
            "Can be a path to a .bin file, a .safetensors file or a model name from the HuggingFace Hub. Default:"
            " runwayml/stable-diffusion-v1-5"
        ),
    )
    parser.add_argument(
        "--override-weights",
        type=str,
        default=None,
        help=(
            "Path to a weights file to override the source model (keeping its config). "
            "This is useful for models distributed as .pth files."
        ),
    )
    parser.add_argument(
        "--to",
        type=str,
        dest="output_path",
        default=None,
        help=(
            "Output path (.safetensors) for converted model. If not provided, the output path will be the same as the"
            " source path."
        ),
    )
    parser.add_argument("--subfolder", type=str, default="unet", help="Subfolder. Default: unet.")
    parser.add_argument(
        "--skip-init-check",
        action="store_true",
        help="Skip check that source and target have the same layers count.",
    )
    parser.add_argument("--half", action="store_true", help="Convert to half precision.")
    parser.add_argument(
        "--verbose",
        action="store_true",
        default=False,
        help="Prints additional information during conversion. Default: False",
    )
    args = parser.parse_args(namespace=Args())
    if args.output_path is None:
        args.output_path = f"{Path(args.source_path).stem}-unet.safetensors"
    converter = setup_converter(args=args)
    converter.save_to_safetensors(path=args.output_path, half=args.half)


if __name__ == "__main__":
    main()
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`import argparse`
			`from pathlib import Path`
add LcmAdapter This adds support for the condition scale embedding. Also updates the UNet converter to support LCM. 2024-02-15 17:58:23 +00:00			`from typing import Any`
run lint rules using latest isort settings 2023-12-11 10:46:38 +00:00
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`import torch`
run lint rules using latest isort settings 2023-12-11 10:46:38 +00:00			`from diffusers import UNet2DConditionModel # type: ignore`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`from torch import nn`
run lint rules using latest isort settings 2023-12-11 10:46:38 +00:00
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`from refiners.fluxion.model_converter import ModelConverter`
add SDXL-Lightning weights to conversion script + support safetensors 2024-02-23 16:34:50 +00:00			`from refiners.fluxion.utils import load_from_safetensors, load_tensors`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`from refiners.foundationals.latent_diffusion import SD1UNet, SDXLUNet`
add docstrings for LCM / LCM-LoRA 2024-02-20 15:22:08 +00:00			`from refiners.foundationals.latent_diffusion.stable_diffusion_xl.lcm import SDXLLcmAdapter`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00

			`class Args(argparse.Namespace):`
			`source_path: str`
			`output_path: str \| None`
add a way to specify the subfolder of the unet (no subfolder -> pass an empty string) 2024-01-16 14:16:33 +00:00			`subfolder: str`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`half: bool`
			`verbose: bool`
unet conversion: add option to skip init check 2024-01-16 17:40:43 +00:00			`skip_init_check: bool`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00

			`def setup_converter(args: Args) -> ModelConverter:`
refactor: convert bash script to python Ran successfully to completion. But on a repeat run `convert_unclip` didn't pass the hash check for some reason. - fix inpainting model download urls - shows a progress bar for downloads - skips downloading existing files - uses a temporary file to prevent partial downloads - can do a dry run to check if url is valid `DRY_RUN=1 python scripts/prepare_test_weights.py` - displays the downloaded file hash 2023-12-14 03:53:06 +00:00			# low_cpu_mem_usage=False stops some annoying console messages us to `pip install accelerate`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`source: nn.Module = UNet2DConditionModel.from_pretrained( # type: ignore`
refactor: convert bash script to python Ran successfully to completion. But on a repeat run `convert_unclip` didn't pass the hash check for some reason. - fix inpainting model download urls - shows a progress bar for downloads - skips downloading existing files - uses a temporary file to prevent partial downloads - can do a dry run to check if url is valid `DRY_RUN=1 python scripts/prepare_test_weights.py` - displays the downloaded file hash 2023-12-14 03:53:06 +00:00			`pretrained_model_name_or_path=args.source_path,`
add a way to specify the subfolder of the unet (no subfolder -> pass an empty string) 2024-01-16 14:16:33 +00:00			`subfolder=args.subfolder,`
refactor: convert bash script to python Ran successfully to completion. But on a repeat run `convert_unclip` didn't pass the hash check for some reason. - fix inpainting model download urls - shows a progress bar for downloads - skips downloading existing files - uses a temporary file to prevent partial downloads - can do a dry run to check if url is valid `DRY_RUN=1 python scripts/prepare_test_weights.py` - displays the downloaded file hash 2023-12-14 03:53:06 +00:00			`low_cpu_mem_usage=False,`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`)`
add option to override unet weights for conversion 2024-02-21 17:05:12 +00:00			`if args.override_weights is not None:`
add SDXL-Lightning weights to conversion script + support safetensors 2024-02-23 16:34:50 +00:00			`if args.override_weights.endswith(".pth"):`
			`sd = load_tensors(args.override_weights)`
			`elif args.override_weights.endswith(".safetensors"):`
			`sd = load_from_safetensors(args.override_weights)`
			`else:`
			`raise ValueError(f"Unsupported file format: {args.override_weights}")`
add option to override unet weights for conversion 2024-02-21 17:05:12 +00:00			`source.load_state_dict(sd)`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`source_in_channels: int = source.config.in_channels # type: ignore`
			`source_clip_embedding_dim: int = source.config.cross_attention_dim # type: ignore`
			`source_has_time_ids: bool = source.config.addition_embed_type == "text_time" # type: ignore`
add LcmAdapter This adds support for the condition scale embedding. Also updates the UNet converter to support LCM. 2024-02-15 17:58:23 +00:00			`source_is_lcm: bool = source.config.time_cond_proj_dim is not None`

			`if source_has_time_ids:`
			`target = SDXLUNet(in_channels=source_in_channels)`
			`else:`
			`target = SD1UNet(in_channels=source_in_channels)`

			`if source_is_lcm:`
			`assert isinstance(target, SDXLUNet)`
add docstrings for LCM / LCM-LoRA 2024-02-20 15:22:08 +00:00			`SDXLLcmAdapter(target=target).inject()`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00
			`x = torch.randn(1, source_in_channels, 32, 32)`
			`timestep = torch.tensor(data=[0])`
			`clip_text_embeddings = torch.randn(1, 77, source_clip_embedding_dim)`

			`target.set_timestep(timestep=timestep)`
			`target.set_clip_text_embedding(clip_text_embedding=clip_text_embeddings)`
			`added_cond_kwargs = {}`
make `__getattr__` on Module return object, not Any PyTorch chose to make it Any because they expect its users' code to be "highly dynamic": https://github.com/pytorch/pytorch/pull/104321 It is not the case for us, in Refiners having untyped code goes contrary to one of our core principles. Note that there is currently an open PR in PyTorch to return `Module \| Tensor`, but in practice this is not always correct either: https://github.com/pytorch/pytorch/pull/115074 I also moved Residuals-related code from SD1 to latent_diffusion because SDXL should not depend on SD1. 2024-02-05 16:10:05 +00:00			`if isinstance(target, SDXLUNet):`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`added_cond_kwargs = {"text_embeds": torch.randn(1, 1280), "time_ids": torch.randn(1, 6)}`
			`target.set_time_ids(time_ids=added_cond_kwargs["time_ids"])`
			`target.set_pooled_text_embedding(pooled_text_embedding=added_cond_kwargs["text_embeds"])`

			`target_args = (x,)`
add LcmAdapter This adds support for the condition scale embedding. Also updates the UNet converter to support LCM. 2024-02-15 17:58:23 +00:00
			`source_kwargs: dict[str, Any] = {}`
			`if source_has_time_ids:`
			`source_kwargs["added_cond_kwargs"] = added_cond_kwargs`
			`if source_is_lcm:`
			`source_kwargs["timestep_cond"] = torch.randn(1, source.config.time_cond_proj_dim)`

implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`source_args = {`
			`"positional": (x, timestep, clip_text_embeddings),`
add LcmAdapter This adds support for the condition scale embedding. Also updates the UNet converter to support LCM. 2024-02-15 17:58:23 +00:00			`"keyword": source_kwargs,`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`}`

unet conversion: add option to skip init check 2024-01-16 17:40:43 +00:00			`converter = ModelConverter(`
			`source_model=source,`
			`target_model=target,`
			`skip_init_check=args.skip_init_check,`
			`skip_output_check=True,`
			`verbose=args.verbose,`
			`)`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`if not converter.run(`
			`source_args=source_args,`
			`target_args=target_args,`
			`):`
			`raise RuntimeError("Model conversion failed")`
			`return converter`


			`def main() -> None:`
			`parser = argparse.ArgumentParser(`
			`description="Converts a Diffusion UNet model to a Refiners SD1UNet or SDXLUNet model"`
			`)`
			`parser.add_argument(`
			`"--from",`
			`type=str,`
			`dest="source_path",`
			`default="runwayml/stable-diffusion-v1-5",`
			`help=(`
			`"Can be a path to a .bin file, a .safetensors file or a model name from the HuggingFace Hub. Default:"`
			`" runwayml/stable-diffusion-v1-5"`
			`),`
			`)`
add option to override unet weights for conversion 2024-02-21 17:05:12 +00:00			`parser.add_argument(`
			`"--override-weights",`
			`type=str,`
			`default=None,`
			`help=(`
			`"Path to a weights file to override the source model (keeping its config). "`
			`"This is useful for models distributed as .pth files."`
			`),`
			`)`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`parser.add_argument(`
			`"--to",`
			`type=str,`
			`dest="output_path",`
			`default=None,`
			`help=(`
			`"Output path (.safetensors) for converted model. If not provided, the output path will be the same as the"`
			`" source path."`
			`),`
			`)`
add a way to specify the subfolder of the unet (no subfolder -> pass an empty string) 2024-01-16 14:16:33 +00:00			`parser.add_argument("--subfolder", type=str, default="unet", help="Subfolder. Default: unet.")`
unet conversion: add option to skip init check 2024-01-16 17:40:43 +00:00			`parser.add_argument(`
			`"--skip-init-check",`
			`action="store_true",`
			`help="Skip check that source and target have the same layers count.",`
			`)`
fix invalid default value for --half in help 2024-01-16 14:15:52 +00:00			`parser.add_argument("--half", action="store_true", help="Convert to half precision.")`
implement the ConvertModule class and refactor conversion scripts 2023-08-24 00:26:37 +00:00			`parser.add_argument(`
			`"--verbose",`
			`action="store_true",`
			`default=False,`
			`help="Prints additional information during conversion. Default: False",`
			`)`
			`args = parser.parse_args(namespace=Args())`
			`if args.output_path is None:`
			`args.output_path = f"{Path(args.source_path).stem}-unet.safetensors"`
			`converter = setup_converter(args=args)`
			`converter.save_to_safetensors(path=args.output_path, half=args.half)`


			`if __name__ == "__main__":`
			`main()`