add new weight conversion logic

This commit is contained in:
Laurent 2024-10-09 09:26:35 +00:00 committed by Laureηt
parent 4045904b26
commit d322e9c5ed
26 changed files with 7652 additions and 3 deletions

View file

@ -56,6 +56,9 @@ Documentation = "https://refine.rs/"
Repository = "https://github.com/finegrain-ai/refiners" Repository = "https://github.com/finegrain-ai/refiners"
Issues = "https://github.com/finegrain-ai/refiners/issues" Issues = "https://github.com/finegrain-ai/refiners/issues"
[project.scripts]
get_weights = "refiners.conversion.cli:main"
[project.optional-dependencies] [project.optional-dependencies]
training = [ training = [
"bitsandbytes>=0.41.2.post2", "bitsandbytes>=0.41.2.post2",
@ -84,6 +87,7 @@ test = [
"sentencepiece>=0.2.0", "sentencepiece>=0.2.0",
] ]
conversion = [ conversion = [
"huggingface-hub>=0.25.1",
"diffusers>=0.26.1", "diffusers>=0.26.1",
"transformers>=4.35.2", "transformers>=4.35.2",
"segment-anything-py>=1.0", "segment-anything-py>=1.0",
@ -181,7 +185,7 @@ exclude_also = [
] ]
[tool.typos.default] [tool.typos.default]
extend-ignore-identifiers-re = ["NDArray*", "interm", "af000ded"] extend-ignore-identifiers-re = ["NDArray*", "interm", "af000ded", "nin"]
[tool.typos.default.extend-words] [tool.typos.default.extend-words]
adaptee = "adaptee" # Common name for an adapter's target adaptee = "adaptee" # Common name for an adapter's target

View file

@ -0,0 +1,45 @@
from .models import (
autoencoder_sd15,
autoencoder_sdxl,
clip_image_sd21,
clip_text_sd15,
clip_text_sdxl,
controllora_sdxl,
controlnet_sd15,
dinov2,
ella,
hq_sam,
ipadapter_sd15,
ipadapter_sdxl,
loras,
mvanet,
preprocessors,
sam,
t2iadapter_sd15,
t2iadapter_sdxl,
unet_sd15,
unet_sdxl,
)
__all__ = [
"autoencoder_sd15",
"autoencoder_sdxl",
"clip_image_sd21",
"clip_text_sd15",
"clip_text_sdxl",
"controllora_sdxl",
"controlnet_sd15",
"dinov2",
"ella",
"hq_sam",
"ipadapter_sd15",
"ipadapter_sdxl",
"loras",
"mvanet",
"preprocessors",
"sam",
"t2iadapter_sd15",
"t2iadapter_sdxl",
"unet_sd15",
"unet_sdxl",
]

View file

@ -0,0 +1,145 @@
import logging
from refiners.conversion import (
autoencoder_sd15,
autoencoder_sdxl,
clip_image_sd21,
clip_text_sd15,
clip_text_sdxl,
controllora_sdxl,
controlnet_sd15,
dinov2,
ella,
hq_sam,
ipadapter_sd15,
ipadapter_sdxl,
loras,
mvanet,
preprocessors,
sam,
t2iadapter_sd15,
t2iadapter_sdxl,
unet_sd15,
unet_sdxl,
)
def main() -> None:
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
# MVANet
mvanet.mvanet.convert()
mvanet.finegrain_v01.download()
# loras (no conversion)
loras.sd15_pokemon.download()
loras.sdxl_dpo.download()
loras.sdxl_scifi.download()
loras.sdxl_pixelart.download()
loras.sdxl_age_slider.download()
loras.sdxl_cartoon_slider.download()
loras.sdxl_eyesize_slider.download()
# preprocessors
preprocessors.informative_drawings.convert()
# SD1.5 autoencoders
autoencoder_sd15.runwayml.convert()
autoencoder_sd15.stability_mse.convert()
autoencoder_sd15.juggernaut_reborn.convert()
autoencoder_sd15.juggernaut_aftermath.convert()
autoencoder_sd15.realistic_stock_photo_v3.convert()
autoencoder_sd15.realistic_vision_v5.convert()
# SDXL autoencoders
autoencoder_sdxl.stability.convert()
autoencoder_sdxl.madebyollin_fp16fix.convert()
autoencoder_sdxl.juggernautXL_v10.convert()
# SD1.5 text encoders
clip_text_sd15.runwayml.convert()
clip_text_sd15.juggernaut_reborn.convert()
clip_text_sd15.juggernaut_aftermath.convert()
clip_text_sd15.realistic_stock_photo_v3.convert()
clip_text_sd15.realistic_vision_v5.convert()
# SD2.1 image encoders
clip_image_sd21.unclip_21.convert()
# SDXL text encoders
clip_text_sdxl.stability.convert()
clip_text_sdxl.juggernautXL_v10.convert()
# SD1.5 unets
unet_sd15.runwayml.convert()
unet_sd15.runwayml_inpainting.convert()
unet_sd15.juggernaut_reborn.convert()
unet_sd15.juggernaut_aftermath.convert()
unet_sd15.realistic_stock_photo_v3.convert()
unet_sd15.realistic_vision_v5.convert()
# SD1.5 IC-Light
unet_sd15.ic_light_fc.convert()
unet_sd15.ic_light_fcon.convert()
unet_sd15.ic_light_fbc.convert()
# SDXL unets
unet_sdxl.stability.convert()
unet_sdxl.juggernautXL_v10.convert()
# SDXL LCM unet
unet_sdxl.lcm.convert()
# SDXL Lightning unet
unet_sdxl.lightning_4step.convert()
unet_sdxl.lightning_1step.convert()
# SD1.5 controlnets
controlnet_sd15.tile.convert()
controlnet_sd15.canny.convert()
controlnet_sd15.depth.convert()
controlnet_sd15.normalbae.convert()
controlnet_sd15.lineart.convert()
controlnet_sd15.sam.convert()
# SDXL Control LoRAs
controllora_sdxl.canny.convert()
controllora_sdxl.cpds.convert()
# SD1.5 IP-Adapters
ipadapter_sd15.base.convert()
ipadapter_sd15.plus.convert()
# SDXL IP-Adapters
ipadapter_sdxl.base.convert()
ipadapter_sdxl.plus.convert()
# SD1.5 T2I-Adapters
t2iadapter_sd15.depth.convert()
# SDXL T2I-Adapters
t2iadapter_sdxl.canny.convert()
# ELLA adapters
ella.sd15_t5xl.convert()
# DINOv2
dinov2.small.convert()
dinov2.small_reg.convert()
dinov2.base.convert()
dinov2.base_reg.convert()
dinov2.large.convert()
dinov2.large_reg.convert()
dinov2.giant.convert()
dinov2.giant_reg.convert()
# SAM
sam.vit_h.convert()
# SAM-HQ
hq_sam.vit_h.convert()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,654 @@
from collections import defaultdict
from enum import Enum, auto
from pathlib import Path
from typing import Any, DefaultDict, TypedDict
import torch
from torch import Tensor, nn
from torch.utils.hooks import RemovableHandle
from refiners.fluxion.utils import no_grad, norm, save_to_safetensors
TORCH_BASIC_LAYERS: list[type[nn.Module]] = [
nn.Conv1d,
nn.Conv2d,
nn.Conv3d,
nn.ConvTranspose1d,
nn.ConvTranspose2d,
nn.ConvTranspose3d,
nn.Linear,
nn.BatchNorm1d,
nn.BatchNorm2d,
nn.BatchNorm3d,
nn.LayerNorm,
nn.GroupNorm,
nn.Embedding,
nn.MaxPool2d,
nn.AvgPool2d,
nn.AdaptiveAvgPool2d,
]
ModelTypeShape = tuple[type[nn.Module], tuple[torch.Size, ...]]
class ModuleArgsDict(TypedDict):
"""Represents positional and keyword arguments passed to a module.
- `positional`: A tuple of positional arguments.
- `keyword`: A dictionary of keyword arguments.
"""
positional: tuple[Any, ...]
keyword: dict[str, Any]
class ConversionStage(Enum):
"""Represents the current stage of the conversion process.
Attributes:
INIT: The conversion process has not started.
BASIC_LAYERS_MATCH: The source and target models have the same number of basic layers.
SHAPE_AND_LAYERS_MATCH: The shape of both models agree.
MODELS_OUTPUT_AGREE: The source and target models agree.
"""
INIT = auto()
BASIC_LAYERS_MATCH = auto()
SHAPE_AND_LAYERS_MATCH = auto()
MODELS_OUTPUT_AGREE = auto()
class ModelConverter:
"""Converts a model's state_dict to match another model's state_dict.
Note: The conversion process consists of three stages
1. Verify that the source and target models have the same number of basic layers.
2. Find matching shapes and layers between the source and target models.
3. Convert the source model's state_dict to match the target model's state_dict.
4. Compare the outputs of the source and target models.
The conversion process can be run multiple times, and will resume from the last stage.
Example:
```py
source = ...
target = ...
converter = ModelConverter(
source_model=source,
target_model=target,
threshold=0.1,
verbose=False
)
is_converted = converter(args)
if is_converted:
converter.save_to_safetensors(path="converted_model.pt")
```
"""
ModuleArgs = tuple[Any, ...] | dict[str, Any] | ModuleArgsDict
stage: ConversionStage = ConversionStage.INIT
_stored_mapping: dict[str, str] | None = None
def __init__(
self,
source_model: nn.Module,
target_model: nn.Module,
source_keys_to_skip: list[str] | None = None,
target_keys_to_skip: list[str] | None = None,
custom_layer_mapping: dict[type[nn.Module], type[nn.Module]] | None = None,
threshold: float = 1e-5,
skip_output_check: bool = False,
skip_init_check: bool = False,
verbose: bool = True,
) -> None:
"""Initializes the ModelConverter.
Args:
source_model: The model to convert from.
target_model: The model to convert to.
source_keys_to_skip: A list of keys to skip when tracing the source model.
target_keys_to_skip: A list of keys to skip when tracing the target model.
custom_layer_mapping: A dictionary mapping custom layer types between the source and target models.
threshold: The threshold for comparing outputs between the source and target models.
skip_output_check: Whether to skip comparing the outputs of the source and target models.
skip_init_check: Whether to skip checking that the source and target models have the same number of basic
layers.
verbose: Whether to print messages during the conversion process.
"""
self.source_model = source_model
self.target_model = target_model
self.source_keys_to_skip = source_keys_to_skip or []
self.target_keys_to_skip = target_keys_to_skip or []
self.custom_layer_mapping = custom_layer_mapping or {}
self.threshold = threshold
self.skip_output_check = skip_output_check
self.skip_init_check = skip_init_check
self.verbose = verbose
def __repr__(self) -> str:
return (
f"ModelConverter(source_model={self.source_model.__class__.__name__},"
f" target_model={self.target_model.__class__.__name__}, stage={self.stage})"
)
def __bool__(self) -> bool:
return self.stage.value >= 2 if self.skip_output_check else self.stage.value >= 3
def run(self, source_args: ModuleArgs, target_args: ModuleArgs | None = None) -> bool:
"""Run the conversion process.
Args:
source_args: The arguments to pass to the source model it can be either a tuple of positional arguments,
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys. If `target_args`
is not provided, these arguments will also be passed to the target model.
target_args: The arguments to pass to the target model it can be either a tuple of positional arguments,
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys.
Returns:
True if the conversion process is done and the models agree.
"""
if target_args is None:
target_args = source_args
match self.stage:
case ConversionStage.MODELS_OUTPUT_AGREE:
self._increment_stage()
return True
case ConversionStage.SHAPE_AND_LAYERS_MATCH if self._run_shape_and_layers_match_stage(
source_args=source_args, target_args=target_args
):
self._increment_stage()
return True
case ConversionStage.BASIC_LAYERS_MATCH if self._run_basic_layers_match_stage(
source_args=source_args, target_args=target_args
):
self._increment_stage()
return self.run(source_args=source_args, target_args=target_args)
case ConversionStage.INIT if self._run_init_stage():
self._increment_stage()
return self.run(source_args=source_args, target_args=target_args)
case _:
self._log(message=f"Conversion failed at stage {self.stage.value}")
return False
def _increment_stage(self) -> None:
"""Increment the stage of the conversion process."""
match self.stage:
case ConversionStage.INIT:
self.stage = ConversionStage.BASIC_LAYERS_MATCH
self._log(
message=(
"Stage 0 -> 1 - Models have the same number of basic layers. Finding matching shapes and"
" layers..."
)
)
case ConversionStage.BASIC_LAYERS_MATCH:
self.stage = ConversionStage.SHAPE_AND_LAYERS_MATCH
self._log(
message=(
"Stage 1 -> 2 - Shape of both models agree. Applying state_dict to target model. Comparing"
" models..."
)
)
case ConversionStage.SHAPE_AND_LAYERS_MATCH:
if self.skip_output_check:
self._log(
message=(
"Stage 2 - Nothing to do. Skipping output check. If you want to compare the outputs, set"
" `skip_output_check` to `False`"
)
)
else:
self.stage = ConversionStage.MODELS_OUTPUT_AGREE
self._log(
message=(
"Stage 2 -> 3 - Conversion is done and source and target models agree: you can export the"
" converted model using `save_to_safetensors`"
)
)
case ConversionStage.MODELS_OUTPUT_AGREE:
self._log(
message=(
"Stage 3 - Nothing to do. Conversion is done and source and target models agree: you can export"
" the converted model using `save_to_safetensors`"
)
)
def get_state_dict(self) -> dict[str, Tensor]:
"""Get the converted state_dict."""
if not self:
raise ValueError("The conversion process is not done yet. Run `converter(args)` first.")
return self.target_model.state_dict()
def get_mapping(self) -> dict[str, str]:
"""Get the mapping between the source and target models' state_dicts."""
if not self:
raise ValueError("The conversion process is not done yet. Run `converter(args)` first.")
assert self._stored_mapping is not None, "Mapping is not stored"
return self._stored_mapping
def save_to_safetensors(self, path: Path | str, metadata: dict[str, str] | None = None, half: bool = False) -> None:
"""Save the converted model to a SafeTensors file.
Warning:
This method can only be called after the conversion process is done.
Args:
path: The path to save the converted model to.
metadata: Metadata to save with the converted model.
half: Whether to save the converted model as half precision.
Raises:
ValueError: If the conversion process is not done yet. Run `converter` first.
"""
if not self:
raise ValueError("The conversion process is not done yet. Run `converter(args)` first.")
state_dict = self.get_state_dict()
if half:
state_dict = {key: value.half() for key, value in state_dict.items()}
save_to_safetensors(path=path, tensors=state_dict, metadata=metadata)
def map_state_dicts(
self,
source_args: ModuleArgs,
target_args: ModuleArgs | None = None,
) -> dict[str, str] | None:
"""Find a mapping between the source and target models' state_dicts.
Args:
source_args: The arguments to pass to the source model it can be either a tuple of positional arguments,
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys. If `target_args`
is not provided, these arguments will also be passed to the target model.
target_args: The arguments to pass to the target model it can be either a tuple of positional arguments,
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys.
Returns:
A dictionary mapping keys in the target model's state_dict to keys in the source model's state_dict.
"""
if target_args is None:
target_args = source_args
source_order = self._trace_module_execution_order(
module=self.source_model, args=source_args, keys_to_skip=self.source_keys_to_skip
)
target_order = self._trace_module_execution_order(
module=self.target_model, args=target_args, keys_to_skip=self.target_keys_to_skip
)
if not self._assert_shapes_aligned(source_order=source_order, target_order=target_order):
return None
mapping: dict[str, str] = {}
for source_type_shape in source_order:
source_keys = source_order[source_type_shape]
target_type_shape = source_type_shape
if not self._is_torch_basic_layer(module_type=source_type_shape[0]):
for source_custom_type, target_custom_type in self.custom_layer_mapping.items():
if source_custom_type == source_type_shape[0]:
target_type_shape = (target_custom_type, source_type_shape[1])
break
target_keys = target_order[target_type_shape]
mapping.update(zip(target_keys, source_keys))
return mapping
def compare_models(
self,
source_args: ModuleArgs,
target_args: ModuleArgs | None = None,
threshold: float = 1e-5,
) -> bool:
"""Compare the outputs of the source and target models.
Args:
source_args: The arguments to pass to the source model it can be either a tuple of positional arguments,
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys. If `target_args`
is not provided, these arguments will also be passed to the target model.
target_args: The arguments to pass to the target model it can be either a tuple of positional arguments,
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys.
threshold: The threshold for comparing outputs between the source and target models.
Returns:
True if the outputs of the source and target models agree.
"""
if target_args is None:
target_args = source_args
source_outputs = self._collect_layers_outputs(
module=self.source_model, args=source_args, keys_to_skip=self.source_keys_to_skip
)
target_outputs = self._collect_layers_outputs(
module=self.target_model, args=target_args, keys_to_skip=self.target_keys_to_skip
)
diff, prev_source_key, prev_target_key = None, None, None
for (source_key, source_output), (target_key, target_output) in zip(source_outputs, target_outputs):
diff = norm(source_output - target_output.reshape(shape=source_output.shape)).item()
if diff > threshold:
self._log(
f"Models diverged between {prev_source_key} and {source_key}, and between {prev_target_key} and"
f" {target_key}, difference in norm: {diff}"
)
return False
prev_source_key, prev_target_key = source_key, target_key
self._log(message=f"Models agree. Difference in norm: {diff}")
return True
def _run_init_stage(self) -> bool:
"""Run the init stage of the conversion process."""
if self.skip_init_check:
self._log(
message=(
"Skipping init check. If you want to check the number of basic layers, set `skip_init_check` to"
" `False`"
)
)
return True
is_count_correct = self._verify_basic_layers_count()
is_not_missing_layers = self._verify_missing_basic_layers()
return is_count_correct and is_not_missing_layers
def _run_basic_layers_match_stage(self, source_args: ModuleArgs, target_args: ModuleArgs | None) -> bool:
"""Run the basic layers match stage of the conversion process."""
mapping = self.map_state_dicts(source_args=source_args, target_args=target_args)
self._stored_mapping = mapping
if mapping is None:
self._log(message="Models do not have matching shapes.")
return False
source_state_dict = self.source_model.state_dict()
target_state_dict = self.target_model.state_dict()
converted_state_dict = self._convert_state_dict(
source_state_dict=source_state_dict, target_state_dict=target_state_dict, state_dict_mapping=mapping
)
self.target_model.load_state_dict(state_dict=converted_state_dict)
return True
def _run_shape_and_layers_match_stage(self, source_args: ModuleArgs, target_args: ModuleArgs | None) -> bool:
"""Run the shape and layers match stage of the conversion process."""
if self.skip_output_check:
self._log(
message="Skipping output check. If you want to compare the outputs, set `skip_output_check` to `False`"
)
return True
try:
if self.compare_models(source_args=source_args, target_args=target_args, threshold=self.threshold):
self._log(message="Models agree. You can export the converted model using `save_to_safetensors`")
return True
else:
self._log(message="Models do not agree. Try to increase the threshold or modify the models.")
return False
except Exception as e:
self._log(message=f"An error occurred while comparing the models: {e}")
return False
def _log(self, message: str) -> None:
"""Print a message if `verbose` is `True`."""
if self.verbose:
print(message)
def _debug_print_shapes(
self,
shape: ModelTypeShape,
source_keys: list[str],
target_keys: list[str],
) -> None:
"""Print the shapes of the sub-modules in `source_keys` and `target_keys`."""
self._log(message=f"{shape}")
max_len = max(len(source_keys), len(target_keys))
for i in range(max_len):
source_key = source_keys[i] if i < len(source_keys) else "---"
target_key = target_keys[i] if i < len(target_keys) else "---"
self._log(f"\t{source_key}\t{target_key}")
@staticmethod
def _unpack_module_args(module_args: ModuleArgs) -> tuple[tuple[Any, ...], dict[str, Any]]:
"""Unpack the positional and keyword arguments passed to a module."""
match module_args:
case tuple(positional_args):
keyword_args: dict[str, Any] = {}
case {"positional": positional_args, "keyword": keyword_args}:
pass
case _:
positional_args = ()
keyword_args = dict(**module_args)
return positional_args, keyword_args
def _is_torch_basic_layer(self, module_type: type[nn.Module]) -> bool:
"""Check if a module type is a subclass of a torch basic layer."""
return any(issubclass(module_type, torch_basic_layer) for torch_basic_layer in TORCH_BASIC_LAYERS)
def _infer_basic_layer_type(self, module: nn.Module) -> type[nn.Module] | None:
"""Infer the type of a basic layer."""
layer_types = (
set(self.custom_layer_mapping.keys()) | set(self.custom_layer_mapping.values()) | set(TORCH_BASIC_LAYERS)
)
for layer_type in layer_types:
if isinstance(module, layer_type):
return layer_type
return None
def get_module_signature(self, module: nn.Module) -> ModelTypeShape:
"""Get the signature of a module."""
layer_type = self._infer_basic_layer_type(module=module)
assert layer_type is not None, f"Module {module} is not a basic layer"
param_shapes = [p.shape for p in module.parameters()]
return (layer_type, tuple(param_shapes))
def _count_basic_layers(self, module: nn.Module) -> dict[type[nn.Module], int]:
"""Count the number of basic layers in a module."""
count: DefaultDict[type[nn.Module], int] = defaultdict(int)
for submodule in module.modules():
layer_type = self._infer_basic_layer_type(module=submodule)
if layer_type is not None:
count[layer_type] += 1
return count
def _verify_basic_layers_count(self) -> bool:
"""Verify that the source and target models have the same number of basic layers."""
source_layers = self._count_basic_layers(module=self.source_model)
target_layers = self._count_basic_layers(module=self.target_model)
reverse_mapping = {v: k for k, v in self.custom_layer_mapping.items()}
diff: dict[type[nn.Module], tuple[int, int]] = {}
for layer_type, source_count in source_layers.items():
target_type = self.custom_layer_mapping.get(layer_type, layer_type)
target_count = target_layers.get(target_type, 0)
if source_count != target_count:
diff[layer_type] = (source_count, target_count)
for layer_type, target_count in target_layers.items():
source_type = reverse_mapping.get(layer_type, layer_type)
source_count = source_layers.get(source_type, 0)
if source_count != target_count:
diff[layer_type] = (source_count, target_count)
if diff:
message = "Models do not have the same number of basic layers:\n"
for layer_type, counts in diff.items():
message += f" {layer_type}: Source {counts[0]} - Target {counts[1]}\n"
self._log(message=message.strip())
return False
return True
def _is_weighted_leaf_module(self, module: nn.Module) -> bool:
"""Check if a module is a leaf module with weights."""
return next(module.parameters(), None) is not None and next(module.children(), None) is None
def _check_for_missing_basic_layers(self, module: nn.Module) -> list[type[nn.Module]]:
"""Check if a module has weighted leaf modules that are not basic layers."""
return [
type(submodule)
for submodule in module.modules()
if self._is_weighted_leaf_module(module=submodule) and not self._infer_basic_layer_type(module=submodule)
]
def _verify_missing_basic_layers(self) -> bool:
"""Verify that the source and target models do not have missing basic layers."""
missing_source_layers = self._check_for_missing_basic_layers(module=self.source_model)
missing_target_layers = self._check_for_missing_basic_layers(module=self.target_model)
if missing_source_layers or missing_target_layers:
self._log(
message=(
"Models might have missing basic layers. If you want to skip this check, set"
f" `skip_init_check` to `True`: {missing_source_layers}, {missing_target_layers}"
)
)
return False
return True
@no_grad()
def _trace_module_execution_order(
self,
module: nn.Module,
args: ModuleArgs,
keys_to_skip: list[str],
) -> dict[ModelTypeShape, list[str]]:
"""Execute a forward pass and store the order of execution of specific sub-modules.
Args:
module: The module to trace.
args: The arguments to pass to the module it can be either a tuple of positional arguments,
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys.
keys_to_skip: A list of keys to skip when tracing the module.
Returns:
A dictionary mapping the signature of each sub-module to a list of keys in the module's `named_modules`
"""
submodule_to_key: dict[nn.Module, str] = {}
execution_order: defaultdict[ModelTypeShape, list[str]] = defaultdict(list)
def collect_execution_order_hook(layer: nn.Module, *_: Any) -> None:
layer_signature = self.get_module_signature(module=layer)
execution_order[layer_signature].append(submodule_to_key[layer])
hooks: list[RemovableHandle] = []
named_modules: list[tuple[str, nn.Module]] = module.named_modules() # type: ignore
for name, submodule in named_modules:
if (self._infer_basic_layer_type(module=submodule) is not None) and name not in keys_to_skip:
submodule_to_key[submodule] = name # type: ignore
hook = submodule.register_forward_hook(hook=collect_execution_order_hook)
hooks.append(hook)
positional_args, keyword_args = self._unpack_module_args(module_args=args)
module(*positional_args, **keyword_args)
for hook in hooks:
hook.remove()
return dict(execution_order)
def _assert_shapes_aligned(
self, source_order: dict[ModelTypeShape, list[str]], target_order: dict[ModelTypeShape, list[str]]
) -> bool:
"""Assert that the shapes of the sub-modules in `source_order` and `target_order` are aligned."""
model_type_shapes = set(source_order.keys()) | set(target_order.keys())
default_type_shapes = [
type_shape for type_shape in model_type_shapes if self._is_torch_basic_layer(module_type=type_shape[0])
]
shape_mismatched = False
for model_type_shape in default_type_shapes:
source_keys = source_order.get(model_type_shape, [])
target_keys = target_order.get(model_type_shape, [])
if len(source_keys) != len(target_keys):
shape_mismatched = True
self._debug_print_shapes(shape=model_type_shape, source_keys=source_keys, target_keys=target_keys)
for source_custom_type in self.custom_layer_mapping.keys():
# iterate over all type_shapes that have the same type as source_custom_type
for source_type_shape in [
type_shape for type_shape in model_type_shapes if type_shape[0] == source_custom_type
]:
source_keys = source_order.get(source_type_shape, [])
target_custom_type = self.custom_layer_mapping[source_custom_type]
target_type_shape = (target_custom_type, source_type_shape[1])
target_keys = target_order.get(target_type_shape, [])
if len(source_keys) != len(target_keys):
shape_mismatched = True
self._debug_print_shapes(shape=source_type_shape, source_keys=source_keys, target_keys=target_keys)
return not shape_mismatched
@staticmethod
def _convert_state_dict(
source_state_dict: dict[str, Tensor], target_state_dict: dict[str, Tensor], state_dict_mapping: dict[str, str]
) -> dict[str, Tensor]:
"""Convert the source model's state_dict to match the target model's state_dict."""
converted_state_dict: dict[str, Tensor] = {}
for target_key in target_state_dict:
target_prefix, suffix = target_key.rsplit(sep=".", maxsplit=1)
source_prefix = state_dict_mapping[target_prefix]
source_key = ".".join([source_prefix, suffix])
converted_state_dict[target_key] = source_state_dict[source_key]
return converted_state_dict
@no_grad()
def _collect_layers_outputs(
self, module: nn.Module, args: ModuleArgs, keys_to_skip: list[str]
) -> list[tuple[str, Tensor]]:
"""Execute a forward pass and store the output of specific sub-modules.
Args:
module: The module to trace.
args: The arguments to pass to the module it can be either a tuple of positional arguments,
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys.
keys_to_skip: A list of keys to skip when tracing the module.
Returns:
A list of tuples containing the key of each sub-module and its output.
Note:
The output of each sub-module is cloned to avoid memory leaks.
"""
submodule_to_key: dict[nn.Module, str] = {}
execution_order: list[tuple[str, Tensor]] = []
def collect_execution_order_hook(layer: nn.Module, _: Any, output: Tensor) -> None:
execution_order.append((submodule_to_key[layer], output.clone()))
hooks: list[RemovableHandle] = []
named_modules: list[tuple[str, nn.Module]] = module.named_modules() # type: ignore
for name, submodule in named_modules:
if (self._infer_basic_layer_type(module=submodule) is not None) and name not in keys_to_skip:
submodule_to_key[submodule] = name # type: ignore
hook = submodule.register_forward_hook(hook=collect_execution_order_hook)
hooks.append(hook)
positional_args, keyword_args = self._unpack_module_args(module_args=args)
module(*positional_args, **keyword_args)
for hook in hooks:
hook.remove()
return execution_order

View file

@ -0,0 +1,375 @@
import torch
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
diffusers_recipe = WeightRecipe(
key_map={
"encoder.conv_in": "Encoder.Conv2d",
"encoder.down_blocks.0.resnets.0.norm1": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_1",
"encoder.down_blocks.0.resnets.0.norm2": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_2",
"encoder.down_blocks.0.resnets.1.norm1": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_1",
"encoder.down_blocks.0.resnets.1.norm2": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_2",
"encoder.down_blocks.1.resnets.0.norm1": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_1",
"decoder.up_blocks.3.resnets.0.norm2": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_2",
"decoder.up_blocks.3.resnets.1.norm1": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_1",
"decoder.up_blocks.3.resnets.1.norm2": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_2",
"decoder.up_blocks.3.resnets.2.norm1": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.GroupNorm_1",
"decoder.up_blocks.3.resnets.2.norm2": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.GroupNorm_2",
"decoder.conv_norm_out": "Decoder.Chain_2.GroupNorm",
"encoder.down_blocks.0.resnets.0.conv1": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_1",
"encoder.down_blocks.0.resnets.0.conv2": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_2",
"encoder.down_blocks.0.resnets.1.conv1": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_1",
"encoder.down_blocks.0.resnets.1.conv2": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_2",
"encoder.down_blocks.0.downsamplers.0.conv": "Encoder.Chain_1.Chain_1.Downsample.Conv2d",
"decoder.up_blocks.3.resnets.0.conv2": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_2",
"decoder.up_blocks.3.resnets.1.conv1": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_1",
"decoder.up_blocks.3.resnets.1.conv2": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_2",
"decoder.up_blocks.3.resnets.2.conv1": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.Conv2d_1",
"decoder.up_blocks.3.resnets.2.conv2": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.Conv2d_2",
"encoder.down_blocks.1.resnets.0.conv1": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_1",
"encoder.down_blocks.1.resnets.0.norm2": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_2",
"encoder.down_blocks.1.resnets.1.norm1": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_1",
"encoder.down_blocks.1.resnets.1.norm2": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_2",
"encoder.down_blocks.2.resnets.0.norm1": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_1",
"decoder.up_blocks.2.resnets.0.norm2": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_2",
"decoder.up_blocks.2.resnets.1.norm1": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_1",
"decoder.up_blocks.2.resnets.1.norm2": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_2",
"decoder.up_blocks.2.resnets.2.norm1": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.GroupNorm_1",
"decoder.up_blocks.2.resnets.2.norm2": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.GroupNorm_2",
"decoder.up_blocks.3.resnets.0.norm1": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_1",
"encoder.down_blocks.1.resnets.0.conv2": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_2",
"encoder.down_blocks.1.resnets.1.conv1": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_1",
"encoder.down_blocks.1.resnets.1.conv2": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_2",
"encoder.down_blocks.1.downsamplers.0.conv": "Encoder.Chain_1.Chain_2.Downsample.Conv2d",
"decoder.up_blocks.2.resnets.0.conv2": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_2",
"decoder.up_blocks.2.resnets.1.conv1": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_1",
"decoder.up_blocks.2.resnets.1.conv2": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_2",
"decoder.up_blocks.2.resnets.2.conv1": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.Conv2d_1",
"decoder.up_blocks.2.resnets.2.conv2": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.Conv2d_2",
"decoder.up_blocks.2.upsamplers.0.conv": "Decoder.Chain_1.Chain_4.Upsample.Conv2d",
"encoder.down_blocks.1.resnets.0.conv_shortcut": "Encoder.Chain_1.Chain_2.Resnet_1.Conv2d",
"encoder.down_blocks.2.resnets.0.conv1": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_1",
"encoder.down_blocks.2.resnets.0.norm2": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_2",
"encoder.down_blocks.2.resnets.1.norm1": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_1",
"encoder.down_blocks.2.resnets.1.norm2": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_2",
"encoder.down_blocks.3.resnets.0.norm1": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_1",
"encoder.down_blocks.3.resnets.0.norm2": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_2",
"encoder.down_blocks.3.resnets.1.norm1": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_1",
"encoder.down_blocks.3.resnets.1.norm2": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_2",
"encoder.mid_block.resnets.0.norm1": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_1",
"encoder.mid_block.resnets.0.norm2": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_2",
"encoder.mid_block.attentions.0.group_norm": "Encoder.Chain_1.Chain_5.Residual.GroupNorm",
"encoder.mid_block.resnets.1.norm1": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_1",
"encoder.mid_block.resnets.1.norm2": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_2",
"encoder.conv_norm_out": "Encoder.Chain_2.GroupNorm",
"decoder.mid_block.resnets.0.norm1": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_1",
"decoder.mid_block.resnets.0.norm2": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_2",
"decoder.mid_block.attentions.0.group_norm": "Decoder.Chain_1.Chain_1.Residual.GroupNorm",
"decoder.mid_block.resnets.1.norm1": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_1",
"decoder.mid_block.resnets.1.norm2": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_2",
"decoder.up_blocks.0.resnets.0.norm1": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_1",
"decoder.up_blocks.0.resnets.0.norm2": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_2",
"decoder.up_blocks.0.resnets.1.norm1": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_1",
"decoder.up_blocks.0.resnets.1.norm2": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_2",
"decoder.up_blocks.0.resnets.2.norm1": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.GroupNorm_1",
"decoder.up_blocks.0.resnets.2.norm2": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.GroupNorm_2",
"decoder.up_blocks.1.resnets.0.norm1": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_1",
"decoder.up_blocks.1.resnets.0.norm2": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_2",
"decoder.up_blocks.1.resnets.1.norm1": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_1",
"decoder.up_blocks.1.resnets.1.norm2": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_2",
"decoder.up_blocks.1.resnets.2.norm1": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.GroupNorm_1",
"decoder.up_blocks.1.resnets.2.norm2": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.GroupNorm_2",
"decoder.up_blocks.2.resnets.0.norm1": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_1",
"encoder.down_blocks.2.resnets.0.conv2": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_2",
"encoder.down_blocks.2.resnets.1.conv1": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_1",
"encoder.down_blocks.2.resnets.1.conv2": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_2",
"encoder.down_blocks.2.downsamplers.0.conv": "Encoder.Chain_1.Chain_3.Downsample.Conv2d",
"encoder.down_blocks.3.resnets.0.conv1": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_1",
"encoder.down_blocks.3.resnets.0.conv2": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_2",
"encoder.down_blocks.3.resnets.1.conv1": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_1",
"encoder.down_blocks.3.resnets.1.conv2": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_2",
"encoder.mid_block.resnets.0.conv1": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_1",
"encoder.mid_block.resnets.0.conv2": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_2",
"encoder.mid_block.resnets.1.conv1": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_1",
"encoder.mid_block.resnets.1.conv2": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_2",
"decoder.mid_block.resnets.0.conv1": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_1",
"decoder.mid_block.resnets.0.conv2": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_2",
"decoder.mid_block.resnets.1.conv1": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_1",
"decoder.mid_block.resnets.1.conv2": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_2",
"decoder.up_blocks.0.resnets.0.conv1": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_1",
"decoder.up_blocks.0.resnets.0.conv2": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_2",
"decoder.up_blocks.0.resnets.1.conv1": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_1",
"decoder.up_blocks.0.resnets.1.conv2": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_2",
"decoder.up_blocks.0.resnets.2.conv1": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.Conv2d_1",
"decoder.up_blocks.0.resnets.2.conv2": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.Conv2d_2",
"decoder.up_blocks.0.upsamplers.0.conv": "Decoder.Chain_1.Chain_2.Upsample.Conv2d",
"decoder.up_blocks.1.resnets.0.conv1": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_1",
"decoder.up_blocks.1.resnets.0.conv2": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_2",
"decoder.up_blocks.1.resnets.1.conv1": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_1",
"decoder.up_blocks.1.resnets.1.conv2": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_2",
"decoder.up_blocks.1.resnets.2.conv1": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.Conv2d_1",
"decoder.up_blocks.1.resnets.2.conv2": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.Conv2d_2",
"decoder.up_blocks.1.upsamplers.0.conv": "Decoder.Chain_1.Chain_3.Upsample.Conv2d",
"encoder.down_blocks.2.resnets.0.conv_shortcut": "Encoder.Chain_1.Chain_3.Resnet_1.Conv2d",
"encoder.mid_block.attentions.0.to_q": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_1",
"encoder.mid_block.attentions.0.to_k": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_2",
"encoder.mid_block.attentions.0.to_v": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_3",
"encoder.mid_block.attentions.0.to_out.0": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Linear",
"decoder.mid_block.attentions.0.to_q": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_1",
"decoder.mid_block.attentions.0.to_k": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_2",
"decoder.mid_block.attentions.0.to_v": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_3",
"decoder.mid_block.attentions.0.to_out.0": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Linear",
"encoder.conv_out": "Encoder.Chain_2.Conv2d",
"quant_conv": "Encoder.Chain_3.Conv2d",
"post_quant_conv": "Decoder.Conv2d_1",
"decoder.conv_in": "Decoder.Conv2d_2",
"decoder.up_blocks.2.resnets.0.conv1": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_1",
"decoder.up_blocks.2.resnets.0.conv_shortcut": "Decoder.Chain_1.Chain_4.Resnet_1.Conv2d",
"decoder.up_blocks.3.resnets.0.conv1": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_1",
"decoder.up_blocks.3.resnets.0.conv_shortcut": "Decoder.Chain_1.Chain_5.Resnet_1.Conv2d",
"decoder.conv_out": "Decoder.Chain_2.Conv2d",
},
key_aliases={
"encoder.mid_block.attentions.0.value": "encoder.mid_block.attentions.0.to_v",
"decoder.mid_block.attentions.0.value": "decoder.mid_block.attentions.0.to_v",
"decoder.mid_block.attentions.0.proj_attn": "decoder.mid_block.attentions.0.to_out.0",
"encoder.mid_block.attentions.0.proj_attn": "encoder.mid_block.attentions.0.to_out.0",
"encoder.mid_block.attentions.0.key": "encoder.mid_block.attentions.0.to_k",
"decoder.mid_block.attentions.0.key": "decoder.mid_block.attentions.0.to_k",
"decoder.mid_block.attentions.0.query": "decoder.mid_block.attentions.0.to_q",
"encoder.mid_block.attentions.0.query": "encoder.mid_block.attentions.0.to_q",
},
)
civitai_recipe = WeightRecipe(
key_prefix="first_stage_model.",
key_map={
"encoder.conv_in": "Encoder.Conv2d",
"encoder.down.0.block.0.norm1": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_1",
"encoder.down.0.block.0.conv1": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_1",
"encoder.down.0.block.0.norm2": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_2",
"encoder.down.0.block.0.conv2": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_2",
"encoder.down.0.block.1.norm1": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_1",
"encoder.down.0.block.1.conv1": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_1",
"encoder.down.0.block.1.norm2": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_2",
"encoder.down.0.block.1.conv2": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_2",
"encoder.down.0.downsample.conv": "Encoder.Chain_1.Chain_1.Downsample.Conv2d",
"encoder.down.1.block.0.norm1": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_1",
"encoder.down.1.block.0.conv1": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_1",
"encoder.down.1.block.0.norm2": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_2",
"encoder.down.1.block.0.conv2": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_2",
"encoder.down.1.block.0.nin_shortcut": "Encoder.Chain_1.Chain_2.Resnet_1.Conv2d",
"encoder.down.1.block.1.norm1": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_1",
"encoder.down.1.block.1.conv1": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_1",
"encoder.down.1.block.1.norm2": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_2",
"encoder.down.1.block.1.conv2": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_2",
"encoder.down.1.downsample.conv": "Encoder.Chain_1.Chain_2.Downsample.Conv2d",
"encoder.down.2.block.0.norm1": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_1",
"encoder.down.2.block.0.conv1": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_1",
"encoder.down.2.block.0.norm2": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_2",
"encoder.down.2.block.0.conv2": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_2",
"encoder.down.2.block.0.nin_shortcut": "Encoder.Chain_1.Chain_3.Resnet_1.Conv2d",
"encoder.down.2.block.1.norm1": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_1",
"encoder.down.2.block.1.conv1": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_1",
"encoder.down.2.block.1.norm2": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_2",
"encoder.down.2.block.1.conv2": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_2",
"encoder.down.2.downsample.conv": "Encoder.Chain_1.Chain_3.Downsample.Conv2d",
"encoder.down.3.block.0.norm1": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_1",
"encoder.down.3.block.0.conv1": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_1",
"encoder.down.3.block.0.norm2": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_2",
"encoder.down.3.block.0.conv2": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_2",
"encoder.down.3.block.1.norm1": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_1",
"encoder.down.3.block.1.conv1": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_1",
"encoder.down.3.block.1.norm2": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_2",
"encoder.down.3.block.1.conv2": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_2",
"encoder.mid.block_1.norm1": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_1",
"encoder.mid.block_1.conv1": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_1",
"encoder.mid.block_1.norm2": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_2",
"encoder.mid.block_1.conv2": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_2",
"encoder.mid.attn_1.norm": "Encoder.Chain_1.Chain_5.Residual.GroupNorm",
"encoder.mid.attn_1.q": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_1",
"encoder.mid.attn_1.k": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_2",
"encoder.mid.attn_1.v": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_3",
"encoder.mid.attn_1.proj_out": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Linear",
"encoder.mid.block_2.norm1": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_1",
"encoder.mid.block_2.conv1": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_1",
"encoder.mid.block_2.norm2": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_2",
"encoder.mid.block_2.conv2": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_2",
"encoder.norm_out": "Encoder.Chain_2.GroupNorm",
"encoder.conv_out": "Encoder.Chain_2.Conv2d",
"quant_conv": "Encoder.Chain_3.Conv2d",
"post_quant_conv": "Decoder.Conv2d_1",
"decoder.conv_in": "Decoder.Conv2d_2",
"decoder.mid.block_1.norm1": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_1",
"decoder.mid.block_1.conv1": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_1",
"decoder.mid.block_1.norm2": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_2",
"decoder.mid.block_1.conv2": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_2",
"decoder.mid.attn_1.norm": "Decoder.Chain_1.Chain_1.Residual.GroupNorm",
"decoder.mid.attn_1.q": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_1",
"decoder.mid.attn_1.k": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_2",
"decoder.mid.attn_1.v": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_3",
"decoder.mid.attn_1.proj_out": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Linear",
"decoder.mid.block_2.norm1": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_1",
"decoder.mid.block_2.conv1": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_1",
"decoder.mid.block_2.norm2": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_2",
"decoder.mid.block_2.conv2": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_2",
"decoder.up.3.block.0.norm1": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_1",
"decoder.up.3.block.0.conv1": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_1",
"decoder.up.3.block.0.norm2": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_2",
"decoder.up.3.block.0.conv2": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_2",
"decoder.up.3.block.1.norm1": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_1",
"decoder.up.3.block.1.conv1": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_1",
"decoder.up.3.block.1.norm2": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_2",
"decoder.up.3.block.1.conv2": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_2",
"decoder.up.3.block.2.norm1": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.GroupNorm_1",
"decoder.up.3.block.2.conv1": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.Conv2d_1",
"decoder.up.3.block.2.norm2": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.GroupNorm_2",
"decoder.up.3.block.2.conv2": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.Conv2d_2",
"decoder.up.3.upsample.conv": "Decoder.Chain_1.Chain_2.Upsample.Conv2d",
"decoder.up.2.block.0.norm1": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_1",
"decoder.up.2.block.0.conv1": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_1",
"decoder.up.2.block.0.norm2": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_2",
"decoder.up.2.block.0.conv2": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_2",
"decoder.up.2.block.1.norm1": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_1",
"decoder.up.2.block.1.conv1": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_1",
"decoder.up.2.block.1.norm2": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_2",
"decoder.up.2.block.1.conv2": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_2",
"decoder.up.2.block.2.norm1": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.GroupNorm_1",
"decoder.up.2.block.2.conv1": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.Conv2d_1",
"decoder.up.2.block.2.norm2": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.GroupNorm_2",
"decoder.up.2.block.2.conv2": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.Conv2d_2",
"decoder.up.2.upsample.conv": "Decoder.Chain_1.Chain_3.Upsample.Conv2d",
"decoder.up.1.block.0.norm1": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_1",
"decoder.up.1.block.0.conv1": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_1",
"decoder.up.1.block.0.norm2": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_2",
"decoder.up.1.block.0.conv2": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_2",
"decoder.up.1.block.0.nin_shortcut": "Decoder.Chain_1.Chain_4.Resnet_1.Conv2d",
"decoder.up.1.block.1.norm1": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_1",
"decoder.up.1.block.1.conv1": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_1",
"decoder.up.1.block.1.norm2": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_2",
"decoder.up.1.block.1.conv2": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_2",
"decoder.up.1.block.2.norm1": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.GroupNorm_1",
"decoder.up.1.block.2.conv1": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.Conv2d_1",
"decoder.up.1.block.2.norm2": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.GroupNorm_2",
"decoder.up.1.block.2.conv2": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.Conv2d_2",
"decoder.up.1.upsample.conv": "Decoder.Chain_1.Chain_4.Upsample.Conv2d",
"decoder.up.0.block.0.norm1": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_1",
"decoder.up.0.block.0.conv1": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_1",
"decoder.up.0.block.0.norm2": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_2",
"decoder.up.0.block.0.conv2": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_2",
"decoder.up.0.block.0.nin_shortcut": "Decoder.Chain_1.Chain_5.Resnet_1.Conv2d",
"decoder.up.0.block.1.norm1": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_1",
"decoder.up.0.block.1.conv1": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_1",
"decoder.up.0.block.1.norm2": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_2",
"decoder.up.0.block.1.conv2": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_2",
"decoder.up.0.block.2.norm1": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.GroupNorm_1",
"decoder.up.0.block.2.conv1": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.Conv2d_1",
"decoder.up.0.block.2.norm2": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.GroupNorm_2",
"decoder.up.0.block.2.conv2": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.Conv2d_2",
"decoder.norm_out": "Decoder.Chain_2.GroupNorm",
"decoder.conv_out": "Decoder.Chain_2.Conv2d",
},
tensor_reshapes={
"Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_1.weight": (512, 512),
"Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_2.weight": (512, 512),
"Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_3.weight": (512, 512),
"Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Linear.weight": (512, 512),
"Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_1.weight": (512, 512),
"Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_2.weight": (512, 512),
"Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_3.weight": (512, 512),
"Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Linear.weight": (512, 512),
},
)
runwayml = Conversion(
original=Hub(
repo_id="stable-diffusion-v1-5/stable-diffusion-v1-5",
filename="vae/diffusion_pytorch_model.safetensors",
revision="f03de327dd89b501a01da37fc5240cf4fdba85a1",
expected_sha256="a2b5134f4dbc140d9c11f11cba3233099e00af40f262f136c691fb7d38d2194c",
),
converted=Hub(
repo_id="refiners/sd15.autoencoder",
filename="model.safetensors",
expected_sha256="3f499ef1c668a8dfc72762d885f53cf0c3d3e98a393211906a8de5ae04e72058",
),
recipe=diffusers_recipe,
dtype=torch.float32,
)
stability_mse = Conversion(
original=Hub(
repo_id="stabilityai/sd-vae-ft-mse",
filename="diffusion_pytorch_model.safetensors",
revision="31f26fdeee1355a5c34592e401dd41e45d25a493",
expected_sha256="a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815",
),
converted=Hub(
repo_id="refiners/sd15.autoencoder_mse",
filename="model.safetensors",
expected_sha256="4b484e70be3b898e2647985f066495672162a4e9ea1d1ee8bf1f7a7895180fce",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
juggernaut_reborn = Conversion(
original=Hub(
repo_id="civitai/KandooAi/juggernaut",
filename="reborn/onefile_fp16.safetensors",
download_url="https://civitai.com/api/download/models/274039?type=Model&format=SafeTensor&size=pruned&fp=fp16",
expected_sha256="338b85bc4f7628bc42cce336242e79154a57c2a4360531436e97f7793568f18c",
),
converted=Hub(
repo_id="refiners/sd15.juggernaut.reborn.autoencoder",
filename="model.safetensors",
expected_sha256="6f1ed875201344031f2a9ddee3ff40f455eb2a5ee4833070061a5d163cb23595",
),
recipe=civitai_recipe,
dtype=torch.float16,
)
juggernaut_aftermath = Conversion(
original=Hub(
repo_id="civitai/KandooAi/juggernaut",
filename="aftermath/onefile_fp32.safetensors",
download_url="https://civitai.com/api/download/models/127207?type=Model&format=SafeTensor&size=full&fp=fp32",
expected_sha256="7ed60a2f5842de8e6755965ffbf2a0ee273429939f307233c22f7dffd8d7ae3d",
),
converted=Hub(
repo_id="refiners/sd15.juggernaut.aftermath.autoencoder",
filename="model.safetensors",
expected_sha256="fa7ef415e1854907aa46d81ed403bd1f6b0fd2f06c885545ab689f9f78fa7989",
),
recipe=civitai_recipe,
dtype=torch.float32,
)
# autoencoder wise, juggernaut_aftermath = juggernaut_aftermath_inpainting
realistic_stock_photo_v3 = Conversion(
original=Hub(
repo_id="Yntec/realisticStockPhoto3",
filename="realisticStockPhoto_v30SD15.safetensors",
expected_sha256="f85affae9aae16276eaf670f810946e2d03c4d300791a0380f07653cb78ba31b",
# download_url="https://civitai.com/api/download/models/524032?type=Model&format=SafeTensor&size=full&fp=fp16",
),
converted=Hub(
repo_id="refiners/sd15.realistic_stock_photo.v3_0.autoencoder",
filename="model.safetensors",
expected_sha256="4b484e70be3b898e2647985f066495672162a4e9ea1d1ee8bf1f7a7895180fce",
),
recipe=civitai_recipe,
dtype=torch.float16,
)
realistic_vision_v5 = Conversion(
original=Hub(
repo_id="civitai/SG_161222/realistic_vision",
filename="v5/onefile_fp32.safetensors",
download_url="https://civitai.com/api/download/models/130072?type=Model&format=SafeTensor&size=full&fp=fp16",
expected_sha256="ef76aa2332635f4352463343beec9c5aae3bd107a73c0fd7221abbbcf8bd5470",
),
converted=Hub(
repo_id="refiners/sd15.realistic_vision.v5_1.autoencoder",
filename="model.safetensors",
expected_sha256="4b484e70be3b898e2647985f066495672162a4e9ea1d1ee8bf1f7a7895180fce",
),
recipe=civitai_recipe,
dtype=torch.float16,
)

View file

@ -0,0 +1,50 @@
import torch
from refiners.conversion.models.autoencoder_sd15 import civitai_recipe, diffusers_recipe
from refiners.conversion.utils import Conversion, Hub
stability = Conversion(
original=Hub(
repo_id="stabilityai/stable-diffusion-xl-base-1.0",
filename="vae/diffusion_pytorch_model.safetensors",
revision="91704abbae38a0e1f60d433fb08d7f7d99081d21",
expected_sha256="1598f3d24932bcfe6634e8b618ea1e30ab1d57f5aad13a6d2de446d2199f2341",
),
converted=Hub(
repo_id="refiners/sdxl.autoencoder",
filename="model.safetensors",
expected_sha256="6534be9990496fcb4086e5cf71e0ceb208b9f5c728823247c6a51e13564c38af",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
madebyollin_fp16fix = Conversion(
original=Hub(
repo_id="madebyollin/sdxl-vae-fp16-fix",
filename="diffusion_pytorch_model.safetensors",
revision="6d1073461cd0b5a6ea4fda10b812e3d9d58a8330",
expected_sha256="1b909373b28f2137098b0fd9dbc6f97f8410854f31f84ddc9fa04b077b0ace2c",
),
converted=Hub(
repo_id="refiners/sdxl.autoencoder_fp16fix",
filename="model.safetensors",
expected_sha256="ede1e84626900ebeb0e7911814b1ac98e8916327340f411cce2b77e056e84dd3",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
juggernautXL_v10 = Conversion(
original=Hub(
repo_id="civitai/KandooAi/juggernautXL",
filename="v10/onefile_fp16.safetensors",
expected_sha256="d91d35736d8f2be038f760a9b0009a771ecf0a417e9b38c244a84ea4cb9c0c45",
download_url="https://civitai.com/api/download/models/456194?type=Model&format=SafeTensor&size=full&fp=fp16",
),
converted=Hub(
repo_id="refiners/sdxl.juggernaut.v10.autoencoder",
filename="model.safetensors",
expected_sha256="ede1e84626900ebeb0e7911814b1ac98e8916327340f411cce2b77e056e84dd3",
),
recipe=civitai_recipe,
dtype=torch.float16,
)

View file

@ -0,0 +1,290 @@
import torch
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
diffusers_recipe = WeightRecipe(
key_map={
"vision_model.embeddings.patch_embedding": "ViTEmbeddings.Concatenate.Chain.PatchEncoder.Conv2d",
"vision_model.embeddings.position_embedding": "ViTEmbeddings.Residual.PositionalEncoder.Embedding",
"vision_model.embeddings.class_embedding": "ViTEmbeddings.Concatenate.ClassToken.Parameter.weight",
"vision_model.pre_layrnorm": "LayerNorm_1",
"vision_model.encoder.layers.0.layer_norm1": "Chain.TransformerLayer_1.Residual_1.LayerNorm",
"vision_model.encoder.layers.0.layer_norm2": "Chain.TransformerLayer_1.Residual_2.LayerNorm",
"vision_model.encoder.layers.1.layer_norm1": "Chain.TransformerLayer_2.Residual_1.LayerNorm",
"vision_model.encoder.layers.1.layer_norm2": "Chain.TransformerLayer_2.Residual_2.LayerNorm",
"vision_model.encoder.layers.2.layer_norm1": "Chain.TransformerLayer_3.Residual_1.LayerNorm",
"vision_model.encoder.layers.2.layer_norm2": "Chain.TransformerLayer_3.Residual_2.LayerNorm",
"vision_model.encoder.layers.3.layer_norm1": "Chain.TransformerLayer_4.Residual_1.LayerNorm",
"vision_model.encoder.layers.3.layer_norm2": "Chain.TransformerLayer_4.Residual_2.LayerNorm",
"vision_model.encoder.layers.4.layer_norm1": "Chain.TransformerLayer_5.Residual_1.LayerNorm",
"vision_model.encoder.layers.4.layer_norm2": "Chain.TransformerLayer_5.Residual_2.LayerNorm",
"vision_model.encoder.layers.5.layer_norm1": "Chain.TransformerLayer_6.Residual_1.LayerNorm",
"vision_model.encoder.layers.5.layer_norm2": "Chain.TransformerLayer_6.Residual_2.LayerNorm",
"vision_model.encoder.layers.6.layer_norm1": "Chain.TransformerLayer_7.Residual_1.LayerNorm",
"vision_model.encoder.layers.6.layer_norm2": "Chain.TransformerLayer_7.Residual_2.LayerNorm",
"vision_model.encoder.layers.7.layer_norm1": "Chain.TransformerLayer_8.Residual_1.LayerNorm",
"vision_model.encoder.layers.7.layer_norm2": "Chain.TransformerLayer_8.Residual_2.LayerNorm",
"vision_model.encoder.layers.8.layer_norm1": "Chain.TransformerLayer_9.Residual_1.LayerNorm",
"vision_model.encoder.layers.8.layer_norm2": "Chain.TransformerLayer_9.Residual_2.LayerNorm",
"vision_model.encoder.layers.9.layer_norm1": "Chain.TransformerLayer_10.Residual_1.LayerNorm",
"vision_model.encoder.layers.9.layer_norm2": "Chain.TransformerLayer_10.Residual_2.LayerNorm",
"vision_model.encoder.layers.10.layer_norm1": "Chain.TransformerLayer_11.Residual_1.LayerNorm",
"vision_model.encoder.layers.10.layer_norm2": "Chain.TransformerLayer_11.Residual_2.LayerNorm",
"vision_model.encoder.layers.11.layer_norm1": "Chain.TransformerLayer_12.Residual_1.LayerNorm",
"vision_model.encoder.layers.11.layer_norm2": "Chain.TransformerLayer_12.Residual_2.LayerNorm",
"vision_model.encoder.layers.12.layer_norm1": "Chain.TransformerLayer_13.Residual_1.LayerNorm",
"vision_model.encoder.layers.12.layer_norm2": "Chain.TransformerLayer_13.Residual_2.LayerNorm",
"vision_model.encoder.layers.13.layer_norm1": "Chain.TransformerLayer_14.Residual_1.LayerNorm",
"vision_model.encoder.layers.13.layer_norm2": "Chain.TransformerLayer_14.Residual_2.LayerNorm",
"vision_model.encoder.layers.14.layer_norm1": "Chain.TransformerLayer_15.Residual_1.LayerNorm",
"vision_model.encoder.layers.14.layer_norm2": "Chain.TransformerLayer_15.Residual_2.LayerNorm",
"vision_model.encoder.layers.15.layer_norm1": "Chain.TransformerLayer_16.Residual_1.LayerNorm",
"vision_model.encoder.layers.15.layer_norm2": "Chain.TransformerLayer_16.Residual_2.LayerNorm",
"vision_model.encoder.layers.16.layer_norm1": "Chain.TransformerLayer_17.Residual_1.LayerNorm",
"vision_model.encoder.layers.16.layer_norm2": "Chain.TransformerLayer_17.Residual_2.LayerNorm",
"vision_model.encoder.layers.17.layer_norm1": "Chain.TransformerLayer_18.Residual_1.LayerNorm",
"vision_model.encoder.layers.17.layer_norm2": "Chain.TransformerLayer_18.Residual_2.LayerNorm",
"vision_model.encoder.layers.18.layer_norm1": "Chain.TransformerLayer_19.Residual_1.LayerNorm",
"vision_model.encoder.layers.18.layer_norm2": "Chain.TransformerLayer_19.Residual_2.LayerNorm",
"vision_model.encoder.layers.19.layer_norm1": "Chain.TransformerLayer_20.Residual_1.LayerNorm",
"vision_model.encoder.layers.19.layer_norm2": "Chain.TransformerLayer_20.Residual_2.LayerNorm",
"vision_model.encoder.layers.20.layer_norm1": "Chain.TransformerLayer_21.Residual_1.LayerNorm",
"vision_model.encoder.layers.20.layer_norm2": "Chain.TransformerLayer_21.Residual_2.LayerNorm",
"vision_model.encoder.layers.21.layer_norm1": "Chain.TransformerLayer_22.Residual_1.LayerNorm",
"vision_model.encoder.layers.21.layer_norm2": "Chain.TransformerLayer_22.Residual_2.LayerNorm",
"vision_model.encoder.layers.22.layer_norm1": "Chain.TransformerLayer_23.Residual_1.LayerNorm",
"vision_model.encoder.layers.22.layer_norm2": "Chain.TransformerLayer_23.Residual_2.LayerNorm",
"vision_model.encoder.layers.23.layer_norm1": "Chain.TransformerLayer_24.Residual_1.LayerNorm",
"vision_model.encoder.layers.23.layer_norm2": "Chain.TransformerLayer_24.Residual_2.LayerNorm",
"vision_model.encoder.layers.24.layer_norm1": "Chain.TransformerLayer_25.Residual_1.LayerNorm",
"vision_model.encoder.layers.24.layer_norm2": "Chain.TransformerLayer_25.Residual_2.LayerNorm",
"vision_model.encoder.layers.25.layer_norm1": "Chain.TransformerLayer_26.Residual_1.LayerNorm",
"vision_model.encoder.layers.25.layer_norm2": "Chain.TransformerLayer_26.Residual_2.LayerNorm",
"vision_model.encoder.layers.26.layer_norm1": "Chain.TransformerLayer_27.Residual_1.LayerNorm",
"vision_model.encoder.layers.26.layer_norm2": "Chain.TransformerLayer_27.Residual_2.LayerNorm",
"vision_model.encoder.layers.27.layer_norm1": "Chain.TransformerLayer_28.Residual_1.LayerNorm",
"vision_model.encoder.layers.27.layer_norm2": "Chain.TransformerLayer_28.Residual_2.LayerNorm",
"vision_model.encoder.layers.28.layer_norm1": "Chain.TransformerLayer_29.Residual_1.LayerNorm",
"vision_model.encoder.layers.28.layer_norm2": "Chain.TransformerLayer_29.Residual_2.LayerNorm",
"vision_model.encoder.layers.29.layer_norm1": "Chain.TransformerLayer_30.Residual_1.LayerNorm",
"vision_model.encoder.layers.29.layer_norm2": "Chain.TransformerLayer_30.Residual_2.LayerNorm",
"vision_model.encoder.layers.30.layer_norm1": "Chain.TransformerLayer_31.Residual_1.LayerNorm",
"vision_model.encoder.layers.30.layer_norm2": "Chain.TransformerLayer_31.Residual_2.LayerNorm",
"vision_model.encoder.layers.31.layer_norm1": "Chain.TransformerLayer_32.Residual_1.LayerNorm",
"vision_model.encoder.layers.31.layer_norm2": "Chain.TransformerLayer_32.Residual_2.LayerNorm",
"vision_model.post_layernorm": "LayerNorm_2",
"vision_model.encoder.layers.0.self_attn.q_proj": "Chain.TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.0.self_attn.k_proj": "Chain.TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.0.self_attn.v_proj": "Chain.TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.0.self_attn.out_proj": "Chain.TransformerLayer_1.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.1.self_attn.q_proj": "Chain.TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.1.self_attn.k_proj": "Chain.TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.1.self_attn.v_proj": "Chain.TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.1.self_attn.out_proj": "Chain.TransformerLayer_2.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.2.self_attn.q_proj": "Chain.TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.2.self_attn.k_proj": "Chain.TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.2.self_attn.v_proj": "Chain.TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.2.self_attn.out_proj": "Chain.TransformerLayer_3.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.3.self_attn.q_proj": "Chain.TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.3.self_attn.k_proj": "Chain.TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.3.self_attn.v_proj": "Chain.TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.3.self_attn.out_proj": "Chain.TransformerLayer_4.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.4.self_attn.q_proj": "Chain.TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.4.self_attn.k_proj": "Chain.TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.4.self_attn.v_proj": "Chain.TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.4.self_attn.out_proj": "Chain.TransformerLayer_5.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.5.self_attn.q_proj": "Chain.TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.5.self_attn.k_proj": "Chain.TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.5.self_attn.v_proj": "Chain.TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.5.self_attn.out_proj": "Chain.TransformerLayer_6.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.6.self_attn.q_proj": "Chain.TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.6.self_attn.k_proj": "Chain.TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.6.self_attn.v_proj": "Chain.TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.6.self_attn.out_proj": "Chain.TransformerLayer_7.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.7.self_attn.q_proj": "Chain.TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.7.self_attn.k_proj": "Chain.TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.7.self_attn.v_proj": "Chain.TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.7.self_attn.out_proj": "Chain.TransformerLayer_8.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.8.self_attn.q_proj": "Chain.TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.8.self_attn.k_proj": "Chain.TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.8.self_attn.v_proj": "Chain.TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.8.self_attn.out_proj": "Chain.TransformerLayer_9.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.9.self_attn.q_proj": "Chain.TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.9.self_attn.k_proj": "Chain.TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.9.self_attn.v_proj": "Chain.TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.9.self_attn.out_proj": "Chain.TransformerLayer_10.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.10.self_attn.q_proj": "Chain.TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.10.self_attn.k_proj": "Chain.TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.10.self_attn.v_proj": "Chain.TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.10.self_attn.out_proj": "Chain.TransformerLayer_11.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.11.self_attn.q_proj": "Chain.TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.11.self_attn.k_proj": "Chain.TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.11.self_attn.v_proj": "Chain.TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.11.self_attn.out_proj": "Chain.TransformerLayer_12.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.12.self_attn.q_proj": "Chain.TransformerLayer_13.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.12.self_attn.k_proj": "Chain.TransformerLayer_13.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.12.self_attn.v_proj": "Chain.TransformerLayer_13.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.12.self_attn.out_proj": "Chain.TransformerLayer_13.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.13.self_attn.q_proj": "Chain.TransformerLayer_14.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.13.self_attn.k_proj": "Chain.TransformerLayer_14.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.13.self_attn.v_proj": "Chain.TransformerLayer_14.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.13.self_attn.out_proj": "Chain.TransformerLayer_14.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.14.self_attn.q_proj": "Chain.TransformerLayer_15.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.14.self_attn.k_proj": "Chain.TransformerLayer_15.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.14.self_attn.v_proj": "Chain.TransformerLayer_15.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.14.self_attn.out_proj": "Chain.TransformerLayer_15.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.15.self_attn.q_proj": "Chain.TransformerLayer_16.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.15.self_attn.k_proj": "Chain.TransformerLayer_16.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.15.self_attn.v_proj": "Chain.TransformerLayer_16.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.15.self_attn.out_proj": "Chain.TransformerLayer_16.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.16.self_attn.q_proj": "Chain.TransformerLayer_17.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.16.self_attn.k_proj": "Chain.TransformerLayer_17.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.16.self_attn.v_proj": "Chain.TransformerLayer_17.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.16.self_attn.out_proj": "Chain.TransformerLayer_17.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.17.self_attn.q_proj": "Chain.TransformerLayer_18.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.17.self_attn.k_proj": "Chain.TransformerLayer_18.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.17.self_attn.v_proj": "Chain.TransformerLayer_18.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.17.self_attn.out_proj": "Chain.TransformerLayer_18.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.18.self_attn.q_proj": "Chain.TransformerLayer_19.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.18.self_attn.k_proj": "Chain.TransformerLayer_19.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.18.self_attn.v_proj": "Chain.TransformerLayer_19.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.18.self_attn.out_proj": "Chain.TransformerLayer_19.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.19.self_attn.q_proj": "Chain.TransformerLayer_20.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.19.self_attn.k_proj": "Chain.TransformerLayer_20.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.19.self_attn.v_proj": "Chain.TransformerLayer_20.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.19.self_attn.out_proj": "Chain.TransformerLayer_20.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.20.self_attn.q_proj": "Chain.TransformerLayer_21.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.20.self_attn.k_proj": "Chain.TransformerLayer_21.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.20.self_attn.v_proj": "Chain.TransformerLayer_21.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.20.self_attn.out_proj": "Chain.TransformerLayer_21.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.21.self_attn.q_proj": "Chain.TransformerLayer_22.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.21.self_attn.k_proj": "Chain.TransformerLayer_22.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.21.self_attn.v_proj": "Chain.TransformerLayer_22.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.21.self_attn.out_proj": "Chain.TransformerLayer_22.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.22.self_attn.q_proj": "Chain.TransformerLayer_23.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.22.self_attn.k_proj": "Chain.TransformerLayer_23.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.22.self_attn.v_proj": "Chain.TransformerLayer_23.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.22.self_attn.out_proj": "Chain.TransformerLayer_23.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.23.self_attn.q_proj": "Chain.TransformerLayer_24.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.23.self_attn.k_proj": "Chain.TransformerLayer_24.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.23.self_attn.v_proj": "Chain.TransformerLayer_24.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.23.self_attn.out_proj": "Chain.TransformerLayer_24.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.24.self_attn.q_proj": "Chain.TransformerLayer_25.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.24.self_attn.k_proj": "Chain.TransformerLayer_25.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.24.self_attn.v_proj": "Chain.TransformerLayer_25.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.24.self_attn.out_proj": "Chain.TransformerLayer_25.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.25.self_attn.q_proj": "Chain.TransformerLayer_26.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.25.self_attn.k_proj": "Chain.TransformerLayer_26.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.25.self_attn.v_proj": "Chain.TransformerLayer_26.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.25.self_attn.out_proj": "Chain.TransformerLayer_26.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.26.self_attn.q_proj": "Chain.TransformerLayer_27.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.26.self_attn.k_proj": "Chain.TransformerLayer_27.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.26.self_attn.v_proj": "Chain.TransformerLayer_27.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.26.self_attn.out_proj": "Chain.TransformerLayer_27.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.27.self_attn.q_proj": "Chain.TransformerLayer_28.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.27.self_attn.k_proj": "Chain.TransformerLayer_28.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.27.self_attn.v_proj": "Chain.TransformerLayer_28.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.27.self_attn.out_proj": "Chain.TransformerLayer_28.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.28.self_attn.q_proj": "Chain.TransformerLayer_29.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.28.self_attn.k_proj": "Chain.TransformerLayer_29.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.28.self_attn.v_proj": "Chain.TransformerLayer_29.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.28.self_attn.out_proj": "Chain.TransformerLayer_29.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.29.self_attn.q_proj": "Chain.TransformerLayer_30.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.29.self_attn.k_proj": "Chain.TransformerLayer_30.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.29.self_attn.v_proj": "Chain.TransformerLayer_30.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.29.self_attn.out_proj": "Chain.TransformerLayer_30.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.30.self_attn.q_proj": "Chain.TransformerLayer_31.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.30.self_attn.k_proj": "Chain.TransformerLayer_31.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.30.self_attn.v_proj": "Chain.TransformerLayer_31.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.30.self_attn.out_proj": "Chain.TransformerLayer_31.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.31.self_attn.q_proj": "Chain.TransformerLayer_32.Residual_1.SelfAttention.Distribute.Linear_1",
"vision_model.encoder.layers.31.self_attn.k_proj": "Chain.TransformerLayer_32.Residual_1.SelfAttention.Distribute.Linear_2",
"vision_model.encoder.layers.31.self_attn.v_proj": "Chain.TransformerLayer_32.Residual_1.SelfAttention.Distribute.Linear_3",
"vision_model.encoder.layers.31.self_attn.out_proj": "Chain.TransformerLayer_32.Residual_1.SelfAttention.Linear",
"vision_model.encoder.layers.0.mlp.fc1": "Chain.TransformerLayer_1.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.1.mlp.fc1": "Chain.TransformerLayer_2.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.2.mlp.fc1": "Chain.TransformerLayer_3.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.3.mlp.fc1": "Chain.TransformerLayer_4.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.4.mlp.fc1": "Chain.TransformerLayer_5.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.5.mlp.fc1": "Chain.TransformerLayer_6.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.6.mlp.fc1": "Chain.TransformerLayer_7.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.7.mlp.fc1": "Chain.TransformerLayer_8.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.8.mlp.fc1": "Chain.TransformerLayer_9.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.9.mlp.fc1": "Chain.TransformerLayer_10.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.10.mlp.fc1": "Chain.TransformerLayer_11.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.11.mlp.fc1": "Chain.TransformerLayer_12.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.12.mlp.fc1": "Chain.TransformerLayer_13.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.13.mlp.fc1": "Chain.TransformerLayer_14.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.14.mlp.fc1": "Chain.TransformerLayer_15.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.15.mlp.fc1": "Chain.TransformerLayer_16.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.16.mlp.fc1": "Chain.TransformerLayer_17.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.17.mlp.fc1": "Chain.TransformerLayer_18.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.18.mlp.fc1": "Chain.TransformerLayer_19.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.19.mlp.fc1": "Chain.TransformerLayer_20.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.20.mlp.fc1": "Chain.TransformerLayer_21.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.21.mlp.fc1": "Chain.TransformerLayer_22.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.22.mlp.fc1": "Chain.TransformerLayer_23.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.23.mlp.fc1": "Chain.TransformerLayer_24.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.24.mlp.fc1": "Chain.TransformerLayer_25.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.25.mlp.fc1": "Chain.TransformerLayer_26.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.26.mlp.fc1": "Chain.TransformerLayer_27.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.27.mlp.fc1": "Chain.TransformerLayer_28.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.28.mlp.fc1": "Chain.TransformerLayer_29.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.29.mlp.fc1": "Chain.TransformerLayer_30.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.30.mlp.fc1": "Chain.TransformerLayer_31.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.31.mlp.fc1": "Chain.TransformerLayer_32.Residual_2.FeedForward.Linear_1",
"vision_model.encoder.layers.0.mlp.fc2": "Chain.TransformerLayer_1.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.1.mlp.fc2": "Chain.TransformerLayer_2.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.2.mlp.fc2": "Chain.TransformerLayer_3.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.3.mlp.fc2": "Chain.TransformerLayer_4.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.4.mlp.fc2": "Chain.TransformerLayer_5.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.5.mlp.fc2": "Chain.TransformerLayer_6.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.6.mlp.fc2": "Chain.TransformerLayer_7.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.7.mlp.fc2": "Chain.TransformerLayer_8.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.8.mlp.fc2": "Chain.TransformerLayer_9.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.9.mlp.fc2": "Chain.TransformerLayer_10.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.10.mlp.fc2": "Chain.TransformerLayer_11.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.11.mlp.fc2": "Chain.TransformerLayer_12.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.12.mlp.fc2": "Chain.TransformerLayer_13.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.13.mlp.fc2": "Chain.TransformerLayer_14.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.14.mlp.fc2": "Chain.TransformerLayer_15.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.15.mlp.fc2": "Chain.TransformerLayer_16.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.16.mlp.fc2": "Chain.TransformerLayer_17.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.17.mlp.fc2": "Chain.TransformerLayer_18.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.18.mlp.fc2": "Chain.TransformerLayer_19.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.19.mlp.fc2": "Chain.TransformerLayer_20.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.20.mlp.fc2": "Chain.TransformerLayer_21.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.21.mlp.fc2": "Chain.TransformerLayer_22.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.22.mlp.fc2": "Chain.TransformerLayer_23.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.23.mlp.fc2": "Chain.TransformerLayer_24.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.24.mlp.fc2": "Chain.TransformerLayer_25.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.25.mlp.fc2": "Chain.TransformerLayer_26.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.26.mlp.fc2": "Chain.TransformerLayer_27.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.27.mlp.fc2": "Chain.TransformerLayer_28.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.28.mlp.fc2": "Chain.TransformerLayer_29.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.29.mlp.fc2": "Chain.TransformerLayer_30.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.30.mlp.fc2": "Chain.TransformerLayer_31.Residual_2.FeedForward.Linear_2",
"vision_model.encoder.layers.31.mlp.fc2": "Chain.TransformerLayer_32.Residual_2.FeedForward.Linear_2",
"visual_projection": "Linear",
},
tensor_reshapes={
"ViTEmbeddings.Concatenate.ClassToken.Parameter.weight": (1, 1280),
},
)
unclip_21 = Conversion(
original=Hub(
repo_id="stabilityai/stable-diffusion-2-1-unclip",
filename="image_encoder/model.safetensors",
revision="e99f66a92bdcd1b0fb0d4b6a9b81b3b37d8bea44",
expected_sha256="9d277aeaed13ebc0ef33e56027b826a74433d45d755b3e0b3829440c1ea7b72e",
),
converted=Hub(
repo_id="refiners/sd21.unclip.image_encoder",
filename="model.safetensors",
expected_sha256="c9f43e359e06f1a237324c4c11734d6acd7fbddbfd3b1ed4f1b525267bedb812",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)

View file

@ -0,0 +1,291 @@
import torch
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
diffusers_recipe = WeightRecipe(
key_map={
"text_model.embeddings.token_embedding": "Sum.TokenEncoder",
"text_model.embeddings.position_embedding": "Sum.PositionalEncoder.Embedding",
"text_model.final_layer_norm": "LayerNorm",
"text_projection": "Linear",
"text_model.encoder.layers.0.layer_norm1": "TransformerLayer_1.Residual_1.LayerNorm",
"text_model.encoder.layers.0.layer_norm2": "TransformerLayer_1.Residual_2.LayerNorm",
"text_model.encoder.layers.0.mlp.fc1": "TransformerLayer_1.Residual_2.FeedForward.Linear_1",
"text_model.encoder.layers.0.mlp.fc2": "TransformerLayer_1.Residual_2.FeedForward.Linear_2",
"text_model.encoder.layers.0.self_attn.k_proj": "TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_2",
"text_model.encoder.layers.0.self_attn.out_proj": "TransformerLayer_1.Residual_1.SelfAttention.Linear",
"text_model.encoder.layers.0.self_attn.q_proj": "TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_1",
"text_model.encoder.layers.0.self_attn.v_proj": "TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_3",
"text_model.encoder.layers.1.layer_norm1": "TransformerLayer_2.Residual_1.LayerNorm",
"text_model.encoder.layers.1.layer_norm2": "TransformerLayer_2.Residual_2.LayerNorm",
"text_model.encoder.layers.1.mlp.fc1": "TransformerLayer_2.Residual_2.FeedForward.Linear_1",
"text_model.encoder.layers.1.mlp.fc2": "TransformerLayer_2.Residual_2.FeedForward.Linear_2",
"text_model.encoder.layers.1.self_attn.k_proj": "TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_2",
"text_model.encoder.layers.1.self_attn.out_proj": "TransformerLayer_2.Residual_1.SelfAttention.Linear",
"text_model.encoder.layers.1.self_attn.q_proj": "TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_1",
"text_model.encoder.layers.1.self_attn.v_proj": "TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_3",
"text_model.encoder.layers.10.layer_norm1": "TransformerLayer_11.Residual_1.LayerNorm",
"text_model.encoder.layers.10.layer_norm2": "TransformerLayer_11.Residual_2.LayerNorm",
"text_model.encoder.layers.10.mlp.fc1": "TransformerLayer_11.Residual_2.FeedForward.Linear_1",
"text_model.encoder.layers.10.mlp.fc2": "TransformerLayer_11.Residual_2.FeedForward.Linear_2",
"text_model.encoder.layers.10.self_attn.k_proj": "TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_2",
"text_model.encoder.layers.10.self_attn.out_proj": "TransformerLayer_11.Residual_1.SelfAttention.Linear",
"text_model.encoder.layers.10.self_attn.q_proj": "TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_1",
"text_model.encoder.layers.10.self_attn.v_proj": "TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_3",
"text_model.encoder.layers.11.layer_norm1": "TransformerLayer_12.Residual_1.LayerNorm",
"text_model.encoder.layers.11.layer_norm2": "TransformerLayer_12.Residual_2.LayerNorm",
"text_model.encoder.layers.11.mlp.fc1": "TransformerLayer_12.Residual_2.FeedForward.Linear_1",
"text_model.encoder.layers.11.mlp.fc2": "TransformerLayer_12.Residual_2.FeedForward.Linear_2",
"text_model.encoder.layers.11.self_attn.k_proj": "TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_2",
"text_model.encoder.layers.11.self_attn.out_proj": "TransformerLayer_12.Residual_1.SelfAttention.Linear",
"text_model.encoder.layers.11.self_attn.q_proj": "TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_1",
"text_model.encoder.layers.11.self_attn.v_proj": "TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_3",
"text_model.encoder.layers.2.layer_norm1": "TransformerLayer_3.Residual_1.LayerNorm",
"text_model.encoder.layers.2.layer_norm2": "TransformerLayer_3.Residual_2.LayerNorm",
"text_model.encoder.layers.2.mlp.fc1": "TransformerLayer_3.Residual_2.FeedForward.Linear_1",
"text_model.encoder.layers.2.mlp.fc2": "TransformerLayer_3.Residual_2.FeedForward.Linear_2",
"text_model.encoder.layers.2.self_attn.k_proj": "TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_2",
"text_model.encoder.layers.2.self_attn.out_proj": "TransformerLayer_3.Residual_1.SelfAttention.Linear",
"text_model.encoder.layers.2.self_attn.q_proj": "TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_1",
"text_model.encoder.layers.2.self_attn.v_proj": "TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_3",
"text_model.encoder.layers.3.layer_norm1": "TransformerLayer_4.Residual_1.LayerNorm",
"text_model.encoder.layers.3.layer_norm2": "TransformerLayer_4.Residual_2.LayerNorm",
"text_model.encoder.layers.3.mlp.fc1": "TransformerLayer_4.Residual_2.FeedForward.Linear_1",
"text_model.encoder.layers.3.mlp.fc2": "TransformerLayer_4.Residual_2.FeedForward.Linear_2",
"text_model.encoder.layers.3.self_attn.k_proj": "TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_2",
"text_model.encoder.layers.3.self_attn.out_proj": "TransformerLayer_4.Residual_1.SelfAttention.Linear",
"text_model.encoder.layers.3.self_attn.q_proj": "TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_1",
"text_model.encoder.layers.3.self_attn.v_proj": "TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_3",
"text_model.encoder.layers.4.layer_norm1": "TransformerLayer_5.Residual_1.LayerNorm",
"text_model.encoder.layers.4.layer_norm2": "TransformerLayer_5.Residual_2.LayerNorm",
"text_model.encoder.layers.4.mlp.fc1": "TransformerLayer_5.Residual_2.FeedForward.Linear_1",
"text_model.encoder.layers.4.mlp.fc2": "TransformerLayer_5.Residual_2.FeedForward.Linear_2",
"text_model.encoder.layers.4.self_attn.k_proj": "TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_2",
"text_model.encoder.layers.4.self_attn.out_proj": "TransformerLayer_5.Residual_1.SelfAttention.Linear",
"text_model.encoder.layers.4.self_attn.q_proj": "TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_1",
"text_model.encoder.layers.4.self_attn.v_proj": "TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_3",
"text_model.encoder.layers.5.layer_norm1": "TransformerLayer_6.Residual_1.LayerNorm",
"text_model.encoder.layers.5.layer_norm2": "TransformerLayer_6.Residual_2.LayerNorm",
"text_model.encoder.layers.5.mlp.fc1": "TransformerLayer_6.Residual_2.FeedForward.Linear_1",
"text_model.encoder.layers.5.mlp.fc2": "TransformerLayer_6.Residual_2.FeedForward.Linear_2",
"text_model.encoder.layers.5.self_attn.k_proj": "TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_2",
"text_model.encoder.layers.5.self_attn.out_proj": "TransformerLayer_6.Residual_1.SelfAttention.Linear",
"text_model.encoder.layers.5.self_attn.q_proj": "TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_1",
"text_model.encoder.layers.5.self_attn.v_proj": "TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_3",
"text_model.encoder.layers.6.layer_norm1": "TransformerLayer_7.Residual_1.LayerNorm",
"text_model.encoder.layers.6.layer_norm2": "TransformerLayer_7.Residual_2.LayerNorm",
"text_model.encoder.layers.6.mlp.fc1": "TransformerLayer_7.Residual_2.FeedForward.Linear_1",
"text_model.encoder.layers.6.mlp.fc2": "TransformerLayer_7.Residual_2.FeedForward.Linear_2",
"text_model.encoder.layers.6.self_attn.k_proj": "TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_2",
"text_model.encoder.layers.6.self_attn.out_proj": "TransformerLayer_7.Residual_1.SelfAttention.Linear",
"text_model.encoder.layers.6.self_attn.q_proj": "TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_1",
"text_model.encoder.layers.6.self_attn.v_proj": "TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_3",
"text_model.encoder.layers.7.layer_norm1": "TransformerLayer_8.Residual_1.LayerNorm",
"text_model.encoder.layers.7.layer_norm2": "TransformerLayer_8.Residual_2.LayerNorm",
"text_model.encoder.layers.7.mlp.fc1": "TransformerLayer_8.Residual_2.FeedForward.Linear_1",
"text_model.encoder.layers.7.mlp.fc2": "TransformerLayer_8.Residual_2.FeedForward.Linear_2",
"text_model.encoder.layers.7.self_attn.k_proj": "TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_2",
"text_model.encoder.layers.7.self_attn.out_proj": "TransformerLayer_8.Residual_1.SelfAttention.Linear",
"text_model.encoder.layers.7.self_attn.q_proj": "TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_1",
"text_model.encoder.layers.7.self_attn.v_proj": "TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_3",
"text_model.encoder.layers.8.layer_norm1": "TransformerLayer_9.Residual_1.LayerNorm",
"text_model.encoder.layers.8.layer_norm2": "TransformerLayer_9.Residual_2.LayerNorm",
"text_model.encoder.layers.8.mlp.fc1": "TransformerLayer_9.Residual_2.FeedForward.Linear_1",
"text_model.encoder.layers.8.mlp.fc2": "TransformerLayer_9.Residual_2.FeedForward.Linear_2",
"text_model.encoder.layers.8.self_attn.k_proj": "TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_2",
"text_model.encoder.layers.8.self_attn.out_proj": "TransformerLayer_9.Residual_1.SelfAttention.Linear",
"text_model.encoder.layers.8.self_attn.q_proj": "TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_1",
"text_model.encoder.layers.8.self_attn.v_proj": "TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_3",
"text_model.encoder.layers.9.layer_norm1": "TransformerLayer_10.Residual_1.LayerNorm",
"text_model.encoder.layers.9.layer_norm2": "TransformerLayer_10.Residual_2.LayerNorm",
"text_model.encoder.layers.9.mlp.fc1": "TransformerLayer_10.Residual_2.FeedForward.Linear_1",
"text_model.encoder.layers.9.mlp.fc2": "TransformerLayer_10.Residual_2.FeedForward.Linear_2",
"text_model.encoder.layers.9.self_attn.k_proj": "TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_2",
"text_model.encoder.layers.9.self_attn.out_proj": "TransformerLayer_10.Residual_1.SelfAttention.Linear",
"text_model.encoder.layers.9.self_attn.q_proj": "TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_1",
"text_model.encoder.layers.9.self_attn.v_proj": "TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_3",
},
)
civitai_recipe = WeightRecipe(
key_prefix="cond_stage_model.transformer.text_model.",
key_map={
"embeddings.token_embedding": "Sum.TokenEncoder",
"embeddings.position_embedding": "Sum.PositionalEncoder.Embedding",
"encoder.layers.0.layer_norm1": "TransformerLayer_1.Residual_1.LayerNorm",
"encoder.layers.0.self_attn.q_proj": "TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_1",
"encoder.layers.0.self_attn.k_proj": "TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_2",
"encoder.layers.0.self_attn.v_proj": "TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_3",
"encoder.layers.0.self_attn.out_proj": "TransformerLayer_1.Residual_1.SelfAttention.Linear",
"encoder.layers.0.layer_norm2": "TransformerLayer_1.Residual_2.LayerNorm",
"encoder.layers.0.mlp.fc1": "TransformerLayer_1.Residual_2.FeedForward.Linear_1",
"encoder.layers.0.mlp.fc2": "TransformerLayer_1.Residual_2.FeedForward.Linear_2",
"encoder.layers.1.layer_norm1": "TransformerLayer_2.Residual_1.LayerNorm",
"encoder.layers.1.self_attn.q_proj": "TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_1",
"encoder.layers.1.self_attn.k_proj": "TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_2",
"encoder.layers.1.self_attn.v_proj": "TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_3",
"encoder.layers.1.self_attn.out_proj": "TransformerLayer_2.Residual_1.SelfAttention.Linear",
"encoder.layers.1.layer_norm2": "TransformerLayer_2.Residual_2.LayerNorm",
"encoder.layers.1.mlp.fc1": "TransformerLayer_2.Residual_2.FeedForward.Linear_1",
"encoder.layers.1.mlp.fc2": "TransformerLayer_2.Residual_2.FeedForward.Linear_2",
"encoder.layers.2.layer_norm1": "TransformerLayer_3.Residual_1.LayerNorm",
"encoder.layers.2.self_attn.q_proj": "TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_1",
"encoder.layers.2.self_attn.k_proj": "TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_2",
"encoder.layers.2.self_attn.v_proj": "TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_3",
"encoder.layers.2.self_attn.out_proj": "TransformerLayer_3.Residual_1.SelfAttention.Linear",
"encoder.layers.2.layer_norm2": "TransformerLayer_3.Residual_2.LayerNorm",
"encoder.layers.2.mlp.fc1": "TransformerLayer_3.Residual_2.FeedForward.Linear_1",
"encoder.layers.2.mlp.fc2": "TransformerLayer_3.Residual_2.FeedForward.Linear_2",
"encoder.layers.3.layer_norm1": "TransformerLayer_4.Residual_1.LayerNorm",
"encoder.layers.3.self_attn.q_proj": "TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_1",
"encoder.layers.3.self_attn.k_proj": "TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_2",
"encoder.layers.3.self_attn.v_proj": "TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_3",
"encoder.layers.3.self_attn.out_proj": "TransformerLayer_4.Residual_1.SelfAttention.Linear",
"encoder.layers.3.layer_norm2": "TransformerLayer_4.Residual_2.LayerNorm",
"encoder.layers.3.mlp.fc1": "TransformerLayer_4.Residual_2.FeedForward.Linear_1",
"encoder.layers.3.mlp.fc2": "TransformerLayer_4.Residual_2.FeedForward.Linear_2",
"encoder.layers.4.layer_norm1": "TransformerLayer_5.Residual_1.LayerNorm",
"encoder.layers.4.self_attn.q_proj": "TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_1",
"encoder.layers.4.self_attn.k_proj": "TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_2",
"encoder.layers.4.self_attn.v_proj": "TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_3",
"encoder.layers.4.self_attn.out_proj": "TransformerLayer_5.Residual_1.SelfAttention.Linear",
"encoder.layers.4.layer_norm2": "TransformerLayer_5.Residual_2.LayerNorm",
"encoder.layers.4.mlp.fc1": "TransformerLayer_5.Residual_2.FeedForward.Linear_1",
"encoder.layers.4.mlp.fc2": "TransformerLayer_5.Residual_2.FeedForward.Linear_2",
"encoder.layers.5.layer_norm1": "TransformerLayer_6.Residual_1.LayerNorm",
"encoder.layers.5.self_attn.q_proj": "TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_1",
"encoder.layers.5.self_attn.k_proj": "TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_2",
"encoder.layers.5.self_attn.v_proj": "TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_3",
"encoder.layers.5.self_attn.out_proj": "TransformerLayer_6.Residual_1.SelfAttention.Linear",
"encoder.layers.5.layer_norm2": "TransformerLayer_6.Residual_2.LayerNorm",
"encoder.layers.5.mlp.fc1": "TransformerLayer_6.Residual_2.FeedForward.Linear_1",
"encoder.layers.5.mlp.fc2": "TransformerLayer_6.Residual_2.FeedForward.Linear_2",
"encoder.layers.6.layer_norm1": "TransformerLayer_7.Residual_1.LayerNorm",
"encoder.layers.6.self_attn.q_proj": "TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_1",
"encoder.layers.6.self_attn.k_proj": "TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_2",
"encoder.layers.6.self_attn.v_proj": "TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_3",
"encoder.layers.6.self_attn.out_proj": "TransformerLayer_7.Residual_1.SelfAttention.Linear",
"encoder.layers.6.layer_norm2": "TransformerLayer_7.Residual_2.LayerNorm",
"encoder.layers.6.mlp.fc1": "TransformerLayer_7.Residual_2.FeedForward.Linear_1",
"encoder.layers.6.mlp.fc2": "TransformerLayer_7.Residual_2.FeedForward.Linear_2",
"encoder.layers.7.layer_norm1": "TransformerLayer_8.Residual_1.LayerNorm",
"encoder.layers.7.self_attn.q_proj": "TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_1",
"encoder.layers.7.self_attn.k_proj": "TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_2",
"encoder.layers.7.self_attn.v_proj": "TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_3",
"encoder.layers.7.self_attn.out_proj": "TransformerLayer_8.Residual_1.SelfAttention.Linear",
"encoder.layers.7.layer_norm2": "TransformerLayer_8.Residual_2.LayerNorm",
"encoder.layers.7.mlp.fc1": "TransformerLayer_8.Residual_2.FeedForward.Linear_1",
"encoder.layers.7.mlp.fc2": "TransformerLayer_8.Residual_2.FeedForward.Linear_2",
"encoder.layers.8.layer_norm1": "TransformerLayer_9.Residual_1.LayerNorm",
"encoder.layers.8.self_attn.q_proj": "TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_1",
"encoder.layers.8.self_attn.k_proj": "TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_2",
"encoder.layers.8.self_attn.v_proj": "TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_3",
"encoder.layers.8.self_attn.out_proj": "TransformerLayer_9.Residual_1.SelfAttention.Linear",
"encoder.layers.8.layer_norm2": "TransformerLayer_9.Residual_2.LayerNorm",
"encoder.layers.8.mlp.fc1": "TransformerLayer_9.Residual_2.FeedForward.Linear_1",
"encoder.layers.8.mlp.fc2": "TransformerLayer_9.Residual_2.FeedForward.Linear_2",
"encoder.layers.9.layer_norm1": "TransformerLayer_10.Residual_1.LayerNorm",
"encoder.layers.9.self_attn.q_proj": "TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_1",
"encoder.layers.9.self_attn.k_proj": "TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_2",
"encoder.layers.9.self_attn.v_proj": "TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_3",
"encoder.layers.9.self_attn.out_proj": "TransformerLayer_10.Residual_1.SelfAttention.Linear",
"encoder.layers.9.layer_norm2": "TransformerLayer_10.Residual_2.LayerNorm",
"encoder.layers.9.mlp.fc1": "TransformerLayer_10.Residual_2.FeedForward.Linear_1",
"encoder.layers.9.mlp.fc2": "TransformerLayer_10.Residual_2.FeedForward.Linear_2",
"encoder.layers.10.layer_norm1": "TransformerLayer_11.Residual_1.LayerNorm",
"encoder.layers.10.self_attn.q_proj": "TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_1",
"encoder.layers.10.self_attn.k_proj": "TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_2",
"encoder.layers.10.self_attn.v_proj": "TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_3",
"encoder.layers.10.self_attn.out_proj": "TransformerLayer_11.Residual_1.SelfAttention.Linear",
"encoder.layers.10.layer_norm2": "TransformerLayer_11.Residual_2.LayerNorm",
"encoder.layers.10.mlp.fc1": "TransformerLayer_11.Residual_2.FeedForward.Linear_1",
"encoder.layers.10.mlp.fc2": "TransformerLayer_11.Residual_2.FeedForward.Linear_2",
"encoder.layers.11.layer_norm1": "TransformerLayer_12.Residual_1.LayerNorm",
"encoder.layers.11.self_attn.q_proj": "TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_1",
"encoder.layers.11.self_attn.k_proj": "TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_2",
"encoder.layers.11.self_attn.v_proj": "TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_3",
"encoder.layers.11.self_attn.out_proj": "TransformerLayer_12.Residual_1.SelfAttention.Linear",
"encoder.layers.11.layer_norm2": "TransformerLayer_12.Residual_2.LayerNorm",
"encoder.layers.11.mlp.fc1": "TransformerLayer_12.Residual_2.FeedForward.Linear_1",
"encoder.layers.11.mlp.fc2": "TransformerLayer_12.Residual_2.FeedForward.Linear_2",
"final_layer_norm": "LayerNorm",
},
)
runwayml = Conversion(
original=Hub(
repo_id="stable-diffusion-v1-5/stable-diffusion-v1-5",
filename="text_encoder/model.safetensors",
revision="f03de327dd89b501a01da37fc5240cf4fdba85a1",
expected_sha256="d008943c017f0092921106440254dbbe00b6a285f7883ec8ba160c3faad88334",
),
converted=Hub(
repo_id="refiners/sd15.text_encoder",
filename="model.safetensors",
expected_sha256="88a171b02f5bad8e61723f9c065ddb00351970a6e3f7f5a2a46970700e90f69d",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
juggernaut_reborn = Conversion(
original=Hub(
repo_id="civitai/KandooAi/juggernaut",
filename="reborn/onefile_fp16.safetensors",
download_url="https://civitai.com/api/download/models/274039?type=Model&format=SafeTensor&size=pruned&fp=fp16",
expected_sha256="338b85bc4f7628bc42cce336242e79154a57c2a4360531436e97f7793568f18c",
),
converted=Hub(
repo_id="refiners/sd15.juggernaut.reborn.text_encoder",
filename="model.safetensors",
expected_sha256="c649e079cbef5ccd79ef643acac29363d153d5a5f719e9c2e1893c96ec8b2357",
),
recipe=civitai_recipe,
dtype=torch.float16,
)
juggernaut_aftermath = Conversion(
original=Hub(
repo_id="civitai/KandooAi/juggernaut",
filename="aftermath/onefile_fp32.safetensors",
download_url="https://civitai.com/api/download/models/127207?type=Model&format=SafeTensor&size=full&fp=fp32",
expected_sha256="7ed60a2f5842de8e6755965ffbf2a0ee273429939f307233c22f7dffd8d7ae3d",
),
converted=Hub(
repo_id="refiners/sd15.juggernaut.aftermath.text_encoder",
filename="model.safetensors",
expected_sha256="2448c3148ae1c9658c18b136e5d6eebcc3c512dd3e1df71d3cb9bb1c83c19db1",
),
recipe=civitai_recipe,
dtype=torch.float32,
)
realistic_stock_photo_v3 = Conversion(
original=Hub(
repo_id="Yntec/realisticStockPhoto3",
filename="realisticStockPhoto_v30SD15.safetensors",
expected_sha256="f85affae9aae16276eaf670f810946e2d03c4d300791a0380f07653cb78ba31b",
# download_url="https://civitai.com/api/download/models/524032?type=Model&format=SafeTensor&size=full&fp=fp16",
),
converted=Hub(
repo_id="refiners/sd15.realistic_stock_photo.v3_0.text_encoder",
filename="model.safetensors",
expected_sha256="6d37d5b8ea7f7628cdaada6ce61bbba3914143d8f88d5b722c120ffdcb408512",
),
recipe=civitai_recipe,
dtype=torch.float16,
)
realistic_vision_v5 = Conversion(
original=Hub(
repo_id="civitai/SG_161222/realistic_vision",
filename="v5/onefile_fp32.safetensors",
download_url="https://civitai.com/api/download/models/130072?type=Model&format=SafeTensor&size=full&fp=fp16",
expected_sha256="ef76aa2332635f4352463343beec9c5aae3bd107a73c0fd7221abbbcf8bd5470",
),
converted=Hub(
repo_id="refiners/sd15.realistic_vision.v5_1.text_encoder",
filename="model.safetensors",
expected_sha256="b34349a39f1ad882885cc5da917aeaa92935c1b80eefbce03a6c46959ed97b10",
),
recipe=civitai_recipe,
dtype=torch.float16,
)

View file

@ -0,0 +1,157 @@
import logging
from typing import NamedTuple, cast
import requests
import torch
from torch import nn
from transformers import CLIPTextModel, CLIPTextModelWithProjection # pyright: ignore[reportMissingTypeStubs]
import refiners.fluxion.layers as fl
from refiners.conversion.model_converter import ModelConverter
from refiners.conversion.utils import Conversion, Hub
from refiners.fluxion.utils import save_to_safetensors
from refiners.foundationals.clip.text_encoder import CLIPTextEncoder, CLIPTextEncoderG, CLIPTextEncoderL
from refiners.foundationals.clip.tokenizer import CLIPTokenizer
from refiners.foundationals.latent_diffusion.stable_diffusion_xl.text_encoder import DoubleTextEncoder
class CLIPTextEncoderConfig(NamedTuple):
architectures: list[str]
vocab_size: int
hidden_size: int
intermediate_size: int
num_hidden_layers: int
num_attention_heads: int
hidden_act: str
layer_norm_eps: float
projection_dim: int
class ModelConverterHubDuo(Conversion):
def __init__(
self,
original_repo_id: str,
converted: Hub,
dtype: torch.dtype,
) -> None:
self.original = Hub(repo_id=original_repo_id, filename="", expected_sha256="")
self.converted = converted
self.dtype = dtype
@staticmethod
def setup_converter(source_path: str, subfolder: str, with_projection: bool) -> ModelConverter:
# instantiate the transformers clip model
cls = CLIPTextModelWithProjection if with_projection else CLIPTextModel
source: nn.Module = cls.from_pretrained( # pyright: ignore[reportUnknownMemberType]
pretrained_model_name_or_path=source_path,
subfolder=subfolder,
low_cpu_mem_usage=False,
)
assert isinstance(source, nn.Module), "Source model is not a nn.Module"
# get the model config from the transformers clip model
config = cast(CLIPTextEncoderConfig, source.config) # pyright: ignore[reportArgumentType, reportUnknownMemberType]
# instantiate the refiners clip model
target = CLIPTextEncoder(
embedding_dim=config.hidden_size,
num_layers=config.num_hidden_layers,
num_attention_heads=config.num_attention_heads,
feedforward_dim=config.intermediate_size,
use_quick_gelu=config.hidden_act == "quick_gelu",
)
if with_projection:
target.append(
module=fl.Linear(
in_features=config.hidden_size,
out_features=config.projection_dim,
bias=False,
)
)
# initialize the inputs
text = "What a nice cat you have there!"
tokenizer = target.ensure_find(CLIPTokenizer)
tokens = tokenizer(text)
# run the converter
converter = ModelConverter(
source_model=source,
target_model=target,
skip_output_check=True,
verbose=False,
)
if not converter.run(source_args=(tokens,), target_args=(text,)):
raise RuntimeError("Model conversion failed")
return converter
def convert(self) -> None:
logging.info(f"Converting {self.original.repo_id} to {self.converted.repo_id}")
# initialize the model converters, find the mappings
converter1 = self.setup_converter(
source_path=self.original.repo_id,
subfolder="text_encoder",
with_projection=False,
)
converter2 = self.setup_converter(
source_path=self.original.repo_id,
subfolder="text_encoder_2",
with_projection=True,
)
# load the CLIPTextEncoderL model
text_encoder_l = CLIPTextEncoderL()
text_encoder_l.load_state_dict(state_dict=converter1.get_state_dict())
# load the CLIPTextEncoderG (with projection) model
projection = cast(CLIPTextEncoder, converter2.target_model)[-1]
assert isinstance(projection, fl.Linear)
text_encoder_g_with_projection = CLIPTextEncoderG()
text_encoder_g_with_projection.append(module=projection)
text_encoder_g_with_projection.load_state_dict(state_dict=converter2.get_state_dict())
# build DoubleTextEncoder from previous two models
projection = text_encoder_g_with_projection.pop(index=-1)
assert isinstance(projection, fl.Linear)
double_text_encoder = DoubleTextEncoder(
text_encoder_l=text_encoder_l,
text_encoder_g=text_encoder_g_with_projection,
projection=projection,
)
# extract the state_dict from the DoubleTextEncoder model
state_dict = double_text_encoder.state_dict()
state_dict = self.change_dtype(state_dict, self.dtype)
# save the converted state_dict
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
save_to_safetensors(self.converted.local_path, state_dict)
# check the converted state_dict
assert self.converted.check_local_hash()
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
stability = ModelConverterHubDuo(
original_repo_id="stabilityai/stable-diffusion-xl-base-1.0",
converted=Hub(
repo_id="refiners/sdxl.text_encoder",
filename="model.safetensors",
expected_sha256="238685accd000683e937085fb3a9c147675f5a1d7775a6810696131e93ddb147",
),
dtype=torch.float16,
)
juggernautXL_v10 = ModelConverterHubDuo(
original_repo_id="RunDiffusion/Juggernaut-X-v10", # TODO(laurent): use file from civitai instead
converted=Hub(
repo_id="refiners/sdxl.juggernaut.v10.text_encoder",
filename="model.safetensors",
expected_sha256="50dde9c171e31d1c9dcd0539ba052e4fe69d90f126c812b0145da40a0a2c4361",
),
dtype=torch.float16,
)

View file

@ -0,0 +1,345 @@
import logging
import requests
import torch
from huggingface_hub import hf_hub_download # type: ignore
from torch import Tensor
from torch.nn import Parameter as TorchParameter
from refiners.conversion.utils import Conversion, Hub
from refiners.fluxion.adapters.lora import Lora, LoraAdapter, auto_attach_loras
from refiners.fluxion.layers import Conv2d
from refiners.fluxion.layers.linear import Linear
from refiners.fluxion.utils import load_from_safetensors, save_to_safetensors
from refiners.foundationals.latent_diffusion.lora import SDLoraManager
from refiners.foundationals.latent_diffusion.stable_diffusion_xl.control_lora import (
ConditionEncoder,
ControlLora,
ControlLoraAdapter,
ZeroConvolution,
)
from refiners.foundationals.latent_diffusion.stable_diffusion_xl.model import StableDiffusion_XL
def sort_keys(key: str, /) -> tuple[str, int]:
"""Compute the score of a key, relatively to its suffix.
When used by [`sorted`][sorted], the keys will only be sorted "at the suffix level".
Args:
key: The key to sort.
Returns:
The padded suffix of the key.
The score of the key's suffix.
"""
if "time_embed" in key: # HACK: will place the "time_embed" layers at very start of the list
return ("", -2)
if "label_emb" in key: # HACK: will place the "label_emb" layers right after "time_embed"
return ("", -1)
if "proj_out" in key: # HACK: will place the "proj_out" layers at the end of each "transformer_blocks"
return (key.removesuffix("proj_out") + "transformer_blocks.99.ff.net.2", 10)
return SDLoraManager.sort_keys(key)
def load_lora_layers(
name: str,
state_dict: dict[str, Tensor],
control_lora: ControlLora,
) -> dict[str, Lora[Linear | Conv2d]]:
"""Load the LoRA layers from the state_dict into the ControlLora.
Args:
name: The name of the LoRA.
state_dict: The state_dict of the LoRA.
control_lora: The ControlLora to load the LoRA layers into.
"""
# filter from the state_dict the layers that will be used for the LoRA layers
lora_weights = {f"{key}.weight": value for key, value in state_dict.items() if ".up" in key or ".down" in key}
# move the tensors to the device and dtype of the ControlLora
lora_weights = {
key: value.to(
dtype=control_lora.dtype,
device=control_lora.device,
)
for key, value in lora_weights.items()
}
# load every LoRA layers from the filtered state_dict
lora_layers = Lora.from_dict(name, state_dict=lora_weights)
# sort all the LoRA's keys using the `sort_keys` method
lora_layers = {
key: lora_layers[key]
for key in sorted(
lora_layers.keys(),
key=sort_keys,
)
}
# auto-attach the LoRA layers to the U-Net
auto_attach_loras(lora_layers, control_lora, exclude=["ZeroConvolution", "ConditionEncoder"])
# eject all the LoRA adapters from the U-Net
# because we need each target path as if the adapter wasn't injected
for lora_layer in lora_layers.values():
lora_adapter = lora_layer.parent
assert isinstance(lora_adapter, LoraAdapter)
lora_adapter.eject()
return lora_layers
def load_condition_encoder(
state_dict: dict[str, Tensor],
control_lora: ControlLora,
) -> None:
"""Load the ConditionEncoder's Conv2d layers from the state_dict into the ControlLora.
Args:
state_dict: The state_dict of the ConditionEncoder.
control_lora: The control_lora to load the ConditionEncoder's Conv2d layers into.
"""
# filter from the state_dict the layers that will be used for the ConditionEncoder
condition_encoder_tensors = {key: value for key, value in state_dict.items() if "input_hint_block" in key}
# move the tensors to the device and dtype of the ControlLora
condition_encoder_tensors = {
key: value.to(
dtype=control_lora.dtype,
device=control_lora.device,
)
for key, value in condition_encoder_tensors.items()
}
# find the ConditionEncoder's Conv2d layers
condition_encoder_layer = control_lora.ensure_find(ConditionEncoder)
condition_encoder_conv2ds = list(condition_encoder_layer.layers(Conv2d))
# replace the Conv2d layers' weights and biases with the ones from the state_dict
for i, layer in enumerate(condition_encoder_conv2ds):
layer.weight = TorchParameter(condition_encoder_tensors[f"input_hint_block.{i*2}.weight"])
layer.bias = TorchParameter(condition_encoder_tensors[f"input_hint_block.{i*2}.bias"])
def load_zero_convolutions(
state_dict: dict[str, Tensor],
control_lora: ControlLora,
) -> None:
"""Load the ZeroConvolution's Conv2d layers from the state_dict into the ControlLora.
Args:
state_dict: The state_dict of the ZeroConvolution.
control_lora: The ControlLora to load the ZeroConvolution's Conv2d layers into.
"""
# filter from the state_dict the layers that will be used for the ZeroConvolution layers
zero_convolution_tensors = {key: value for key, value in state_dict.items() if "zero_convs" in key}
n = len(zero_convolution_tensors) // 2
zero_convolution_tensors[f"zero_convs.{n}.0.weight"] = state_dict["middle_block_out.0.weight"]
zero_convolution_tensors[f"zero_convs.{n}.0.bias"] = state_dict["middle_block_out.0.bias"]
# move the tensors to the device and dtype of the ControlLora
zero_convolution_tensors = {
key: value.to(
dtype=control_lora.dtype,
device=control_lora.device,
)
for key, value in zero_convolution_tensors.items()
}
# find the ZeroConvolution's Conv2d layers
zero_convolution_layers = list(control_lora.layers(ZeroConvolution))
zero_convolution_conv2ds = [layer.ensure_find(Conv2d) for layer in zero_convolution_layers]
# replace the Conv2d layers' weights and biases with the ones from the state_dict
for i, layer in enumerate(zero_convolution_conv2ds):
layer.weight = TorchParameter(zero_convolution_tensors[f"zero_convs.{i}.0.weight"])
layer.bias = TorchParameter(zero_convolution_tensors[f"zero_convs.{i}.0.bias"])
def simplify_key(key: str, prefix: str, index: int | None = None) -> str:
"""Simplify a key by stripping everything to the left of the prefix.
Also optionally add a zero-padded index to the prefix.
Example:
>>> simplify_key("foo.bar.ControlLora.something", "ControlLora", 1)
"ControlLora_01.something"
>>> simplify_key("foo.bar.ControlLora.DownBlocks.something", "ControlLora")
"ControlLora.DownBlocks.something"
Args:
key: The key to simplify.
prefix: The prefix to remove.
index: The index to add.
"""
_, right = key.split(prefix, maxsplit=1)
if index:
return f"{prefix}_{index:02d}{right}"
else:
return f"{prefix}{right}"
def convert_lora_layers(
lora_layers: dict[str, Lora[Linear | Conv2d]],
control_lora: ControlLora,
refiners_state_dict: dict[str, Tensor],
) -> None:
"""Convert the LoRA layers to the refiners format.
Args:
lora_layers: The LoRA layers to convert.
control_lora: The ControlLora to convert the LoRA layers from.
refiners_state_dict: The refiners state dict to update with the converted LoRA layers.
"""
for lora_layer in lora_layers.values():
# get the adapter associated with the LoRA layer
lora_adapter = lora_layer.parent
assert isinstance(lora_adapter, LoraAdapter)
# get the path of the adapter's target in the ControlLora
target = lora_adapter.target
path = target.get_path(parent=control_lora.ensure_find_parent(target))
state_dict = {
f"{path}.down": lora_layer.down.weight,
f"{path}.up": lora_layer.up.weight,
}
state_dict = {simplify_key(key, "ControlLora."): param for key, param in state_dict.items()}
refiners_state_dict.update(state_dict)
def convert_zero_convolutions(
control_lora: ControlLora,
refiners_state_dict: dict[str, Tensor],
) -> None:
"""Convert the ZeroConvolution layers to the refiners format.
Args:
control_lora: The ControlLora to convert the ZeroConvolution layers from.
refiners_state_dict: The refiners state dict to update with the converted ZeroConvolution layers.
"""
zero_convolution_layers = list(control_lora.layers(ZeroConvolution))
for i, zero_convolution_layer in enumerate(zero_convolution_layers):
state_dict = zero_convolution_layer.state_dict()
path = zero_convolution_layer.get_path()
state_dict = {f"{path}.{key}": param for key, param in state_dict.items()}
state_dict = {simplify_key(key, "ZeroConvolution", i + 1): param for key, param in state_dict.items()}
refiners_state_dict.update(state_dict)
def convert_condition_encoder(
control_lora: ControlLora,
refiners_state_dict: dict[str, Tensor],
) -> None:
"""Convert the ConditionEncoder to the refiners format.
Args:
control_lora: The ControlLora to convert the ConditionEncoder from.
refiners_state_dict: The refiners state dict to update with the converted ConditionEncoder.
"""
condition_encoder_layer = control_lora.ensure_find(ConditionEncoder)
path = condition_encoder_layer.get_path()
state_dict = condition_encoder_layer.state_dict()
state_dict = {f"{path}.{key}": param for key, param in state_dict.items()}
state_dict = {simplify_key(key, "ConditionEncoder"): param for key, param in state_dict.items()}
refiners_state_dict.update(state_dict)
class ControlLoraConversion(Conversion):
def __init__(
self,
original: Hub,
converted: Hub,
dtype: torch.dtype = torch.float32,
) -> None:
"""Initialize the weight structure.
Args:
original_weight_hub: A HubPath object representing the original weight.
converted_weight_hub: A HubPath object representing the converted weight.
"""
self.original = original
self.converted = converted
self.dtype = dtype
def convert(self) -> None:
"""Convert the weights from the original to the converted weights."""
logging.info(
f"Converting {self.original.repo_id}/{self.original.filename} to {self.converted.repo_id}/{self.converted.filename}"
)
# check if the converted file already exists
if self.converted.local_path.is_file():
logging.warning(f"{self.converted.local_path} already exists")
if self.converted.check_local_hash():
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.error(f"{self.converted.local_path} couldn't verify remote hash")
return
# get the original state_dict
self.original.download()
# load the original state_dict
original_state_dict = load_from_safetensors(self.original.local_path)
# convert the state_dict
sdxl = StableDiffusion_XL()
name = self.original.local_path.stem
control_lora_adapter = ControlLoraAdapter(target=sdxl.unet, name=name).inject()
control_lora = control_lora_adapter.control_lora
lora_layers = load_lora_layers(name, original_state_dict, control_lora)
load_zero_convolutions(original_state_dict, control_lora)
load_condition_encoder(original_state_dict, control_lora)
converted_state_dict: dict[str, Tensor] = {}
convert_lora_layers(lora_layers, control_lora, converted_state_dict)
convert_zero_convolutions(control_lora, converted_state_dict)
convert_condition_encoder(control_lora, converted_state_dict)
# save the converted state_dict
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
save_to_safetensors(self.converted.local_path, converted_state_dict)
# check the converted state_dict
assert self.converted.check_local_hash()
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
canny = ControlLoraConversion(
original=Hub(
repo_id="lllyasviel/misc",
filename="control-lora-canny-rank128.safetensors",
revision="71f7a66a7affe631c64af469fe647217d422cac0",
expected_sha256="56389dbb245ca44de91d662529bd4298abc55ce2318f60bc19454fb72ff68247",
),
converted=Hub(
repo_id="refiners/sdxl.controllora.canny",
filename="model.safetensors",
expected_sha256="6edfa742e2b5191ce357fb559e236652b004feea490c4f1277b30abc9804321f",
),
)
cpds = ControlLoraConversion(
original=Hub(
repo_id="lllyasviel/misc",
filename="fooocus_xl_cpds_128.safetensors",
revision="71f7a66a7affe631c64af469fe647217d422cac0",
expected_sha256="eec3fd8209a65b41341ea9f415de66909c97b30fb4d20965b3304e8e5251c2f1",
),
converted=Hub(
repo_id="refiners/sdxl.controllora.cpds",
filename="model.safetensors",
expected_sha256="9a3b2a86f32e4747e98531b0af8b59a804391b538949a0dd85263722b6e64db0",
),
)

View file

@ -0,0 +1,290 @@
import torch
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
diffusers_recipe = WeightRecipe(
key_map={
"time_embedding.linear_1": "TimestepEncoder.RangeEncoder.Linear_1",
"time_embedding.linear_2": "TimestepEncoder.RangeEncoder.Linear_2",
"down_blocks.2.resnets.0.time_emb_proj": "DownBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"down_blocks.2.resnets.1.time_emb_proj": "DownBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"down_blocks.3.resnets.0.time_emb_proj": "DownBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.3.resnets.1.time_emb_proj": "DownBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"mid_block.resnets.0.time_emb_proj": "MiddleBlock.ResidualBlock_1.Chain.RangeAdapter2d.Chain.Linear",
"mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"mid_block.resnets.1.time_emb_proj": "MiddleBlock.ResidualBlock_2.Chain.RangeAdapter2d.Chain.Linear",
"conv_in": "DownBlocks.Chain_1.Conv2d",
"controlnet_cond_embedding.conv_in": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_1.Conv2d",
"controlnet_cond_embedding.blocks.0": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_2.Conv2d_1",
"controlnet_cond_embedding.blocks.1": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_2.Conv2d_2",
"controlnet_cond_embedding.blocks.2": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_3.Conv2d_1",
"controlnet_cond_embedding.blocks.3": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_3.Conv2d_2",
"controlnet_cond_embedding.blocks.4": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_4.Conv2d_1",
"controlnet_cond_embedding.blocks.5": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_4.Conv2d_2",
"controlnet_cond_embedding.conv_out": "DownBlocks.Chain_1.Residual.ConditionEncoder.Conv2d",
"down_blocks.0.resnets.0.norm1": "DownBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.0.resnets.0.norm2": "DownBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.0.attentions.0.norm": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_1.GroupNorm",
"down_blocks.0.resnets.1.norm1": "DownBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.0.resnets.1.norm2": "DownBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.0.attentions.1.norm": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_1.GroupNorm",
"down_blocks.1.resnets.0.norm1": "DownBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.0.resnets.0.conv1": "DownBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.0.resnets.0.conv2": "DownBlocks.Chain_2.ResidualBlock.Chain.Conv2d",
"down_blocks.0.resnets.1.conv1": "DownBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.0.resnets.1.conv2": "DownBlocks.Chain_3.ResidualBlock.Chain.Conv2d",
"down_blocks.0.downsamplers.0.conv": "DownBlocks.Chain_4.Downsample.Conv2d",
"down_blocks.0.resnets.0.time_emb_proj": "DownBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"down_blocks.0.resnets.1.time_emb_proj": "DownBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"controlnet_down_blocks.0": "DownBlocks.Chain_1.Passthrough.Conv2d",
"down_blocks.0.attentions.0.proj_in": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_1.Conv2d",
"down_blocks.0.attentions.0.proj_out": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_3.Conv2d",
"controlnet_down_blocks.1": "DownBlocks.Chain_2.Passthrough.Conv2d",
"down_blocks.0.attentions.1.proj_in": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_1.Conv2d",
"down_blocks.0.attentions.1.proj_out": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_3.Conv2d",
"controlnet_down_blocks.2": "DownBlocks.Chain_3.Passthrough.Conv2d",
"controlnet_down_blocks.3": "DownBlocks.Chain_4.Passthrough.Conv2d",
"down_blocks.0.attentions.0.transformer_blocks.0.norm1": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"down_blocks.0.attentions.0.transformer_blocks.0.norm2": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"down_blocks.0.attentions.0.transformer_blocks.0.norm3": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"down_blocks.0.attentions.1.transformer_blocks.0.norm1": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"down_blocks.0.attentions.1.transformer_blocks.0.norm2": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"down_blocks.0.attentions.1.transformer_blocks.0.norm3": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"down_blocks.1.resnets.0.conv1": "DownBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.1.resnets.0.time_emb_proj": "DownBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.1.resnets.1.time_emb_proj": "DownBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.1.resnets.0.norm2": "DownBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.1.attentions.0.norm": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_1.GroupNorm",
"down_blocks.1.resnets.1.norm1": "DownBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.1.resnets.1.norm2": "DownBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.1.attentions.1.norm": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_1.GroupNorm",
"down_blocks.2.resnets.0.norm1": "DownBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.1.resnets.0.conv2": "DownBlocks.Chain_5.ResidualBlock.Chain.Conv2d",
"down_blocks.1.resnets.1.conv1": "DownBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.1.resnets.1.conv2": "DownBlocks.Chain_6.ResidualBlock.Chain.Conv2d",
"down_blocks.1.downsamplers.0.conv": "DownBlocks.Chain_7.Downsample.Conv2d",
"down_blocks.1.resnets.0.conv_shortcut": "DownBlocks.Chain_5.ResidualBlock.Conv2d",
"down_blocks.1.attentions.0.proj_in": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_1.Conv2d",
"down_blocks.1.attentions.0.proj_out": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_3.Conv2d",
"controlnet_down_blocks.4": "DownBlocks.Chain_5.Passthrough.Conv2d",
"down_blocks.1.attentions.1.proj_in": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_1.Conv2d",
"down_blocks.1.attentions.1.proj_out": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_3.Conv2d",
"controlnet_down_blocks.5": "DownBlocks.Chain_6.Passthrough.Conv2d",
"controlnet_down_blocks.6": "DownBlocks.Chain_7.Passthrough.Conv2d",
"down_blocks.1.attentions.0.transformer_blocks.0.norm1": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"down_blocks.1.attentions.0.transformer_blocks.0.norm2": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"down_blocks.1.attentions.0.transformer_blocks.0.norm3": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"down_blocks.1.attentions.1.transformer_blocks.0.norm1": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"down_blocks.1.attentions.1.transformer_blocks.0.norm2": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"down_blocks.1.attentions.1.transformer_blocks.0.norm3": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"down_blocks.2.resnets.0.conv1": "DownBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.2.resnets.0.norm2": "DownBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.2.attentions.0.norm": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_1.GroupNorm",
"down_blocks.2.resnets.1.norm1": "DownBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.2.resnets.1.norm2": "DownBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.2.attentions.1.norm": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_1.GroupNorm",
"down_blocks.3.resnets.0.norm1": "DownBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.3.resnets.0.norm2": "DownBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.3.resnets.1.norm1": "DownBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.3.resnets.1.norm2": "DownBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_2",
"mid_block.resnets.0.norm1": "MiddleBlock.ResidualBlock_1.Chain.GroupNorm_1",
"mid_block.resnets.0.norm2": "MiddleBlock.ResidualBlock_1.Chain.GroupNorm_2",
"mid_block.attentions.0.norm": "MiddleBlock.CLIPLCrossAttention.Chain_1.GroupNorm",
"mid_block.resnets.1.norm1": "MiddleBlock.ResidualBlock_2.Chain.GroupNorm_1",
"mid_block.resnets.1.norm2": "MiddleBlock.ResidualBlock_2.Chain.GroupNorm_2",
"down_blocks.2.resnets.0.conv2": "DownBlocks.Chain_8.ResidualBlock.Chain.Conv2d",
"down_blocks.2.resnets.1.conv1": "DownBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.2.resnets.1.conv2": "DownBlocks.Chain_9.ResidualBlock.Chain.Conv2d",
"down_blocks.2.downsamplers.0.conv": "DownBlocks.Chain_10.Downsample.Conv2d",
"down_blocks.3.resnets.0.conv1": "DownBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.3.resnets.0.conv2": "DownBlocks.Chain_11.ResidualBlock.Chain.Conv2d",
"down_blocks.3.resnets.1.conv1": "DownBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.3.resnets.1.conv2": "DownBlocks.Chain_12.ResidualBlock.Chain.Conv2d",
"mid_block.resnets.0.conv1": "MiddleBlock.ResidualBlock_1.Chain.RangeAdapter2d.Conv2d",
"mid_block.resnets.0.conv2": "MiddleBlock.ResidualBlock_1.Chain.Conv2d",
"mid_block.resnets.1.conv1": "MiddleBlock.ResidualBlock_2.Chain.RangeAdapter2d.Conv2d",
"mid_block.resnets.1.conv2": "MiddleBlock.ResidualBlock_2.Chain.Conv2d",
"down_blocks.2.resnets.0.conv_shortcut": "DownBlocks.Chain_8.ResidualBlock.Conv2d",
"down_blocks.2.attentions.0.proj_in": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_1.Conv2d",
"down_blocks.2.attentions.0.proj_out": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_3.Conv2d",
"controlnet_down_blocks.7": "DownBlocks.Chain_8.Passthrough.Conv2d",
"down_blocks.2.attentions.1.proj_in": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_1.Conv2d",
"down_blocks.2.attentions.1.proj_out": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_3.Conv2d",
"controlnet_down_blocks.8": "DownBlocks.Chain_9.Passthrough.Conv2d",
"controlnet_down_blocks.9": "DownBlocks.Chain_10.Passthrough.Conv2d",
"controlnet_down_blocks.10": "DownBlocks.Chain_11.Passthrough.Conv2d",
"controlnet_down_blocks.11": "DownBlocks.Chain_12.Passthrough.Conv2d",
"mid_block.attentions.0.proj_in": "MiddleBlock.CLIPLCrossAttention.Chain_1.Conv2d",
"mid_block.attentions.0.proj_out": "MiddleBlock.CLIPLCrossAttention.Chain_3.Conv2d",
"controlnet_mid_block": "MiddleBlock.Passthrough.Conv2d",
"down_blocks.2.attentions.0.transformer_blocks.0.norm1": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"down_blocks.2.attentions.0.transformer_blocks.0.norm2": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"down_blocks.2.attentions.0.transformer_blocks.0.norm3": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"down_blocks.2.attentions.1.transformer_blocks.0.norm1": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"down_blocks.2.attentions.1.transformer_blocks.0.norm2": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"down_blocks.2.attentions.1.transformer_blocks.0.norm3": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"mid_block.attentions.0.transformer_blocks.0.norm1": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"mid_block.attentions.0.transformer_blocks.0.norm2": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"mid_block.attentions.0.transformer_blocks.0.norm3": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"mid_block.attentions.0.transformer_blocks.0.attn1.to_q": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"mid_block.attentions.0.transformer_blocks.0.attn1.to_k": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"mid_block.attentions.0.transformer_blocks.0.attn1.to_v": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"mid_block.attentions.0.transformer_blocks.0.attn2.to_q": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"mid_block.attentions.0.transformer_blocks.0.attn2.to_k": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"mid_block.attentions.0.transformer_blocks.0.attn2.to_v": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"mid_block.attentions.0.transformer_blocks.0.ff.net.2": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
},
)
tile = Conversion(
original=Hub(
repo_id="lllyasviel/control_v11f1e_sd15_tile",
filename="diffusion_pytorch_model.bin",
revision="3f877705c37010b7221c3d10743307d6b5b6efac",
expected_sha256="eb05b4c3665bd76dad70a90652014a9b3aab391abd8a5bb484e860330f9492fb",
),
converted=Hub(
repo_id="refiners/sd15.controlnet.tile",
filename="model.safetensors",
expected_sha256="3002029df75364fcbdbf6024dc4c414c929cb8623a2fe7d406a4dbfcee1ffa5a",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
canny = Conversion(
original=Hub(
repo_id="lllyasviel/control_v11p_sd15_canny",
filename="diffusion_pytorch_model.safetensors",
revision="115a470d547982438f70198e353a921996e2e819",
expected_sha256="be713fb941fc7c625f0c7d816b6a19115783a665f3049a8974f127e0c075d9a9",
),
converted=Hub(
repo_id="refiners/sd15.controlnet.canny",
filename="model.safetensors",
expected_sha256="0d6d0ba036dc26f4842e89f3c5f2f37feca904863e83b578db099a6fdafc4f51",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
depth = Conversion(
original=Hub(
repo_id="lllyasviel/control_v11f1p_sd15_depth",
filename="diffusion_pytorch_model.safetensors",
revision="539f99181d33db39cf1af2e517cd8056785f0a87",
expected_sha256="999aca923ca5e19e70e6afc8d11073cc3c03553ca935b636bd5925df4a1c77d1",
),
converted=Hub(
repo_id="refiners/sd15.controlnet.depth",
filename="model.safetensors",
expected_sha256="d131fb2e73e89e56ee1e73b6af17373bba886da6ae24dd94300a93e993292133",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
normalbae = Conversion(
original=Hub(
repo_id="lllyasviel/control_v11p_sd15_normalbae",
filename="diffusion_pytorch_model.safetensors",
revision="cb7296e6587a219068e9d65864e38729cd862aa8",
expected_sha256="e6c3772b35e5cb1869beca97a6ade6e8e5283310462297b10d129e25351983d7",
),
converted=Hub(
repo_id="refiners/sd15.controlnet.normalbae",
filename="model.safetensors",
expected_sha256="89ac08579cd244cf64bda2045d302d5455b33b951fada4abd089b5defbade364",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
lineart = Conversion(
original=Hub(
repo_id="lllyasviel/control_v11p_sd15_lineart",
filename="diffusion_pytorch_model.safetensors",
revision="8a158f547e031c5b8fbca19ead09a74767ff4db0",
expected_sha256="d9d6d0e5526dd21dfc503f9e42a93ff1f977aa52df3c14e8ac11085b518cb114",
),
converted=Hub(
repo_id="refiners/sd15.controlnet.lineart",
filename="model.safetensors",
expected_sha256="5fb9ce17b92032c1b7d54aa7fb3c00c067f08e9c024a43cde06999a5a18391cb",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
sam = Conversion(
original=Hub(
repo_id="mfidabel/controlnet-segment-anything",
filename="diffusion_pytorch_model.bin",
revision="22bf4d81a4c7557287815a53e1f55279836e2bfa",
expected_sha256="9d4f35bb941e35ceeb54e4d6d35c9239949b193e5c7389426b95a97e43de884d",
),
converted=Hub(
repo_id="refiners/sd15.controlnet.sam",
filename="model.safetensors",
expected_sha256="b8e0e1f2b1e542e5a21e21cdadf135981937b9ca491312b75aa85bff63b35589",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)

View file

@ -0,0 +1,298 @@
import logging
import requests
import torch
from refiners.conversion.utils import Conversion, Hub
from refiners.fluxion.utils import load_tensors, save_to_safetensors
def convert_dinov2_facebook(weights: dict[str, torch.Tensor]) -> None:
"""Convert a DINOv2 weights from facebook to refiners."""
# get depth from "blocks" keys
depth = max([int(k.split(".")[1]) for k in weights.keys() if k.startswith("blocks.")]) + 1
# only needed when pre-training
del weights["mask_token"]
# squeeze cls_token and position_embeddings
weights["cls_token"] = weights["cls_token"].squeeze(0)
weights["pos_embed"] = weights["pos_embed"].squeeze(0)
# rename "w12" to "fc1" and "w3" to "fc2", only for giant model
for key in list(weights.keys()):
if "w3" in key:
new_key = key.replace("w3", "fc2")
weights[new_key] = weights.pop(key)
elif "w12" in key:
# we swap w1 and w2 because of the difference between our GLU implementation and theirs
# see https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/layers/swiglu_ffn.py#L31-L34
# and https://github.com/finegrain-ai/refiners/blob/a2ee70578361e4d84a65a8708564480a9b0ec67e/src/refiners/fluxion/layers/activations.py#L158-L160
weight = weights.pop(key)
w1, w2 = weight.chunk(2, dim=0)
w21 = torch.cat([w2, w1], dim=0)
new_key = key.replace("w12", "fc1")
weights[new_key] = w21
rename_keys: list[tuple[str, str]] = [
("cls_token", "Concatenate.ClassToken.Parameter.weight"),
("pos_embed", "PositionalEncoder.PositionalEmbedding.Parameter.weight"),
("patch_embed.proj.weight", "Concatenate.PatchEncoder.Conv2d.weight"),
("patch_embed.proj.bias", "Concatenate.PatchEncoder.Conv2d.bias"),
("norm.weight", "LayerNorm.weight"),
("norm.bias", "LayerNorm.bias"),
]
for i in range(depth):
rename_keys.append(
(
f"blocks.{i}.norm1.weight",
f"Transformer.TransformerLayer_{i+1}.Residual_1.LayerNorm.weight",
),
)
rename_keys.append(
(
f"blocks.{i}.norm1.bias",
f"Transformer.TransformerLayer_{i+1}.Residual_1.LayerNorm.bias",
),
)
rename_keys.append(
(
f"blocks.{i}.attn.proj.weight",
f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Linear.weight",
),
)
rename_keys.append(
(
f"blocks.{i}.attn.proj.bias",
f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Linear.bias",
),
)
rename_keys.append(
(
f"blocks.{i}.ls1.gamma",
f"Transformer.TransformerLayer_{i+1}.Residual_1.LayerScale.weight",
),
)
rename_keys.append(
(
f"blocks.{i}.norm2.weight",
f"Transformer.TransformerLayer_{i+1}.Residual_2.LayerNorm.weight",
),
)
rename_keys.append(
(
f"blocks.{i}.norm2.bias",
f"Transformer.TransformerLayer_{i+1}.Residual_2.LayerNorm.bias",
),
)
rename_keys.append(
(
f"blocks.{i}.mlp.fc1.weight",
f"Transformer.TransformerLayer_{i+1}.Residual_2.FeedForward.Linear_1.weight",
),
)
rename_keys.append(
(
f"blocks.{i}.mlp.fc1.bias",
f"Transformer.TransformerLayer_{i+1}.Residual_2.FeedForward.Linear_1.bias",
),
)
rename_keys.append(
(
f"blocks.{i}.mlp.fc2.weight",
f"Transformer.TransformerLayer_{i+1}.Residual_2.FeedForward.Linear_2.weight",
),
)
rename_keys.append(
(
f"blocks.{i}.mlp.fc2.bias",
f"Transformer.TransformerLayer_{i+1}.Residual_2.FeedForward.Linear_2.bias",
),
)
rename_keys.append(
(
f"blocks.{i}.ls2.gamma",
f"Transformer.TransformerLayer_{i+1}.Residual_2.LayerScale.weight",
),
)
if "register_tokens" in weights:
weights["register_tokens"] = weights["register_tokens"].squeeze(0)
rename_keys.append(("register_tokens", "Registers.Parameter.weight"))
# rename keys
for old_key, new_key in rename_keys:
weights[new_key] = weights.pop(old_key)
# split the qkv weights and biases
for i in range(depth):
qkv_weight = weights.pop(f"blocks.{i}.attn.qkv.weight")
q_weight, k_weight, v_weight = qkv_weight.chunk(3, dim=0)
weights[f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Distribute.Linear_1.weight"] = q_weight
weights[f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Distribute.Linear_2.weight"] = k_weight
weights[f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Distribute.Linear_3.weight"] = v_weight
qkv_bias = weights.pop(f"blocks.{i}.attn.qkv.bias")
q_bias, k_bias, v_bias = qkv_bias.chunk(3, dim=0)
weights[f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Distribute.Linear_1.bias"] = q_bias
weights[f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Distribute.Linear_2.bias"] = k_bias
weights[f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Distribute.Linear_3.bias"] = v_bias
class DinoV2Conversion(Conversion):
def __init__(
self,
original: Hub,
converted: Hub,
dtype: torch.dtype = torch.float32,
) -> None:
"""Initialize the weight structure.
Args:
original_weight_hub: A HubPath object representing the original weight.
converted_weight_hub: A HubPath object representing the converted weight.
"""
self.original = original
self.converted = converted
self.dtype = dtype
def convert(self) -> None: # type: ignore
"""Convert the weights from the original to the converted weights."""
logging.info(f"Converting {self.original.repo_id}/{self.original.filename} to {self.converted.repo_id}")
# check if the converted file already exists
if self.converted.local_path.is_file():
logging.warning(f"{self.converted.local_path} already exists")
if self.converted.check_local_hash():
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.error(f"{self.converted.local_path} couldn't verify remote hash")
return
# get the original state_dict
self.original.download()
# load the original state_dict
original_weights = load_tensors(self.original.local_path)
# convert the state_dict
convert_dinov2_facebook(original_weights) # FIXME: this is inplace
original_weights = self.change_dtype(original_weights, self.dtype)
# save the converted state_dict
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
save_to_safetensors(self.converted.local_path, original_weights)
# check the converted state_dict
assert self.converted.check_local_hash()
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
small = DinoV2Conversion(
original=Hub(
repo_id="facebook/github_dinov2",
filename="vits14.pth",
expected_sha256="b938bf1bc15cd2ec0feacfe3a1bb553fe8ea9ca46a7e1d8d00217f29aef60cd9",
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth",
),
converted=Hub(
repo_id="refiners/dinov2.small.patch_14",
filename="model.safetensors",
expected_sha256="56a4b77856e20bbb5c4f0ce135089d4cd72da344dcdb278ba0c1376c8545e543",
),
)
small_reg = DinoV2Conversion(
original=Hub(
repo_id="facebook/github_dinov2",
filename="vits14_reg4.pth",
expected_sha256="f433177089a681826f849f194ece3bb48f4d63fb38d32fc837e3dc7a4e5641fb",
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_reg4_pretrain.pth",
),
converted=Hub(
repo_id="refiners/dinov2.small.patch_14.reg_4",
filename="model.safetensors",
expected_sha256="beee454507762018616635099c0ac30c7a6e4e08fbd9363c5e5d2a8f1935c3f2",
),
)
base = DinoV2Conversion(
original=Hub(
repo_id="facebook/github_dinov2",
filename="vitb14.pth",
expected_sha256="0b8b82f85de91b424aded121c7e1dcc2b7bc6d0adeea651bf73a13307fad8c73",
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_pretrain.pth",
),
converted=Hub(
repo_id="refiners/dinov2.base.patch_14",
filename="model.safetensors",
expected_sha256="59b778ed980bc02843456d3fbe1893943922ac7759a9a706ca286dd45d10db1f",
),
)
base_reg = DinoV2Conversion(
original=Hub(
repo_id="facebook/github_dinov2",
filename="vitb14_reg4.pth",
expected_sha256="73182a088cf94833c94b1666d1c99e02fe87e2007bff57b564fb6206e25dba71",
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_reg4_pretrain.pth",
),
converted=Hub(
repo_id="refiners/dinov2.base.patch_14.reg_4",
filename="model.safetensors",
expected_sha256="7f91aa7cd5aa51d665949ba328a938967164b363ebaacb8cae914143a7e004e7",
),
)
large = DinoV2Conversion(
original=Hub(
repo_id="facebook/github_dinov2",
filename="vitl14.pth",
expected_sha256="d5383ea8f4877b2472eb973e0fd72d557c7da5d3611bd527ceeb1d7162cbf428",
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_pretrain.pth",
),
converted=Hub(
repo_id="refiners/dinov2.large.patch_14",
filename="model.safetensors",
expected_sha256="2ba79218d37482455db0d9967dfad024c3ad525499f8de0e3db5ff83faf80414",
),
)
large_reg = DinoV2Conversion(
original=Hub(
repo_id="facebook/github_dinov2",
filename="vitl14_reg4.pth",
expected_sha256="36e4deffbaef061a2576705b0c36f93621e2ae20bf6274694821b0b492551b51",
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_reg4_pretrain.pth",
),
converted=Hub(
repo_id="refiners/dinov2.large.patch_14.reg_4",
filename="model.safetensors",
expected_sha256="e1d5a183a0ec15c5ac0a9e388038a07f8e90dd19e001b7bd4f7ffe3c5761667c",
),
)
giant = DinoV2Conversion(
original=Hub(
repo_id="facebook/github_dinov2",
filename="vitg14.pth",
expected_sha256="baf8467e50af277596bbbafa06887c177ee899ab46033649c383577d7e9309d3",
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_pretrain.pth",
),
converted=Hub(
repo_id="refiners/dinov2.giant.patch_14",
filename="model.safetensors",
expected_sha256="5a2d6088f4fd4aa1bf527ce0edf2ae3e76eee70c900b90716c18ad7daa4a1f2f",
),
)
giant_reg = DinoV2Conversion(
original=Hub(
repo_id="facebook/github_dinov2",
filename="vitg14_reg4.pth",
expected_sha256="746ecb8c6301c645c5c855be91687d274587d6e48fdaec4a729753160b34a283",
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_reg4_pretrain.pth",
),
converted=Hub(
repo_id="refiners/dinov2.giant.patch_14.reg_4",
filename="model.safetensors",
expected_sha256="d5f7f0917926d4fe72cd33408f79562c5d524c3e8aee999830129eecabda56a2",
),
)

View file

@ -0,0 +1,122 @@
import logging
import requests
import torch
from refiners.conversion.utils import Conversion, Hub, TensorDict
from refiners.fluxion.utils import load_from_safetensors, save_to_safetensors
def convert_state_dict(state_dict: dict[str, torch.Tensor]) -> TensorDict:
new_state_dict: TensorDict = {}
for key in list(state_dict.keys()):
if "latents" in key:
new_key = "PerceiverResampler.Latents.ParameterInitialized.weight"
new_state_dict[new_key] = state_dict.pop(key)
elif "time_embedding" in key:
new_key = key.replace("time_embedding", "TimestepEncoder.RangeEncoder").replace("linear", "Linear")
new_state_dict[new_key] = state_dict.pop(key)
elif "proj_in" in key:
new_key = f"PerceiverResampler.Linear.{key.split('.')[-1]}"
new_state_dict[new_key] = state_dict.pop(key)
elif "time_aware" in key:
new_key = f"PerceiverResampler.Residual.Linear.{key.split('.')[-1]}"
new_state_dict[new_key] = state_dict.pop(key)
elif "attn.in_proj" in key:
layer_num = int(key.split(".")[2])
query_param, key_param, value_param = state_dict.pop(key).chunk(3, dim=0)
param_type = "weight" if "weight" in key else "bias"
for i, param in enumerate([query_param, key_param, value_param]):
new_key = f"PerceiverResampler.Transformer.TransformerLayer_{layer_num+1}.Residual_1.PerceiverAttention.Attention.Distribute.Linear_{i+1}.{param_type}"
new_state_dict[new_key] = param
elif "attn.out_proj" in key:
layer_num = int(key.split(".")[2])
new_key = f"PerceiverResampler.Transformer.TransformerLayer_{layer_num+1}.Residual_1.PerceiverAttention.Attention.Linear.{key.split('.')[-1]}"
new_state_dict[new_key] = state_dict.pop(key)
elif "ln_ff" in key:
layer_num = int(key.split(".")[2])
new_key = f"PerceiverResampler.Transformer.TransformerLayer_{layer_num+1}.Residual_2.AdaLayerNorm.Parallel.Chain.Linear.{key.split('.')[-1]}"
new_state_dict[new_key] = state_dict.pop(key)
elif "ln_1" in key or "ln_2" in key:
layer_num = int(key.split(".")[2])
n = 1 if int(key.split(".")[3].split("_")[-1]) == 2 else 2
new_key = f"PerceiverResampler.Transformer.TransformerLayer_{layer_num+1}.Residual_1.PerceiverAttention.Distribute.AdaLayerNorm_{n}.Parallel.Chain.Linear.{key.split('.')[-1]}"
new_state_dict[new_key] = state_dict.pop(key)
elif "mlp" in key:
layer_num = int(key.split(".")[2])
n = 1 if "c_fc" in key else 2
new_key = f"PerceiverResampler.Transformer.TransformerLayer_{layer_num+1}.Residual_2.FeedForward.Linear_{n}.{key.split('.')[-1]}"
new_state_dict[new_key] = state_dict.pop(key)
return new_state_dict
class ELLAConversion(Conversion):
def __init__(
self,
original: Hub,
converted: Hub,
dtype: torch.dtype = torch.float32,
) -> None:
"""Initialize the weight structure.
Args:
original_weight_hub: A HubPath object representing the original weight.
converted_weight_hub: A HubPath object representing the converted weight.
"""
self.original = original
self.converted = converted
self.dtype = dtype
# TODO: use WeightRecipe instead
def convert(self) -> None: # type: ignore
"""Convert the weights from the original to the converted weights."""
logging.info(f"Converting {self.original.repo_id}/{self.original.filename} to {self.converted.repo_id}")
# check if the converted file already exists
if self.converted.local_path.is_file():
logging.warning(f"{self.converted.local_path} already exists")
if self.converted.check_local_hash():
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.error(f"{self.converted.local_path} couldn't verify remote hash")
return
# get the original state_dict
self.original.download()
# load the original state_dict
original_weights = load_from_safetensors(self.original.local_path)
# convert the state_dict
converted_state_dict = convert_state_dict(original_weights)
original_weights = self.change_dtype(converted_state_dict, self.dtype)
# save the converted state_dict
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
save_to_safetensors(self.converted.local_path, original_weights)
# check the converted state_dict
assert self.converted.check_local_hash()
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
sd15_t5xl = ELLAConversion(
original=Hub(
repo_id="QQGYLab/ELLA",
filename="ella-sd1.5-tsc-t5xl.safetensors",
revision="c07675dea7873abe24a4152e1140cf0131c217d2",
expected_sha256="ca2018e325170d622389b531c0a061eea9d856b80e58e359ed54ade881517417",
),
converted=Hub(
repo_id="refiners/sd15.ella.tsc_t5xl",
filename="model.safetensors",
expected_sha256="ffc368afb97b93792f581d4a75275f4195cf76c225961cce61c3e1ef687df7da",
),
dtype=torch.float16,
)

View file

@ -0,0 +1,36 @@
import torch
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
recipe = WeightRecipe(
key_map={
"mask_decoder.compress_vit_feat.0": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.HQFeatures.CompressViTFeat.ConvTranspose2d_1",
"mask_decoder.embedding_encoder.0": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.HQFeatures.EmbeddingEncoder.ConvTranspose2d_1",
"mask_decoder.embedding_maskfeature.0": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.EmbeddingMaskfeature.Conv2d_1",
"mask_decoder.compress_vit_feat.1": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.HQFeatures.CompressViTFeat.LayerNorm2d",
"mask_decoder.embedding_encoder.1": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.HQFeatures.EmbeddingEncoder.LayerNorm2d",
"mask_decoder.embedding_maskfeature.1": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.EmbeddingMaskfeature.LayerNorm2d",
"mask_decoder.compress_vit_feat.3": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.HQFeatures.CompressViTFeat.ConvTranspose2d_2",
"mask_decoder.embedding_encoder.3": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.HQFeatures.EmbeddingEncoder.ConvTranspose2d_2",
"mask_decoder.embedding_maskfeature.3": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.EmbeddingMaskfeature.Conv2d_2",
"mask_decoder.hf_mlp.layers.0": "Chain.HQSAMMaskPrediction.HQTokenMLP.MultiLinear.Linear_1",
"mask_decoder.hf_mlp.layers.1": "Chain.HQSAMMaskPrediction.HQTokenMLP.MultiLinear.Linear_2",
"mask_decoder.hf_mlp.layers.2": "Chain.HQSAMMaskPrediction.HQTokenMLP.MultiLinear.Linear_3",
"mask_decoder.hf_token": "MaskDecoderTokensExtender.hq_token",
},
)
vit_h = Conversion(
original=Hub(
repo_id="lkeab/hq-sam",
filename="sam_hq_vit_h.pth",
expected_sha256="a7ac14a085326d9fa6199c8c698c4f0e7280afdbb974d2c4660ec60877b45e35",
),
converted=Hub(
repo_id="refiners/sam.hq.vit_h",
filename="model.safetensors",
expected_sha256="017630c780ff67673d71e91beaec8804f8b5ae3a9ea607456b4504562f96cc2f",
),
recipe=recipe,
dtype=torch.float32,
)

View file

@ -0,0 +1,147 @@
import torch
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
diffusers_recipe = WeightRecipe(
key_map={
"image_proj.proj": "image_proj.Linear",
"image_proj.norm": "image_proj.LayerNorm",
"ip_adapter.1.to_k_ip": "ip_adapter.000.to_k_ip",
"ip_adapter.1.to_v_ip": "ip_adapter.000.to_v_ip",
"ip_adapter.3.to_k_ip": "ip_adapter.001.to_k_ip",
"ip_adapter.3.to_v_ip": "ip_adapter.001.to_v_ip",
"ip_adapter.5.to_k_ip": "ip_adapter.002.to_k_ip",
"ip_adapter.5.to_v_ip": "ip_adapter.002.to_v_ip",
"ip_adapter.7.to_k_ip": "ip_adapter.003.to_k_ip",
"ip_adapter.7.to_v_ip": "ip_adapter.003.to_v_ip",
"ip_adapter.9.to_k_ip": "ip_adapter.004.to_k_ip",
"ip_adapter.9.to_v_ip": "ip_adapter.004.to_v_ip",
"ip_adapter.11.to_k_ip": "ip_adapter.005.to_k_ip",
"ip_adapter.11.to_v_ip": "ip_adapter.005.to_v_ip",
"ip_adapter.31.to_k_ip": "ip_adapter.006.to_k_ip",
"ip_adapter.31.to_v_ip": "ip_adapter.006.to_v_ip",
"ip_adapter.13.to_k_ip": "ip_adapter.007.to_k_ip",
"ip_adapter.13.to_v_ip": "ip_adapter.007.to_v_ip",
"ip_adapter.15.to_k_ip": "ip_adapter.008.to_k_ip",
"ip_adapter.15.to_v_ip": "ip_adapter.008.to_v_ip",
"ip_adapter.17.to_k_ip": "ip_adapter.009.to_k_ip",
"ip_adapter.17.to_v_ip": "ip_adapter.009.to_v_ip",
"ip_adapter.19.to_k_ip": "ip_adapter.010.to_k_ip",
"ip_adapter.19.to_v_ip": "ip_adapter.010.to_v_ip",
"ip_adapter.21.to_k_ip": "ip_adapter.011.to_k_ip",
"ip_adapter.21.to_v_ip": "ip_adapter.011.to_v_ip",
"ip_adapter.23.to_k_ip": "ip_adapter.012.to_k_ip",
"ip_adapter.23.to_v_ip": "ip_adapter.012.to_v_ip",
"ip_adapter.25.to_k_ip": "ip_adapter.013.to_k_ip",
"ip_adapter.25.to_v_ip": "ip_adapter.013.to_v_ip",
"ip_adapter.27.to_k_ip": "ip_adapter.014.to_k_ip",
"ip_adapter.27.to_v_ip": "ip_adapter.014.to_v_ip",
"ip_adapter.29.to_k_ip": "ip_adapter.015.to_k_ip",
"ip_adapter.29.to_v_ip": "ip_adapter.015.to_v_ip",
},
)
diffusers_plus_recipe = WeightRecipe(
key_map={
"image_proj.latents": "image_proj.LatentsToken.Parameter.weight",
"image_proj.proj_in": "image_proj.Linear_1",
"image_proj.proj_out": "image_proj.Linear_2",
"image_proj.norm_out": "image_proj.LayerNorm",
"image_proj.layers.0.0.norm1": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
"image_proj.layers.0.0.norm2": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
"image_proj.layers.0.0.to_q": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
"image_proj.layers.0.0.to_kv": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
"image_proj.layers.0.0.to_out": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Linear",
"image_proj.layers.0.1.0": "image_proj.Transformer.TransformerLayer_1.Residual_2.LayerNorm",
"image_proj.layers.0.1.1": "image_proj.Transformer.TransformerLayer_1.Residual_2.FeedForward.Linear_1",
"image_proj.layers.0.1.3": "image_proj.Transformer.TransformerLayer_1.Residual_2.FeedForward.Linear_2",
"image_proj.layers.1.0.norm1": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
"image_proj.layers.1.0.norm2": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
"image_proj.layers.1.0.to_q": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
"image_proj.layers.1.0.to_kv": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
"image_proj.layers.1.0.to_out": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Linear",
"image_proj.layers.1.1.0": "image_proj.Transformer.TransformerLayer_2.Residual_2.LayerNorm",
"image_proj.layers.1.1.1": "image_proj.Transformer.TransformerLayer_2.Residual_2.FeedForward.Linear_1",
"image_proj.layers.1.1.3": "image_proj.Transformer.TransformerLayer_2.Residual_2.FeedForward.Linear_2",
"image_proj.layers.2.0.norm1": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
"image_proj.layers.2.0.norm2": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
"image_proj.layers.2.0.to_q": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
"image_proj.layers.2.0.to_kv": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
"image_proj.layers.2.0.to_out": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Linear",
"image_proj.layers.2.1.0": "image_proj.Transformer.TransformerLayer_3.Residual_2.LayerNorm",
"image_proj.layers.2.1.1": "image_proj.Transformer.TransformerLayer_3.Residual_2.FeedForward.Linear_1",
"image_proj.layers.2.1.3": "image_proj.Transformer.TransformerLayer_3.Residual_2.FeedForward.Linear_2",
"image_proj.layers.3.0.norm1": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
"image_proj.layers.3.0.norm2": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
"image_proj.layers.3.0.to_q": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
"image_proj.layers.3.0.to_kv": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
"image_proj.layers.3.0.to_out": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Linear",
"image_proj.layers.3.1.0": "image_proj.Transformer.TransformerLayer_4.Residual_2.LayerNorm",
"image_proj.layers.3.1.1": "image_proj.Transformer.TransformerLayer_4.Residual_2.FeedForward.Linear_1",
"image_proj.layers.3.1.3": "image_proj.Transformer.TransformerLayer_4.Residual_2.FeedForward.Linear_2",
"ip_adapter.1.to_k_ip": "ip_adapter.000.to_k_ip",
"ip_adapter.1.to_v_ip": "ip_adapter.000.to_v_ip",
"ip_adapter.3.to_k_ip": "ip_adapter.001.to_k_ip",
"ip_adapter.3.to_v_ip": "ip_adapter.001.to_v_ip",
"ip_adapter.5.to_k_ip": "ip_adapter.002.to_k_ip",
"ip_adapter.5.to_v_ip": "ip_adapter.002.to_v_ip",
"ip_adapter.7.to_k_ip": "ip_adapter.003.to_k_ip",
"ip_adapter.7.to_v_ip": "ip_adapter.003.to_v_ip",
"ip_adapter.9.to_k_ip": "ip_adapter.004.to_k_ip",
"ip_adapter.9.to_v_ip": "ip_adapter.004.to_v_ip",
"ip_adapter.11.to_k_ip": "ip_adapter.005.to_k_ip",
"ip_adapter.11.to_v_ip": "ip_adapter.005.to_v_ip",
"ip_adapter.31.to_k_ip": "ip_adapter.006.to_k_ip",
"ip_adapter.31.to_v_ip": "ip_adapter.006.to_v_ip",
"ip_adapter.13.to_k_ip": "ip_adapter.007.to_k_ip",
"ip_adapter.13.to_v_ip": "ip_adapter.007.to_v_ip",
"ip_adapter.15.to_k_ip": "ip_adapter.008.to_k_ip",
"ip_adapter.15.to_v_ip": "ip_adapter.008.to_v_ip",
"ip_adapter.17.to_k_ip": "ip_adapter.009.to_k_ip",
"ip_adapter.17.to_v_ip": "ip_adapter.009.to_v_ip",
"ip_adapter.19.to_k_ip": "ip_adapter.010.to_k_ip",
"ip_adapter.19.to_v_ip": "ip_adapter.010.to_v_ip",
"ip_adapter.21.to_k_ip": "ip_adapter.011.to_k_ip",
"ip_adapter.21.to_v_ip": "ip_adapter.011.to_v_ip",
"ip_adapter.23.to_k_ip": "ip_adapter.012.to_k_ip",
"ip_adapter.23.to_v_ip": "ip_adapter.012.to_v_ip",
"ip_adapter.25.to_k_ip": "ip_adapter.013.to_k_ip",
"ip_adapter.25.to_v_ip": "ip_adapter.013.to_v_ip",
"ip_adapter.27.to_k_ip": "ip_adapter.014.to_k_ip",
"ip_adapter.27.to_v_ip": "ip_adapter.014.to_v_ip",
"ip_adapter.29.to_k_ip": "ip_adapter.015.to_k_ip",
"ip_adapter.29.to_v_ip": "ip_adapter.015.to_v_ip",
},
tensor_reshapes={"image_proj.LatentsToken.Parameter.weight": (16, 768)},
)
base = Conversion(
original=Hub(
repo_id="h94/IP-Adapter",
filename="models/ip-adapter_sd15.safetensors",
revision="018e402774aeeddd60609b4ecdb7e298259dc729",
expected_sha256="289b45f16d043d0bf542e45831f971dcdaabe18b656f11e86d9dfba7e9ee3369",
),
converted=Hub(
repo_id="refiners/sd15.ip_adapter",
filename="model.safetensors",
expected_sha256="ebabe531bac205e2fac942b353c22066abfb115b02f7fb72cd0e3361ee838ef3",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
plus = Conversion(
original=Hub(
repo_id="h94/IP-Adapter",
filename="models/ip-adapter-plus_sd15.safetensors",
revision="018e402774aeeddd60609b4ecdb7e298259dc729",
expected_sha256="a1c250be40455cc61a43da1201ec3f1edaea71214865fb47f57927e06cbe4996",
),
converted=Hub(
repo_id="refiners/sd15.ip_adapter.plus",
filename="model.safetensors",
expected_sha256="6eae5d2098fa83e3b8bf416fb46dd77a921ad044650f13890e8d41d7c84a71d2",
),
recipe=diffusers_plus_recipe,
dtype=torch.float16,
)

View file

@ -0,0 +1,363 @@
import torch
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
diffusers_recipe = WeightRecipe(
key_map={
"image_proj.proj": "image_proj.Linear",
"image_proj.norm": "image_proj.LayerNorm",
"ip_adapter.1.to_k_ip": "ip_adapter.000.to_k_ip",
"ip_adapter.1.to_v_ip": "ip_adapter.000.to_v_ip",
"ip_adapter.3.to_k_ip": "ip_adapter.001.to_k_ip",
"ip_adapter.3.to_v_ip": "ip_adapter.001.to_v_ip",
"ip_adapter.5.to_k_ip": "ip_adapter.002.to_k_ip",
"ip_adapter.5.to_v_ip": "ip_adapter.002.to_v_ip",
"ip_adapter.7.to_k_ip": "ip_adapter.003.to_k_ip",
"ip_adapter.7.to_v_ip": "ip_adapter.003.to_v_ip",
"ip_adapter.9.to_k_ip": "ip_adapter.004.to_k_ip",
"ip_adapter.9.to_v_ip": "ip_adapter.004.to_v_ip",
"ip_adapter.11.to_k_ip": "ip_adapter.005.to_k_ip",
"ip_adapter.11.to_v_ip": "ip_adapter.005.to_v_ip",
"ip_adapter.13.to_k_ip": "ip_adapter.006.to_k_ip",
"ip_adapter.13.to_v_ip": "ip_adapter.006.to_v_ip",
"ip_adapter.15.to_k_ip": "ip_adapter.007.to_k_ip",
"ip_adapter.15.to_v_ip": "ip_adapter.007.to_v_ip",
"ip_adapter.17.to_k_ip": "ip_adapter.008.to_k_ip",
"ip_adapter.17.to_v_ip": "ip_adapter.008.to_v_ip",
"ip_adapter.19.to_k_ip": "ip_adapter.009.to_k_ip",
"ip_adapter.19.to_v_ip": "ip_adapter.009.to_v_ip",
"ip_adapter.21.to_k_ip": "ip_adapter.010.to_k_ip",
"ip_adapter.21.to_v_ip": "ip_adapter.010.to_v_ip",
"ip_adapter.23.to_k_ip": "ip_adapter.011.to_k_ip",
"ip_adapter.23.to_v_ip": "ip_adapter.011.to_v_ip",
"ip_adapter.25.to_k_ip": "ip_adapter.012.to_k_ip",
"ip_adapter.25.to_v_ip": "ip_adapter.012.to_v_ip",
"ip_adapter.27.to_k_ip": "ip_adapter.013.to_k_ip",
"ip_adapter.27.to_v_ip": "ip_adapter.013.to_v_ip",
"ip_adapter.29.to_k_ip": "ip_adapter.014.to_k_ip",
"ip_adapter.29.to_v_ip": "ip_adapter.014.to_v_ip",
"ip_adapter.31.to_k_ip": "ip_adapter.015.to_k_ip",
"ip_adapter.31.to_v_ip": "ip_adapter.015.to_v_ip",
"ip_adapter.33.to_k_ip": "ip_adapter.016.to_k_ip",
"ip_adapter.33.to_v_ip": "ip_adapter.016.to_v_ip",
"ip_adapter.35.to_k_ip": "ip_adapter.017.to_k_ip",
"ip_adapter.35.to_v_ip": "ip_adapter.017.to_v_ip",
"ip_adapter.37.to_k_ip": "ip_adapter.018.to_k_ip",
"ip_adapter.37.to_v_ip": "ip_adapter.018.to_v_ip",
"ip_adapter.39.to_k_ip": "ip_adapter.019.to_k_ip",
"ip_adapter.39.to_v_ip": "ip_adapter.019.to_v_ip",
"ip_adapter.41.to_k_ip": "ip_adapter.020.to_k_ip",
"ip_adapter.41.to_v_ip": "ip_adapter.020.to_v_ip",
"ip_adapter.43.to_k_ip": "ip_adapter.021.to_k_ip",
"ip_adapter.43.to_v_ip": "ip_adapter.021.to_v_ip",
"ip_adapter.45.to_k_ip": "ip_adapter.022.to_k_ip",
"ip_adapter.45.to_v_ip": "ip_adapter.022.to_v_ip",
"ip_adapter.47.to_k_ip": "ip_adapter.023.to_k_ip",
"ip_adapter.47.to_v_ip": "ip_adapter.023.to_v_ip",
"ip_adapter.121.to_k_ip": "ip_adapter.024.to_k_ip",
"ip_adapter.121.to_v_ip": "ip_adapter.024.to_v_ip",
"ip_adapter.123.to_k_ip": "ip_adapter.025.to_k_ip",
"ip_adapter.123.to_v_ip": "ip_adapter.025.to_v_ip",
"ip_adapter.125.to_k_ip": "ip_adapter.026.to_k_ip",
"ip_adapter.125.to_v_ip": "ip_adapter.026.to_v_ip",
"ip_adapter.127.to_k_ip": "ip_adapter.027.to_k_ip",
"ip_adapter.127.to_v_ip": "ip_adapter.027.to_v_ip",
"ip_adapter.129.to_k_ip": "ip_adapter.028.to_k_ip",
"ip_adapter.129.to_v_ip": "ip_adapter.028.to_v_ip",
"ip_adapter.131.to_k_ip": "ip_adapter.029.to_k_ip",
"ip_adapter.131.to_v_ip": "ip_adapter.029.to_v_ip",
"ip_adapter.133.to_k_ip": "ip_adapter.030.to_k_ip",
"ip_adapter.133.to_v_ip": "ip_adapter.030.to_v_ip",
"ip_adapter.135.to_k_ip": "ip_adapter.031.to_k_ip",
"ip_adapter.135.to_v_ip": "ip_adapter.031.to_v_ip",
"ip_adapter.137.to_k_ip": "ip_adapter.032.to_k_ip",
"ip_adapter.137.to_v_ip": "ip_adapter.032.to_v_ip",
"ip_adapter.139.to_k_ip": "ip_adapter.033.to_k_ip",
"ip_adapter.139.to_v_ip": "ip_adapter.033.to_v_ip",
"ip_adapter.49.to_k_ip": "ip_adapter.034.to_k_ip",
"ip_adapter.49.to_v_ip": "ip_adapter.034.to_v_ip",
"ip_adapter.51.to_k_ip": "ip_adapter.035.to_k_ip",
"ip_adapter.51.to_v_ip": "ip_adapter.035.to_v_ip",
"ip_adapter.53.to_k_ip": "ip_adapter.036.to_k_ip",
"ip_adapter.53.to_v_ip": "ip_adapter.036.to_v_ip",
"ip_adapter.55.to_k_ip": "ip_adapter.037.to_k_ip",
"ip_adapter.55.to_v_ip": "ip_adapter.037.to_v_ip",
"ip_adapter.57.to_k_ip": "ip_adapter.038.to_k_ip",
"ip_adapter.57.to_v_ip": "ip_adapter.038.to_v_ip",
"ip_adapter.59.to_k_ip": "ip_adapter.039.to_k_ip",
"ip_adapter.59.to_v_ip": "ip_adapter.039.to_v_ip",
"ip_adapter.61.to_k_ip": "ip_adapter.040.to_k_ip",
"ip_adapter.61.to_v_ip": "ip_adapter.040.to_v_ip",
"ip_adapter.63.to_k_ip": "ip_adapter.041.to_k_ip",
"ip_adapter.63.to_v_ip": "ip_adapter.041.to_v_ip",
"ip_adapter.65.to_k_ip": "ip_adapter.042.to_k_ip",
"ip_adapter.65.to_v_ip": "ip_adapter.042.to_v_ip",
"ip_adapter.67.to_k_ip": "ip_adapter.043.to_k_ip",
"ip_adapter.67.to_v_ip": "ip_adapter.043.to_v_ip",
"ip_adapter.69.to_k_ip": "ip_adapter.044.to_k_ip",
"ip_adapter.69.to_v_ip": "ip_adapter.044.to_v_ip",
"ip_adapter.71.to_k_ip": "ip_adapter.045.to_k_ip",
"ip_adapter.71.to_v_ip": "ip_adapter.045.to_v_ip",
"ip_adapter.73.to_k_ip": "ip_adapter.046.to_k_ip",
"ip_adapter.73.to_v_ip": "ip_adapter.046.to_v_ip",
"ip_adapter.75.to_k_ip": "ip_adapter.047.to_k_ip",
"ip_adapter.75.to_v_ip": "ip_adapter.047.to_v_ip",
"ip_adapter.77.to_k_ip": "ip_adapter.048.to_k_ip",
"ip_adapter.77.to_v_ip": "ip_adapter.048.to_v_ip",
"ip_adapter.79.to_k_ip": "ip_adapter.049.to_k_ip",
"ip_adapter.79.to_v_ip": "ip_adapter.049.to_v_ip",
"ip_adapter.81.to_k_ip": "ip_adapter.050.to_k_ip",
"ip_adapter.81.to_v_ip": "ip_adapter.050.to_v_ip",
"ip_adapter.83.to_k_ip": "ip_adapter.051.to_k_ip",
"ip_adapter.83.to_v_ip": "ip_adapter.051.to_v_ip",
"ip_adapter.85.to_k_ip": "ip_adapter.052.to_k_ip",
"ip_adapter.85.to_v_ip": "ip_adapter.052.to_v_ip",
"ip_adapter.87.to_k_ip": "ip_adapter.053.to_k_ip",
"ip_adapter.87.to_v_ip": "ip_adapter.053.to_v_ip",
"ip_adapter.89.to_k_ip": "ip_adapter.054.to_k_ip",
"ip_adapter.89.to_v_ip": "ip_adapter.054.to_v_ip",
"ip_adapter.91.to_k_ip": "ip_adapter.055.to_k_ip",
"ip_adapter.91.to_v_ip": "ip_adapter.055.to_v_ip",
"ip_adapter.93.to_k_ip": "ip_adapter.056.to_k_ip",
"ip_adapter.93.to_v_ip": "ip_adapter.056.to_v_ip",
"ip_adapter.95.to_k_ip": "ip_adapter.057.to_k_ip",
"ip_adapter.95.to_v_ip": "ip_adapter.057.to_v_ip",
"ip_adapter.97.to_k_ip": "ip_adapter.058.to_k_ip",
"ip_adapter.97.to_v_ip": "ip_adapter.058.to_v_ip",
"ip_adapter.99.to_k_ip": "ip_adapter.059.to_k_ip",
"ip_adapter.99.to_v_ip": "ip_adapter.059.to_v_ip",
"ip_adapter.101.to_k_ip": "ip_adapter.060.to_k_ip",
"ip_adapter.101.to_v_ip": "ip_adapter.060.to_v_ip",
"ip_adapter.103.to_k_ip": "ip_adapter.061.to_k_ip",
"ip_adapter.103.to_v_ip": "ip_adapter.061.to_v_ip",
"ip_adapter.105.to_k_ip": "ip_adapter.062.to_k_ip",
"ip_adapter.105.to_v_ip": "ip_adapter.062.to_v_ip",
"ip_adapter.107.to_k_ip": "ip_adapter.063.to_k_ip",
"ip_adapter.107.to_v_ip": "ip_adapter.063.to_v_ip",
"ip_adapter.109.to_k_ip": "ip_adapter.064.to_k_ip",
"ip_adapter.109.to_v_ip": "ip_adapter.064.to_v_ip",
"ip_adapter.111.to_k_ip": "ip_adapter.065.to_k_ip",
"ip_adapter.111.to_v_ip": "ip_adapter.065.to_v_ip",
"ip_adapter.113.to_k_ip": "ip_adapter.066.to_k_ip",
"ip_adapter.113.to_v_ip": "ip_adapter.066.to_v_ip",
"ip_adapter.115.to_k_ip": "ip_adapter.067.to_k_ip",
"ip_adapter.115.to_v_ip": "ip_adapter.067.to_v_ip",
"ip_adapter.117.to_k_ip": "ip_adapter.068.to_k_ip",
"ip_adapter.117.to_v_ip": "ip_adapter.068.to_v_ip",
"ip_adapter.119.to_k_ip": "ip_adapter.069.to_k_ip",
"ip_adapter.119.to_v_ip": "ip_adapter.069.to_v_ip",
},
)
plus_diffusers_recipe = WeightRecipe(
key_map={
"image_proj.latents": "image_proj.LatentsToken.Parameter.weight",
"image_proj.proj_in": "image_proj.Linear_1",
"image_proj.proj_out": "image_proj.Linear_2",
"image_proj.norm_out": "image_proj.LayerNorm",
"image_proj.layers.0.0.norm1": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
"image_proj.layers.0.0.norm2": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
"image_proj.layers.0.0.to_q": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
"image_proj.layers.0.0.to_kv": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
"image_proj.layers.0.0.to_out": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Linear",
"image_proj.layers.0.1.0": "image_proj.Transformer.TransformerLayer_1.Residual_2.LayerNorm",
"image_proj.layers.0.1.1": "image_proj.Transformer.TransformerLayer_1.Residual_2.FeedForward.Linear_1",
"image_proj.layers.0.1.3": "image_proj.Transformer.TransformerLayer_1.Residual_2.FeedForward.Linear_2",
"image_proj.layers.1.0.norm1": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
"image_proj.layers.1.0.norm2": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
"image_proj.layers.1.0.to_q": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
"image_proj.layers.1.0.to_kv": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
"image_proj.layers.1.0.to_out": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Linear",
"image_proj.layers.1.1.0": "image_proj.Transformer.TransformerLayer_2.Residual_2.LayerNorm",
"image_proj.layers.1.1.1": "image_proj.Transformer.TransformerLayer_2.Residual_2.FeedForward.Linear_1",
"image_proj.layers.1.1.3": "image_proj.Transformer.TransformerLayer_2.Residual_2.FeedForward.Linear_2",
"image_proj.layers.2.0.norm1": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
"image_proj.layers.2.0.norm2": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
"image_proj.layers.2.0.to_q": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
"image_proj.layers.2.0.to_kv": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
"image_proj.layers.2.0.to_out": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Linear",
"image_proj.layers.2.1.0": "image_proj.Transformer.TransformerLayer_3.Residual_2.LayerNorm",
"image_proj.layers.2.1.1": "image_proj.Transformer.TransformerLayer_3.Residual_2.FeedForward.Linear_1",
"image_proj.layers.2.1.3": "image_proj.Transformer.TransformerLayer_3.Residual_2.FeedForward.Linear_2",
"image_proj.layers.3.0.norm1": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
"image_proj.layers.3.0.norm2": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
"image_proj.layers.3.0.to_q": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
"image_proj.layers.3.0.to_kv": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
"image_proj.layers.3.0.to_out": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Linear",
"image_proj.layers.3.1.0": "image_proj.Transformer.TransformerLayer_4.Residual_2.LayerNorm",
"image_proj.layers.3.1.1": "image_proj.Transformer.TransformerLayer_4.Residual_2.FeedForward.Linear_1",
"image_proj.layers.3.1.3": "image_proj.Transformer.TransformerLayer_4.Residual_2.FeedForward.Linear_2",
"ip_adapter.1.to_k_ip": "ip_adapter.000.to_k_ip",
"ip_adapter.1.to_v_ip": "ip_adapter.000.to_v_ip",
"ip_adapter.3.to_k_ip": "ip_adapter.001.to_k_ip",
"ip_adapter.3.to_v_ip": "ip_adapter.001.to_v_ip",
"ip_adapter.5.to_k_ip": "ip_adapter.002.to_k_ip",
"ip_adapter.5.to_v_ip": "ip_adapter.002.to_v_ip",
"ip_adapter.7.to_k_ip": "ip_adapter.003.to_k_ip",
"ip_adapter.7.to_v_ip": "ip_adapter.003.to_v_ip",
"ip_adapter.9.to_k_ip": "ip_adapter.004.to_k_ip",
"ip_adapter.9.to_v_ip": "ip_adapter.004.to_v_ip",
"ip_adapter.11.to_k_ip": "ip_adapter.005.to_k_ip",
"ip_adapter.11.to_v_ip": "ip_adapter.005.to_v_ip",
"ip_adapter.13.to_k_ip": "ip_adapter.006.to_k_ip",
"ip_adapter.13.to_v_ip": "ip_adapter.006.to_v_ip",
"ip_adapter.15.to_k_ip": "ip_adapter.007.to_k_ip",
"ip_adapter.15.to_v_ip": "ip_adapter.007.to_v_ip",
"ip_adapter.17.to_k_ip": "ip_adapter.008.to_k_ip",
"ip_adapter.17.to_v_ip": "ip_adapter.008.to_v_ip",
"ip_adapter.19.to_k_ip": "ip_adapter.009.to_k_ip",
"ip_adapter.19.to_v_ip": "ip_adapter.009.to_v_ip",
"ip_adapter.21.to_k_ip": "ip_adapter.010.to_k_ip",
"ip_adapter.21.to_v_ip": "ip_adapter.010.to_v_ip",
"ip_adapter.23.to_k_ip": "ip_adapter.011.to_k_ip",
"ip_adapter.23.to_v_ip": "ip_adapter.011.to_v_ip",
"ip_adapter.25.to_k_ip": "ip_adapter.012.to_k_ip",
"ip_adapter.25.to_v_ip": "ip_adapter.012.to_v_ip",
"ip_adapter.27.to_k_ip": "ip_adapter.013.to_k_ip",
"ip_adapter.27.to_v_ip": "ip_adapter.013.to_v_ip",
"ip_adapter.29.to_k_ip": "ip_adapter.014.to_k_ip",
"ip_adapter.29.to_v_ip": "ip_adapter.014.to_v_ip",
"ip_adapter.31.to_k_ip": "ip_adapter.015.to_k_ip",
"ip_adapter.31.to_v_ip": "ip_adapter.015.to_v_ip",
"ip_adapter.33.to_k_ip": "ip_adapter.016.to_k_ip",
"ip_adapter.33.to_v_ip": "ip_adapter.016.to_v_ip",
"ip_adapter.35.to_k_ip": "ip_adapter.017.to_k_ip",
"ip_adapter.35.to_v_ip": "ip_adapter.017.to_v_ip",
"ip_adapter.37.to_k_ip": "ip_adapter.018.to_k_ip",
"ip_adapter.37.to_v_ip": "ip_adapter.018.to_v_ip",
"ip_adapter.39.to_k_ip": "ip_adapter.019.to_k_ip",
"ip_adapter.39.to_v_ip": "ip_adapter.019.to_v_ip",
"ip_adapter.41.to_k_ip": "ip_adapter.020.to_k_ip",
"ip_adapter.41.to_v_ip": "ip_adapter.020.to_v_ip",
"ip_adapter.43.to_k_ip": "ip_adapter.021.to_k_ip",
"ip_adapter.43.to_v_ip": "ip_adapter.021.to_v_ip",
"ip_adapter.45.to_k_ip": "ip_adapter.022.to_k_ip",
"ip_adapter.45.to_v_ip": "ip_adapter.022.to_v_ip",
"ip_adapter.47.to_k_ip": "ip_adapter.023.to_k_ip",
"ip_adapter.47.to_v_ip": "ip_adapter.023.to_v_ip",
"ip_adapter.121.to_k_ip": "ip_adapter.024.to_k_ip",
"ip_adapter.121.to_v_ip": "ip_adapter.024.to_v_ip",
"ip_adapter.123.to_k_ip": "ip_adapter.025.to_k_ip",
"ip_adapter.123.to_v_ip": "ip_adapter.025.to_v_ip",
"ip_adapter.125.to_k_ip": "ip_adapter.026.to_k_ip",
"ip_adapter.125.to_v_ip": "ip_adapter.026.to_v_ip",
"ip_adapter.127.to_k_ip": "ip_adapter.027.to_k_ip",
"ip_adapter.127.to_v_ip": "ip_adapter.027.to_v_ip",
"ip_adapter.129.to_k_ip": "ip_adapter.028.to_k_ip",
"ip_adapter.129.to_v_ip": "ip_adapter.028.to_v_ip",
"ip_adapter.131.to_k_ip": "ip_adapter.029.to_k_ip",
"ip_adapter.131.to_v_ip": "ip_adapter.029.to_v_ip",
"ip_adapter.133.to_k_ip": "ip_adapter.030.to_k_ip",
"ip_adapter.133.to_v_ip": "ip_adapter.030.to_v_ip",
"ip_adapter.135.to_k_ip": "ip_adapter.031.to_k_ip",
"ip_adapter.135.to_v_ip": "ip_adapter.031.to_v_ip",
"ip_adapter.137.to_k_ip": "ip_adapter.032.to_k_ip",
"ip_adapter.137.to_v_ip": "ip_adapter.032.to_v_ip",
"ip_adapter.139.to_k_ip": "ip_adapter.033.to_k_ip",
"ip_adapter.139.to_v_ip": "ip_adapter.033.to_v_ip",
"ip_adapter.49.to_k_ip": "ip_adapter.034.to_k_ip",
"ip_adapter.49.to_v_ip": "ip_adapter.034.to_v_ip",
"ip_adapter.51.to_k_ip": "ip_adapter.035.to_k_ip",
"ip_adapter.51.to_v_ip": "ip_adapter.035.to_v_ip",
"ip_adapter.53.to_k_ip": "ip_adapter.036.to_k_ip",
"ip_adapter.53.to_v_ip": "ip_adapter.036.to_v_ip",
"ip_adapter.55.to_k_ip": "ip_adapter.037.to_k_ip",
"ip_adapter.55.to_v_ip": "ip_adapter.037.to_v_ip",
"ip_adapter.57.to_k_ip": "ip_adapter.038.to_k_ip",
"ip_adapter.57.to_v_ip": "ip_adapter.038.to_v_ip",
"ip_adapter.59.to_k_ip": "ip_adapter.039.to_k_ip",
"ip_adapter.59.to_v_ip": "ip_adapter.039.to_v_ip",
"ip_adapter.61.to_k_ip": "ip_adapter.040.to_k_ip",
"ip_adapter.61.to_v_ip": "ip_adapter.040.to_v_ip",
"ip_adapter.63.to_k_ip": "ip_adapter.041.to_k_ip",
"ip_adapter.63.to_v_ip": "ip_adapter.041.to_v_ip",
"ip_adapter.65.to_k_ip": "ip_adapter.042.to_k_ip",
"ip_adapter.65.to_v_ip": "ip_adapter.042.to_v_ip",
"ip_adapter.67.to_k_ip": "ip_adapter.043.to_k_ip",
"ip_adapter.67.to_v_ip": "ip_adapter.043.to_v_ip",
"ip_adapter.69.to_k_ip": "ip_adapter.044.to_k_ip",
"ip_adapter.69.to_v_ip": "ip_adapter.044.to_v_ip",
"ip_adapter.71.to_k_ip": "ip_adapter.045.to_k_ip",
"ip_adapter.71.to_v_ip": "ip_adapter.045.to_v_ip",
"ip_adapter.73.to_k_ip": "ip_adapter.046.to_k_ip",
"ip_adapter.73.to_v_ip": "ip_adapter.046.to_v_ip",
"ip_adapter.75.to_k_ip": "ip_adapter.047.to_k_ip",
"ip_adapter.75.to_v_ip": "ip_adapter.047.to_v_ip",
"ip_adapter.77.to_k_ip": "ip_adapter.048.to_k_ip",
"ip_adapter.77.to_v_ip": "ip_adapter.048.to_v_ip",
"ip_adapter.79.to_k_ip": "ip_adapter.049.to_k_ip",
"ip_adapter.79.to_v_ip": "ip_adapter.049.to_v_ip",
"ip_adapter.81.to_k_ip": "ip_adapter.050.to_k_ip",
"ip_adapter.81.to_v_ip": "ip_adapter.050.to_v_ip",
"ip_adapter.83.to_k_ip": "ip_adapter.051.to_k_ip",
"ip_adapter.83.to_v_ip": "ip_adapter.051.to_v_ip",
"ip_adapter.85.to_k_ip": "ip_adapter.052.to_k_ip",
"ip_adapter.85.to_v_ip": "ip_adapter.052.to_v_ip",
"ip_adapter.87.to_k_ip": "ip_adapter.053.to_k_ip",
"ip_adapter.87.to_v_ip": "ip_adapter.053.to_v_ip",
"ip_adapter.89.to_k_ip": "ip_adapter.054.to_k_ip",
"ip_adapter.89.to_v_ip": "ip_adapter.054.to_v_ip",
"ip_adapter.91.to_k_ip": "ip_adapter.055.to_k_ip",
"ip_adapter.91.to_v_ip": "ip_adapter.055.to_v_ip",
"ip_adapter.93.to_k_ip": "ip_adapter.056.to_k_ip",
"ip_adapter.93.to_v_ip": "ip_adapter.056.to_v_ip",
"ip_adapter.95.to_k_ip": "ip_adapter.057.to_k_ip",
"ip_adapter.95.to_v_ip": "ip_adapter.057.to_v_ip",
"ip_adapter.97.to_k_ip": "ip_adapter.058.to_k_ip",
"ip_adapter.97.to_v_ip": "ip_adapter.058.to_v_ip",
"ip_adapter.99.to_k_ip": "ip_adapter.059.to_k_ip",
"ip_adapter.99.to_v_ip": "ip_adapter.059.to_v_ip",
"ip_adapter.101.to_k_ip": "ip_adapter.060.to_k_ip",
"ip_adapter.101.to_v_ip": "ip_adapter.060.to_v_ip",
"ip_adapter.103.to_k_ip": "ip_adapter.061.to_k_ip",
"ip_adapter.103.to_v_ip": "ip_adapter.061.to_v_ip",
"ip_adapter.105.to_k_ip": "ip_adapter.062.to_k_ip",
"ip_adapter.105.to_v_ip": "ip_adapter.062.to_v_ip",
"ip_adapter.107.to_k_ip": "ip_adapter.063.to_k_ip",
"ip_adapter.107.to_v_ip": "ip_adapter.063.to_v_ip",
"ip_adapter.109.to_k_ip": "ip_adapter.064.to_k_ip",
"ip_adapter.109.to_v_ip": "ip_adapter.064.to_v_ip",
"ip_adapter.111.to_k_ip": "ip_adapter.065.to_k_ip",
"ip_adapter.111.to_v_ip": "ip_adapter.065.to_v_ip",
"ip_adapter.113.to_k_ip": "ip_adapter.066.to_k_ip",
"ip_adapter.113.to_v_ip": "ip_adapter.066.to_v_ip",
"ip_adapter.115.to_k_ip": "ip_adapter.067.to_k_ip",
"ip_adapter.115.to_v_ip": "ip_adapter.067.to_v_ip",
"ip_adapter.117.to_k_ip": "ip_adapter.068.to_k_ip",
"ip_adapter.117.to_v_ip": "ip_adapter.068.to_v_ip",
"ip_adapter.119.to_k_ip": "ip_adapter.069.to_k_ip",
"ip_adapter.119.to_v_ip": "ip_adapter.069.to_v_ip",
},
tensor_reshapes={"image_proj.LatentsToken.Parameter.weight": (16, 1280)},
)
base = Conversion(
original=Hub(
repo_id="h94/IP-Adapter",
filename="sdxl_models/ip-adapter_sdxl_vit-h.safetensors",
revision="018e402774aeeddd60609b4ecdb7e298259dc729",
expected_sha256="ebf05d918348aec7abb02a5e9ecef77e0aaea6914a5c4ea13f50d45eb1681831",
),
converted=Hub(
repo_id="refiners/sdxl.ip_adapter",
filename="model.safetensors",
expected_sha256="91fc7f3c9571ed26a93372e7251596c7269f37e134fae3a6a5f4f7247d998ab8",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
plus = Conversion(
original=Hub(
repo_id="h94/IP-Adapter",
filename="sdxl_models/ip-adapter-plus_sdxl_vit-h.safetensors",
revision="018e402774aeeddd60609b4ecdb7e298259dc729",
expected_sha256="3f5062b8400c94b7159665b21ba5c62acdcd7682262743d7f2aefedef00e6581",
),
converted=Hub(
repo_id="refiners/sdxl.ip_adapter.plus",
filename="model.safetensors",
expected_sha256="9fdbcb4c6e3a643b6e8c002945685cc9d12ddd9787ce8b3e53fdeb814002ca22",
),
recipe=plus_diffusers_recipe,
dtype=torch.float16,
)

View file

@ -0,0 +1,56 @@
from refiners.conversion.utils import Hub
sd15_pokemon = Hub(
repo_id="pcuenq/pokemon-lora",
filename="pytorch_lora_weights.bin",
revision="31ae8fe6f588a78c02828e9b8d352dccd90f1a24",
expected_sha256="f712fcfb6618da14d25a4f3e0c9460a878fc2417e2df95cdd683a73f71b50384",
)
sdxl_dpo = Hub(
repo_id="radames/sdxl-DPO-LoRA",
filename="pytorch_lora_weights.safetensors",
revision="319a544fff501b3ed907df67e1db356bee364c9f",
expected_sha256="aeb5ec4a7db6679ea8085f794db1abca92cfd8e4c667a1b301b2b8ecd5599a5d",
)
sdxl_scifi = Hub(
repo_id="civitai/Ciro_Negrogni",
filename="Sci-fi_Environments_sdxl.safetensors",
expected_sha256="5a3f738c9f79c65c1fac1418b1fe593967b0c1bd24fdb27f120ef1685e815c8e",
download_url="https://civitai.com/api/download/models/140624?type=Model&format=SafeTensor",
)
sdxl_pixelart = Hub(
repo_id="civitai/NeriJS",
filename="pixel-art-xl-v1.1.safetensors",
expected_sha256="bbf3d8defbfb3fb71331545225c0cf50c74a748d2525f7c19ebb8f74445de274",
download_url="https://civitai.com/api/download/models/135931?type=Model&format=SafeTensor",
)
sdxl_age_slider = Hub(
repo_id="baulab/sliders",
filename="age.pt",
expected_sha256="8c1c096f7cc1109b4072cbc604c811a5f0ff034fc0f6dc7cf66a558550aa4890",
download_url="https://sliders.baulab.info/weights/xl_sliders/age.pt",
)
sdxl_cartoon_slider = Hub(
repo_id="baulab/sliders",
filename="cartoon_style.pt",
expected_sha256="e07c30e4f82f709a474ae11dc5108ac48f81b6996b937757c8dd198920ea9b4d",
download_url="https://sliders.baulab.info/weights/xl_sliders/cartoon_style.pt",
)
sdxl_eyesize_slider = Hub(
repo_id="baulab/sliders",
filename="eyesize.pt",
expected_sha256="8fdffa3e7788f4bd6be9a2fe3b91957b4f35999fc9fa19eabfb49f92fbf6650b",
download_url="https://sliders.baulab.info/weights/xl_sliders/eyesize.pt",
)
sdxl_lcm = Hub(
repo_id="latent-consistency/lcm-lora-sdxl",
filename="pytorch_lora_weights.safetensors",
revision="a18548dd4956b174ec5b0d78d340c8dae0a129cd",
expected_sha256="a764e6859b6e04047cd761c08ff0cee96413a8e004c9f07707530cd776b19141",
)
sdxl_lightning_4steps = Hub(
repo_id="ByteDance/SDXL-Lightning",
filename="sdxl_lightning_4step_lora.safetensors",
revision="c9a24f48e1c025556787b0c58dd67a091ece2e44",
expected_sha256="bf56cf2657efb15e465d81402ed481d1e11c4677e4bcce1bc11fe71ad8506b79",
)

View file

@ -1,9 +1,14 @@
import logging
import re import re
from torch import Tensor import requests
import torch
from refiners.conversion.utils import Conversion, Hub, TensorDict
from refiners.fluxion.utils import save_to_safetensors
def convert_weights(official_state_dict: dict[str, Tensor]) -> dict[str, Tensor]: def convert_weights(official_state_dict: TensorDict) -> TensorDict:
rm_list = [ rm_list = [
# Official weights contains useless keys # Official weights contains useless keys
# See https://github.com/qianyu-dlut/MVANet/issues/3#issuecomment-2105650425 # See https://github.com/qianyu-dlut/MVANet/issues/3#issuecomment-2105650425
@ -136,3 +141,72 @@ def convert_weights(official_state_dict: dict[str, Tensor]) -> dict[str, Tensor]
state_dict.pop(key) state_dict.pop(key)
return state_dict return state_dict
class MVANetConversion(Conversion):
def __init__(
self,
original: Hub,
converted: Hub,
dtype: torch.dtype = torch.float16,
) -> None:
self.original = original
self.converted = converted
self.dtype = dtype
def convert(self) -> None: # type: ignore
"""Convert the weights from the original to the converted weights."""
logging.info(f"Converting {self.original.repo_id}/{self.original.filename} to {self.converted.repo_id}")
# check if the converted file already exists
if self.converted.local_path.is_file():
logging.warning(f"{self.converted.local_path} already exists")
if self.converted.check_local_hash():
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.error(f"{self.converted.local_path} couldn't verify remote hash")
return
# get the original state_dict
self.original.download()
# load the original state_dict
original_weights = self.load_state_dict(self.original.local_path)
# convert the state_dict
converted_weights = convert_weights(original_weights)
converted_weights = self.change_dtype(converted_weights, self.dtype)
# save the converted state_dict
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
save_to_safetensors(self.converted.local_path, converted_weights)
# check the converted state_dict
assert self.converted.check_local_hash()
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
mvanet = MVANetConversion(
original=Hub(
repo_id="creative-graphic-design/MVANet-checkpoints",
filename="Model_80.pth",
revision="62d38c42a28b999067e2f755e32b27249bcc66c6",
expected_sha256="ffec20a382b0a1832786438475e8b912a03be727a0e3197e7ab039153fb3bc46",
),
converted=Hub(
repo_id="refiners/mvanet",
filename="model.safetensors",
expected_sha256="cca9a6e05e977ee9ac98b3f9a248430d7fe8385f7d249eaddece318e777788e5",
),
dtype=torch.float16,
)
finegrain_v01 = Hub(
repo_id="finegrain/finegrain-box-segmenter",
filename="model.safetensors",
revision="v0.1",
expected_sha256="fd5f13919dfc0dda102df1af648c3773c61221aa65fe58d6af978637baded1ae",
)

View file

@ -0,0 +1,36 @@
import torch
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
informative_drawings_recipe = WeightRecipe(
key_map={
"model0.1": "Chain_1.Conv2d",
"model1.0": "Chain_2.Conv2d",
"model1.3": "Chain_3.Conv2d",
"model2.0.conv_block.1": "Residual_1.Conv2d_1",
"model2.0.conv_block.5": "Residual_1.Conv2d_2",
"model2.1.conv_block.1": "Residual_2.Conv2d_1",
"model2.1.conv_block.5": "Residual_2.Conv2d_2",
"model2.2.conv_block.1": "Residual_3.Conv2d_1",
"model2.2.conv_block.5": "Residual_3.Conv2d_2",
"model3.0": "Chain_4.ConvTranspose2d",
"model3.3": "Chain_5.ConvTranspose2d",
"model4.1": "Chain_6.Conv2d",
},
)
informative_drawings = Conversion(
original=Hub(
repo_id="carolineec/informativedrawings",
filename="model.pth",
expected_sha256="30a534781061f34e83bb9406b4335da4ff2616c95d22a585c1245aa8363e74e0",
download_url="https://huggingface.co/spaces/carolineec/informativedrawings/resolve/main/model2.pth",
),
converted=Hub(
repo_id="refiners/preprocessor.informativedrawings",
filename="model.safetensors",
expected_sha256="0f9a34bfcd95d89aedcc213b8d279ba1bab1279b73d8d009d1632d6276e6fcf3",
),
recipe=informative_drawings_recipe,
dtype=torch.float32,
)

View file

@ -0,0 +1,293 @@
import logging
import types
from typing import Any, Callable, cast
import requests
import torch
import torch.nn as nn
from segment_anything import build_sam_vit_h # type: ignore
from segment_anything.modeling.common import LayerNorm2d # type: ignore
from torch import Tensor, nn
import refiners.fluxion.layers as fl
from refiners.conversion.model_converter import ModelConverter
from refiners.conversion.utils import Conversion, Hub
from refiners.fluxion.utils import load_tensors, manual_seed, save_to_safetensors
from refiners.foundationals.segment_anything.image_encoder import PositionalEncoder, SAMViTH
from refiners.foundationals.segment_anything.mask_decoder import MaskDecoder
from refiners.foundationals.segment_anything.prompt_encoder import MaskEncoder, PointEncoder
class FacebookSAM(nn.Module):
image_encoder: nn.Module
prompt_encoder: nn.Module
mask_decoder: nn.Module
build_sam_vit_h = cast(Callable[[], FacebookSAM], build_sam_vit_h)
assert issubclass(LayerNorm2d, nn.Module)
custom_layers = {LayerNorm2d: fl.LayerNorm2d}
def convert_mask_encoder(prompt_encoder: nn.Module) -> dict[str, Tensor]:
manual_seed(seed=0)
refiners_mask_encoder = MaskEncoder()
converter = ModelConverter(
source_model=prompt_encoder.mask_downscaling,
target_model=refiners_mask_encoder,
custom_layer_mapping=custom_layers, # type: ignore
verbose=False,
)
x = torch.randn(1, 256, 256)
mapping = converter.map_state_dicts(source_args=(x,))
assert mapping
source_state_dict = prompt_encoder.mask_downscaling.state_dict()
target_state_dict = refiners_mask_encoder.state_dict()
# Mapping handled manually (see below) because nn.Parameter is a special case
del target_state_dict["no_mask_embedding"]
converted_source = converter._convert_state_dict( # pyright: ignore[reportPrivateUsage]
source_state_dict=source_state_dict, target_state_dict=target_state_dict, state_dict_mapping=mapping
)
state_dict: dict[str, Tensor] = {
"no_mask_embedding": nn.Parameter(data=prompt_encoder.no_mask_embed.weight.clone()), # type: ignore
}
state_dict.update(converted_source)
refiners_mask_encoder.load_state_dict(state_dict=state_dict)
return state_dict
def convert_point_encoder(prompt_encoder: nn.Module) -> dict[str, Tensor]:
manual_seed(seed=0)
point_embeddings: list[Tensor] = [pe.weight for pe in prompt_encoder.point_embeddings] + [
prompt_encoder.not_a_point_embed.weight
] # type: ignore
pe = prompt_encoder.pe_layer.positional_encoding_gaussian_matrix # type: ignore
assert isinstance(pe, Tensor)
state_dict: dict[str, Tensor] = {
"Residual.PointTypeEmbedding.weight": nn.Parameter(data=torch.cat(tensors=point_embeddings, dim=0)),
"CoordinateEncoder.Linear.weight": nn.Parameter(data=pe.T.contiguous()),
}
refiners_prompt_encoder = PointEncoder()
refiners_prompt_encoder.load_state_dict(state_dict=state_dict)
return state_dict
def convert_vit(vit: nn.Module) -> dict[str, Tensor]:
manual_seed(seed=0)
refiners_sam_vit_h = SAMViTH()
converter = ModelConverter(
source_model=vit,
target_model=refiners_sam_vit_h,
custom_layer_mapping=custom_layers, # type: ignore
verbose=False,
)
converter.skip_init_check = True
x = torch.randn(1, 3, 1024, 1024)
mapping = converter.map_state_dicts(source_args=(x,))
assert mapping
mapping["PositionalEncoder.Parameter.weight"] = "pos_embed"
target_state_dict = refiners_sam_vit_h.state_dict()
del target_state_dict["PositionalEncoder.Parameter.weight"]
source_state_dict = vit.state_dict()
pos_embed = source_state_dict["pos_embed"]
del source_state_dict["pos_embed"]
target_rel_keys = [
(
f"Transformer.TransformerLayer_{i}.Residual_1.FusedSelfAttention.RelativePositionAttention.horizontal_embedding",
f"Transformer.TransformerLayer_{i}.Residual_1.FusedSelfAttention.RelativePositionAttention.vertical_embedding",
)
for i in range(1, 33)
]
source_rel_keys = [(f"blocks.{i}.attn.rel_pos_w", f"blocks.{i}.attn.rel_pos_h") for i in range(32)]
rel_items: dict[str, Tensor] = {}
for (key_w, key_h), (target_key_w, target_key_h) in zip(source_rel_keys, target_rel_keys):
rel_items[target_key_w] = source_state_dict[key_w]
rel_items[target_key_h] = source_state_dict[key_h]
del source_state_dict[key_w]
del source_state_dict[key_h]
del target_state_dict[target_key_w]
del target_state_dict[target_key_h]
converted_source = converter._convert_state_dict( # pyright: ignore[reportPrivateUsage]
source_state_dict=source_state_dict, target_state_dict=target_state_dict, state_dict_mapping=mapping
)
positional_encoder = refiners_sam_vit_h.layer("PositionalEncoder", PositionalEncoder)
embed = pos_embed.reshape_as(positional_encoder.layer("Parameter", fl.Parameter).weight)
converted_source["PositionalEncoder.Parameter.weight"] = embed # type: ignore
converted_source.update(rel_items)
refiners_sam_vit_h.load_state_dict(state_dict=converted_source)
assert converter.compare_models((x,), threshold=0.5)
return converted_source
def convert_mask_decoder(mask_decoder: nn.Module) -> dict[str, Tensor]:
manual_seed(seed=0)
refiners_mask_decoder = MaskDecoder()
image_embedding = torch.randn(1, 256, 64, 64)
dense_positional_embedding = torch.randn(1, 256, 64, 64)
point_embedding = torch.randn(1, 3, 256)
mask_embedding = torch.randn(1, 256, 64, 64)
from segment_anything.modeling.common import LayerNorm2d # type: ignore
import refiners.fluxion.layers as fl
assert issubclass(LayerNorm2d, nn.Module)
custom_layers = {LayerNorm2d: fl.LayerNorm2d}
converter = ModelConverter(
source_model=mask_decoder,
target_model=refiners_mask_decoder,
custom_layer_mapping=custom_layers, # type: ignore
verbose=False,
)
inputs = {
"image_embeddings": image_embedding,
"image_pe": dense_positional_embedding,
"sparse_prompt_embeddings": point_embedding,
"dense_prompt_embeddings": mask_embedding,
"multimask_output": True,
}
refiners_mask_decoder.set_image_embedding(image_embedding)
refiners_mask_decoder.set_point_embedding(point_embedding)
refiners_mask_decoder.set_mask_embedding(mask_embedding)
refiners_mask_decoder.set_dense_positional_embedding(dense_positional_embedding)
mapping = converter.map_state_dicts(source_args=inputs, target_args={})
assert mapping is not None
mapping["MaskDecoderTokens.Parameter"] = "iou_token"
state_dict = converter._convert_state_dict( # type: ignore
source_state_dict=mask_decoder.state_dict(),
target_state_dict=refiners_mask_decoder.state_dict(),
state_dict_mapping=mapping,
)
state_dict["MaskDecoderTokens.Parameter.weight"] = torch.cat(
tensors=[mask_decoder.iou_token.weight, mask_decoder.mask_tokens.weight], dim=0
) # type: ignore
refiners_mask_decoder.load_state_dict(state_dict=state_dict)
refiners_mask_decoder.set_image_embedding(image_embedding)
refiners_mask_decoder.set_point_embedding(point_embedding)
refiners_mask_decoder.set_mask_embedding(mask_embedding)
refiners_mask_decoder.set_dense_positional_embedding(dense_positional_embedding)
# Perform (1) upscaling then (2) mask prediction in this order (= like in the official implementation) to make
# `compare_models` happy (MaskPrediction's Matmul runs those in the reverse order by default)
matmul = refiners_mask_decoder.ensure_find(fl.Matmul)
def forward_swapped_order(self: Any, *args: Any) -> Any:
y = self[1](*args) # (1)
x = self[0](*args) # (2)
return torch.matmul(input=x, other=y)
matmul.forward = types.MethodType(forward_swapped_order, matmul)
assert converter.compare_models(source_args=inputs, target_args={}, threshold=1e-3)
return state_dict
# TODO(laurent): convert this to a simple mapping
class ModelConverterHubDuo(Conversion):
def __init__(
self,
original: Hub,
converted: Hub,
dtype: torch.dtype,
) -> None:
self.original = original
self.converted = converted
self.dtype = dtype
def convert(self) -> None:
logging.info(f"Converting {self.original.repo_id} to {self.converted.repo_id}")
# check if the converted file already exists
if self.converted.local_path.is_file():
logging.warning(f"{self.converted.local_path} already exists")
if self.converted.check_local_hash():
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.error(f"{self.converted.local_path} couldn't verify remote hash")
return
# get the original state_dict
self.original.download()
# load the original model
sam_h = build_sam_vit_h() # type: ignore
sam_h.load_state_dict(state_dict=load_tensors(self.original.local_path))
# convert each part of the model
vit_state_dict = convert_vit(vit=sam_h.image_encoder)
mask_decoder_state_dict = convert_mask_decoder(mask_decoder=sam_h.mask_decoder)
point_encoder_state_dict = convert_point_encoder(prompt_encoder=sam_h.prompt_encoder)
mask_encoder_state_dict = convert_mask_encoder(prompt_encoder=sam_h.prompt_encoder)
# build the entire state_dict
output_state_dict = {
**{f"SAMViTH.{key}": value for key, value in vit_state_dict.items()},
**{f"MaskDecoder.{key}": value for key, value in mask_decoder_state_dict.items()},
**{f"PointEncoder.{key}": value for key, value in point_encoder_state_dict.items()},
**{f"MaskEncoder.{key}": value for key, value in mask_encoder_state_dict.items()},
}
# extract the state_dict from the DoubleTextEncoder model
state_dict = self.change_dtype(output_state_dict, self.dtype)
# save the converted state_dict
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
save_to_safetensors(self.converted.local_path, state_dict)
# check the converted state_dict
assert self.converted.check_local_hash()
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
vit_h = ModelConverterHubDuo(
original=Hub(
repo_id="facebook/github_segment_anything",
filename="sam_vit_h.pth",
expected_sha256="a7bf3b02f3ebf1267aba913ff637d9a2d5c33d3173bb679e46d9f338c26f262e",
download_url="https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth",
),
converted=Hub(
repo_id="refiners/sam.vit_h",
filename="model.safetensors",
expected_sha256="acc3034e9253b8e91d3e56b12e4c846c5bd44b640fd2e08bf328229f4714e8cf",
),
dtype=torch.float32,
)

View file

@ -0,0 +1,46 @@
import torch
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
diffusers_recipe = WeightRecipe(
key_map={
"adapter.conv_in": "Conv2d",
"adapter.body.0.resnets.0.block1": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
"adapter.body.0.resnets.1.block1": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
"adapter.body.0.resnets.0.block2": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
"adapter.body.0.resnets.1.block2": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
"adapter.body.1.downsample": "StatefulResidualBlocks_2.ResidualBlocks.Downsample2d",
"adapter.body.2.downsample": "StatefulResidualBlocks_3.ResidualBlocks.Downsample2d",
"adapter.body.3.downsample": "StatefulResidualBlocks_4.ResidualBlocks.Downsample2d",
"adapter.body.1.in_conv": "StatefulResidualBlocks_2.ResidualBlocks.Conv2d",
"adapter.body.1.resnets.0.block1": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
"adapter.body.1.resnets.1.block1": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
"adapter.body.1.resnets.0.block2": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
"adapter.body.1.resnets.1.block2": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
"adapter.body.2.in_conv": "StatefulResidualBlocks_3.ResidualBlocks.Conv2d",
"adapter.body.2.resnets.0.block1": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
"adapter.body.2.resnets.1.block1": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
"adapter.body.3.resnets.0.block1": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
"adapter.body.3.resnets.1.block1": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
"adapter.body.2.resnets.0.block2": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
"adapter.body.2.resnets.1.block2": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
"adapter.body.3.resnets.0.block2": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
"adapter.body.3.resnets.1.block2": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
},
)
depth = Conversion(
original=Hub(
repo_id="TencentARC/t2iadapter_depth_sd15v2",
filename="diffusion_pytorch_model.bin",
revision="9f96518933daa6c9386692914f72af81a0f6978f",
expected_sha256="68aaebf5e7d5eeb62eaea9476c68d279ba98d0876b385cc925e12c43cee19edd",
),
converted=Hub(
repo_id="refiners/sd15.t2i_adapter.depth",
filename="model.safetensors",
expected_sha256="0178baeb59713ef4ae4dcbca0a2d3447fdd42bbeeaed019d3dc01f0f1913f74f",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)

View file

@ -0,0 +1,44 @@
import torch
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
diffusers_recipe = WeightRecipe(
key_map={
"adapter.conv_in": "Conv2d",
"adapter.body.0.resnets.0.block1": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
"adapter.body.0.resnets.1.block1": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
"adapter.body.0.resnets.0.block2": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
"adapter.body.0.resnets.1.block2": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
"adapter.body.1.in_conv": "StatefulResidualBlocks_2.ResidualBlocks.Conv2d",
"adapter.body.1.resnets.0.block1": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
"adapter.body.1.resnets.1.block1": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
"adapter.body.1.resnets.0.block2": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
"adapter.body.1.resnets.1.block2": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
"adapter.body.2.downsample": "StatefulResidualBlocks_3.ResidualBlocks.Downsample2d",
"adapter.body.2.in_conv": "StatefulResidualBlocks_3.ResidualBlocks.Conv2d",
"adapter.body.2.resnets.0.block1": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
"adapter.body.2.resnets.1.block1": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
"adapter.body.3.resnets.0.block1": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
"adapter.body.3.resnets.1.block1": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
"adapter.body.2.resnets.0.block2": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
"adapter.body.2.resnets.1.block2": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
"adapter.body.3.resnets.0.block2": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
"adapter.body.3.resnets.1.block2": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
},
)
canny = Conversion(
original=Hub(
repo_id="TencentARC/t2i-adapter-canny-sdxl-1.0",
filename="diffusion_pytorch_model.safetensors",
revision="2d7244ba45ded9129cfbf8e96a4befb7f6094210",
expected_sha256="b601b28b7df0c0dcbbaf704ab8ba6fd22bcf35c9a875fa0c9bc933d47cc27438",
),
converted=Hub(
repo_id="refiners/sdxl.t2i_adapter.canny",
filename="model.safetensors",
expected_sha256="3aabc9b964b220b0ff80ad383eebf1885f6298f74425c1dbee659c86127d4b60",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)

View file

@ -0,0 +1,949 @@
import torch
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
diffusers_recipe = WeightRecipe(
key_map={
"time_embedding.linear_1": "TimestepEncoder.RangeEncoder.Linear_1",
"time_embedding.linear_2": "TimestepEncoder.RangeEncoder.Linear_2",
"down_blocks.2.resnets.0.time_emb_proj": "DownBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"down_blocks.2.resnets.1.time_emb_proj": "DownBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"down_blocks.3.resnets.0.time_emb_proj": "DownBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.3.resnets.1.time_emb_proj": "DownBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"mid_block.resnets.0.time_emb_proj": "Sum.MiddleBlock.ResidualBlock_1.Chain.RangeAdapter2d.Chain.Linear",
"mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"mid_block.resnets.1.time_emb_proj": "Sum.MiddleBlock.ResidualBlock_2.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.0.resnets.0.time_emb_proj": "UpBlocks.Chain_1.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.0.resnets.1.time_emb_proj": "UpBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.0.resnets.2.time_emb_proj": "UpBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.1.resnets.0.time_emb_proj": "UpBlocks.Chain_4.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"up_blocks.1.resnets.1.time_emb_proj": "UpBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"up_blocks.1.resnets.2.time_emb_proj": "UpBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"conv_in": "DownBlocks.Chain_1.Conv2d",
"down_blocks.0.resnets.0.norm1": "DownBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.0.resnets.0.norm2": "DownBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.0.attentions.0.norm": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_1.GroupNorm",
"down_blocks.0.resnets.1.norm1": "DownBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.0.resnets.1.norm2": "DownBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.0.attentions.1.norm": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_1.GroupNorm",
"down_blocks.1.resnets.0.norm1": "DownBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_1",
"up_blocks.3.resnets.0.norm2": "UpBlocks.Chain_10.ResidualBlock.Chain.GroupNorm_2",
"up_blocks.3.attentions.0.norm": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_1.GroupNorm",
"up_blocks.3.resnets.1.norm2": "UpBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_2",
"up_blocks.3.attentions.1.norm": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_1.GroupNorm",
"up_blocks.3.resnets.2.norm2": "UpBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_2",
"up_blocks.3.attentions.2.norm": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_1.GroupNorm",
"conv_norm_out": "Chain.GroupNorm",
"down_blocks.0.resnets.0.conv1": "DownBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.0.resnets.0.conv2": "DownBlocks.Chain_2.ResidualBlock.Chain.Conv2d",
"down_blocks.0.resnets.1.conv1": "DownBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.0.resnets.1.conv2": "DownBlocks.Chain_3.ResidualBlock.Chain.Conv2d",
"down_blocks.0.downsamplers.0.conv": "DownBlocks.Chain_4.Downsample.Conv2d",
"up_blocks.3.resnets.0.conv2": "UpBlocks.Chain_10.ResidualBlock.Chain.Conv2d",
"up_blocks.3.resnets.1.conv2": "UpBlocks.Chain_11.ResidualBlock.Chain.Conv2d",
"up_blocks.3.resnets.2.conv2": "UpBlocks.Chain_12.ResidualBlock.Chain.Conv2d",
"down_blocks.0.resnets.0.time_emb_proj": "DownBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"down_blocks.0.resnets.1.time_emb_proj": "DownBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"up_blocks.3.resnets.0.time_emb_proj": "UpBlocks.Chain_10.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"up_blocks.3.resnets.1.time_emb_proj": "UpBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"up_blocks.3.resnets.2.time_emb_proj": "UpBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"down_blocks.0.attentions.0.proj_in": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_1.Conv2d",
"down_blocks.0.attentions.0.proj_out": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_3.Conv2d",
"down_blocks.0.attentions.1.proj_in": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_1.Conv2d",
"down_blocks.0.attentions.1.proj_out": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_3.Conv2d",
"up_blocks.3.attentions.0.proj_in": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_1.Conv2d",
"up_blocks.3.attentions.0.proj_out": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_3.Conv2d",
"up_blocks.3.attentions.1.proj_in": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_1.Conv2d",
"up_blocks.3.attentions.1.proj_out": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_3.Conv2d",
"up_blocks.3.attentions.2.proj_in": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_1.Conv2d",
"up_blocks.3.attentions.2.proj_out": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_3.Conv2d",
"down_blocks.0.attentions.0.transformer_blocks.0.norm1": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"down_blocks.0.attentions.0.transformer_blocks.0.norm2": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"down_blocks.0.attentions.0.transformer_blocks.0.norm3": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"down_blocks.0.attentions.1.transformer_blocks.0.norm1": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"down_blocks.0.attentions.1.transformer_blocks.0.norm2": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"down_blocks.0.attentions.1.transformer_blocks.0.norm3": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"up_blocks.3.attentions.0.transformer_blocks.0.norm1": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"up_blocks.3.attentions.0.transformer_blocks.0.norm2": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"up_blocks.3.attentions.0.transformer_blocks.0.norm3": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"up_blocks.3.attentions.1.transformer_blocks.0.norm1": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"up_blocks.3.attentions.1.transformer_blocks.0.norm2": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"up_blocks.3.attentions.1.transformer_blocks.0.norm3": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"up_blocks.3.attentions.2.transformer_blocks.0.norm1": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"up_blocks.3.attentions.2.transformer_blocks.0.norm2": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"up_blocks.3.attentions.2.transformer_blocks.0.norm3": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"down_blocks.1.resnets.0.conv1": "DownBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.1.resnets.0.time_emb_proj": "DownBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.1.resnets.1.time_emb_proj": "DownBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.2.resnets.0.time_emb_proj": "UpBlocks.Chain_7.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.2.resnets.1.time_emb_proj": "UpBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"up_blocks.2.resnets.2.time_emb_proj": "UpBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"down_blocks.1.resnets.0.norm2": "DownBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.1.attentions.0.norm": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_1.GroupNorm",
"down_blocks.1.resnets.1.norm1": "DownBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.1.resnets.1.norm2": "DownBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.1.attentions.1.norm": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_1.GroupNorm",
"down_blocks.2.resnets.0.norm1": "DownBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_1",
"up_blocks.2.resnets.0.norm2": "UpBlocks.Chain_7.ResidualBlock.Chain.GroupNorm_2",
"up_blocks.2.attentions.0.norm": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_1.GroupNorm",
"up_blocks.2.resnets.1.norm2": "UpBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_2",
"up_blocks.2.attentions.1.norm": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_1.GroupNorm",
"up_blocks.2.resnets.2.norm2": "UpBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_2",
"up_blocks.2.attentions.2.norm": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_1.GroupNorm",
"up_blocks.3.resnets.1.norm1": "UpBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_1",
"up_blocks.3.resnets.2.norm1": "UpBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.1.resnets.0.conv2": "DownBlocks.Chain_5.ResidualBlock.Chain.Conv2d",
"down_blocks.1.resnets.1.conv1": "DownBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.1.resnets.1.conv2": "DownBlocks.Chain_6.ResidualBlock.Chain.Conv2d",
"down_blocks.1.downsamplers.0.conv": "DownBlocks.Chain_7.Downsample.Conv2d",
"up_blocks.2.resnets.0.conv2": "UpBlocks.Chain_7.ResidualBlock.Chain.Conv2d",
"up_blocks.2.resnets.1.conv2": "UpBlocks.Chain_8.ResidualBlock.Chain.Conv2d",
"up_blocks.2.resnets.2.conv2": "UpBlocks.Chain_9.ResidualBlock.Chain.Conv2d",
"up_blocks.2.upsamplers.0.conv": "UpBlocks.Chain_9.Upsample.Conv2d",
"down_blocks.1.resnets.0.conv_shortcut": "DownBlocks.Chain_5.ResidualBlock.Conv2d",
"down_blocks.1.attentions.0.proj_in": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_1.Conv2d",
"down_blocks.1.attentions.0.proj_out": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_3.Conv2d",
"down_blocks.1.attentions.1.proj_in": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_1.Conv2d",
"down_blocks.1.attentions.1.proj_out": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_3.Conv2d",
"up_blocks.2.attentions.0.proj_in": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_1.Conv2d",
"up_blocks.2.attentions.0.proj_out": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_3.Conv2d",
"up_blocks.2.attentions.1.proj_in": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_1.Conv2d",
"up_blocks.2.attentions.1.proj_out": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_3.Conv2d",
"up_blocks.2.attentions.2.proj_in": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_1.Conv2d",
"up_blocks.2.attentions.2.proj_out": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_3.Conv2d",
"down_blocks.1.attentions.0.transformer_blocks.0.norm1": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"down_blocks.1.attentions.0.transformer_blocks.0.norm2": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"down_blocks.1.attentions.0.transformer_blocks.0.norm3": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"down_blocks.1.attentions.1.transformer_blocks.0.norm1": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"down_blocks.1.attentions.1.transformer_blocks.0.norm2": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"down_blocks.1.attentions.1.transformer_blocks.0.norm3": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"up_blocks.2.attentions.0.transformer_blocks.0.norm1": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"up_blocks.2.attentions.0.transformer_blocks.0.norm2": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"up_blocks.2.attentions.0.transformer_blocks.0.norm3": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"up_blocks.2.attentions.1.transformer_blocks.0.norm1": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"up_blocks.2.attentions.1.transformer_blocks.0.norm2": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"up_blocks.2.attentions.1.transformer_blocks.0.norm3": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"up_blocks.2.attentions.2.transformer_blocks.0.norm1": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"up_blocks.2.attentions.2.transformer_blocks.0.norm2": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"up_blocks.2.attentions.2.transformer_blocks.0.norm3": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"down_blocks.2.resnets.0.conv1": "DownBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.2.resnets.0.norm2": "DownBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.2.attentions.0.norm": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_1.GroupNorm",
"down_blocks.2.resnets.1.norm1": "DownBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.2.resnets.1.norm2": "DownBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.2.attentions.1.norm": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_1.GroupNorm",
"down_blocks.3.resnets.0.norm1": "DownBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.3.resnets.0.norm2": "DownBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_2",
"down_blocks.3.resnets.1.norm1": "DownBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.3.resnets.1.norm2": "DownBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_2",
"mid_block.resnets.0.norm1": "Sum.MiddleBlock.ResidualBlock_1.Chain.GroupNorm_1",
"mid_block.resnets.0.norm2": "Sum.MiddleBlock.ResidualBlock_1.Chain.GroupNorm_2",
"mid_block.attentions.0.norm": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_1.GroupNorm",
"mid_block.resnets.1.norm1": "Sum.MiddleBlock.ResidualBlock_2.Chain.GroupNorm_1",
"mid_block.resnets.1.norm2": "Sum.MiddleBlock.ResidualBlock_2.Chain.GroupNorm_2",
"up_blocks.0.resnets.0.norm2": "UpBlocks.Chain_1.ResidualBlock.Chain.GroupNorm_2",
"up_blocks.0.resnets.1.norm2": "UpBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_2",
"up_blocks.0.resnets.2.norm2": "UpBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_2",
"up_blocks.1.resnets.0.norm2": "UpBlocks.Chain_4.ResidualBlock.Chain.GroupNorm_2",
"up_blocks.1.attentions.0.norm": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_1.GroupNorm",
"up_blocks.1.resnets.1.norm2": "UpBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_2",
"up_blocks.1.attentions.1.norm": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_1.GroupNorm",
"up_blocks.1.resnets.2.norm2": "UpBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_2",
"up_blocks.1.attentions.2.norm": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_1.GroupNorm",
"up_blocks.2.resnets.1.norm1": "UpBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_1",
"down_blocks.2.resnets.0.conv2": "DownBlocks.Chain_8.ResidualBlock.Chain.Conv2d",
"down_blocks.2.resnets.1.conv1": "DownBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.2.resnets.1.conv2": "DownBlocks.Chain_9.ResidualBlock.Chain.Conv2d",
"down_blocks.2.downsamplers.0.conv": "DownBlocks.Chain_10.Downsample.Conv2d",
"down_blocks.3.resnets.0.conv1": "DownBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.3.resnets.0.conv2": "DownBlocks.Chain_11.ResidualBlock.Chain.Conv2d",
"down_blocks.3.resnets.1.conv1": "DownBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"down_blocks.3.resnets.1.conv2": "DownBlocks.Chain_12.ResidualBlock.Chain.Conv2d",
"mid_block.resnets.0.conv1": "Sum.MiddleBlock.ResidualBlock_1.Chain.RangeAdapter2d.Conv2d",
"mid_block.resnets.0.conv2": "Sum.MiddleBlock.ResidualBlock_1.Chain.Conv2d",
"mid_block.resnets.1.conv1": "Sum.MiddleBlock.ResidualBlock_2.Chain.RangeAdapter2d.Conv2d",
"mid_block.resnets.1.conv2": "Sum.MiddleBlock.ResidualBlock_2.Chain.Conv2d",
"up_blocks.0.resnets.0.conv2": "UpBlocks.Chain_1.ResidualBlock.Chain.Conv2d",
"up_blocks.0.resnets.1.conv2": "UpBlocks.Chain_2.ResidualBlock.Chain.Conv2d",
"up_blocks.0.resnets.2.conv2": "UpBlocks.Chain_3.ResidualBlock.Chain.Conv2d",
"up_blocks.0.upsamplers.0.conv": "UpBlocks.Chain_3.Upsample.Conv2d",
"up_blocks.1.resnets.0.conv2": "UpBlocks.Chain_4.ResidualBlock.Chain.Conv2d",
"up_blocks.1.resnets.1.conv2": "UpBlocks.Chain_5.ResidualBlock.Chain.Conv2d",
"up_blocks.1.resnets.2.conv2": "UpBlocks.Chain_6.ResidualBlock.Chain.Conv2d",
"up_blocks.1.upsamplers.0.conv": "UpBlocks.Chain_6.Upsample.Conv2d",
"down_blocks.2.resnets.0.conv_shortcut": "DownBlocks.Chain_8.ResidualBlock.Conv2d",
"down_blocks.2.attentions.0.proj_in": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_1.Conv2d",
"down_blocks.2.attentions.0.proj_out": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_3.Conv2d",
"down_blocks.2.attentions.1.proj_in": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_1.Conv2d",
"down_blocks.2.attentions.1.proj_out": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_3.Conv2d",
"mid_block.attentions.0.proj_in": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_1.Conv2d",
"mid_block.attentions.0.proj_out": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_3.Conv2d",
"up_blocks.1.attentions.0.proj_in": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_1.Conv2d",
"up_blocks.1.attentions.0.proj_out": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_3.Conv2d",
"up_blocks.1.attentions.1.proj_in": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_1.Conv2d",
"up_blocks.1.attentions.1.proj_out": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_3.Conv2d",
"up_blocks.1.attentions.2.proj_in": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_1.Conv2d",
"up_blocks.1.attentions.2.proj_out": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_3.Conv2d",
"down_blocks.2.attentions.0.transformer_blocks.0.norm1": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"down_blocks.2.attentions.0.transformer_blocks.0.norm2": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"down_blocks.2.attentions.0.transformer_blocks.0.norm3": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"down_blocks.2.attentions.1.transformer_blocks.0.norm1": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"down_blocks.2.attentions.1.transformer_blocks.0.norm2": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"down_blocks.2.attentions.1.transformer_blocks.0.norm3": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"mid_block.attentions.0.transformer_blocks.0.norm1": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"mid_block.attentions.0.transformer_blocks.0.norm2": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"mid_block.attentions.0.transformer_blocks.0.norm3": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"up_blocks.1.attentions.0.transformer_blocks.0.norm1": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"up_blocks.1.attentions.0.transformer_blocks.0.norm2": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"up_blocks.1.attentions.0.transformer_blocks.0.norm3": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"up_blocks.1.attentions.1.transformer_blocks.0.norm1": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"up_blocks.1.attentions.1.transformer_blocks.0.norm2": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"up_blocks.1.attentions.1.transformer_blocks.0.norm3": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"up_blocks.1.attentions.2.transformer_blocks.0.norm1": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"up_blocks.1.attentions.2.transformer_blocks.0.norm2": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"up_blocks.1.attentions.2.transformer_blocks.0.norm3": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"mid_block.attentions.0.transformer_blocks.0.attn1.to_q": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"mid_block.attentions.0.transformer_blocks.0.attn1.to_k": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"mid_block.attentions.0.transformer_blocks.0.attn1.to_v": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"mid_block.attentions.0.transformer_blocks.0.attn2.to_q": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"mid_block.attentions.0.transformer_blocks.0.attn2.to_k": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"mid_block.attentions.0.transformer_blocks.0.attn2.to_v": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"mid_block.attentions.0.transformer_blocks.0.ff.net.2": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"up_blocks.0.resnets.0.norm1": "UpBlocks.Chain_1.ResidualBlock.Chain.GroupNorm_1",
"up_blocks.0.resnets.1.norm1": "UpBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_1",
"up_blocks.0.resnets.2.norm1": "UpBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_1",
"up_blocks.1.resnets.0.norm1": "UpBlocks.Chain_4.ResidualBlock.Chain.GroupNorm_1",
"up_blocks.1.resnets.1.norm1": "UpBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_1",
"up_blocks.0.resnets.0.conv1": "UpBlocks.Chain_1.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"up_blocks.0.resnets.1.conv1": "UpBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"up_blocks.0.resnets.2.conv1": "UpBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"up_blocks.1.resnets.0.conv1": "UpBlocks.Chain_4.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"up_blocks.1.resnets.1.conv1": "UpBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"up_blocks.0.resnets.0.conv_shortcut": "UpBlocks.Chain_1.ResidualBlock.Conv2d",
"up_blocks.0.resnets.1.conv_shortcut": "UpBlocks.Chain_2.ResidualBlock.Conv2d",
"up_blocks.0.resnets.2.conv_shortcut": "UpBlocks.Chain_3.ResidualBlock.Conv2d",
"up_blocks.1.resnets.0.conv_shortcut": "UpBlocks.Chain_4.ResidualBlock.Conv2d",
"up_blocks.1.resnets.1.conv_shortcut": "UpBlocks.Chain_5.ResidualBlock.Conv2d",
"up_blocks.1.resnets.2.norm1": "UpBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_1",
"up_blocks.2.resnets.0.norm1": "UpBlocks.Chain_7.ResidualBlock.Chain.GroupNorm_1",
"up_blocks.1.resnets.2.conv1": "UpBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"up_blocks.1.resnets.2.conv_shortcut": "UpBlocks.Chain_6.ResidualBlock.Conv2d",
"up_blocks.2.resnets.0.conv1": "UpBlocks.Chain_7.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"up_blocks.2.resnets.0.conv_shortcut": "UpBlocks.Chain_7.ResidualBlock.Conv2d",
"up_blocks.2.resnets.1.conv1": "UpBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"up_blocks.2.resnets.1.conv_shortcut": "UpBlocks.Chain_8.ResidualBlock.Conv2d",
"up_blocks.2.resnets.2.norm1": "UpBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_1",
"up_blocks.3.resnets.0.norm1": "UpBlocks.Chain_10.ResidualBlock.Chain.GroupNorm_1",
"up_blocks.2.resnets.2.conv1": "UpBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"up_blocks.2.resnets.2.conv_shortcut": "UpBlocks.Chain_9.ResidualBlock.Conv2d",
"up_blocks.3.resnets.0.conv1": "UpBlocks.Chain_10.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"up_blocks.3.resnets.0.conv_shortcut": "UpBlocks.Chain_10.ResidualBlock.Conv2d",
"up_blocks.3.resnets.1.conv1": "UpBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"up_blocks.3.resnets.2.conv1": "UpBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"up_blocks.3.resnets.1.conv_shortcut": "UpBlocks.Chain_11.ResidualBlock.Conv2d",
"up_blocks.3.resnets.2.conv_shortcut": "UpBlocks.Chain_12.ResidualBlock.Conv2d",
"conv_out": "Chain.Conv2d",
},
)
civitai_recipe = WeightRecipe(
key_prefix="model.diffusion_model.",
key_map={
"time_embed.0": "TimestepEncoder.RangeEncoder.Linear_1",
"time_embed.2": "TimestepEncoder.RangeEncoder.Linear_2",
"input_blocks.0.0": "DownBlocks.Chain_1.Conv2d",
"input_blocks.1.0.in_layers.0": "DownBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_1",
"input_blocks.1.0.in_layers.2": "DownBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"input_blocks.1.0.emb_layers.1": "DownBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"input_blocks.1.0.out_layers.0": "DownBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_2",
"input_blocks.1.0.out_layers.3": "DownBlocks.Chain_2.ResidualBlock.Chain.Conv2d",
"input_blocks.1.1.norm": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_1.GroupNorm",
"input_blocks.1.1.proj_in": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_1.Conv2d",
"input_blocks.1.1.transformer_blocks.0.norm1": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"input_blocks.1.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"input_blocks.1.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"input_blocks.1.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"input_blocks.1.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"input_blocks.1.1.transformer_blocks.0.norm2": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"input_blocks.1.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"input_blocks.1.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"input_blocks.1.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"input_blocks.1.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"input_blocks.1.1.transformer_blocks.0.norm3": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"input_blocks.1.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"input_blocks.1.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"input_blocks.1.1.proj_out": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_3.Conv2d",
"input_blocks.2.0.in_layers.0": "DownBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_1",
"input_blocks.2.0.in_layers.2": "DownBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"input_blocks.2.0.emb_layers.1": "DownBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"input_blocks.2.0.out_layers.0": "DownBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_2",
"input_blocks.2.0.out_layers.3": "DownBlocks.Chain_3.ResidualBlock.Chain.Conv2d",
"input_blocks.2.1.norm": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_1.GroupNorm",
"input_blocks.2.1.proj_in": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_1.Conv2d",
"input_blocks.2.1.transformer_blocks.0.norm1": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"input_blocks.2.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"input_blocks.2.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"input_blocks.2.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"input_blocks.2.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"input_blocks.2.1.transformer_blocks.0.norm2": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"input_blocks.2.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"input_blocks.2.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"input_blocks.2.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"input_blocks.2.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"input_blocks.2.1.transformer_blocks.0.norm3": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"input_blocks.2.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"input_blocks.2.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"input_blocks.2.1.proj_out": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_3.Conv2d",
"input_blocks.3.0.op": "DownBlocks.Chain_4.Downsample.Conv2d",
"input_blocks.4.0.in_layers.0": "DownBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_1",
"input_blocks.4.0.in_layers.2": "DownBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"input_blocks.4.0.emb_layers.1": "DownBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"input_blocks.4.0.out_layers.0": "DownBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_2",
"input_blocks.4.0.out_layers.3": "DownBlocks.Chain_5.ResidualBlock.Chain.Conv2d",
"input_blocks.4.0.skip_connection": "DownBlocks.Chain_5.ResidualBlock.Conv2d",
"input_blocks.4.1.norm": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_1.GroupNorm",
"input_blocks.4.1.proj_in": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_1.Conv2d",
"input_blocks.4.1.transformer_blocks.0.norm1": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"input_blocks.4.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"input_blocks.4.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"input_blocks.4.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"input_blocks.4.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"input_blocks.4.1.transformer_blocks.0.norm2": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"input_blocks.4.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"input_blocks.4.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"input_blocks.4.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"input_blocks.4.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"input_blocks.4.1.transformer_blocks.0.norm3": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"input_blocks.4.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"input_blocks.4.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"input_blocks.4.1.proj_out": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_3.Conv2d",
"input_blocks.5.0.in_layers.0": "DownBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_1",
"input_blocks.5.0.in_layers.2": "DownBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"input_blocks.5.0.emb_layers.1": "DownBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"input_blocks.5.0.out_layers.0": "DownBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_2",
"input_blocks.5.0.out_layers.3": "DownBlocks.Chain_6.ResidualBlock.Chain.Conv2d",
"input_blocks.5.1.norm": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_1.GroupNorm",
"input_blocks.5.1.proj_in": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_1.Conv2d",
"input_blocks.5.1.transformer_blocks.0.norm1": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"input_blocks.5.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"input_blocks.5.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"input_blocks.5.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"input_blocks.5.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"input_blocks.5.1.transformer_blocks.0.norm2": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"input_blocks.5.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"input_blocks.5.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"input_blocks.5.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"input_blocks.5.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"input_blocks.5.1.transformer_blocks.0.norm3": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"input_blocks.5.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"input_blocks.5.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"input_blocks.5.1.proj_out": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_3.Conv2d",
"input_blocks.6.0.op": "DownBlocks.Chain_7.Downsample.Conv2d",
"input_blocks.7.0.in_layers.0": "DownBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_1",
"input_blocks.7.0.in_layers.2": "DownBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"input_blocks.7.0.emb_layers.1": "DownBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"input_blocks.7.0.out_layers.0": "DownBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_2",
"input_blocks.7.0.out_layers.3": "DownBlocks.Chain_8.ResidualBlock.Chain.Conv2d",
"input_blocks.7.0.skip_connection": "DownBlocks.Chain_8.ResidualBlock.Conv2d",
"input_blocks.7.1.norm": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_1.GroupNorm",
"input_blocks.7.1.proj_in": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_1.Conv2d",
"input_blocks.7.1.transformer_blocks.0.norm1": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"input_blocks.7.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"input_blocks.7.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"input_blocks.7.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"input_blocks.7.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"input_blocks.7.1.transformer_blocks.0.norm2": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"input_blocks.7.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"input_blocks.7.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"input_blocks.7.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"input_blocks.7.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"input_blocks.7.1.transformer_blocks.0.norm3": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"input_blocks.7.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"input_blocks.7.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"input_blocks.7.1.proj_out": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_3.Conv2d",
"input_blocks.8.0.in_layers.0": "DownBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_1",
"input_blocks.8.0.in_layers.2": "DownBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"input_blocks.8.0.emb_layers.1": "DownBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"input_blocks.8.0.out_layers.0": "DownBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_2",
"input_blocks.8.0.out_layers.3": "DownBlocks.Chain_9.ResidualBlock.Chain.Conv2d",
"input_blocks.8.1.norm": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_1.GroupNorm",
"input_blocks.8.1.proj_in": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_1.Conv2d",
"input_blocks.8.1.transformer_blocks.0.norm1": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"input_blocks.8.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"input_blocks.8.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"input_blocks.8.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"input_blocks.8.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"input_blocks.8.1.transformer_blocks.0.norm2": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"input_blocks.8.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"input_blocks.8.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"input_blocks.8.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"input_blocks.8.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"input_blocks.8.1.transformer_blocks.0.norm3": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"input_blocks.8.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"input_blocks.8.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"input_blocks.8.1.proj_out": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_3.Conv2d",
"input_blocks.9.0.op": "DownBlocks.Chain_10.Downsample.Conv2d",
"input_blocks.10.0.in_layers.0": "DownBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_1",
"input_blocks.10.0.in_layers.2": "DownBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"input_blocks.10.0.emb_layers.1": "DownBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"input_blocks.10.0.out_layers.0": "DownBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_2",
"input_blocks.10.0.out_layers.3": "DownBlocks.Chain_11.ResidualBlock.Chain.Conv2d",
"input_blocks.11.0.in_layers.0": "DownBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_1",
"input_blocks.11.0.in_layers.2": "DownBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"input_blocks.11.0.emb_layers.1": "DownBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"input_blocks.11.0.out_layers.0": "DownBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_2",
"input_blocks.11.0.out_layers.3": "DownBlocks.Chain_12.ResidualBlock.Chain.Conv2d",
"middle_block.0.in_layers.0": "Sum.MiddleBlock.ResidualBlock_1.Chain.GroupNorm_1",
"middle_block.0.in_layers.2": "Sum.MiddleBlock.ResidualBlock_1.Chain.RangeAdapter2d.Conv2d",
"middle_block.0.emb_layers.1": "Sum.MiddleBlock.ResidualBlock_1.Chain.RangeAdapter2d.Chain.Linear",
"middle_block.0.out_layers.0": "Sum.MiddleBlock.ResidualBlock_1.Chain.GroupNorm_2",
"middle_block.0.out_layers.3": "Sum.MiddleBlock.ResidualBlock_1.Chain.Conv2d",
"middle_block.1.norm": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_1.GroupNorm",
"middle_block.1.proj_in": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_1.Conv2d",
"middle_block.1.transformer_blocks.0.norm1": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"middle_block.1.transformer_blocks.0.attn1.to_q": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"middle_block.1.transformer_blocks.0.attn1.to_k": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"middle_block.1.transformer_blocks.0.attn1.to_v": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"middle_block.1.transformer_blocks.0.attn1.to_out.0": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"middle_block.1.transformer_blocks.0.norm2": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"middle_block.1.transformer_blocks.0.attn2.to_q": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"middle_block.1.transformer_blocks.0.attn2.to_k": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"middle_block.1.transformer_blocks.0.attn2.to_v": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"middle_block.1.transformer_blocks.0.attn2.to_out.0": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"middle_block.1.transformer_blocks.0.norm3": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"middle_block.1.transformer_blocks.0.ff.net.0.proj": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"middle_block.1.transformer_blocks.0.ff.net.2": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"middle_block.1.proj_out": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_3.Conv2d",
"middle_block.2.in_layers.0": "Sum.MiddleBlock.ResidualBlock_2.Chain.GroupNorm_1",
"middle_block.2.in_layers.2": "Sum.MiddleBlock.ResidualBlock_2.Chain.RangeAdapter2d.Conv2d",
"middle_block.2.emb_layers.1": "Sum.MiddleBlock.ResidualBlock_2.Chain.RangeAdapter2d.Chain.Linear",
"middle_block.2.out_layers.0": "Sum.MiddleBlock.ResidualBlock_2.Chain.GroupNorm_2",
"middle_block.2.out_layers.3": "Sum.MiddleBlock.ResidualBlock_2.Chain.Conv2d",
"output_blocks.0.0.in_layers.0": "UpBlocks.Chain_1.ResidualBlock.Chain.GroupNorm_1",
"output_blocks.0.0.in_layers.2": "UpBlocks.Chain_1.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"output_blocks.0.0.emb_layers.1": "UpBlocks.Chain_1.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"output_blocks.0.0.out_layers.0": "UpBlocks.Chain_1.ResidualBlock.Chain.GroupNorm_2",
"output_blocks.0.0.out_layers.3": "UpBlocks.Chain_1.ResidualBlock.Chain.Conv2d",
"output_blocks.0.0.skip_connection": "UpBlocks.Chain_1.ResidualBlock.Conv2d",
"output_blocks.1.0.in_layers.0": "UpBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_1",
"output_blocks.1.0.in_layers.2": "UpBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"output_blocks.1.0.emb_layers.1": "UpBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"output_blocks.1.0.out_layers.0": "UpBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_2",
"output_blocks.1.0.out_layers.3": "UpBlocks.Chain_2.ResidualBlock.Chain.Conv2d",
"output_blocks.1.0.skip_connection": "UpBlocks.Chain_2.ResidualBlock.Conv2d",
"output_blocks.2.0.in_layers.0": "UpBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_1",
"output_blocks.2.0.in_layers.2": "UpBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"output_blocks.2.0.emb_layers.1": "UpBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"output_blocks.2.0.out_layers.0": "UpBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_2",
"output_blocks.2.0.out_layers.3": "UpBlocks.Chain_3.ResidualBlock.Chain.Conv2d",
"output_blocks.2.0.skip_connection": "UpBlocks.Chain_3.ResidualBlock.Conv2d",
"output_blocks.2.1.conv": "UpBlocks.Chain_3.Upsample.Conv2d",
"output_blocks.3.0.in_layers.0": "UpBlocks.Chain_4.ResidualBlock.Chain.GroupNorm_1",
"output_blocks.3.0.in_layers.2": "UpBlocks.Chain_4.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"output_blocks.3.0.emb_layers.1": "UpBlocks.Chain_4.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"output_blocks.3.0.out_layers.0": "UpBlocks.Chain_4.ResidualBlock.Chain.GroupNorm_2",
"output_blocks.3.0.out_layers.3": "UpBlocks.Chain_4.ResidualBlock.Chain.Conv2d",
"output_blocks.3.0.skip_connection": "UpBlocks.Chain_4.ResidualBlock.Conv2d",
"output_blocks.3.1.norm": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_1.GroupNorm",
"output_blocks.3.1.proj_in": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_1.Conv2d",
"output_blocks.3.1.transformer_blocks.0.norm1": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"output_blocks.3.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"output_blocks.3.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"output_blocks.3.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"output_blocks.3.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"output_blocks.3.1.transformer_blocks.0.norm2": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"output_blocks.3.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"output_blocks.3.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"output_blocks.3.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"output_blocks.3.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"output_blocks.3.1.transformer_blocks.0.norm3": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"output_blocks.3.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"output_blocks.3.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"output_blocks.3.1.proj_out": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_3.Conv2d",
"output_blocks.4.0.in_layers.0": "UpBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_1",
"output_blocks.4.0.in_layers.2": "UpBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"output_blocks.4.0.emb_layers.1": "UpBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"output_blocks.4.0.out_layers.0": "UpBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_2",
"output_blocks.4.0.out_layers.3": "UpBlocks.Chain_5.ResidualBlock.Chain.Conv2d",
"output_blocks.4.0.skip_connection": "UpBlocks.Chain_5.ResidualBlock.Conv2d",
"output_blocks.4.1.norm": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_1.GroupNorm",
"output_blocks.4.1.proj_in": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_1.Conv2d",
"output_blocks.4.1.transformer_blocks.0.norm1": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"output_blocks.4.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"output_blocks.4.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"output_blocks.4.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"output_blocks.4.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"output_blocks.4.1.transformer_blocks.0.norm2": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"output_blocks.4.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"output_blocks.4.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"output_blocks.4.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"output_blocks.4.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"output_blocks.4.1.transformer_blocks.0.norm3": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"output_blocks.4.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"output_blocks.4.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"output_blocks.4.1.proj_out": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_3.Conv2d",
"output_blocks.5.0.in_layers.0": "UpBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_1",
"output_blocks.5.0.in_layers.2": "UpBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"output_blocks.5.0.emb_layers.1": "UpBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"output_blocks.5.0.out_layers.0": "UpBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_2",
"output_blocks.5.0.out_layers.3": "UpBlocks.Chain_6.ResidualBlock.Chain.Conv2d",
"output_blocks.5.0.skip_connection": "UpBlocks.Chain_6.ResidualBlock.Conv2d",
"output_blocks.5.1.norm": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_1.GroupNorm",
"output_blocks.5.1.proj_in": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_1.Conv2d",
"output_blocks.5.1.transformer_blocks.0.norm1": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"output_blocks.5.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"output_blocks.5.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"output_blocks.5.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"output_blocks.5.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"output_blocks.5.1.transformer_blocks.0.norm2": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"output_blocks.5.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"output_blocks.5.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"output_blocks.5.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"output_blocks.5.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"output_blocks.5.1.transformer_blocks.0.norm3": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"output_blocks.5.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"output_blocks.5.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"output_blocks.5.1.proj_out": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_3.Conv2d",
"output_blocks.5.2.conv": "UpBlocks.Chain_6.Upsample.Conv2d",
"output_blocks.6.0.in_layers.0": "UpBlocks.Chain_7.ResidualBlock.Chain.GroupNorm_1",
"output_blocks.6.0.in_layers.2": "UpBlocks.Chain_7.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"output_blocks.6.0.emb_layers.1": "UpBlocks.Chain_7.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"output_blocks.6.0.out_layers.0": "UpBlocks.Chain_7.ResidualBlock.Chain.GroupNorm_2",
"output_blocks.6.0.out_layers.3": "UpBlocks.Chain_7.ResidualBlock.Chain.Conv2d",
"output_blocks.6.0.skip_connection": "UpBlocks.Chain_7.ResidualBlock.Conv2d",
"output_blocks.6.1.norm": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_1.GroupNorm",
"output_blocks.6.1.proj_in": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_1.Conv2d",
"output_blocks.6.1.transformer_blocks.0.norm1": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"output_blocks.6.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"output_blocks.6.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"output_blocks.6.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"output_blocks.6.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"output_blocks.6.1.transformer_blocks.0.norm2": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"output_blocks.6.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"output_blocks.6.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"output_blocks.6.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"output_blocks.6.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"output_blocks.6.1.transformer_blocks.0.norm3": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"output_blocks.6.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"output_blocks.6.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"output_blocks.6.1.proj_out": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_3.Conv2d",
"output_blocks.7.0.in_layers.0": "UpBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_1",
"output_blocks.7.0.in_layers.2": "UpBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"output_blocks.7.0.emb_layers.1": "UpBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"output_blocks.7.0.out_layers.0": "UpBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_2",
"output_blocks.7.0.out_layers.3": "UpBlocks.Chain_8.ResidualBlock.Chain.Conv2d",
"output_blocks.7.0.skip_connection": "UpBlocks.Chain_8.ResidualBlock.Conv2d",
"output_blocks.7.1.norm": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_1.GroupNorm",
"output_blocks.7.1.proj_in": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_1.Conv2d",
"output_blocks.7.1.transformer_blocks.0.norm1": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"output_blocks.7.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"output_blocks.7.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"output_blocks.7.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"output_blocks.7.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"output_blocks.7.1.transformer_blocks.0.norm2": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"output_blocks.7.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"output_blocks.7.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"output_blocks.7.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"output_blocks.7.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"output_blocks.7.1.transformer_blocks.0.norm3": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"output_blocks.7.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"output_blocks.7.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"output_blocks.7.1.proj_out": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_3.Conv2d",
"output_blocks.8.0.in_layers.0": "UpBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_1",
"output_blocks.8.0.in_layers.2": "UpBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"output_blocks.8.0.emb_layers.1": "UpBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"output_blocks.8.0.out_layers.0": "UpBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_2",
"output_blocks.8.0.out_layers.3": "UpBlocks.Chain_9.ResidualBlock.Chain.Conv2d",
"output_blocks.8.0.skip_connection": "UpBlocks.Chain_9.ResidualBlock.Conv2d",
"output_blocks.8.1.norm": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_1.GroupNorm",
"output_blocks.8.1.proj_in": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_1.Conv2d",
"output_blocks.8.1.transformer_blocks.0.norm1": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"output_blocks.8.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"output_blocks.8.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"output_blocks.8.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"output_blocks.8.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"output_blocks.8.1.transformer_blocks.0.norm2": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"output_blocks.8.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"output_blocks.8.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"output_blocks.8.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"output_blocks.8.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"output_blocks.8.1.transformer_blocks.0.norm3": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"output_blocks.8.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"output_blocks.8.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"output_blocks.8.1.proj_out": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_3.Conv2d",
"output_blocks.8.2.conv": "UpBlocks.Chain_9.Upsample.Conv2d",
"output_blocks.9.0.in_layers.0": "UpBlocks.Chain_10.ResidualBlock.Chain.GroupNorm_1",
"output_blocks.9.0.in_layers.2": "UpBlocks.Chain_10.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"output_blocks.9.0.emb_layers.1": "UpBlocks.Chain_10.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"output_blocks.9.0.out_layers.0": "UpBlocks.Chain_10.ResidualBlock.Chain.GroupNorm_2",
"output_blocks.9.0.out_layers.3": "UpBlocks.Chain_10.ResidualBlock.Chain.Conv2d",
"output_blocks.9.0.skip_connection": "UpBlocks.Chain_10.ResidualBlock.Conv2d",
"output_blocks.9.1.norm": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_1.GroupNorm",
"output_blocks.9.1.proj_in": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_1.Conv2d",
"output_blocks.9.1.transformer_blocks.0.norm1": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"output_blocks.9.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"output_blocks.9.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"output_blocks.9.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"output_blocks.9.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"output_blocks.9.1.transformer_blocks.0.norm2": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"output_blocks.9.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"output_blocks.9.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"output_blocks.9.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"output_blocks.9.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"output_blocks.9.1.transformer_blocks.0.norm3": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"output_blocks.9.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"output_blocks.9.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"output_blocks.9.1.proj_out": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_3.Conv2d",
"output_blocks.10.0.in_layers.0": "UpBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_1",
"output_blocks.10.0.in_layers.2": "UpBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"output_blocks.10.0.emb_layers.1": "UpBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"output_blocks.10.0.out_layers.0": "UpBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_2",
"output_blocks.10.0.out_layers.3": "UpBlocks.Chain_11.ResidualBlock.Chain.Conv2d",
"output_blocks.10.0.skip_connection": "UpBlocks.Chain_11.ResidualBlock.Conv2d",
"output_blocks.10.1.norm": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_1.GroupNorm",
"output_blocks.10.1.proj_in": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_1.Conv2d",
"output_blocks.10.1.transformer_blocks.0.norm1": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"output_blocks.10.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"output_blocks.10.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"output_blocks.10.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"output_blocks.10.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"output_blocks.10.1.transformer_blocks.0.norm2": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"output_blocks.10.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"output_blocks.10.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"output_blocks.10.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"output_blocks.10.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"output_blocks.10.1.transformer_blocks.0.norm3": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"output_blocks.10.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"output_blocks.10.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"output_blocks.10.1.proj_out": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_3.Conv2d",
"output_blocks.11.0.in_layers.0": "UpBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_1",
"output_blocks.11.0.in_layers.2": "UpBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
"output_blocks.11.0.emb_layers.1": "UpBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
"output_blocks.11.0.out_layers.0": "UpBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_2",
"output_blocks.11.0.out_layers.3": "UpBlocks.Chain_12.ResidualBlock.Chain.Conv2d",
"output_blocks.11.0.skip_connection": "UpBlocks.Chain_12.ResidualBlock.Conv2d",
"output_blocks.11.1.norm": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_1.GroupNorm",
"output_blocks.11.1.proj_in": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_1.Conv2d",
"output_blocks.11.1.transformer_blocks.0.norm1": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
"output_blocks.11.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
"output_blocks.11.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
"output_blocks.11.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
"output_blocks.11.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
"output_blocks.11.1.transformer_blocks.0.norm2": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
"output_blocks.11.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
"output_blocks.11.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
"output_blocks.11.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
"output_blocks.11.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
"output_blocks.11.1.transformer_blocks.0.norm3": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
"output_blocks.11.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
"output_blocks.11.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
"output_blocks.11.1.proj_out": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_3.Conv2d",
"out.0": "Chain.GroupNorm",
"out.2": "Chain.Conv2d",
},
)
runwayml = Conversion(
original=Hub(
repo_id="stable-diffusion-v1-5/stable-diffusion-v1-5",
filename="unet/diffusion_pytorch_model.safetensors",
revision="f03de327dd89b501a01da37fc5240cf4fdba85a1",
expected_sha256="19da7aaa4b880e59d56843f1fcb4dd9b599c28a1d9d9af7c1143057c8ffae9f1",
),
converted=Hub(
repo_id="refiners/sd15.unet",
filename="model.safetensors",
expected_sha256="6c0488a590f151128565bac105dbc3ce6563643f270c5c32ea756fa317a1c256",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
runwayml_inpainting = Conversion(
original=Hub(
repo_id="stable-diffusion-v1-5/stable-diffusion-inpainting",
filename="unet/diffusion_pytorch_model.fp16.safetensors",
revision="8a4288a76071f7280aedbdb3253bdb9e9d5d84bb",
expected_sha256="24b788b4a777748377cc20364eea4ae113c8c42f4468c16bc8c02fdae5492af9",
),
converted=Hub(
repo_id="refiners/sd15.unet_inpainting",
filename="model.safetensors",
expected_sha256="331283db7631bfe0027d5f7107ec00ac64679e25fe6e195b79a7b040ad3666ae",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
juggernaut_reborn = Conversion(
original=Hub(
repo_id="civitai/KandooAi/juggernaut",
filename="reborn/onefile_fp16.safetensors",
download_url="https://civitai.com/api/download/models/274039?type=Model&format=SafeTensor&size=pruned&fp=fp16",
expected_sha256="338b85bc4f7628bc42cce336242e79154a57c2a4360531436e97f7793568f18c",
),
converted=Hub(
repo_id="refiners/sd15.juggernaut.reborn.unet",
filename="model.safetensors",
expected_sha256="64a75d16fdb11faeedbef8270fcdfe3051284f743cdf46d8bb89c09499a22591",
),
recipe=civitai_recipe,
dtype=torch.float16,
)
juggernaut_aftermath = Conversion(
original=Hub(
repo_id="civitai/KandooAi/juggernaut",
filename="aftermath/onefile_fp32.safetensors",
download_url="https://civitai.com/api/download/models/127207?type=Model&format=SafeTensor&size=full&fp=fp32",
expected_sha256="7ed60a2f5842de8e6755965ffbf2a0ee273429939f307233c22f7dffd8d7ae3d",
),
converted=Hub(
repo_id="refiners/sd15.juggernaut.aftermath.unet",
filename="model.safetensors",
expected_sha256="5882c517eac0670df60755cb9eb762081a1d6a37431d3f7f9a3a6d8dfe764d86",
),
recipe=civitai_recipe,
dtype=torch.float32,
)
juggernaut_aftermath_inpainting = Conversion(
original=Hub(
repo_id="civitai/KandooAi/juggernaut",
filename="aftermath-inpainting/onefile_fp32.safetensors",
download_url="https://civitai.com/api/download/models/129549?type=Model&format=SafeTensor&size=full&fp=fp32",
expected_sha256="b370189733ef44a3661a96139c02fde22d36df5ad12d1112b0b56fc3d6bfbdba",
),
converted=Hub(
repo_id="refiners/sd15.juggernaut.aftermath.unet_inpainting",
filename="model.safetensors",
expected_sha256="c00a85060de351d617cc3a3be6865cf3493a0557f1f32303cb8a385c6368b9a8",
),
recipe=civitai_recipe,
dtype=torch.float32,
)
realistic_stock_photo_v3 = Conversion(
original=Hub(
repo_id="Yntec/realisticStockPhoto3",
filename="realisticStockPhoto_v30SD15.safetensors",
expected_sha256="f85affae9aae16276eaf670f810946e2d03c4d300791a0380f07653cb78ba31b",
# download_url="https://civitai.com/api/download/models/524032?type=Model&format=SafeTensor&size=full&fp=fp16",
# civitai model is gated by auth, using a mirror on hf hub instead
),
converted=Hub(
repo_id="refiners/sd15.realistic_stock_photo.v3_0.unet",
filename="model.safetensors",
expected_sha256="18a8616375a8738cc051c3d5a63979e7d40dec4d720f88247424db2ebd663131",
),
recipe=civitai_recipe,
dtype=torch.float16,
)
realistic_vision_v5 = Conversion(
original=Hub(
repo_id="civitai/SG_161222/realistic_vision",
filename="v5/onefile_fp32.safetensors",
download_url="https://civitai.com/api/download/models/130072?type=Model&format=SafeTensor&size=full&fp=fp16",
expected_sha256="ef76aa2332635f4352463343beec9c5aae3bd107a73c0fd7221abbbcf8bd5470",
),
converted=Hub(
repo_id="refiners/sd15.realistic_vision.v5_1.unet",
filename="model.safetensors",
expected_sha256="3d6fc9cb9eabb1487e0337ed17ab29bbfabfc5e5faf47c3635a4ee73b5bb1164",
),
recipe=civitai_recipe,
dtype=torch.float16,
)
ic_light_fc = Conversion(
original=Hub(
repo_id="lllyasviel/ic-light",
filename="iclight_sd15_fc.safetensors",
revision="f5950d474dc0cd5bb5a0c66189534cd13f28eb70",
expected_sha256="a033fbaaa2f3f7859fa6a4477ee63ebbf9c116bf3569d5811856d2807f3468cd",
),
converted=Hub(
repo_id="refiners/sd15.ic_light.fc",
filename="model.safetensors",
expected_sha256="2d4e8ff0ac65274ec9655eeac459226e4790de1326e4338e34c4348bdf763350",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
ic_light_fcon = Conversion(
original=Hub(
repo_id="lllyasviel/ic-light",
filename="iclight_sd15_fcon.safetensors",
revision="f5950d474dc0cd5bb5a0c66189534cd13f28eb70",
expected_sha256="37652ef27028c8fdb9882830b1621e4e648d26e19cb2035a6af8d52f3a6d8d87",
),
converted=Hub(
repo_id="refiners/sd15.ic_light.fcon",
filename="model.safetensors",
expected_sha256="856cf6a6cb6b57335073c3140bb38c3cc35bcffa69cd8a57c166fac37b3594d4",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)
ic_light_fbc = Conversion(
original=Hub(
repo_id="lllyasviel/ic-light",
filename="iclight_sd15_fbc.safetensors",
revision="f5950d474dc0cd5bb5a0c66189534cd13f28eb70",
expected_sha256="bb8ccedaa4944b16cfa8356afcbc2c2174cc4c4af57de19124ae0cddd0d96947",
),
converted=Hub(
repo_id="refiners/sd15.ic_light.fbc",
filename="model.safetensors",
expected_sha256="c79f275b94566da66801cf8fe9c1872202ff5c9b634d321a7e6ecaface0e456f",
),
recipe=diffusers_recipe,
dtype=torch.float16,
)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,347 @@
import logging
import os
from hashlib import sha256
from pathlib import Path
from typing import Any, cast
from warnings import warn
import requests
import torch
from huggingface_hub import ( # pyright: ignore[reportMissingTypeStubs]
HfFileMetadata,
get_hf_file_metadata, # pyright: ignore[reportUnknownVariableType]
hf_hub_download, # pyright: ignore[reportUnknownVariableType]
hf_hub_url,
)
from tqdm import tqdm
from refiners.fluxion.utils import load_from_safetensors, load_tensors, save_to_safetensors
AnyDict = dict[str, Any]
TensorDict = dict[str, torch.Tensor]
def download_file_url(url: str, destination: Path) -> None:
"""Download a file from a url to a destination."""
logging.debug(f"Downloading {url} to {destination}")
# get the size of the file
response = requests.get(url, stream=True)
response.raise_for_status()
total = int(response.headers.get("content-length", 0))
# create a progress bar
bar = tqdm(
desc=destination.name,
total=total,
unit="iB",
unit_scale=True,
unit_divisor=1024,
leave=False,
)
# download the file
destination.parent.mkdir(parents=True, exist_ok=True)
with destination.open("wb") as f:
with requests.get(url, stream=True) as r:
r.raise_for_status()
for chunk in r.iter_content(chunk_size=1024 * 1000):
size = f.write(chunk)
bar.update(size)
bar.close()
class Hub:
"""A class representing a weight on the Hub.
Note:
The Hub denotes a directory on the local machine where the weights are stored.
The Hub may also correspond to a remote repository on the Hugging Face Hub.
"""
def __init__(
self,
repo_id: str,
filename: str,
expected_sha256: str,
revision: str = "main",
download_url: str | None = None,
) -> None:
"""Initialize the HubPath.
Args:
repo_id: The repository identifier on the hub.
filename: The filename of the file in the repository.
revision: The revision of the file on the hf hub.
expected_sha256: The sha256 hash of the file.
download_url: The url to download the file from, if not from the huggingface hub.
"""
self.repo_id = repo_id
self.filename = filename
self.revision = revision
self.expected_sha256 = expected_sha256.lower()
self.override_download_url = download_url
@staticmethod
def hub_location():
"""Return the path to the local hub root directory."""
return Path(os.getenv("REFINERS_HUB_PATH", "tests/weights"))
@property
def hf_url(self) -> str:
"""Return the url to the file on the hf hub."""
assert self.override_download_url is None, f"{self.repo_id}/{self.filename} is not available on the hub"
return hf_hub_url(
repo_id=self.repo_id,
filename=self.filename,
revision=self.revision,
)
@property
def hf_cache_path(self) -> Path:
"""Download the file from the hf hub and return its path in the local hf cache."""
return Path(
hf_hub_download(
repo_id=self.repo_id,
filename=self.filename,
revision=self.revision,
),
)
@property
def hf_metadata(self) -> HfFileMetadata:
"""Return the metadata of the file on the hf hub."""
return get_hf_file_metadata(self.hf_url)
@property
def hf_sha256_hash(self) -> str:
"""Return the sha256 hash of the file on the hf hub."""
remote_hash = self.hf_metadata.etag
assert remote_hash is not None
assert len(remote_hash) == 64
return remote_hash.lower()
@property
def local_path(self) -> Path:
"""Return the path to the file in the local hub."""
return self.hub_location() / self.repo_id / self.filename
@property
def local_hash(self) -> str:
"""Return the sha256 hash of the file in the local hub."""
assert self.local_path.is_file(), f"{self.local_path} does not exist"
# TODO: use https://docs.python.org/3/library/hashlib.html#hashlib.file_digest when support python >= 3.11
return sha256(self.local_path.read_bytes()).hexdigest().lower()
def check_local_hash(self) -> bool:
"""Check if the sha256 hash of the file in the local hub is correct."""
if self.expected_sha256 != self.local_hash:
logging.warning(f"{self.local_path} local sha256 mismatch, {self.local_hash} != {self.expected_sha256}")
return False
else:
logging.debug(f"{self.local_path} local sha256 is correct ({self.local_hash})")
return True
def check_remote_hash(self) -> bool:
"""Check if the sha256 hash of the file on the hf hub is correct."""
if self.expected_sha256 != self.hf_sha256_hash:
logging.warning(
f"{self.local_path} remote sha256 mismatch, {self.hf_sha256_hash} != {self.expected_sha256}"
)
return False
else:
logging.debug(f"{self.local_path} remote sha256 is correct ({self.hf_sha256_hash})")
return True
def download(self) -> None:
"""Download the file from the hf hub or from the override download url."""
self.local_path.parent.mkdir(parents=True, exist_ok=True)
if self.local_path.is_file():
logging.warning(f"{self.local_path} already exists")
elif self.override_download_url is not None:
download_file_url(url=self.override_download_url, destination=self.local_path)
else:
# TODO: pas assez de message de log quand local_path existe pas et que ça vient du hf cache
self.local_path.symlink_to(self.hf_cache_path)
assert self.check_local_hash()
class WeightRecipe:
"""A class representing a recipe to convert weights from one format to another."""
def __init__(
self,
key_map: dict[str, str],
key_prefix: str = "",
key_aliases: dict[str, str] = {},
tensor_reshapes: dict[str, tuple[int, ...]] = {},
):
"""Initialize the weight recipe.
Args:
key_map: A dictionary mapping the keys of the original state dict to the converted state dict.
key_prefix: A prefix to remove from the keys of the original state dict.
key_aliases: A dictionary mapping the keys of the original state dict to their aliases.
tensor_reshapes: A dictionary mapping the keys of the original state dict to their new shapes.
"""
self.key_prefix = key_prefix
self.key_map = key_map
self.key_aliases = key_aliases
self.tensor_reshapes = tensor_reshapes
@staticmethod
def flatten_state_dict(state_dict: AnyDict, sep: str = ".") -> AnyDict:
"""Flattens a nested dictionary into a dictionary with dot-separated keys.
Args:
state_dict: A nested dictionary.
sep: The separator to use between keys when flattening.
"""
def _flatten(current_dict: AnyDict, parent_key: str = "") -> AnyDict:
items: AnyDict = {}
for k, v in current_dict.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.update(_flatten(cast(AnyDict, v), new_key))
else:
items[new_key] = v
return items
return _flatten(state_dict)
def name_map_keys(self, state_dict: TensorDict) -> TensorDict:
"""Map the keys of the state dict according to the name map."""
new_state_dict: TensorDict = {}
for key, value in state_dict.items():
# check for .weight or .bias suffixes
suffix = next(s for s in (".weight", ".bias", "") if key.endswith(s))
key = key.removesuffix(suffix)
# remove key_prefix
key = key.removeprefix(self.key_prefix)
# check for key aliases
source_key = self.key_aliases.get(key, key)
# get target_key from key_map
target_key = self.key_map.get(source_key)
if target_key is None:
continue # ignore key if it doesn't exist in the key_map
# add value to new_state_dict with the mapped key
new_state_dict[target_key + suffix] = value
return new_state_dict
def reshape_tensors(self, state_dict: TensorDict) -> TensorDict:
"""Reshape tensors in the state dict according to tensor_reshapes."""
new_state_dict = state_dict.copy()
for key, value in state_dict.items():
if key in self.tensor_reshapes:
new_shape = self.tensor_reshapes[key]
new_state_dict[key] = value.reshape(new_shape)
return new_state_dict
def translate_keys(self, state_dict: AnyDict, flatten_state_dict: bool = True) -> TensorDict:
"""Translate the keys of a state dict."""
if flatten_state_dict:
state_dict = self.flatten_state_dict(state_dict)
state_dict = self.name_map_keys(state_dict)
state_dict = self.reshape_tensors(state_dict)
return state_dict
class Conversion:
"""Structure to link original and converted weights on the Hub."""
def __init__(
self,
original: Hub,
converted: Hub,
recipe: WeightRecipe,
dtype: torch.dtype,
) -> None:
"""Initialize the weight structure.
Args:
original: A Hub object representing the original weight.
converted: A Hub object representing the converted weight.
recipe: A WeightRecipe object used to convert from the original to the converted weight.
dtype: The dtype of the converted weights.
"""
self.original = original
self.converted = converted
self.recipe = recipe
self.dtype = dtype
@staticmethod
def load_state_dict(path: Path) -> AnyDict:
"""Load a state dict from a file."""
if path.suffix == ".safetensors" or path.suffix == ".sft":
return load_from_safetensors(path)
else:
return load_tensors(path)
@staticmethod
def filter_tensors_state_dict(state_dict: AnyDict) -> TensorDict:
"""Filter out non-tensor values and tensors with NaNs from a state dict."""
new_state_dict: TensorDict = {}
for key, value in state_dict.items():
if not isinstance(value, torch.Tensor):
warn(f"Value for key {key} is not a tensor, filtering")
continue
if torch.isnan(value).sum().item() > 0:
warn(f"Found NaNs in {key}, filtering")
continue
new_state_dict[key] = value
return new_state_dict
@staticmethod
def change_dtype(state_dict: TensorDict, dtype: torch.dtype) -> TensorDict:
"""Change the dtype of the tensors in a state dict."""
return {k: v.to(dtype=dtype) for k, v in state_dict.items()}
def convert(self) -> None:
"""Convert the weights from the original to the converted weights.
Note: The original weights are automatically downloaded if they are not already present.
"""
logging.info(
f"Converting {self.original.repo_id}/{self.original.filename} "
f"to {self.converted.repo_id}/{self.converted.filename}"
)
# check if the converted file already exists
if self.converted.local_path.is_file():
logging.warning(f"{self.converted.local_path} already exists")
if self.converted.check_local_hash():
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.error(f"{self.converted.local_path} couldn't verify remote hash")
return
# get the original state_dict
self.original.download()
# load the original state_dict
original_state_dict = self.load_state_dict(self.original.local_path)
original_state_dict = self.filter_tensors_state_dict(original_state_dict)
# convert the state_dict
converted_state_dict = self.recipe.translate_keys(original_state_dict)
converted_state_dict = self.change_dtype(converted_state_dict, self.dtype)
# save the converted state_dict
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
save_to_safetensors(self.converted.local_path, converted_state_dict)
# check the converted state_dict
assert self.converted.check_local_hash()
try:
assert self.converted.check_remote_hash()
except requests.exceptions.HTTPError:
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")