mirror of
https://github.com/finegrain-ai/refiners.git
synced 2024-11-23 06:38:45 +00:00
add new weight conversion logic
This commit is contained in:
parent
4045904b26
commit
d322e9c5ed
|
@ -56,6 +56,9 @@ Documentation = "https://refine.rs/"
|
||||||
Repository = "https://github.com/finegrain-ai/refiners"
|
Repository = "https://github.com/finegrain-ai/refiners"
|
||||||
Issues = "https://github.com/finegrain-ai/refiners/issues"
|
Issues = "https://github.com/finegrain-ai/refiners/issues"
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
get_weights = "refiners.conversion.cli:main"
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
training = [
|
training = [
|
||||||
"bitsandbytes>=0.41.2.post2",
|
"bitsandbytes>=0.41.2.post2",
|
||||||
|
@ -84,6 +87,7 @@ test = [
|
||||||
"sentencepiece>=0.2.0",
|
"sentencepiece>=0.2.0",
|
||||||
]
|
]
|
||||||
conversion = [
|
conversion = [
|
||||||
|
"huggingface-hub>=0.25.1",
|
||||||
"diffusers>=0.26.1",
|
"diffusers>=0.26.1",
|
||||||
"transformers>=4.35.2",
|
"transformers>=4.35.2",
|
||||||
"segment-anything-py>=1.0",
|
"segment-anything-py>=1.0",
|
||||||
|
@ -181,7 +185,7 @@ exclude_also = [
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.typos.default]
|
[tool.typos.default]
|
||||||
extend-ignore-identifiers-re = ["NDArray*", "interm", "af000ded"]
|
extend-ignore-identifiers-re = ["NDArray*", "interm", "af000ded", "nin"]
|
||||||
|
|
||||||
[tool.typos.default.extend-words]
|
[tool.typos.default.extend-words]
|
||||||
adaptee = "adaptee" # Common name for an adapter's target
|
adaptee = "adaptee" # Common name for an adapter's target
|
||||||
|
|
45
src/refiners/conversion/__init__.py
Normal file
45
src/refiners/conversion/__init__.py
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
from .models import (
|
||||||
|
autoencoder_sd15,
|
||||||
|
autoencoder_sdxl,
|
||||||
|
clip_image_sd21,
|
||||||
|
clip_text_sd15,
|
||||||
|
clip_text_sdxl,
|
||||||
|
controllora_sdxl,
|
||||||
|
controlnet_sd15,
|
||||||
|
dinov2,
|
||||||
|
ella,
|
||||||
|
hq_sam,
|
||||||
|
ipadapter_sd15,
|
||||||
|
ipadapter_sdxl,
|
||||||
|
loras,
|
||||||
|
mvanet,
|
||||||
|
preprocessors,
|
||||||
|
sam,
|
||||||
|
t2iadapter_sd15,
|
||||||
|
t2iadapter_sdxl,
|
||||||
|
unet_sd15,
|
||||||
|
unet_sdxl,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"autoencoder_sd15",
|
||||||
|
"autoencoder_sdxl",
|
||||||
|
"clip_image_sd21",
|
||||||
|
"clip_text_sd15",
|
||||||
|
"clip_text_sdxl",
|
||||||
|
"controllora_sdxl",
|
||||||
|
"controlnet_sd15",
|
||||||
|
"dinov2",
|
||||||
|
"ella",
|
||||||
|
"hq_sam",
|
||||||
|
"ipadapter_sd15",
|
||||||
|
"ipadapter_sdxl",
|
||||||
|
"loras",
|
||||||
|
"mvanet",
|
||||||
|
"preprocessors",
|
||||||
|
"sam",
|
||||||
|
"t2iadapter_sd15",
|
||||||
|
"t2iadapter_sdxl",
|
||||||
|
"unet_sd15",
|
||||||
|
"unet_sdxl",
|
||||||
|
]
|
145
src/refiners/conversion/cli.py
Normal file
145
src/refiners/conversion/cli.py
Normal file
|
@ -0,0 +1,145 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from refiners.conversion import (
|
||||||
|
autoencoder_sd15,
|
||||||
|
autoencoder_sdxl,
|
||||||
|
clip_image_sd21,
|
||||||
|
clip_text_sd15,
|
||||||
|
clip_text_sdxl,
|
||||||
|
controllora_sdxl,
|
||||||
|
controlnet_sd15,
|
||||||
|
dinov2,
|
||||||
|
ella,
|
||||||
|
hq_sam,
|
||||||
|
ipadapter_sd15,
|
||||||
|
ipadapter_sdxl,
|
||||||
|
loras,
|
||||||
|
mvanet,
|
||||||
|
preprocessors,
|
||||||
|
sam,
|
||||||
|
t2iadapter_sd15,
|
||||||
|
t2iadapter_sdxl,
|
||||||
|
unet_sd15,
|
||||||
|
unet_sdxl,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||||
|
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||||
|
|
||||||
|
# MVANet
|
||||||
|
mvanet.mvanet.convert()
|
||||||
|
mvanet.finegrain_v01.download()
|
||||||
|
|
||||||
|
# loras (no conversion)
|
||||||
|
loras.sd15_pokemon.download()
|
||||||
|
loras.sdxl_dpo.download()
|
||||||
|
loras.sdxl_scifi.download()
|
||||||
|
loras.sdxl_pixelart.download()
|
||||||
|
loras.sdxl_age_slider.download()
|
||||||
|
loras.sdxl_cartoon_slider.download()
|
||||||
|
loras.sdxl_eyesize_slider.download()
|
||||||
|
|
||||||
|
# preprocessors
|
||||||
|
preprocessors.informative_drawings.convert()
|
||||||
|
|
||||||
|
# SD1.5 autoencoders
|
||||||
|
autoencoder_sd15.runwayml.convert()
|
||||||
|
autoencoder_sd15.stability_mse.convert()
|
||||||
|
autoencoder_sd15.juggernaut_reborn.convert()
|
||||||
|
autoencoder_sd15.juggernaut_aftermath.convert()
|
||||||
|
autoencoder_sd15.realistic_stock_photo_v3.convert()
|
||||||
|
autoencoder_sd15.realistic_vision_v5.convert()
|
||||||
|
|
||||||
|
# SDXL autoencoders
|
||||||
|
autoencoder_sdxl.stability.convert()
|
||||||
|
autoencoder_sdxl.madebyollin_fp16fix.convert()
|
||||||
|
autoencoder_sdxl.juggernautXL_v10.convert()
|
||||||
|
|
||||||
|
# SD1.5 text encoders
|
||||||
|
clip_text_sd15.runwayml.convert()
|
||||||
|
clip_text_sd15.juggernaut_reborn.convert()
|
||||||
|
clip_text_sd15.juggernaut_aftermath.convert()
|
||||||
|
clip_text_sd15.realistic_stock_photo_v3.convert()
|
||||||
|
clip_text_sd15.realistic_vision_v5.convert()
|
||||||
|
|
||||||
|
# SD2.1 image encoders
|
||||||
|
clip_image_sd21.unclip_21.convert()
|
||||||
|
|
||||||
|
# SDXL text encoders
|
||||||
|
clip_text_sdxl.stability.convert()
|
||||||
|
clip_text_sdxl.juggernautXL_v10.convert()
|
||||||
|
|
||||||
|
# SD1.5 unets
|
||||||
|
unet_sd15.runwayml.convert()
|
||||||
|
unet_sd15.runwayml_inpainting.convert()
|
||||||
|
unet_sd15.juggernaut_reborn.convert()
|
||||||
|
unet_sd15.juggernaut_aftermath.convert()
|
||||||
|
unet_sd15.realistic_stock_photo_v3.convert()
|
||||||
|
unet_sd15.realistic_vision_v5.convert()
|
||||||
|
|
||||||
|
# SD1.5 IC-Light
|
||||||
|
unet_sd15.ic_light_fc.convert()
|
||||||
|
unet_sd15.ic_light_fcon.convert()
|
||||||
|
unet_sd15.ic_light_fbc.convert()
|
||||||
|
|
||||||
|
# SDXL unets
|
||||||
|
unet_sdxl.stability.convert()
|
||||||
|
unet_sdxl.juggernautXL_v10.convert()
|
||||||
|
|
||||||
|
# SDXL LCM unet
|
||||||
|
unet_sdxl.lcm.convert()
|
||||||
|
|
||||||
|
# SDXL Lightning unet
|
||||||
|
unet_sdxl.lightning_4step.convert()
|
||||||
|
unet_sdxl.lightning_1step.convert()
|
||||||
|
|
||||||
|
# SD1.5 controlnets
|
||||||
|
controlnet_sd15.tile.convert()
|
||||||
|
controlnet_sd15.canny.convert()
|
||||||
|
controlnet_sd15.depth.convert()
|
||||||
|
controlnet_sd15.normalbae.convert()
|
||||||
|
controlnet_sd15.lineart.convert()
|
||||||
|
controlnet_sd15.sam.convert()
|
||||||
|
|
||||||
|
# SDXL Control LoRAs
|
||||||
|
controllora_sdxl.canny.convert()
|
||||||
|
controllora_sdxl.cpds.convert()
|
||||||
|
|
||||||
|
# SD1.5 IP-Adapters
|
||||||
|
ipadapter_sd15.base.convert()
|
||||||
|
ipadapter_sd15.plus.convert()
|
||||||
|
|
||||||
|
# SDXL IP-Adapters
|
||||||
|
ipadapter_sdxl.base.convert()
|
||||||
|
ipadapter_sdxl.plus.convert()
|
||||||
|
|
||||||
|
# SD1.5 T2I-Adapters
|
||||||
|
t2iadapter_sd15.depth.convert()
|
||||||
|
|
||||||
|
# SDXL T2I-Adapters
|
||||||
|
t2iadapter_sdxl.canny.convert()
|
||||||
|
|
||||||
|
# ELLA adapters
|
||||||
|
ella.sd15_t5xl.convert()
|
||||||
|
|
||||||
|
# DINOv2
|
||||||
|
dinov2.small.convert()
|
||||||
|
dinov2.small_reg.convert()
|
||||||
|
dinov2.base.convert()
|
||||||
|
dinov2.base_reg.convert()
|
||||||
|
dinov2.large.convert()
|
||||||
|
dinov2.large_reg.convert()
|
||||||
|
dinov2.giant.convert()
|
||||||
|
dinov2.giant_reg.convert()
|
||||||
|
|
||||||
|
# SAM
|
||||||
|
sam.vit_h.convert()
|
||||||
|
|
||||||
|
# SAM-HQ
|
||||||
|
hq_sam.vit_h.convert()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
654
src/refiners/conversion/model_converter.py
Normal file
654
src/refiners/conversion/model_converter.py
Normal file
|
@ -0,0 +1,654 @@
|
||||||
|
from collections import defaultdict
|
||||||
|
from enum import Enum, auto
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, DefaultDict, TypedDict
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from torch import Tensor, nn
|
||||||
|
from torch.utils.hooks import RemovableHandle
|
||||||
|
|
||||||
|
from refiners.fluxion.utils import no_grad, norm, save_to_safetensors
|
||||||
|
|
||||||
|
TORCH_BASIC_LAYERS: list[type[nn.Module]] = [
|
||||||
|
nn.Conv1d,
|
||||||
|
nn.Conv2d,
|
||||||
|
nn.Conv3d,
|
||||||
|
nn.ConvTranspose1d,
|
||||||
|
nn.ConvTranspose2d,
|
||||||
|
nn.ConvTranspose3d,
|
||||||
|
nn.Linear,
|
||||||
|
nn.BatchNorm1d,
|
||||||
|
nn.BatchNorm2d,
|
||||||
|
nn.BatchNorm3d,
|
||||||
|
nn.LayerNorm,
|
||||||
|
nn.GroupNorm,
|
||||||
|
nn.Embedding,
|
||||||
|
nn.MaxPool2d,
|
||||||
|
nn.AvgPool2d,
|
||||||
|
nn.AdaptiveAvgPool2d,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
ModelTypeShape = tuple[type[nn.Module], tuple[torch.Size, ...]]
|
||||||
|
|
||||||
|
|
||||||
|
class ModuleArgsDict(TypedDict):
|
||||||
|
"""Represents positional and keyword arguments passed to a module.
|
||||||
|
|
||||||
|
- `positional`: A tuple of positional arguments.
|
||||||
|
- `keyword`: A dictionary of keyword arguments.
|
||||||
|
"""
|
||||||
|
|
||||||
|
positional: tuple[Any, ...]
|
||||||
|
keyword: dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
class ConversionStage(Enum):
|
||||||
|
"""Represents the current stage of the conversion process.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
INIT: The conversion process has not started.
|
||||||
|
BASIC_LAYERS_MATCH: The source and target models have the same number of basic layers.
|
||||||
|
SHAPE_AND_LAYERS_MATCH: The shape of both models agree.
|
||||||
|
MODELS_OUTPUT_AGREE: The source and target models agree.
|
||||||
|
"""
|
||||||
|
|
||||||
|
INIT = auto()
|
||||||
|
BASIC_LAYERS_MATCH = auto()
|
||||||
|
SHAPE_AND_LAYERS_MATCH = auto()
|
||||||
|
MODELS_OUTPUT_AGREE = auto()
|
||||||
|
|
||||||
|
|
||||||
|
class ModelConverter:
|
||||||
|
"""Converts a model's state_dict to match another model's state_dict.
|
||||||
|
|
||||||
|
Note: The conversion process consists of three stages
|
||||||
|
1. Verify that the source and target models have the same number of basic layers.
|
||||||
|
2. Find matching shapes and layers between the source and target models.
|
||||||
|
3. Convert the source model's state_dict to match the target model's state_dict.
|
||||||
|
4. Compare the outputs of the source and target models.
|
||||||
|
|
||||||
|
The conversion process can be run multiple times, and will resume from the last stage.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```py
|
||||||
|
source = ...
|
||||||
|
target = ...
|
||||||
|
|
||||||
|
converter = ModelConverter(
|
||||||
|
source_model=source,
|
||||||
|
target_model=target,
|
||||||
|
threshold=0.1,
|
||||||
|
verbose=False
|
||||||
|
)
|
||||||
|
|
||||||
|
is_converted = converter(args)
|
||||||
|
if is_converted:
|
||||||
|
converter.save_to_safetensors(path="converted_model.pt")
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
ModuleArgs = tuple[Any, ...] | dict[str, Any] | ModuleArgsDict
|
||||||
|
stage: ConversionStage = ConversionStage.INIT
|
||||||
|
_stored_mapping: dict[str, str] | None = None
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
source_model: nn.Module,
|
||||||
|
target_model: nn.Module,
|
||||||
|
source_keys_to_skip: list[str] | None = None,
|
||||||
|
target_keys_to_skip: list[str] | None = None,
|
||||||
|
custom_layer_mapping: dict[type[nn.Module], type[nn.Module]] | None = None,
|
||||||
|
threshold: float = 1e-5,
|
||||||
|
skip_output_check: bool = False,
|
||||||
|
skip_init_check: bool = False,
|
||||||
|
verbose: bool = True,
|
||||||
|
) -> None:
|
||||||
|
"""Initializes the ModelConverter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source_model: The model to convert from.
|
||||||
|
target_model: The model to convert to.
|
||||||
|
source_keys_to_skip: A list of keys to skip when tracing the source model.
|
||||||
|
target_keys_to_skip: A list of keys to skip when tracing the target model.
|
||||||
|
custom_layer_mapping: A dictionary mapping custom layer types between the source and target models.
|
||||||
|
threshold: The threshold for comparing outputs between the source and target models.
|
||||||
|
skip_output_check: Whether to skip comparing the outputs of the source and target models.
|
||||||
|
skip_init_check: Whether to skip checking that the source and target models have the same number of basic
|
||||||
|
layers.
|
||||||
|
verbose: Whether to print messages during the conversion process.
|
||||||
|
|
||||||
|
"""
|
||||||
|
self.source_model = source_model
|
||||||
|
self.target_model = target_model
|
||||||
|
self.source_keys_to_skip = source_keys_to_skip or []
|
||||||
|
self.target_keys_to_skip = target_keys_to_skip or []
|
||||||
|
self.custom_layer_mapping = custom_layer_mapping or {}
|
||||||
|
self.threshold = threshold
|
||||||
|
self.skip_output_check = skip_output_check
|
||||||
|
self.skip_init_check = skip_init_check
|
||||||
|
self.verbose = verbose
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return (
|
||||||
|
f"ModelConverter(source_model={self.source_model.__class__.__name__},"
|
||||||
|
f" target_model={self.target_model.__class__.__name__}, stage={self.stage})"
|
||||||
|
)
|
||||||
|
|
||||||
|
def __bool__(self) -> bool:
|
||||||
|
return self.stage.value >= 2 if self.skip_output_check else self.stage.value >= 3
|
||||||
|
|
||||||
|
def run(self, source_args: ModuleArgs, target_args: ModuleArgs | None = None) -> bool:
|
||||||
|
"""Run the conversion process.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source_args: The arguments to pass to the source model it can be either a tuple of positional arguments,
|
||||||
|
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys. If `target_args`
|
||||||
|
is not provided, these arguments will also be passed to the target model.
|
||||||
|
target_args: The arguments to pass to the target model it can be either a tuple of positional arguments,
|
||||||
|
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the conversion process is done and the models agree.
|
||||||
|
"""
|
||||||
|
if target_args is None:
|
||||||
|
target_args = source_args
|
||||||
|
|
||||||
|
match self.stage:
|
||||||
|
case ConversionStage.MODELS_OUTPUT_AGREE:
|
||||||
|
self._increment_stage()
|
||||||
|
return True
|
||||||
|
|
||||||
|
case ConversionStage.SHAPE_AND_LAYERS_MATCH if self._run_shape_and_layers_match_stage(
|
||||||
|
source_args=source_args, target_args=target_args
|
||||||
|
):
|
||||||
|
self._increment_stage()
|
||||||
|
return True
|
||||||
|
|
||||||
|
case ConversionStage.BASIC_LAYERS_MATCH if self._run_basic_layers_match_stage(
|
||||||
|
source_args=source_args, target_args=target_args
|
||||||
|
):
|
||||||
|
self._increment_stage()
|
||||||
|
return self.run(source_args=source_args, target_args=target_args)
|
||||||
|
|
||||||
|
case ConversionStage.INIT if self._run_init_stage():
|
||||||
|
self._increment_stage()
|
||||||
|
return self.run(source_args=source_args, target_args=target_args)
|
||||||
|
|
||||||
|
case _:
|
||||||
|
self._log(message=f"Conversion failed at stage {self.stage.value}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _increment_stage(self) -> None:
|
||||||
|
"""Increment the stage of the conversion process."""
|
||||||
|
match self.stage:
|
||||||
|
case ConversionStage.INIT:
|
||||||
|
self.stage = ConversionStage.BASIC_LAYERS_MATCH
|
||||||
|
self._log(
|
||||||
|
message=(
|
||||||
|
"Stage 0 -> 1 - Models have the same number of basic layers. Finding matching shapes and"
|
||||||
|
" layers..."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
case ConversionStage.BASIC_LAYERS_MATCH:
|
||||||
|
self.stage = ConversionStage.SHAPE_AND_LAYERS_MATCH
|
||||||
|
self._log(
|
||||||
|
message=(
|
||||||
|
"Stage 1 -> 2 - Shape of both models agree. Applying state_dict to target model. Comparing"
|
||||||
|
" models..."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
case ConversionStage.SHAPE_AND_LAYERS_MATCH:
|
||||||
|
if self.skip_output_check:
|
||||||
|
self._log(
|
||||||
|
message=(
|
||||||
|
"Stage 2 - Nothing to do. Skipping output check. If you want to compare the outputs, set"
|
||||||
|
" `skip_output_check` to `False`"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.stage = ConversionStage.MODELS_OUTPUT_AGREE
|
||||||
|
self._log(
|
||||||
|
message=(
|
||||||
|
"Stage 2 -> 3 - Conversion is done and source and target models agree: you can export the"
|
||||||
|
" converted model using `save_to_safetensors`"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
case ConversionStage.MODELS_OUTPUT_AGREE:
|
||||||
|
self._log(
|
||||||
|
message=(
|
||||||
|
"Stage 3 - Nothing to do. Conversion is done and source and target models agree: you can export"
|
||||||
|
" the converted model using `save_to_safetensors`"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_state_dict(self) -> dict[str, Tensor]:
|
||||||
|
"""Get the converted state_dict."""
|
||||||
|
if not self:
|
||||||
|
raise ValueError("The conversion process is not done yet. Run `converter(args)` first.")
|
||||||
|
return self.target_model.state_dict()
|
||||||
|
|
||||||
|
def get_mapping(self) -> dict[str, str]:
|
||||||
|
"""Get the mapping between the source and target models' state_dicts."""
|
||||||
|
if not self:
|
||||||
|
raise ValueError("The conversion process is not done yet. Run `converter(args)` first.")
|
||||||
|
assert self._stored_mapping is not None, "Mapping is not stored"
|
||||||
|
return self._stored_mapping
|
||||||
|
|
||||||
|
def save_to_safetensors(self, path: Path | str, metadata: dict[str, str] | None = None, half: bool = False) -> None:
|
||||||
|
"""Save the converted model to a SafeTensors file.
|
||||||
|
|
||||||
|
Warning:
|
||||||
|
This method can only be called after the conversion process is done.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: The path to save the converted model to.
|
||||||
|
metadata: Metadata to save with the converted model.
|
||||||
|
half: Whether to save the converted model as half precision.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the conversion process is not done yet. Run `converter` first.
|
||||||
|
"""
|
||||||
|
if not self:
|
||||||
|
raise ValueError("The conversion process is not done yet. Run `converter(args)` first.")
|
||||||
|
state_dict = self.get_state_dict()
|
||||||
|
if half:
|
||||||
|
state_dict = {key: value.half() for key, value in state_dict.items()}
|
||||||
|
save_to_safetensors(path=path, tensors=state_dict, metadata=metadata)
|
||||||
|
|
||||||
|
def map_state_dicts(
|
||||||
|
self,
|
||||||
|
source_args: ModuleArgs,
|
||||||
|
target_args: ModuleArgs | None = None,
|
||||||
|
) -> dict[str, str] | None:
|
||||||
|
"""Find a mapping between the source and target models' state_dicts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source_args: The arguments to pass to the source model it can be either a tuple of positional arguments,
|
||||||
|
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys. If `target_args`
|
||||||
|
is not provided, these arguments will also be passed to the target model.
|
||||||
|
target_args: The arguments to pass to the target model it can be either a tuple of positional arguments,
|
||||||
|
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dictionary mapping keys in the target model's state_dict to keys in the source model's state_dict.
|
||||||
|
"""
|
||||||
|
if target_args is None:
|
||||||
|
target_args = source_args
|
||||||
|
|
||||||
|
source_order = self._trace_module_execution_order(
|
||||||
|
module=self.source_model, args=source_args, keys_to_skip=self.source_keys_to_skip
|
||||||
|
)
|
||||||
|
target_order = self._trace_module_execution_order(
|
||||||
|
module=self.target_model, args=target_args, keys_to_skip=self.target_keys_to_skip
|
||||||
|
)
|
||||||
|
|
||||||
|
if not self._assert_shapes_aligned(source_order=source_order, target_order=target_order):
|
||||||
|
return None
|
||||||
|
|
||||||
|
mapping: dict[str, str] = {}
|
||||||
|
for source_type_shape in source_order:
|
||||||
|
source_keys = source_order[source_type_shape]
|
||||||
|
target_type_shape = source_type_shape
|
||||||
|
if not self._is_torch_basic_layer(module_type=source_type_shape[0]):
|
||||||
|
for source_custom_type, target_custom_type in self.custom_layer_mapping.items():
|
||||||
|
if source_custom_type == source_type_shape[0]:
|
||||||
|
target_type_shape = (target_custom_type, source_type_shape[1])
|
||||||
|
break
|
||||||
|
|
||||||
|
target_keys = target_order[target_type_shape]
|
||||||
|
mapping.update(zip(target_keys, source_keys))
|
||||||
|
|
||||||
|
return mapping
|
||||||
|
|
||||||
|
def compare_models(
|
||||||
|
self,
|
||||||
|
source_args: ModuleArgs,
|
||||||
|
target_args: ModuleArgs | None = None,
|
||||||
|
threshold: float = 1e-5,
|
||||||
|
) -> bool:
|
||||||
|
"""Compare the outputs of the source and target models.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source_args: The arguments to pass to the source model it can be either a tuple of positional arguments,
|
||||||
|
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys. If `target_args`
|
||||||
|
is not provided, these arguments will also be passed to the target model.
|
||||||
|
target_args: The arguments to pass to the target model it can be either a tuple of positional arguments,
|
||||||
|
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys.
|
||||||
|
threshold: The threshold for comparing outputs between the source and target models.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the outputs of the source and target models agree.
|
||||||
|
"""
|
||||||
|
if target_args is None:
|
||||||
|
target_args = source_args
|
||||||
|
|
||||||
|
source_outputs = self._collect_layers_outputs(
|
||||||
|
module=self.source_model, args=source_args, keys_to_skip=self.source_keys_to_skip
|
||||||
|
)
|
||||||
|
target_outputs = self._collect_layers_outputs(
|
||||||
|
module=self.target_model, args=target_args, keys_to_skip=self.target_keys_to_skip
|
||||||
|
)
|
||||||
|
|
||||||
|
diff, prev_source_key, prev_target_key = None, None, None
|
||||||
|
for (source_key, source_output), (target_key, target_output) in zip(source_outputs, target_outputs):
|
||||||
|
diff = norm(source_output - target_output.reshape(shape=source_output.shape)).item()
|
||||||
|
if diff > threshold:
|
||||||
|
self._log(
|
||||||
|
f"Models diverged between {prev_source_key} and {source_key}, and between {prev_target_key} and"
|
||||||
|
f" {target_key}, difference in norm: {diff}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
prev_source_key, prev_target_key = source_key, target_key
|
||||||
|
|
||||||
|
self._log(message=f"Models agree. Difference in norm: {diff}")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _run_init_stage(self) -> bool:
|
||||||
|
"""Run the init stage of the conversion process."""
|
||||||
|
if self.skip_init_check:
|
||||||
|
self._log(
|
||||||
|
message=(
|
||||||
|
"Skipping init check. If you want to check the number of basic layers, set `skip_init_check` to"
|
||||||
|
" `False`"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
is_count_correct = self._verify_basic_layers_count()
|
||||||
|
is_not_missing_layers = self._verify_missing_basic_layers()
|
||||||
|
|
||||||
|
return is_count_correct and is_not_missing_layers
|
||||||
|
|
||||||
|
def _run_basic_layers_match_stage(self, source_args: ModuleArgs, target_args: ModuleArgs | None) -> bool:
|
||||||
|
"""Run the basic layers match stage of the conversion process."""
|
||||||
|
mapping = self.map_state_dicts(source_args=source_args, target_args=target_args)
|
||||||
|
self._stored_mapping = mapping
|
||||||
|
if mapping is None:
|
||||||
|
self._log(message="Models do not have matching shapes.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
source_state_dict = self.source_model.state_dict()
|
||||||
|
target_state_dict = self.target_model.state_dict()
|
||||||
|
converted_state_dict = self._convert_state_dict(
|
||||||
|
source_state_dict=source_state_dict, target_state_dict=target_state_dict, state_dict_mapping=mapping
|
||||||
|
)
|
||||||
|
self.target_model.load_state_dict(state_dict=converted_state_dict)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _run_shape_and_layers_match_stage(self, source_args: ModuleArgs, target_args: ModuleArgs | None) -> bool:
|
||||||
|
"""Run the shape and layers match stage of the conversion process."""
|
||||||
|
if self.skip_output_check:
|
||||||
|
self._log(
|
||||||
|
message="Skipping output check. If you want to compare the outputs, set `skip_output_check` to `False`"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
if self.compare_models(source_args=source_args, target_args=target_args, threshold=self.threshold):
|
||||||
|
self._log(message="Models agree. You can export the converted model using `save_to_safetensors`")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self._log(message="Models do not agree. Try to increase the threshold or modify the models.")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
self._log(message=f"An error occurred while comparing the models: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _log(self, message: str) -> None:
|
||||||
|
"""Print a message if `verbose` is `True`."""
|
||||||
|
if self.verbose:
|
||||||
|
print(message)
|
||||||
|
|
||||||
|
def _debug_print_shapes(
|
||||||
|
self,
|
||||||
|
shape: ModelTypeShape,
|
||||||
|
source_keys: list[str],
|
||||||
|
target_keys: list[str],
|
||||||
|
) -> None:
|
||||||
|
"""Print the shapes of the sub-modules in `source_keys` and `target_keys`."""
|
||||||
|
self._log(message=f"{shape}")
|
||||||
|
max_len = max(len(source_keys), len(target_keys))
|
||||||
|
for i in range(max_len):
|
||||||
|
source_key = source_keys[i] if i < len(source_keys) else "---"
|
||||||
|
target_key = target_keys[i] if i < len(target_keys) else "---"
|
||||||
|
self._log(f"\t{source_key}\t{target_key}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _unpack_module_args(module_args: ModuleArgs) -> tuple[tuple[Any, ...], dict[str, Any]]:
|
||||||
|
"""Unpack the positional and keyword arguments passed to a module."""
|
||||||
|
match module_args:
|
||||||
|
case tuple(positional_args):
|
||||||
|
keyword_args: dict[str, Any] = {}
|
||||||
|
case {"positional": positional_args, "keyword": keyword_args}:
|
||||||
|
pass
|
||||||
|
case _:
|
||||||
|
positional_args = ()
|
||||||
|
keyword_args = dict(**module_args)
|
||||||
|
|
||||||
|
return positional_args, keyword_args
|
||||||
|
|
||||||
|
def _is_torch_basic_layer(self, module_type: type[nn.Module]) -> bool:
|
||||||
|
"""Check if a module type is a subclass of a torch basic layer."""
|
||||||
|
return any(issubclass(module_type, torch_basic_layer) for torch_basic_layer in TORCH_BASIC_LAYERS)
|
||||||
|
|
||||||
|
def _infer_basic_layer_type(self, module: nn.Module) -> type[nn.Module] | None:
|
||||||
|
"""Infer the type of a basic layer."""
|
||||||
|
layer_types = (
|
||||||
|
set(self.custom_layer_mapping.keys()) | set(self.custom_layer_mapping.values()) | set(TORCH_BASIC_LAYERS)
|
||||||
|
)
|
||||||
|
for layer_type in layer_types:
|
||||||
|
if isinstance(module, layer_type):
|
||||||
|
return layer_type
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_module_signature(self, module: nn.Module) -> ModelTypeShape:
|
||||||
|
"""Get the signature of a module."""
|
||||||
|
layer_type = self._infer_basic_layer_type(module=module)
|
||||||
|
assert layer_type is not None, f"Module {module} is not a basic layer"
|
||||||
|
param_shapes = [p.shape for p in module.parameters()]
|
||||||
|
return (layer_type, tuple(param_shapes))
|
||||||
|
|
||||||
|
def _count_basic_layers(self, module: nn.Module) -> dict[type[nn.Module], int]:
|
||||||
|
"""Count the number of basic layers in a module."""
|
||||||
|
count: DefaultDict[type[nn.Module], int] = defaultdict(int)
|
||||||
|
for submodule in module.modules():
|
||||||
|
layer_type = self._infer_basic_layer_type(module=submodule)
|
||||||
|
if layer_type is not None:
|
||||||
|
count[layer_type] += 1
|
||||||
|
|
||||||
|
return count
|
||||||
|
|
||||||
|
def _verify_basic_layers_count(self) -> bool:
|
||||||
|
"""Verify that the source and target models have the same number of basic layers."""
|
||||||
|
source_layers = self._count_basic_layers(module=self.source_model)
|
||||||
|
target_layers = self._count_basic_layers(module=self.target_model)
|
||||||
|
|
||||||
|
reverse_mapping = {v: k for k, v in self.custom_layer_mapping.items()}
|
||||||
|
|
||||||
|
diff: dict[type[nn.Module], tuple[int, int]] = {}
|
||||||
|
for layer_type, source_count in source_layers.items():
|
||||||
|
target_type = self.custom_layer_mapping.get(layer_type, layer_type)
|
||||||
|
target_count = target_layers.get(target_type, 0)
|
||||||
|
|
||||||
|
if source_count != target_count:
|
||||||
|
diff[layer_type] = (source_count, target_count)
|
||||||
|
|
||||||
|
for layer_type, target_count in target_layers.items():
|
||||||
|
source_type = reverse_mapping.get(layer_type, layer_type)
|
||||||
|
source_count = source_layers.get(source_type, 0)
|
||||||
|
|
||||||
|
if source_count != target_count:
|
||||||
|
diff[layer_type] = (source_count, target_count)
|
||||||
|
|
||||||
|
if diff:
|
||||||
|
message = "Models do not have the same number of basic layers:\n"
|
||||||
|
for layer_type, counts in diff.items():
|
||||||
|
message += f" {layer_type}: Source {counts[0]} - Target {counts[1]}\n"
|
||||||
|
self._log(message=message.strip())
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _is_weighted_leaf_module(self, module: nn.Module) -> bool:
|
||||||
|
"""Check if a module is a leaf module with weights."""
|
||||||
|
return next(module.parameters(), None) is not None and next(module.children(), None) is None
|
||||||
|
|
||||||
|
def _check_for_missing_basic_layers(self, module: nn.Module) -> list[type[nn.Module]]:
|
||||||
|
"""Check if a module has weighted leaf modules that are not basic layers."""
|
||||||
|
return [
|
||||||
|
type(submodule)
|
||||||
|
for submodule in module.modules()
|
||||||
|
if self._is_weighted_leaf_module(module=submodule) and not self._infer_basic_layer_type(module=submodule)
|
||||||
|
]
|
||||||
|
|
||||||
|
def _verify_missing_basic_layers(self) -> bool:
|
||||||
|
"""Verify that the source and target models do not have missing basic layers."""
|
||||||
|
missing_source_layers = self._check_for_missing_basic_layers(module=self.source_model)
|
||||||
|
missing_target_layers = self._check_for_missing_basic_layers(module=self.target_model)
|
||||||
|
|
||||||
|
if missing_source_layers or missing_target_layers:
|
||||||
|
self._log(
|
||||||
|
message=(
|
||||||
|
"Models might have missing basic layers. If you want to skip this check, set"
|
||||||
|
f" `skip_init_check` to `True`: {missing_source_layers}, {missing_target_layers}"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
@no_grad()
|
||||||
|
def _trace_module_execution_order(
|
||||||
|
self,
|
||||||
|
module: nn.Module,
|
||||||
|
args: ModuleArgs,
|
||||||
|
keys_to_skip: list[str],
|
||||||
|
) -> dict[ModelTypeShape, list[str]]:
|
||||||
|
"""Execute a forward pass and store the order of execution of specific sub-modules.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
module: The module to trace.
|
||||||
|
args: The arguments to pass to the module it can be either a tuple of positional arguments,
|
||||||
|
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys.
|
||||||
|
keys_to_skip: A list of keys to skip when tracing the module.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dictionary mapping the signature of each sub-module to a list of keys in the module's `named_modules`
|
||||||
|
"""
|
||||||
|
submodule_to_key: dict[nn.Module, str] = {}
|
||||||
|
execution_order: defaultdict[ModelTypeShape, list[str]] = defaultdict(list)
|
||||||
|
|
||||||
|
def collect_execution_order_hook(layer: nn.Module, *_: Any) -> None:
|
||||||
|
layer_signature = self.get_module_signature(module=layer)
|
||||||
|
execution_order[layer_signature].append(submodule_to_key[layer])
|
||||||
|
|
||||||
|
hooks: list[RemovableHandle] = []
|
||||||
|
named_modules: list[tuple[str, nn.Module]] = module.named_modules() # type: ignore
|
||||||
|
for name, submodule in named_modules:
|
||||||
|
if (self._infer_basic_layer_type(module=submodule) is not None) and name not in keys_to_skip:
|
||||||
|
submodule_to_key[submodule] = name # type: ignore
|
||||||
|
hook = submodule.register_forward_hook(hook=collect_execution_order_hook)
|
||||||
|
hooks.append(hook)
|
||||||
|
|
||||||
|
positional_args, keyword_args = self._unpack_module_args(module_args=args)
|
||||||
|
module(*positional_args, **keyword_args)
|
||||||
|
|
||||||
|
for hook in hooks:
|
||||||
|
hook.remove()
|
||||||
|
|
||||||
|
return dict(execution_order)
|
||||||
|
|
||||||
|
def _assert_shapes_aligned(
|
||||||
|
self, source_order: dict[ModelTypeShape, list[str]], target_order: dict[ModelTypeShape, list[str]]
|
||||||
|
) -> bool:
|
||||||
|
"""Assert that the shapes of the sub-modules in `source_order` and `target_order` are aligned."""
|
||||||
|
model_type_shapes = set(source_order.keys()) | set(target_order.keys())
|
||||||
|
|
||||||
|
default_type_shapes = [
|
||||||
|
type_shape for type_shape in model_type_shapes if self._is_torch_basic_layer(module_type=type_shape[0])
|
||||||
|
]
|
||||||
|
|
||||||
|
shape_mismatched = False
|
||||||
|
|
||||||
|
for model_type_shape in default_type_shapes:
|
||||||
|
source_keys = source_order.get(model_type_shape, [])
|
||||||
|
target_keys = target_order.get(model_type_shape, [])
|
||||||
|
|
||||||
|
if len(source_keys) != len(target_keys):
|
||||||
|
shape_mismatched = True
|
||||||
|
self._debug_print_shapes(shape=model_type_shape, source_keys=source_keys, target_keys=target_keys)
|
||||||
|
|
||||||
|
for source_custom_type in self.custom_layer_mapping.keys():
|
||||||
|
# iterate over all type_shapes that have the same type as source_custom_type
|
||||||
|
for source_type_shape in [
|
||||||
|
type_shape for type_shape in model_type_shapes if type_shape[0] == source_custom_type
|
||||||
|
]:
|
||||||
|
source_keys = source_order.get(source_type_shape, [])
|
||||||
|
target_custom_type = self.custom_layer_mapping[source_custom_type]
|
||||||
|
target_type_shape = (target_custom_type, source_type_shape[1])
|
||||||
|
target_keys = target_order.get(target_type_shape, [])
|
||||||
|
|
||||||
|
if len(source_keys) != len(target_keys):
|
||||||
|
shape_mismatched = True
|
||||||
|
self._debug_print_shapes(shape=source_type_shape, source_keys=source_keys, target_keys=target_keys)
|
||||||
|
|
||||||
|
return not shape_mismatched
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _convert_state_dict(
|
||||||
|
source_state_dict: dict[str, Tensor], target_state_dict: dict[str, Tensor], state_dict_mapping: dict[str, str]
|
||||||
|
) -> dict[str, Tensor]:
|
||||||
|
"""Convert the source model's state_dict to match the target model's state_dict."""
|
||||||
|
converted_state_dict: dict[str, Tensor] = {}
|
||||||
|
for target_key in target_state_dict:
|
||||||
|
target_prefix, suffix = target_key.rsplit(sep=".", maxsplit=1)
|
||||||
|
source_prefix = state_dict_mapping[target_prefix]
|
||||||
|
source_key = ".".join([source_prefix, suffix])
|
||||||
|
converted_state_dict[target_key] = source_state_dict[source_key]
|
||||||
|
|
||||||
|
return converted_state_dict
|
||||||
|
|
||||||
|
@no_grad()
|
||||||
|
def _collect_layers_outputs(
|
||||||
|
self, module: nn.Module, args: ModuleArgs, keys_to_skip: list[str]
|
||||||
|
) -> list[tuple[str, Tensor]]:
|
||||||
|
"""Execute a forward pass and store the output of specific sub-modules.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
module: The module to trace.
|
||||||
|
args: The arguments to pass to the module it can be either a tuple of positional arguments,
|
||||||
|
a dictionary of keyword arguments, or a dictionary with `positional` and `keyword` keys.
|
||||||
|
keys_to_skip: A list of keys to skip when tracing the module.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of tuples containing the key of each sub-module and its output.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
The output of each sub-module is cloned to avoid memory leaks.
|
||||||
|
"""
|
||||||
|
submodule_to_key: dict[nn.Module, str] = {}
|
||||||
|
execution_order: list[tuple[str, Tensor]] = []
|
||||||
|
|
||||||
|
def collect_execution_order_hook(layer: nn.Module, _: Any, output: Tensor) -> None:
|
||||||
|
execution_order.append((submodule_to_key[layer], output.clone()))
|
||||||
|
|
||||||
|
hooks: list[RemovableHandle] = []
|
||||||
|
named_modules: list[tuple[str, nn.Module]] = module.named_modules() # type: ignore
|
||||||
|
for name, submodule in named_modules:
|
||||||
|
if (self._infer_basic_layer_type(module=submodule) is not None) and name not in keys_to_skip:
|
||||||
|
submodule_to_key[submodule] = name # type: ignore
|
||||||
|
hook = submodule.register_forward_hook(hook=collect_execution_order_hook)
|
||||||
|
hooks.append(hook)
|
||||||
|
|
||||||
|
positional_args, keyword_args = self._unpack_module_args(module_args=args)
|
||||||
|
module(*positional_args, **keyword_args)
|
||||||
|
|
||||||
|
for hook in hooks:
|
||||||
|
hook.remove()
|
||||||
|
|
||||||
|
return execution_order
|
0
src/refiners/conversion/models/__init__.py
Normal file
0
src/refiners/conversion/models/__init__.py
Normal file
375
src/refiners/conversion/models/autoencoder_sd15.py
Normal file
375
src/refiners/conversion/models/autoencoder_sd15.py
Normal file
|
@ -0,0 +1,375 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
|
||||||
|
|
||||||
|
diffusers_recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"encoder.conv_in": "Encoder.Conv2d",
|
||||||
|
"encoder.down_blocks.0.resnets.0.norm1": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"encoder.down_blocks.0.resnets.0.norm2": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"encoder.down_blocks.0.resnets.1.norm1": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"encoder.down_blocks.0.resnets.1.norm2": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"encoder.down_blocks.1.resnets.0.norm1": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"decoder.up_blocks.3.resnets.0.norm2": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"decoder.up_blocks.3.resnets.1.norm1": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"decoder.up_blocks.3.resnets.1.norm2": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"decoder.up_blocks.3.resnets.2.norm1": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.GroupNorm_1",
|
||||||
|
"decoder.up_blocks.3.resnets.2.norm2": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.GroupNorm_2",
|
||||||
|
"decoder.conv_norm_out": "Decoder.Chain_2.GroupNorm",
|
||||||
|
"encoder.down_blocks.0.resnets.0.conv1": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"encoder.down_blocks.0.resnets.0.conv2": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"encoder.down_blocks.0.resnets.1.conv1": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"encoder.down_blocks.0.resnets.1.conv2": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"encoder.down_blocks.0.downsamplers.0.conv": "Encoder.Chain_1.Chain_1.Downsample.Conv2d",
|
||||||
|
"decoder.up_blocks.3.resnets.0.conv2": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"decoder.up_blocks.3.resnets.1.conv1": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"decoder.up_blocks.3.resnets.1.conv2": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"decoder.up_blocks.3.resnets.2.conv1": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.Conv2d_1",
|
||||||
|
"decoder.up_blocks.3.resnets.2.conv2": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.Conv2d_2",
|
||||||
|
"encoder.down_blocks.1.resnets.0.conv1": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"encoder.down_blocks.1.resnets.0.norm2": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"encoder.down_blocks.1.resnets.1.norm1": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"encoder.down_blocks.1.resnets.1.norm2": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"encoder.down_blocks.2.resnets.0.norm1": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"decoder.up_blocks.2.resnets.0.norm2": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"decoder.up_blocks.2.resnets.1.norm1": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"decoder.up_blocks.2.resnets.1.norm2": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"decoder.up_blocks.2.resnets.2.norm1": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.GroupNorm_1",
|
||||||
|
"decoder.up_blocks.2.resnets.2.norm2": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.GroupNorm_2",
|
||||||
|
"decoder.up_blocks.3.resnets.0.norm1": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"encoder.down_blocks.1.resnets.0.conv2": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"encoder.down_blocks.1.resnets.1.conv1": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"encoder.down_blocks.1.resnets.1.conv2": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"encoder.down_blocks.1.downsamplers.0.conv": "Encoder.Chain_1.Chain_2.Downsample.Conv2d",
|
||||||
|
"decoder.up_blocks.2.resnets.0.conv2": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"decoder.up_blocks.2.resnets.1.conv1": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"decoder.up_blocks.2.resnets.1.conv2": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"decoder.up_blocks.2.resnets.2.conv1": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.Conv2d_1",
|
||||||
|
"decoder.up_blocks.2.resnets.2.conv2": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.Conv2d_2",
|
||||||
|
"decoder.up_blocks.2.upsamplers.0.conv": "Decoder.Chain_1.Chain_4.Upsample.Conv2d",
|
||||||
|
"encoder.down_blocks.1.resnets.0.conv_shortcut": "Encoder.Chain_1.Chain_2.Resnet_1.Conv2d",
|
||||||
|
"encoder.down_blocks.2.resnets.0.conv1": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"encoder.down_blocks.2.resnets.0.norm2": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"encoder.down_blocks.2.resnets.1.norm1": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"encoder.down_blocks.2.resnets.1.norm2": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"encoder.down_blocks.3.resnets.0.norm1": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"encoder.down_blocks.3.resnets.0.norm2": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"encoder.down_blocks.3.resnets.1.norm1": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"encoder.down_blocks.3.resnets.1.norm2": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"encoder.mid_block.resnets.0.norm1": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"encoder.mid_block.resnets.0.norm2": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"encoder.mid_block.attentions.0.group_norm": "Encoder.Chain_1.Chain_5.Residual.GroupNorm",
|
||||||
|
"encoder.mid_block.resnets.1.norm1": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"encoder.mid_block.resnets.1.norm2": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"encoder.conv_norm_out": "Encoder.Chain_2.GroupNorm",
|
||||||
|
"decoder.mid_block.resnets.0.norm1": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"decoder.mid_block.resnets.0.norm2": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"decoder.mid_block.attentions.0.group_norm": "Decoder.Chain_1.Chain_1.Residual.GroupNorm",
|
||||||
|
"decoder.mid_block.resnets.1.norm1": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"decoder.mid_block.resnets.1.norm2": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"decoder.up_blocks.0.resnets.0.norm1": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"decoder.up_blocks.0.resnets.0.norm2": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"decoder.up_blocks.0.resnets.1.norm1": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"decoder.up_blocks.0.resnets.1.norm2": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"decoder.up_blocks.0.resnets.2.norm1": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.GroupNorm_1",
|
||||||
|
"decoder.up_blocks.0.resnets.2.norm2": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.GroupNorm_2",
|
||||||
|
"decoder.up_blocks.1.resnets.0.norm1": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"decoder.up_blocks.1.resnets.0.norm2": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"decoder.up_blocks.1.resnets.1.norm1": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"decoder.up_blocks.1.resnets.1.norm2": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"decoder.up_blocks.1.resnets.2.norm1": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.GroupNorm_1",
|
||||||
|
"decoder.up_blocks.1.resnets.2.norm2": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.GroupNorm_2",
|
||||||
|
"decoder.up_blocks.2.resnets.0.norm1": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"encoder.down_blocks.2.resnets.0.conv2": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"encoder.down_blocks.2.resnets.1.conv1": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"encoder.down_blocks.2.resnets.1.conv2": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"encoder.down_blocks.2.downsamplers.0.conv": "Encoder.Chain_1.Chain_3.Downsample.Conv2d",
|
||||||
|
"encoder.down_blocks.3.resnets.0.conv1": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"encoder.down_blocks.3.resnets.0.conv2": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"encoder.down_blocks.3.resnets.1.conv1": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"encoder.down_blocks.3.resnets.1.conv2": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"encoder.mid_block.resnets.0.conv1": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"encoder.mid_block.resnets.0.conv2": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"encoder.mid_block.resnets.1.conv1": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"encoder.mid_block.resnets.1.conv2": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"decoder.mid_block.resnets.0.conv1": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"decoder.mid_block.resnets.0.conv2": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"decoder.mid_block.resnets.1.conv1": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"decoder.mid_block.resnets.1.conv2": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"decoder.up_blocks.0.resnets.0.conv1": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"decoder.up_blocks.0.resnets.0.conv2": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"decoder.up_blocks.0.resnets.1.conv1": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"decoder.up_blocks.0.resnets.1.conv2": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"decoder.up_blocks.0.resnets.2.conv1": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.Conv2d_1",
|
||||||
|
"decoder.up_blocks.0.resnets.2.conv2": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.Conv2d_2",
|
||||||
|
"decoder.up_blocks.0.upsamplers.0.conv": "Decoder.Chain_1.Chain_2.Upsample.Conv2d",
|
||||||
|
"decoder.up_blocks.1.resnets.0.conv1": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"decoder.up_blocks.1.resnets.0.conv2": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"decoder.up_blocks.1.resnets.1.conv1": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"decoder.up_blocks.1.resnets.1.conv2": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"decoder.up_blocks.1.resnets.2.conv1": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.Conv2d_1",
|
||||||
|
"decoder.up_blocks.1.resnets.2.conv2": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.Conv2d_2",
|
||||||
|
"decoder.up_blocks.1.upsamplers.0.conv": "Decoder.Chain_1.Chain_3.Upsample.Conv2d",
|
||||||
|
"encoder.down_blocks.2.resnets.0.conv_shortcut": "Encoder.Chain_1.Chain_3.Resnet_1.Conv2d",
|
||||||
|
"encoder.mid_block.attentions.0.to_q": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_1",
|
||||||
|
"encoder.mid_block.attentions.0.to_k": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_2",
|
||||||
|
"encoder.mid_block.attentions.0.to_v": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_3",
|
||||||
|
"encoder.mid_block.attentions.0.to_out.0": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Linear",
|
||||||
|
"decoder.mid_block.attentions.0.to_q": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_1",
|
||||||
|
"decoder.mid_block.attentions.0.to_k": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_2",
|
||||||
|
"decoder.mid_block.attentions.0.to_v": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_3",
|
||||||
|
"decoder.mid_block.attentions.0.to_out.0": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Linear",
|
||||||
|
"encoder.conv_out": "Encoder.Chain_2.Conv2d",
|
||||||
|
"quant_conv": "Encoder.Chain_3.Conv2d",
|
||||||
|
"post_quant_conv": "Decoder.Conv2d_1",
|
||||||
|
"decoder.conv_in": "Decoder.Conv2d_2",
|
||||||
|
"decoder.up_blocks.2.resnets.0.conv1": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"decoder.up_blocks.2.resnets.0.conv_shortcut": "Decoder.Chain_1.Chain_4.Resnet_1.Conv2d",
|
||||||
|
"decoder.up_blocks.3.resnets.0.conv1": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"decoder.up_blocks.3.resnets.0.conv_shortcut": "Decoder.Chain_1.Chain_5.Resnet_1.Conv2d",
|
||||||
|
"decoder.conv_out": "Decoder.Chain_2.Conv2d",
|
||||||
|
},
|
||||||
|
key_aliases={
|
||||||
|
"encoder.mid_block.attentions.0.value": "encoder.mid_block.attentions.0.to_v",
|
||||||
|
"decoder.mid_block.attentions.0.value": "decoder.mid_block.attentions.0.to_v",
|
||||||
|
"decoder.mid_block.attentions.0.proj_attn": "decoder.mid_block.attentions.0.to_out.0",
|
||||||
|
"encoder.mid_block.attentions.0.proj_attn": "encoder.mid_block.attentions.0.to_out.0",
|
||||||
|
"encoder.mid_block.attentions.0.key": "encoder.mid_block.attentions.0.to_k",
|
||||||
|
"decoder.mid_block.attentions.0.key": "decoder.mid_block.attentions.0.to_k",
|
||||||
|
"decoder.mid_block.attentions.0.query": "decoder.mid_block.attentions.0.to_q",
|
||||||
|
"encoder.mid_block.attentions.0.query": "encoder.mid_block.attentions.0.to_q",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
civitai_recipe = WeightRecipe(
|
||||||
|
key_prefix="first_stage_model.",
|
||||||
|
key_map={
|
||||||
|
"encoder.conv_in": "Encoder.Conv2d",
|
||||||
|
"encoder.down.0.block.0.norm1": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"encoder.down.0.block.0.conv1": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"encoder.down.0.block.0.norm2": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"encoder.down.0.block.0.conv2": "Encoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"encoder.down.0.block.1.norm1": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"encoder.down.0.block.1.conv1": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"encoder.down.0.block.1.norm2": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"encoder.down.0.block.1.conv2": "Encoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"encoder.down.0.downsample.conv": "Encoder.Chain_1.Chain_1.Downsample.Conv2d",
|
||||||
|
"encoder.down.1.block.0.norm1": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"encoder.down.1.block.0.conv1": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"encoder.down.1.block.0.norm2": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"encoder.down.1.block.0.conv2": "Encoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"encoder.down.1.block.0.nin_shortcut": "Encoder.Chain_1.Chain_2.Resnet_1.Conv2d",
|
||||||
|
"encoder.down.1.block.1.norm1": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"encoder.down.1.block.1.conv1": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"encoder.down.1.block.1.norm2": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"encoder.down.1.block.1.conv2": "Encoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"encoder.down.1.downsample.conv": "Encoder.Chain_1.Chain_2.Downsample.Conv2d",
|
||||||
|
"encoder.down.2.block.0.norm1": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"encoder.down.2.block.0.conv1": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"encoder.down.2.block.0.norm2": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"encoder.down.2.block.0.conv2": "Encoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"encoder.down.2.block.0.nin_shortcut": "Encoder.Chain_1.Chain_3.Resnet_1.Conv2d",
|
||||||
|
"encoder.down.2.block.1.norm1": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"encoder.down.2.block.1.conv1": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"encoder.down.2.block.1.norm2": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"encoder.down.2.block.1.conv2": "Encoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"encoder.down.2.downsample.conv": "Encoder.Chain_1.Chain_3.Downsample.Conv2d",
|
||||||
|
"encoder.down.3.block.0.norm1": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"encoder.down.3.block.0.conv1": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"encoder.down.3.block.0.norm2": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"encoder.down.3.block.0.conv2": "Encoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"encoder.down.3.block.1.norm1": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"encoder.down.3.block.1.conv1": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"encoder.down.3.block.1.norm2": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"encoder.down.3.block.1.conv2": "Encoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"encoder.mid.block_1.norm1": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"encoder.mid.block_1.conv1": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"encoder.mid.block_1.norm2": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"encoder.mid.block_1.conv2": "Encoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"encoder.mid.attn_1.norm": "Encoder.Chain_1.Chain_5.Residual.GroupNorm",
|
||||||
|
"encoder.mid.attn_1.q": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_1",
|
||||||
|
"encoder.mid.attn_1.k": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_2",
|
||||||
|
"encoder.mid.attn_1.v": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_3",
|
||||||
|
"encoder.mid.attn_1.proj_out": "Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Linear",
|
||||||
|
"encoder.mid.block_2.norm1": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"encoder.mid.block_2.conv1": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"encoder.mid.block_2.norm2": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"encoder.mid.block_2.conv2": "Encoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"encoder.norm_out": "Encoder.Chain_2.GroupNorm",
|
||||||
|
"encoder.conv_out": "Encoder.Chain_2.Conv2d",
|
||||||
|
"quant_conv": "Encoder.Chain_3.Conv2d",
|
||||||
|
"post_quant_conv": "Decoder.Conv2d_1",
|
||||||
|
"decoder.conv_in": "Decoder.Conv2d_2",
|
||||||
|
"decoder.mid.block_1.norm1": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"decoder.mid.block_1.conv1": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"decoder.mid.block_1.norm2": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"decoder.mid.block_1.conv2": "Decoder.Chain_1.Chain_1.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"decoder.mid.attn_1.norm": "Decoder.Chain_1.Chain_1.Residual.GroupNorm",
|
||||||
|
"decoder.mid.attn_1.q": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_1",
|
||||||
|
"decoder.mid.attn_1.k": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_2",
|
||||||
|
"decoder.mid.attn_1.v": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_3",
|
||||||
|
"decoder.mid.attn_1.proj_out": "Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Linear",
|
||||||
|
"decoder.mid.block_2.norm1": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"decoder.mid.block_2.conv1": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"decoder.mid.block_2.norm2": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"decoder.mid.block_2.conv2": "Decoder.Chain_1.Chain_1.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"decoder.up.3.block.0.norm1": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"decoder.up.3.block.0.conv1": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"decoder.up.3.block.0.norm2": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"decoder.up.3.block.0.conv2": "Decoder.Chain_1.Chain_2.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"decoder.up.3.block.1.norm1": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"decoder.up.3.block.1.conv1": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"decoder.up.3.block.1.norm2": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"decoder.up.3.block.1.conv2": "Decoder.Chain_1.Chain_2.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"decoder.up.3.block.2.norm1": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.GroupNorm_1",
|
||||||
|
"decoder.up.3.block.2.conv1": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.Conv2d_1",
|
||||||
|
"decoder.up.3.block.2.norm2": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.GroupNorm_2",
|
||||||
|
"decoder.up.3.block.2.conv2": "Decoder.Chain_1.Chain_2.Resnet_3.Chain.Conv2d_2",
|
||||||
|
"decoder.up.3.upsample.conv": "Decoder.Chain_1.Chain_2.Upsample.Conv2d",
|
||||||
|
"decoder.up.2.block.0.norm1": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"decoder.up.2.block.0.conv1": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"decoder.up.2.block.0.norm2": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"decoder.up.2.block.0.conv2": "Decoder.Chain_1.Chain_3.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"decoder.up.2.block.1.norm1": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"decoder.up.2.block.1.conv1": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"decoder.up.2.block.1.norm2": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"decoder.up.2.block.1.conv2": "Decoder.Chain_1.Chain_3.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"decoder.up.2.block.2.norm1": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.GroupNorm_1",
|
||||||
|
"decoder.up.2.block.2.conv1": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.Conv2d_1",
|
||||||
|
"decoder.up.2.block.2.norm2": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.GroupNorm_2",
|
||||||
|
"decoder.up.2.block.2.conv2": "Decoder.Chain_1.Chain_3.Resnet_3.Chain.Conv2d_2",
|
||||||
|
"decoder.up.2.upsample.conv": "Decoder.Chain_1.Chain_3.Upsample.Conv2d",
|
||||||
|
"decoder.up.1.block.0.norm1": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"decoder.up.1.block.0.conv1": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"decoder.up.1.block.0.norm2": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"decoder.up.1.block.0.conv2": "Decoder.Chain_1.Chain_4.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"decoder.up.1.block.0.nin_shortcut": "Decoder.Chain_1.Chain_4.Resnet_1.Conv2d",
|
||||||
|
"decoder.up.1.block.1.norm1": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"decoder.up.1.block.1.conv1": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"decoder.up.1.block.1.norm2": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"decoder.up.1.block.1.conv2": "Decoder.Chain_1.Chain_4.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"decoder.up.1.block.2.norm1": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.GroupNorm_1",
|
||||||
|
"decoder.up.1.block.2.conv1": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.Conv2d_1",
|
||||||
|
"decoder.up.1.block.2.norm2": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.GroupNorm_2",
|
||||||
|
"decoder.up.1.block.2.conv2": "Decoder.Chain_1.Chain_4.Resnet_3.Chain.Conv2d_2",
|
||||||
|
"decoder.up.1.upsample.conv": "Decoder.Chain_1.Chain_4.Upsample.Conv2d",
|
||||||
|
"decoder.up.0.block.0.norm1": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_1",
|
||||||
|
"decoder.up.0.block.0.conv1": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_1",
|
||||||
|
"decoder.up.0.block.0.norm2": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.GroupNorm_2",
|
||||||
|
"decoder.up.0.block.0.conv2": "Decoder.Chain_1.Chain_5.Resnet_1.Chain.Conv2d_2",
|
||||||
|
"decoder.up.0.block.0.nin_shortcut": "Decoder.Chain_1.Chain_5.Resnet_1.Conv2d",
|
||||||
|
"decoder.up.0.block.1.norm1": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_1",
|
||||||
|
"decoder.up.0.block.1.conv1": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_1",
|
||||||
|
"decoder.up.0.block.1.norm2": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.GroupNorm_2",
|
||||||
|
"decoder.up.0.block.1.conv2": "Decoder.Chain_1.Chain_5.Resnet_2.Chain.Conv2d_2",
|
||||||
|
"decoder.up.0.block.2.norm1": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.GroupNorm_1",
|
||||||
|
"decoder.up.0.block.2.conv1": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.Conv2d_1",
|
||||||
|
"decoder.up.0.block.2.norm2": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.GroupNorm_2",
|
||||||
|
"decoder.up.0.block.2.conv2": "Decoder.Chain_1.Chain_5.Resnet_3.Chain.Conv2d_2",
|
||||||
|
"decoder.norm_out": "Decoder.Chain_2.GroupNorm",
|
||||||
|
"decoder.conv_out": "Decoder.Chain_2.Conv2d",
|
||||||
|
},
|
||||||
|
tensor_reshapes={
|
||||||
|
"Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_1.weight": (512, 512),
|
||||||
|
"Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_2.weight": (512, 512),
|
||||||
|
"Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Distribute.Linear_3.weight": (512, 512),
|
||||||
|
"Encoder.Chain_1.Chain_5.Residual.SelfAttention2d.Linear.weight": (512, 512),
|
||||||
|
"Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_1.weight": (512, 512),
|
||||||
|
"Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_2.weight": (512, 512),
|
||||||
|
"Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Distribute.Linear_3.weight": (512, 512),
|
||||||
|
"Decoder.Chain_1.Chain_1.Residual.SelfAttention2d.Linear.weight": (512, 512),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
runwayml = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||||
|
filename="vae/diffusion_pytorch_model.safetensors",
|
||||||
|
revision="f03de327dd89b501a01da37fc5240cf4fdba85a1",
|
||||||
|
expected_sha256="a2b5134f4dbc140d9c11f11cba3233099e00af40f262f136c691fb7d38d2194c",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.autoencoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="3f499ef1c668a8dfc72762d885f53cf0c3d3e98a393211906a8de5ae04e72058",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float32,
|
||||||
|
)
|
||||||
|
stability_mse = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="stabilityai/sd-vae-ft-mse",
|
||||||
|
filename="diffusion_pytorch_model.safetensors",
|
||||||
|
revision="31f26fdeee1355a5c34592e401dd41e45d25a493",
|
||||||
|
expected_sha256="a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.autoencoder_mse",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="4b484e70be3b898e2647985f066495672162a4e9ea1d1ee8bf1f7a7895180fce",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
juggernaut_reborn = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="civitai/KandooAi/juggernaut",
|
||||||
|
filename="reborn/onefile_fp16.safetensors",
|
||||||
|
download_url="https://civitai.com/api/download/models/274039?type=Model&format=SafeTensor&size=pruned&fp=fp16",
|
||||||
|
expected_sha256="338b85bc4f7628bc42cce336242e79154a57c2a4360531436e97f7793568f18c",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.juggernaut.reborn.autoencoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="6f1ed875201344031f2a9ddee3ff40f455eb2a5ee4833070061a5d163cb23595",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
juggernaut_aftermath = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="civitai/KandooAi/juggernaut",
|
||||||
|
filename="aftermath/onefile_fp32.safetensors",
|
||||||
|
download_url="https://civitai.com/api/download/models/127207?type=Model&format=SafeTensor&size=full&fp=fp32",
|
||||||
|
expected_sha256="7ed60a2f5842de8e6755965ffbf2a0ee273429939f307233c22f7dffd8d7ae3d",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.juggernaut.aftermath.autoencoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="fa7ef415e1854907aa46d81ed403bd1f6b0fd2f06c885545ab689f9f78fa7989",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float32,
|
||||||
|
)
|
||||||
|
# autoencoder wise, juggernaut_aftermath = juggernaut_aftermath_inpainting
|
||||||
|
realistic_stock_photo_v3 = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="Yntec/realisticStockPhoto3",
|
||||||
|
filename="realisticStockPhoto_v30SD15.safetensors",
|
||||||
|
expected_sha256="f85affae9aae16276eaf670f810946e2d03c4d300791a0380f07653cb78ba31b",
|
||||||
|
# download_url="https://civitai.com/api/download/models/524032?type=Model&format=SafeTensor&size=full&fp=fp16",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.realistic_stock_photo.v3_0.autoencoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="4b484e70be3b898e2647985f066495672162a4e9ea1d1ee8bf1f7a7895180fce",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
realistic_vision_v5 = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="civitai/SG_161222/realistic_vision",
|
||||||
|
filename="v5/onefile_fp32.safetensors",
|
||||||
|
download_url="https://civitai.com/api/download/models/130072?type=Model&format=SafeTensor&size=full&fp=fp16",
|
||||||
|
expected_sha256="ef76aa2332635f4352463343beec9c5aae3bd107a73c0fd7221abbbcf8bd5470",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.realistic_vision.v5_1.autoencoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="4b484e70be3b898e2647985f066495672162a4e9ea1d1ee8bf1f7a7895180fce",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
50
src/refiners/conversion/models/autoencoder_sdxl.py
Normal file
50
src/refiners/conversion/models/autoencoder_sdxl.py
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.models.autoencoder_sd15 import civitai_recipe, diffusers_recipe
|
||||||
|
from refiners.conversion.utils import Conversion, Hub
|
||||||
|
|
||||||
|
stability = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="stabilityai/stable-diffusion-xl-base-1.0",
|
||||||
|
filename="vae/diffusion_pytorch_model.safetensors",
|
||||||
|
revision="91704abbae38a0e1f60d433fb08d7f7d99081d21",
|
||||||
|
expected_sha256="1598f3d24932bcfe6634e8b618ea1e30ab1d57f5aad13a6d2de446d2199f2341",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sdxl.autoencoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="6534be9990496fcb4086e5cf71e0ceb208b9f5c728823247c6a51e13564c38af",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
madebyollin_fp16fix = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="madebyollin/sdxl-vae-fp16-fix",
|
||||||
|
filename="diffusion_pytorch_model.safetensors",
|
||||||
|
revision="6d1073461cd0b5a6ea4fda10b812e3d9d58a8330",
|
||||||
|
expected_sha256="1b909373b28f2137098b0fd9dbc6f97f8410854f31f84ddc9fa04b077b0ace2c",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sdxl.autoencoder_fp16fix",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="ede1e84626900ebeb0e7911814b1ac98e8916327340f411cce2b77e056e84dd3",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
juggernautXL_v10 = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="civitai/KandooAi/juggernautXL",
|
||||||
|
filename="v10/onefile_fp16.safetensors",
|
||||||
|
expected_sha256="d91d35736d8f2be038f760a9b0009a771ecf0a417e9b38c244a84ea4cb9c0c45",
|
||||||
|
download_url="https://civitai.com/api/download/models/456194?type=Model&format=SafeTensor&size=full&fp=fp16",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sdxl.juggernaut.v10.autoencoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="ede1e84626900ebeb0e7911814b1ac98e8916327340f411cce2b77e056e84dd3",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
290
src/refiners/conversion/models/clip_image_sd21.py
Normal file
290
src/refiners/conversion/models/clip_image_sd21.py
Normal file
|
@ -0,0 +1,290 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
|
||||||
|
|
||||||
|
diffusers_recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"vision_model.embeddings.patch_embedding": "ViTEmbeddings.Concatenate.Chain.PatchEncoder.Conv2d",
|
||||||
|
"vision_model.embeddings.position_embedding": "ViTEmbeddings.Residual.PositionalEncoder.Embedding",
|
||||||
|
"vision_model.embeddings.class_embedding": "ViTEmbeddings.Concatenate.ClassToken.Parameter.weight",
|
||||||
|
"vision_model.pre_layrnorm": "LayerNorm_1",
|
||||||
|
"vision_model.encoder.layers.0.layer_norm1": "Chain.TransformerLayer_1.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.0.layer_norm2": "Chain.TransformerLayer_1.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.1.layer_norm1": "Chain.TransformerLayer_2.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.1.layer_norm2": "Chain.TransformerLayer_2.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.2.layer_norm1": "Chain.TransformerLayer_3.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.2.layer_norm2": "Chain.TransformerLayer_3.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.3.layer_norm1": "Chain.TransformerLayer_4.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.3.layer_norm2": "Chain.TransformerLayer_4.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.4.layer_norm1": "Chain.TransformerLayer_5.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.4.layer_norm2": "Chain.TransformerLayer_5.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.5.layer_norm1": "Chain.TransformerLayer_6.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.5.layer_norm2": "Chain.TransformerLayer_6.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.6.layer_norm1": "Chain.TransformerLayer_7.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.6.layer_norm2": "Chain.TransformerLayer_7.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.7.layer_norm1": "Chain.TransformerLayer_8.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.7.layer_norm2": "Chain.TransformerLayer_8.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.8.layer_norm1": "Chain.TransformerLayer_9.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.8.layer_norm2": "Chain.TransformerLayer_9.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.9.layer_norm1": "Chain.TransformerLayer_10.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.9.layer_norm2": "Chain.TransformerLayer_10.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.10.layer_norm1": "Chain.TransformerLayer_11.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.10.layer_norm2": "Chain.TransformerLayer_11.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.11.layer_norm1": "Chain.TransformerLayer_12.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.11.layer_norm2": "Chain.TransformerLayer_12.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.12.layer_norm1": "Chain.TransformerLayer_13.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.12.layer_norm2": "Chain.TransformerLayer_13.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.13.layer_norm1": "Chain.TransformerLayer_14.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.13.layer_norm2": "Chain.TransformerLayer_14.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.14.layer_norm1": "Chain.TransformerLayer_15.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.14.layer_norm2": "Chain.TransformerLayer_15.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.15.layer_norm1": "Chain.TransformerLayer_16.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.15.layer_norm2": "Chain.TransformerLayer_16.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.16.layer_norm1": "Chain.TransformerLayer_17.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.16.layer_norm2": "Chain.TransformerLayer_17.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.17.layer_norm1": "Chain.TransformerLayer_18.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.17.layer_norm2": "Chain.TransformerLayer_18.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.18.layer_norm1": "Chain.TransformerLayer_19.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.18.layer_norm2": "Chain.TransformerLayer_19.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.19.layer_norm1": "Chain.TransformerLayer_20.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.19.layer_norm2": "Chain.TransformerLayer_20.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.20.layer_norm1": "Chain.TransformerLayer_21.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.20.layer_norm2": "Chain.TransformerLayer_21.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.21.layer_norm1": "Chain.TransformerLayer_22.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.21.layer_norm2": "Chain.TransformerLayer_22.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.22.layer_norm1": "Chain.TransformerLayer_23.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.22.layer_norm2": "Chain.TransformerLayer_23.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.23.layer_norm1": "Chain.TransformerLayer_24.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.23.layer_norm2": "Chain.TransformerLayer_24.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.24.layer_norm1": "Chain.TransformerLayer_25.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.24.layer_norm2": "Chain.TransformerLayer_25.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.25.layer_norm1": "Chain.TransformerLayer_26.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.25.layer_norm2": "Chain.TransformerLayer_26.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.26.layer_norm1": "Chain.TransformerLayer_27.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.26.layer_norm2": "Chain.TransformerLayer_27.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.27.layer_norm1": "Chain.TransformerLayer_28.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.27.layer_norm2": "Chain.TransformerLayer_28.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.28.layer_norm1": "Chain.TransformerLayer_29.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.28.layer_norm2": "Chain.TransformerLayer_29.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.29.layer_norm1": "Chain.TransformerLayer_30.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.29.layer_norm2": "Chain.TransformerLayer_30.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.30.layer_norm1": "Chain.TransformerLayer_31.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.30.layer_norm2": "Chain.TransformerLayer_31.Residual_2.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.31.layer_norm1": "Chain.TransformerLayer_32.Residual_1.LayerNorm",
|
||||||
|
"vision_model.encoder.layers.31.layer_norm2": "Chain.TransformerLayer_32.Residual_2.LayerNorm",
|
||||||
|
"vision_model.post_layernorm": "LayerNorm_2",
|
||||||
|
"vision_model.encoder.layers.0.self_attn.q_proj": "Chain.TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.0.self_attn.k_proj": "Chain.TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.0.self_attn.v_proj": "Chain.TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.0.self_attn.out_proj": "Chain.TransformerLayer_1.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.1.self_attn.q_proj": "Chain.TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.1.self_attn.k_proj": "Chain.TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.1.self_attn.v_proj": "Chain.TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.1.self_attn.out_proj": "Chain.TransformerLayer_2.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.2.self_attn.q_proj": "Chain.TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.2.self_attn.k_proj": "Chain.TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.2.self_attn.v_proj": "Chain.TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.2.self_attn.out_proj": "Chain.TransformerLayer_3.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.3.self_attn.q_proj": "Chain.TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.3.self_attn.k_proj": "Chain.TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.3.self_attn.v_proj": "Chain.TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.3.self_attn.out_proj": "Chain.TransformerLayer_4.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.4.self_attn.q_proj": "Chain.TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.4.self_attn.k_proj": "Chain.TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.4.self_attn.v_proj": "Chain.TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.4.self_attn.out_proj": "Chain.TransformerLayer_5.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.5.self_attn.q_proj": "Chain.TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.5.self_attn.k_proj": "Chain.TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.5.self_attn.v_proj": "Chain.TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.5.self_attn.out_proj": "Chain.TransformerLayer_6.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.6.self_attn.q_proj": "Chain.TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.6.self_attn.k_proj": "Chain.TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.6.self_attn.v_proj": "Chain.TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.6.self_attn.out_proj": "Chain.TransformerLayer_7.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.7.self_attn.q_proj": "Chain.TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.7.self_attn.k_proj": "Chain.TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.7.self_attn.v_proj": "Chain.TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.7.self_attn.out_proj": "Chain.TransformerLayer_8.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.8.self_attn.q_proj": "Chain.TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.8.self_attn.k_proj": "Chain.TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.8.self_attn.v_proj": "Chain.TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.8.self_attn.out_proj": "Chain.TransformerLayer_9.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.9.self_attn.q_proj": "Chain.TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.9.self_attn.k_proj": "Chain.TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.9.self_attn.v_proj": "Chain.TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.9.self_attn.out_proj": "Chain.TransformerLayer_10.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.10.self_attn.q_proj": "Chain.TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.10.self_attn.k_proj": "Chain.TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.10.self_attn.v_proj": "Chain.TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.10.self_attn.out_proj": "Chain.TransformerLayer_11.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.11.self_attn.q_proj": "Chain.TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.11.self_attn.k_proj": "Chain.TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.11.self_attn.v_proj": "Chain.TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.11.self_attn.out_proj": "Chain.TransformerLayer_12.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.12.self_attn.q_proj": "Chain.TransformerLayer_13.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.12.self_attn.k_proj": "Chain.TransformerLayer_13.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.12.self_attn.v_proj": "Chain.TransformerLayer_13.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.12.self_attn.out_proj": "Chain.TransformerLayer_13.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.13.self_attn.q_proj": "Chain.TransformerLayer_14.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.13.self_attn.k_proj": "Chain.TransformerLayer_14.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.13.self_attn.v_proj": "Chain.TransformerLayer_14.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.13.self_attn.out_proj": "Chain.TransformerLayer_14.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.14.self_attn.q_proj": "Chain.TransformerLayer_15.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.14.self_attn.k_proj": "Chain.TransformerLayer_15.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.14.self_attn.v_proj": "Chain.TransformerLayer_15.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.14.self_attn.out_proj": "Chain.TransformerLayer_15.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.15.self_attn.q_proj": "Chain.TransformerLayer_16.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.15.self_attn.k_proj": "Chain.TransformerLayer_16.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.15.self_attn.v_proj": "Chain.TransformerLayer_16.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.15.self_attn.out_proj": "Chain.TransformerLayer_16.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.16.self_attn.q_proj": "Chain.TransformerLayer_17.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.16.self_attn.k_proj": "Chain.TransformerLayer_17.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.16.self_attn.v_proj": "Chain.TransformerLayer_17.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.16.self_attn.out_proj": "Chain.TransformerLayer_17.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.17.self_attn.q_proj": "Chain.TransformerLayer_18.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.17.self_attn.k_proj": "Chain.TransformerLayer_18.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.17.self_attn.v_proj": "Chain.TransformerLayer_18.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.17.self_attn.out_proj": "Chain.TransformerLayer_18.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.18.self_attn.q_proj": "Chain.TransformerLayer_19.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.18.self_attn.k_proj": "Chain.TransformerLayer_19.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.18.self_attn.v_proj": "Chain.TransformerLayer_19.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.18.self_attn.out_proj": "Chain.TransformerLayer_19.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.19.self_attn.q_proj": "Chain.TransformerLayer_20.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.19.self_attn.k_proj": "Chain.TransformerLayer_20.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.19.self_attn.v_proj": "Chain.TransformerLayer_20.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.19.self_attn.out_proj": "Chain.TransformerLayer_20.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.20.self_attn.q_proj": "Chain.TransformerLayer_21.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.20.self_attn.k_proj": "Chain.TransformerLayer_21.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.20.self_attn.v_proj": "Chain.TransformerLayer_21.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.20.self_attn.out_proj": "Chain.TransformerLayer_21.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.21.self_attn.q_proj": "Chain.TransformerLayer_22.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.21.self_attn.k_proj": "Chain.TransformerLayer_22.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.21.self_attn.v_proj": "Chain.TransformerLayer_22.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.21.self_attn.out_proj": "Chain.TransformerLayer_22.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.22.self_attn.q_proj": "Chain.TransformerLayer_23.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.22.self_attn.k_proj": "Chain.TransformerLayer_23.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.22.self_attn.v_proj": "Chain.TransformerLayer_23.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.22.self_attn.out_proj": "Chain.TransformerLayer_23.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.23.self_attn.q_proj": "Chain.TransformerLayer_24.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.23.self_attn.k_proj": "Chain.TransformerLayer_24.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.23.self_attn.v_proj": "Chain.TransformerLayer_24.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.23.self_attn.out_proj": "Chain.TransformerLayer_24.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.24.self_attn.q_proj": "Chain.TransformerLayer_25.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.24.self_attn.k_proj": "Chain.TransformerLayer_25.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.24.self_attn.v_proj": "Chain.TransformerLayer_25.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.24.self_attn.out_proj": "Chain.TransformerLayer_25.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.25.self_attn.q_proj": "Chain.TransformerLayer_26.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.25.self_attn.k_proj": "Chain.TransformerLayer_26.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.25.self_attn.v_proj": "Chain.TransformerLayer_26.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.25.self_attn.out_proj": "Chain.TransformerLayer_26.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.26.self_attn.q_proj": "Chain.TransformerLayer_27.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.26.self_attn.k_proj": "Chain.TransformerLayer_27.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.26.self_attn.v_proj": "Chain.TransformerLayer_27.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.26.self_attn.out_proj": "Chain.TransformerLayer_27.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.27.self_attn.q_proj": "Chain.TransformerLayer_28.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.27.self_attn.k_proj": "Chain.TransformerLayer_28.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.27.self_attn.v_proj": "Chain.TransformerLayer_28.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.27.self_attn.out_proj": "Chain.TransformerLayer_28.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.28.self_attn.q_proj": "Chain.TransformerLayer_29.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.28.self_attn.k_proj": "Chain.TransformerLayer_29.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.28.self_attn.v_proj": "Chain.TransformerLayer_29.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.28.self_attn.out_proj": "Chain.TransformerLayer_29.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.29.self_attn.q_proj": "Chain.TransformerLayer_30.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.29.self_attn.k_proj": "Chain.TransformerLayer_30.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.29.self_attn.v_proj": "Chain.TransformerLayer_30.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.29.self_attn.out_proj": "Chain.TransformerLayer_30.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.30.self_attn.q_proj": "Chain.TransformerLayer_31.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.30.self_attn.k_proj": "Chain.TransformerLayer_31.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.30.self_attn.v_proj": "Chain.TransformerLayer_31.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.30.self_attn.out_proj": "Chain.TransformerLayer_31.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.31.self_attn.q_proj": "Chain.TransformerLayer_32.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"vision_model.encoder.layers.31.self_attn.k_proj": "Chain.TransformerLayer_32.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"vision_model.encoder.layers.31.self_attn.v_proj": "Chain.TransformerLayer_32.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"vision_model.encoder.layers.31.self_attn.out_proj": "Chain.TransformerLayer_32.Residual_1.SelfAttention.Linear",
|
||||||
|
"vision_model.encoder.layers.0.mlp.fc1": "Chain.TransformerLayer_1.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.1.mlp.fc1": "Chain.TransformerLayer_2.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.2.mlp.fc1": "Chain.TransformerLayer_3.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.3.mlp.fc1": "Chain.TransformerLayer_4.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.4.mlp.fc1": "Chain.TransformerLayer_5.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.5.mlp.fc1": "Chain.TransformerLayer_6.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.6.mlp.fc1": "Chain.TransformerLayer_7.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.7.mlp.fc1": "Chain.TransformerLayer_8.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.8.mlp.fc1": "Chain.TransformerLayer_9.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.9.mlp.fc1": "Chain.TransformerLayer_10.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.10.mlp.fc1": "Chain.TransformerLayer_11.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.11.mlp.fc1": "Chain.TransformerLayer_12.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.12.mlp.fc1": "Chain.TransformerLayer_13.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.13.mlp.fc1": "Chain.TransformerLayer_14.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.14.mlp.fc1": "Chain.TransformerLayer_15.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.15.mlp.fc1": "Chain.TransformerLayer_16.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.16.mlp.fc1": "Chain.TransformerLayer_17.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.17.mlp.fc1": "Chain.TransformerLayer_18.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.18.mlp.fc1": "Chain.TransformerLayer_19.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.19.mlp.fc1": "Chain.TransformerLayer_20.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.20.mlp.fc1": "Chain.TransformerLayer_21.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.21.mlp.fc1": "Chain.TransformerLayer_22.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.22.mlp.fc1": "Chain.TransformerLayer_23.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.23.mlp.fc1": "Chain.TransformerLayer_24.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.24.mlp.fc1": "Chain.TransformerLayer_25.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.25.mlp.fc1": "Chain.TransformerLayer_26.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.26.mlp.fc1": "Chain.TransformerLayer_27.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.27.mlp.fc1": "Chain.TransformerLayer_28.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.28.mlp.fc1": "Chain.TransformerLayer_29.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.29.mlp.fc1": "Chain.TransformerLayer_30.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.30.mlp.fc1": "Chain.TransformerLayer_31.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.31.mlp.fc1": "Chain.TransformerLayer_32.Residual_2.FeedForward.Linear_1",
|
||||||
|
"vision_model.encoder.layers.0.mlp.fc2": "Chain.TransformerLayer_1.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.1.mlp.fc2": "Chain.TransformerLayer_2.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.2.mlp.fc2": "Chain.TransformerLayer_3.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.3.mlp.fc2": "Chain.TransformerLayer_4.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.4.mlp.fc2": "Chain.TransformerLayer_5.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.5.mlp.fc2": "Chain.TransformerLayer_6.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.6.mlp.fc2": "Chain.TransformerLayer_7.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.7.mlp.fc2": "Chain.TransformerLayer_8.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.8.mlp.fc2": "Chain.TransformerLayer_9.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.9.mlp.fc2": "Chain.TransformerLayer_10.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.10.mlp.fc2": "Chain.TransformerLayer_11.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.11.mlp.fc2": "Chain.TransformerLayer_12.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.12.mlp.fc2": "Chain.TransformerLayer_13.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.13.mlp.fc2": "Chain.TransformerLayer_14.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.14.mlp.fc2": "Chain.TransformerLayer_15.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.15.mlp.fc2": "Chain.TransformerLayer_16.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.16.mlp.fc2": "Chain.TransformerLayer_17.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.17.mlp.fc2": "Chain.TransformerLayer_18.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.18.mlp.fc2": "Chain.TransformerLayer_19.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.19.mlp.fc2": "Chain.TransformerLayer_20.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.20.mlp.fc2": "Chain.TransformerLayer_21.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.21.mlp.fc2": "Chain.TransformerLayer_22.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.22.mlp.fc2": "Chain.TransformerLayer_23.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.23.mlp.fc2": "Chain.TransformerLayer_24.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.24.mlp.fc2": "Chain.TransformerLayer_25.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.25.mlp.fc2": "Chain.TransformerLayer_26.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.26.mlp.fc2": "Chain.TransformerLayer_27.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.27.mlp.fc2": "Chain.TransformerLayer_28.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.28.mlp.fc2": "Chain.TransformerLayer_29.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.29.mlp.fc2": "Chain.TransformerLayer_30.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.30.mlp.fc2": "Chain.TransformerLayer_31.Residual_2.FeedForward.Linear_2",
|
||||||
|
"vision_model.encoder.layers.31.mlp.fc2": "Chain.TransformerLayer_32.Residual_2.FeedForward.Linear_2",
|
||||||
|
"visual_projection": "Linear",
|
||||||
|
},
|
||||||
|
tensor_reshapes={
|
||||||
|
"ViTEmbeddings.Concatenate.ClassToken.Parameter.weight": (1, 1280),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
unclip_21 = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="stabilityai/stable-diffusion-2-1-unclip",
|
||||||
|
filename="image_encoder/model.safetensors",
|
||||||
|
revision="e99f66a92bdcd1b0fb0d4b6a9b81b3b37d8bea44",
|
||||||
|
expected_sha256="9d277aeaed13ebc0ef33e56027b826a74433d45d755b3e0b3829440c1ea7b72e",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd21.unclip.image_encoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="c9f43e359e06f1a237324c4c11734d6acd7fbddbfd3b1ed4f1b525267bedb812",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
291
src/refiners/conversion/models/clip_text_sd15.py
Normal file
291
src/refiners/conversion/models/clip_text_sd15.py
Normal file
|
@ -0,0 +1,291 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
|
||||||
|
|
||||||
|
diffusers_recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"text_model.embeddings.token_embedding": "Sum.TokenEncoder",
|
||||||
|
"text_model.embeddings.position_embedding": "Sum.PositionalEncoder.Embedding",
|
||||||
|
"text_model.final_layer_norm": "LayerNorm",
|
||||||
|
"text_projection": "Linear",
|
||||||
|
"text_model.encoder.layers.0.layer_norm1": "TransformerLayer_1.Residual_1.LayerNorm",
|
||||||
|
"text_model.encoder.layers.0.layer_norm2": "TransformerLayer_1.Residual_2.LayerNorm",
|
||||||
|
"text_model.encoder.layers.0.mlp.fc1": "TransformerLayer_1.Residual_2.FeedForward.Linear_1",
|
||||||
|
"text_model.encoder.layers.0.mlp.fc2": "TransformerLayer_1.Residual_2.FeedForward.Linear_2",
|
||||||
|
"text_model.encoder.layers.0.self_attn.k_proj": "TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"text_model.encoder.layers.0.self_attn.out_proj": "TransformerLayer_1.Residual_1.SelfAttention.Linear",
|
||||||
|
"text_model.encoder.layers.0.self_attn.q_proj": "TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"text_model.encoder.layers.0.self_attn.v_proj": "TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"text_model.encoder.layers.1.layer_norm1": "TransformerLayer_2.Residual_1.LayerNorm",
|
||||||
|
"text_model.encoder.layers.1.layer_norm2": "TransformerLayer_2.Residual_2.LayerNorm",
|
||||||
|
"text_model.encoder.layers.1.mlp.fc1": "TransformerLayer_2.Residual_2.FeedForward.Linear_1",
|
||||||
|
"text_model.encoder.layers.1.mlp.fc2": "TransformerLayer_2.Residual_2.FeedForward.Linear_2",
|
||||||
|
"text_model.encoder.layers.1.self_attn.k_proj": "TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"text_model.encoder.layers.1.self_attn.out_proj": "TransformerLayer_2.Residual_1.SelfAttention.Linear",
|
||||||
|
"text_model.encoder.layers.1.self_attn.q_proj": "TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"text_model.encoder.layers.1.self_attn.v_proj": "TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"text_model.encoder.layers.10.layer_norm1": "TransformerLayer_11.Residual_1.LayerNorm",
|
||||||
|
"text_model.encoder.layers.10.layer_norm2": "TransformerLayer_11.Residual_2.LayerNorm",
|
||||||
|
"text_model.encoder.layers.10.mlp.fc1": "TransformerLayer_11.Residual_2.FeedForward.Linear_1",
|
||||||
|
"text_model.encoder.layers.10.mlp.fc2": "TransformerLayer_11.Residual_2.FeedForward.Linear_2",
|
||||||
|
"text_model.encoder.layers.10.self_attn.k_proj": "TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"text_model.encoder.layers.10.self_attn.out_proj": "TransformerLayer_11.Residual_1.SelfAttention.Linear",
|
||||||
|
"text_model.encoder.layers.10.self_attn.q_proj": "TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"text_model.encoder.layers.10.self_attn.v_proj": "TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"text_model.encoder.layers.11.layer_norm1": "TransformerLayer_12.Residual_1.LayerNorm",
|
||||||
|
"text_model.encoder.layers.11.layer_norm2": "TransformerLayer_12.Residual_2.LayerNorm",
|
||||||
|
"text_model.encoder.layers.11.mlp.fc1": "TransformerLayer_12.Residual_2.FeedForward.Linear_1",
|
||||||
|
"text_model.encoder.layers.11.mlp.fc2": "TransformerLayer_12.Residual_2.FeedForward.Linear_2",
|
||||||
|
"text_model.encoder.layers.11.self_attn.k_proj": "TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"text_model.encoder.layers.11.self_attn.out_proj": "TransformerLayer_12.Residual_1.SelfAttention.Linear",
|
||||||
|
"text_model.encoder.layers.11.self_attn.q_proj": "TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"text_model.encoder.layers.11.self_attn.v_proj": "TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"text_model.encoder.layers.2.layer_norm1": "TransformerLayer_3.Residual_1.LayerNorm",
|
||||||
|
"text_model.encoder.layers.2.layer_norm2": "TransformerLayer_3.Residual_2.LayerNorm",
|
||||||
|
"text_model.encoder.layers.2.mlp.fc1": "TransformerLayer_3.Residual_2.FeedForward.Linear_1",
|
||||||
|
"text_model.encoder.layers.2.mlp.fc2": "TransformerLayer_3.Residual_2.FeedForward.Linear_2",
|
||||||
|
"text_model.encoder.layers.2.self_attn.k_proj": "TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"text_model.encoder.layers.2.self_attn.out_proj": "TransformerLayer_3.Residual_1.SelfAttention.Linear",
|
||||||
|
"text_model.encoder.layers.2.self_attn.q_proj": "TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"text_model.encoder.layers.2.self_attn.v_proj": "TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"text_model.encoder.layers.3.layer_norm1": "TransformerLayer_4.Residual_1.LayerNorm",
|
||||||
|
"text_model.encoder.layers.3.layer_norm2": "TransformerLayer_4.Residual_2.LayerNorm",
|
||||||
|
"text_model.encoder.layers.3.mlp.fc1": "TransformerLayer_4.Residual_2.FeedForward.Linear_1",
|
||||||
|
"text_model.encoder.layers.3.mlp.fc2": "TransformerLayer_4.Residual_2.FeedForward.Linear_2",
|
||||||
|
"text_model.encoder.layers.3.self_attn.k_proj": "TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"text_model.encoder.layers.3.self_attn.out_proj": "TransformerLayer_4.Residual_1.SelfAttention.Linear",
|
||||||
|
"text_model.encoder.layers.3.self_attn.q_proj": "TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"text_model.encoder.layers.3.self_attn.v_proj": "TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"text_model.encoder.layers.4.layer_norm1": "TransformerLayer_5.Residual_1.LayerNorm",
|
||||||
|
"text_model.encoder.layers.4.layer_norm2": "TransformerLayer_5.Residual_2.LayerNorm",
|
||||||
|
"text_model.encoder.layers.4.mlp.fc1": "TransformerLayer_5.Residual_2.FeedForward.Linear_1",
|
||||||
|
"text_model.encoder.layers.4.mlp.fc2": "TransformerLayer_5.Residual_2.FeedForward.Linear_2",
|
||||||
|
"text_model.encoder.layers.4.self_attn.k_proj": "TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"text_model.encoder.layers.4.self_attn.out_proj": "TransformerLayer_5.Residual_1.SelfAttention.Linear",
|
||||||
|
"text_model.encoder.layers.4.self_attn.q_proj": "TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"text_model.encoder.layers.4.self_attn.v_proj": "TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"text_model.encoder.layers.5.layer_norm1": "TransformerLayer_6.Residual_1.LayerNorm",
|
||||||
|
"text_model.encoder.layers.5.layer_norm2": "TransformerLayer_6.Residual_2.LayerNorm",
|
||||||
|
"text_model.encoder.layers.5.mlp.fc1": "TransformerLayer_6.Residual_2.FeedForward.Linear_1",
|
||||||
|
"text_model.encoder.layers.5.mlp.fc2": "TransformerLayer_6.Residual_2.FeedForward.Linear_2",
|
||||||
|
"text_model.encoder.layers.5.self_attn.k_proj": "TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"text_model.encoder.layers.5.self_attn.out_proj": "TransformerLayer_6.Residual_1.SelfAttention.Linear",
|
||||||
|
"text_model.encoder.layers.5.self_attn.q_proj": "TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"text_model.encoder.layers.5.self_attn.v_proj": "TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"text_model.encoder.layers.6.layer_norm1": "TransformerLayer_7.Residual_1.LayerNorm",
|
||||||
|
"text_model.encoder.layers.6.layer_norm2": "TransformerLayer_7.Residual_2.LayerNorm",
|
||||||
|
"text_model.encoder.layers.6.mlp.fc1": "TransformerLayer_7.Residual_2.FeedForward.Linear_1",
|
||||||
|
"text_model.encoder.layers.6.mlp.fc2": "TransformerLayer_7.Residual_2.FeedForward.Linear_2",
|
||||||
|
"text_model.encoder.layers.6.self_attn.k_proj": "TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"text_model.encoder.layers.6.self_attn.out_proj": "TransformerLayer_7.Residual_1.SelfAttention.Linear",
|
||||||
|
"text_model.encoder.layers.6.self_attn.q_proj": "TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"text_model.encoder.layers.6.self_attn.v_proj": "TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"text_model.encoder.layers.7.layer_norm1": "TransformerLayer_8.Residual_1.LayerNorm",
|
||||||
|
"text_model.encoder.layers.7.layer_norm2": "TransformerLayer_8.Residual_2.LayerNorm",
|
||||||
|
"text_model.encoder.layers.7.mlp.fc1": "TransformerLayer_8.Residual_2.FeedForward.Linear_1",
|
||||||
|
"text_model.encoder.layers.7.mlp.fc2": "TransformerLayer_8.Residual_2.FeedForward.Linear_2",
|
||||||
|
"text_model.encoder.layers.7.self_attn.k_proj": "TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"text_model.encoder.layers.7.self_attn.out_proj": "TransformerLayer_8.Residual_1.SelfAttention.Linear",
|
||||||
|
"text_model.encoder.layers.7.self_attn.q_proj": "TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"text_model.encoder.layers.7.self_attn.v_proj": "TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"text_model.encoder.layers.8.layer_norm1": "TransformerLayer_9.Residual_1.LayerNorm",
|
||||||
|
"text_model.encoder.layers.8.layer_norm2": "TransformerLayer_9.Residual_2.LayerNorm",
|
||||||
|
"text_model.encoder.layers.8.mlp.fc1": "TransformerLayer_9.Residual_2.FeedForward.Linear_1",
|
||||||
|
"text_model.encoder.layers.8.mlp.fc2": "TransformerLayer_9.Residual_2.FeedForward.Linear_2",
|
||||||
|
"text_model.encoder.layers.8.self_attn.k_proj": "TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"text_model.encoder.layers.8.self_attn.out_proj": "TransformerLayer_9.Residual_1.SelfAttention.Linear",
|
||||||
|
"text_model.encoder.layers.8.self_attn.q_proj": "TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"text_model.encoder.layers.8.self_attn.v_proj": "TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"text_model.encoder.layers.9.layer_norm1": "TransformerLayer_10.Residual_1.LayerNorm",
|
||||||
|
"text_model.encoder.layers.9.layer_norm2": "TransformerLayer_10.Residual_2.LayerNorm",
|
||||||
|
"text_model.encoder.layers.9.mlp.fc1": "TransformerLayer_10.Residual_2.FeedForward.Linear_1",
|
||||||
|
"text_model.encoder.layers.9.mlp.fc2": "TransformerLayer_10.Residual_2.FeedForward.Linear_2",
|
||||||
|
"text_model.encoder.layers.9.self_attn.k_proj": "TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"text_model.encoder.layers.9.self_attn.out_proj": "TransformerLayer_10.Residual_1.SelfAttention.Linear",
|
||||||
|
"text_model.encoder.layers.9.self_attn.q_proj": "TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"text_model.encoder.layers.9.self_attn.v_proj": "TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
civitai_recipe = WeightRecipe(
|
||||||
|
key_prefix="cond_stage_model.transformer.text_model.",
|
||||||
|
key_map={
|
||||||
|
"embeddings.token_embedding": "Sum.TokenEncoder",
|
||||||
|
"embeddings.position_embedding": "Sum.PositionalEncoder.Embedding",
|
||||||
|
"encoder.layers.0.layer_norm1": "TransformerLayer_1.Residual_1.LayerNorm",
|
||||||
|
"encoder.layers.0.self_attn.q_proj": "TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"encoder.layers.0.self_attn.k_proj": "TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"encoder.layers.0.self_attn.v_proj": "TransformerLayer_1.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"encoder.layers.0.self_attn.out_proj": "TransformerLayer_1.Residual_1.SelfAttention.Linear",
|
||||||
|
"encoder.layers.0.layer_norm2": "TransformerLayer_1.Residual_2.LayerNorm",
|
||||||
|
"encoder.layers.0.mlp.fc1": "TransformerLayer_1.Residual_2.FeedForward.Linear_1",
|
||||||
|
"encoder.layers.0.mlp.fc2": "TransformerLayer_1.Residual_2.FeedForward.Linear_2",
|
||||||
|
"encoder.layers.1.layer_norm1": "TransformerLayer_2.Residual_1.LayerNorm",
|
||||||
|
"encoder.layers.1.self_attn.q_proj": "TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"encoder.layers.1.self_attn.k_proj": "TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"encoder.layers.1.self_attn.v_proj": "TransformerLayer_2.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"encoder.layers.1.self_attn.out_proj": "TransformerLayer_2.Residual_1.SelfAttention.Linear",
|
||||||
|
"encoder.layers.1.layer_norm2": "TransformerLayer_2.Residual_2.LayerNorm",
|
||||||
|
"encoder.layers.1.mlp.fc1": "TransformerLayer_2.Residual_2.FeedForward.Linear_1",
|
||||||
|
"encoder.layers.1.mlp.fc2": "TransformerLayer_2.Residual_2.FeedForward.Linear_2",
|
||||||
|
"encoder.layers.2.layer_norm1": "TransformerLayer_3.Residual_1.LayerNorm",
|
||||||
|
"encoder.layers.2.self_attn.q_proj": "TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"encoder.layers.2.self_attn.k_proj": "TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"encoder.layers.2.self_attn.v_proj": "TransformerLayer_3.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"encoder.layers.2.self_attn.out_proj": "TransformerLayer_3.Residual_1.SelfAttention.Linear",
|
||||||
|
"encoder.layers.2.layer_norm2": "TransformerLayer_3.Residual_2.LayerNorm",
|
||||||
|
"encoder.layers.2.mlp.fc1": "TransformerLayer_3.Residual_2.FeedForward.Linear_1",
|
||||||
|
"encoder.layers.2.mlp.fc2": "TransformerLayer_3.Residual_2.FeedForward.Linear_2",
|
||||||
|
"encoder.layers.3.layer_norm1": "TransformerLayer_4.Residual_1.LayerNorm",
|
||||||
|
"encoder.layers.3.self_attn.q_proj": "TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"encoder.layers.3.self_attn.k_proj": "TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"encoder.layers.3.self_attn.v_proj": "TransformerLayer_4.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"encoder.layers.3.self_attn.out_proj": "TransformerLayer_4.Residual_1.SelfAttention.Linear",
|
||||||
|
"encoder.layers.3.layer_norm2": "TransformerLayer_4.Residual_2.LayerNorm",
|
||||||
|
"encoder.layers.3.mlp.fc1": "TransformerLayer_4.Residual_2.FeedForward.Linear_1",
|
||||||
|
"encoder.layers.3.mlp.fc2": "TransformerLayer_4.Residual_2.FeedForward.Linear_2",
|
||||||
|
"encoder.layers.4.layer_norm1": "TransformerLayer_5.Residual_1.LayerNorm",
|
||||||
|
"encoder.layers.4.self_attn.q_proj": "TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"encoder.layers.4.self_attn.k_proj": "TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"encoder.layers.4.self_attn.v_proj": "TransformerLayer_5.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"encoder.layers.4.self_attn.out_proj": "TransformerLayer_5.Residual_1.SelfAttention.Linear",
|
||||||
|
"encoder.layers.4.layer_norm2": "TransformerLayer_5.Residual_2.LayerNorm",
|
||||||
|
"encoder.layers.4.mlp.fc1": "TransformerLayer_5.Residual_2.FeedForward.Linear_1",
|
||||||
|
"encoder.layers.4.mlp.fc2": "TransformerLayer_5.Residual_2.FeedForward.Linear_2",
|
||||||
|
"encoder.layers.5.layer_norm1": "TransformerLayer_6.Residual_1.LayerNorm",
|
||||||
|
"encoder.layers.5.self_attn.q_proj": "TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"encoder.layers.5.self_attn.k_proj": "TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"encoder.layers.5.self_attn.v_proj": "TransformerLayer_6.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"encoder.layers.5.self_attn.out_proj": "TransformerLayer_6.Residual_1.SelfAttention.Linear",
|
||||||
|
"encoder.layers.5.layer_norm2": "TransformerLayer_6.Residual_2.LayerNorm",
|
||||||
|
"encoder.layers.5.mlp.fc1": "TransformerLayer_6.Residual_2.FeedForward.Linear_1",
|
||||||
|
"encoder.layers.5.mlp.fc2": "TransformerLayer_6.Residual_2.FeedForward.Linear_2",
|
||||||
|
"encoder.layers.6.layer_norm1": "TransformerLayer_7.Residual_1.LayerNorm",
|
||||||
|
"encoder.layers.6.self_attn.q_proj": "TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"encoder.layers.6.self_attn.k_proj": "TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"encoder.layers.6.self_attn.v_proj": "TransformerLayer_7.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"encoder.layers.6.self_attn.out_proj": "TransformerLayer_7.Residual_1.SelfAttention.Linear",
|
||||||
|
"encoder.layers.6.layer_norm2": "TransformerLayer_7.Residual_2.LayerNorm",
|
||||||
|
"encoder.layers.6.mlp.fc1": "TransformerLayer_7.Residual_2.FeedForward.Linear_1",
|
||||||
|
"encoder.layers.6.mlp.fc2": "TransformerLayer_7.Residual_2.FeedForward.Linear_2",
|
||||||
|
"encoder.layers.7.layer_norm1": "TransformerLayer_8.Residual_1.LayerNorm",
|
||||||
|
"encoder.layers.7.self_attn.q_proj": "TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"encoder.layers.7.self_attn.k_proj": "TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"encoder.layers.7.self_attn.v_proj": "TransformerLayer_8.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"encoder.layers.7.self_attn.out_proj": "TransformerLayer_8.Residual_1.SelfAttention.Linear",
|
||||||
|
"encoder.layers.7.layer_norm2": "TransformerLayer_8.Residual_2.LayerNorm",
|
||||||
|
"encoder.layers.7.mlp.fc1": "TransformerLayer_8.Residual_2.FeedForward.Linear_1",
|
||||||
|
"encoder.layers.7.mlp.fc2": "TransformerLayer_8.Residual_2.FeedForward.Linear_2",
|
||||||
|
"encoder.layers.8.layer_norm1": "TransformerLayer_9.Residual_1.LayerNorm",
|
||||||
|
"encoder.layers.8.self_attn.q_proj": "TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"encoder.layers.8.self_attn.k_proj": "TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"encoder.layers.8.self_attn.v_proj": "TransformerLayer_9.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"encoder.layers.8.self_attn.out_proj": "TransformerLayer_9.Residual_1.SelfAttention.Linear",
|
||||||
|
"encoder.layers.8.layer_norm2": "TransformerLayer_9.Residual_2.LayerNorm",
|
||||||
|
"encoder.layers.8.mlp.fc1": "TransformerLayer_9.Residual_2.FeedForward.Linear_1",
|
||||||
|
"encoder.layers.8.mlp.fc2": "TransformerLayer_9.Residual_2.FeedForward.Linear_2",
|
||||||
|
"encoder.layers.9.layer_norm1": "TransformerLayer_10.Residual_1.LayerNorm",
|
||||||
|
"encoder.layers.9.self_attn.q_proj": "TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"encoder.layers.9.self_attn.k_proj": "TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"encoder.layers.9.self_attn.v_proj": "TransformerLayer_10.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"encoder.layers.9.self_attn.out_proj": "TransformerLayer_10.Residual_1.SelfAttention.Linear",
|
||||||
|
"encoder.layers.9.layer_norm2": "TransformerLayer_10.Residual_2.LayerNorm",
|
||||||
|
"encoder.layers.9.mlp.fc1": "TransformerLayer_10.Residual_2.FeedForward.Linear_1",
|
||||||
|
"encoder.layers.9.mlp.fc2": "TransformerLayer_10.Residual_2.FeedForward.Linear_2",
|
||||||
|
"encoder.layers.10.layer_norm1": "TransformerLayer_11.Residual_1.LayerNorm",
|
||||||
|
"encoder.layers.10.self_attn.q_proj": "TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"encoder.layers.10.self_attn.k_proj": "TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"encoder.layers.10.self_attn.v_proj": "TransformerLayer_11.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"encoder.layers.10.self_attn.out_proj": "TransformerLayer_11.Residual_1.SelfAttention.Linear",
|
||||||
|
"encoder.layers.10.layer_norm2": "TransformerLayer_11.Residual_2.LayerNorm",
|
||||||
|
"encoder.layers.10.mlp.fc1": "TransformerLayer_11.Residual_2.FeedForward.Linear_1",
|
||||||
|
"encoder.layers.10.mlp.fc2": "TransformerLayer_11.Residual_2.FeedForward.Linear_2",
|
||||||
|
"encoder.layers.11.layer_norm1": "TransformerLayer_12.Residual_1.LayerNorm",
|
||||||
|
"encoder.layers.11.self_attn.q_proj": "TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"encoder.layers.11.self_attn.k_proj": "TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"encoder.layers.11.self_attn.v_proj": "TransformerLayer_12.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"encoder.layers.11.self_attn.out_proj": "TransformerLayer_12.Residual_1.SelfAttention.Linear",
|
||||||
|
"encoder.layers.11.layer_norm2": "TransformerLayer_12.Residual_2.LayerNorm",
|
||||||
|
"encoder.layers.11.mlp.fc1": "TransformerLayer_12.Residual_2.FeedForward.Linear_1",
|
||||||
|
"encoder.layers.11.mlp.fc2": "TransformerLayer_12.Residual_2.FeedForward.Linear_2",
|
||||||
|
"final_layer_norm": "LayerNorm",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
runwayml = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||||
|
filename="text_encoder/model.safetensors",
|
||||||
|
revision="f03de327dd89b501a01da37fc5240cf4fdba85a1",
|
||||||
|
expected_sha256="d008943c017f0092921106440254dbbe00b6a285f7883ec8ba160c3faad88334",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.text_encoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="88a171b02f5bad8e61723f9c065ddb00351970a6e3f7f5a2a46970700e90f69d",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
juggernaut_reborn = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="civitai/KandooAi/juggernaut",
|
||||||
|
filename="reborn/onefile_fp16.safetensors",
|
||||||
|
download_url="https://civitai.com/api/download/models/274039?type=Model&format=SafeTensor&size=pruned&fp=fp16",
|
||||||
|
expected_sha256="338b85bc4f7628bc42cce336242e79154a57c2a4360531436e97f7793568f18c",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.juggernaut.reborn.text_encoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="c649e079cbef5ccd79ef643acac29363d153d5a5f719e9c2e1893c96ec8b2357",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
juggernaut_aftermath = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="civitai/KandooAi/juggernaut",
|
||||||
|
filename="aftermath/onefile_fp32.safetensors",
|
||||||
|
download_url="https://civitai.com/api/download/models/127207?type=Model&format=SafeTensor&size=full&fp=fp32",
|
||||||
|
expected_sha256="7ed60a2f5842de8e6755965ffbf2a0ee273429939f307233c22f7dffd8d7ae3d",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.juggernaut.aftermath.text_encoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="2448c3148ae1c9658c18b136e5d6eebcc3c512dd3e1df71d3cb9bb1c83c19db1",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float32,
|
||||||
|
)
|
||||||
|
realistic_stock_photo_v3 = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="Yntec/realisticStockPhoto3",
|
||||||
|
filename="realisticStockPhoto_v30SD15.safetensors",
|
||||||
|
expected_sha256="f85affae9aae16276eaf670f810946e2d03c4d300791a0380f07653cb78ba31b",
|
||||||
|
# download_url="https://civitai.com/api/download/models/524032?type=Model&format=SafeTensor&size=full&fp=fp16",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.realistic_stock_photo.v3_0.text_encoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="6d37d5b8ea7f7628cdaada6ce61bbba3914143d8f88d5b722c120ffdcb408512",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
realistic_vision_v5 = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="civitai/SG_161222/realistic_vision",
|
||||||
|
filename="v5/onefile_fp32.safetensors",
|
||||||
|
download_url="https://civitai.com/api/download/models/130072?type=Model&format=SafeTensor&size=full&fp=fp16",
|
||||||
|
expected_sha256="ef76aa2332635f4352463343beec9c5aae3bd107a73c0fd7221abbbcf8bd5470",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.realistic_vision.v5_1.text_encoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="b34349a39f1ad882885cc5da917aeaa92935c1b80eefbce03a6c46959ed97b10",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
157
src/refiners/conversion/models/clip_text_sdxl.py
Normal file
157
src/refiners/conversion/models/clip_text_sdxl.py
Normal file
|
@ -0,0 +1,157 @@
|
||||||
|
import logging
|
||||||
|
from typing import NamedTuple, cast
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from transformers import CLIPTextModel, CLIPTextModelWithProjection # pyright: ignore[reportMissingTypeStubs]
|
||||||
|
|
||||||
|
import refiners.fluxion.layers as fl
|
||||||
|
from refiners.conversion.model_converter import ModelConverter
|
||||||
|
from refiners.conversion.utils import Conversion, Hub
|
||||||
|
from refiners.fluxion.utils import save_to_safetensors
|
||||||
|
from refiners.foundationals.clip.text_encoder import CLIPTextEncoder, CLIPTextEncoderG, CLIPTextEncoderL
|
||||||
|
from refiners.foundationals.clip.tokenizer import CLIPTokenizer
|
||||||
|
from refiners.foundationals.latent_diffusion.stable_diffusion_xl.text_encoder import DoubleTextEncoder
|
||||||
|
|
||||||
|
|
||||||
|
class CLIPTextEncoderConfig(NamedTuple):
|
||||||
|
architectures: list[str]
|
||||||
|
vocab_size: int
|
||||||
|
hidden_size: int
|
||||||
|
intermediate_size: int
|
||||||
|
num_hidden_layers: int
|
||||||
|
num_attention_heads: int
|
||||||
|
hidden_act: str
|
||||||
|
layer_norm_eps: float
|
||||||
|
projection_dim: int
|
||||||
|
|
||||||
|
|
||||||
|
class ModelConverterHubDuo(Conversion):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
original_repo_id: str,
|
||||||
|
converted: Hub,
|
||||||
|
dtype: torch.dtype,
|
||||||
|
) -> None:
|
||||||
|
self.original = Hub(repo_id=original_repo_id, filename="", expected_sha256="")
|
||||||
|
self.converted = converted
|
||||||
|
self.dtype = dtype
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def setup_converter(source_path: str, subfolder: str, with_projection: bool) -> ModelConverter:
|
||||||
|
# instantiate the transformers clip model
|
||||||
|
cls = CLIPTextModelWithProjection if with_projection else CLIPTextModel
|
||||||
|
source: nn.Module = cls.from_pretrained( # pyright: ignore[reportUnknownMemberType]
|
||||||
|
pretrained_model_name_or_path=source_path,
|
||||||
|
subfolder=subfolder,
|
||||||
|
low_cpu_mem_usage=False,
|
||||||
|
)
|
||||||
|
assert isinstance(source, nn.Module), "Source model is not a nn.Module"
|
||||||
|
|
||||||
|
# get the model config from the transformers clip model
|
||||||
|
config = cast(CLIPTextEncoderConfig, source.config) # pyright: ignore[reportArgumentType, reportUnknownMemberType]
|
||||||
|
|
||||||
|
# instantiate the refiners clip model
|
||||||
|
target = CLIPTextEncoder(
|
||||||
|
embedding_dim=config.hidden_size,
|
||||||
|
num_layers=config.num_hidden_layers,
|
||||||
|
num_attention_heads=config.num_attention_heads,
|
||||||
|
feedforward_dim=config.intermediate_size,
|
||||||
|
use_quick_gelu=config.hidden_act == "quick_gelu",
|
||||||
|
)
|
||||||
|
if with_projection:
|
||||||
|
target.append(
|
||||||
|
module=fl.Linear(
|
||||||
|
in_features=config.hidden_size,
|
||||||
|
out_features=config.projection_dim,
|
||||||
|
bias=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# initialize the inputs
|
||||||
|
text = "What a nice cat you have there!"
|
||||||
|
tokenizer = target.ensure_find(CLIPTokenizer)
|
||||||
|
tokens = tokenizer(text)
|
||||||
|
|
||||||
|
# run the converter
|
||||||
|
converter = ModelConverter(
|
||||||
|
source_model=source,
|
||||||
|
target_model=target,
|
||||||
|
skip_output_check=True,
|
||||||
|
verbose=False,
|
||||||
|
)
|
||||||
|
if not converter.run(source_args=(tokens,), target_args=(text,)):
|
||||||
|
raise RuntimeError("Model conversion failed")
|
||||||
|
|
||||||
|
return converter
|
||||||
|
|
||||||
|
def convert(self) -> None:
|
||||||
|
logging.info(f"Converting {self.original.repo_id} to {self.converted.repo_id}")
|
||||||
|
|
||||||
|
# initialize the model converters, find the mappings
|
||||||
|
converter1 = self.setup_converter(
|
||||||
|
source_path=self.original.repo_id,
|
||||||
|
subfolder="text_encoder",
|
||||||
|
with_projection=False,
|
||||||
|
)
|
||||||
|
converter2 = self.setup_converter(
|
||||||
|
source_path=self.original.repo_id,
|
||||||
|
subfolder="text_encoder_2",
|
||||||
|
with_projection=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# load the CLIPTextEncoderL model
|
||||||
|
text_encoder_l = CLIPTextEncoderL()
|
||||||
|
text_encoder_l.load_state_dict(state_dict=converter1.get_state_dict())
|
||||||
|
|
||||||
|
# load the CLIPTextEncoderG (with projection) model
|
||||||
|
projection = cast(CLIPTextEncoder, converter2.target_model)[-1]
|
||||||
|
assert isinstance(projection, fl.Linear)
|
||||||
|
text_encoder_g_with_projection = CLIPTextEncoderG()
|
||||||
|
text_encoder_g_with_projection.append(module=projection)
|
||||||
|
text_encoder_g_with_projection.load_state_dict(state_dict=converter2.get_state_dict())
|
||||||
|
|
||||||
|
# build DoubleTextEncoder from previous two models
|
||||||
|
projection = text_encoder_g_with_projection.pop(index=-1)
|
||||||
|
assert isinstance(projection, fl.Linear)
|
||||||
|
double_text_encoder = DoubleTextEncoder(
|
||||||
|
text_encoder_l=text_encoder_l,
|
||||||
|
text_encoder_g=text_encoder_g_with_projection,
|
||||||
|
projection=projection,
|
||||||
|
)
|
||||||
|
|
||||||
|
# extract the state_dict from the DoubleTextEncoder model
|
||||||
|
state_dict = double_text_encoder.state_dict()
|
||||||
|
state_dict = self.change_dtype(state_dict, self.dtype)
|
||||||
|
|
||||||
|
# save the converted state_dict
|
||||||
|
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
save_to_safetensors(self.converted.local_path, state_dict)
|
||||||
|
|
||||||
|
# check the converted state_dict
|
||||||
|
assert self.converted.check_local_hash()
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
|
||||||
|
|
||||||
|
|
||||||
|
stability = ModelConverterHubDuo(
|
||||||
|
original_repo_id="stabilityai/stable-diffusion-xl-base-1.0",
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sdxl.text_encoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="238685accd000683e937085fb3a9c147675f5a1d7775a6810696131e93ddb147",
|
||||||
|
),
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
juggernautXL_v10 = ModelConverterHubDuo(
|
||||||
|
original_repo_id="RunDiffusion/Juggernaut-X-v10", # TODO(laurent): use file from civitai instead
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sdxl.juggernaut.v10.text_encoder",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="50dde9c171e31d1c9dcd0539ba052e4fe69d90f126c812b0145da40a0a2c4361",
|
||||||
|
),
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
345
src/refiners/conversion/models/controllora_sdxl.py
Normal file
345
src/refiners/conversion/models/controllora_sdxl.py
Normal file
|
@ -0,0 +1,345 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import torch
|
||||||
|
from huggingface_hub import hf_hub_download # type: ignore
|
||||||
|
from torch import Tensor
|
||||||
|
from torch.nn import Parameter as TorchParameter
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub
|
||||||
|
from refiners.fluxion.adapters.lora import Lora, LoraAdapter, auto_attach_loras
|
||||||
|
from refiners.fluxion.layers import Conv2d
|
||||||
|
from refiners.fluxion.layers.linear import Linear
|
||||||
|
from refiners.fluxion.utils import load_from_safetensors, save_to_safetensors
|
||||||
|
from refiners.foundationals.latent_diffusion.lora import SDLoraManager
|
||||||
|
from refiners.foundationals.latent_diffusion.stable_diffusion_xl.control_lora import (
|
||||||
|
ConditionEncoder,
|
||||||
|
ControlLora,
|
||||||
|
ControlLoraAdapter,
|
||||||
|
ZeroConvolution,
|
||||||
|
)
|
||||||
|
from refiners.foundationals.latent_diffusion.stable_diffusion_xl.model import StableDiffusion_XL
|
||||||
|
|
||||||
|
|
||||||
|
def sort_keys(key: str, /) -> tuple[str, int]:
|
||||||
|
"""Compute the score of a key, relatively to its suffix.
|
||||||
|
|
||||||
|
When used by [`sorted`][sorted], the keys will only be sorted "at the suffix level".
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: The key to sort.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The padded suffix of the key.
|
||||||
|
The score of the key's suffix.
|
||||||
|
"""
|
||||||
|
if "time_embed" in key: # HACK: will place the "time_embed" layers at very start of the list
|
||||||
|
return ("", -2)
|
||||||
|
|
||||||
|
if "label_emb" in key: # HACK: will place the "label_emb" layers right after "time_embed"
|
||||||
|
return ("", -1)
|
||||||
|
|
||||||
|
if "proj_out" in key: # HACK: will place the "proj_out" layers at the end of each "transformer_blocks"
|
||||||
|
return (key.removesuffix("proj_out") + "transformer_blocks.99.ff.net.2", 10)
|
||||||
|
|
||||||
|
return SDLoraManager.sort_keys(key)
|
||||||
|
|
||||||
|
|
||||||
|
def load_lora_layers(
|
||||||
|
name: str,
|
||||||
|
state_dict: dict[str, Tensor],
|
||||||
|
control_lora: ControlLora,
|
||||||
|
) -> dict[str, Lora[Linear | Conv2d]]:
|
||||||
|
"""Load the LoRA layers from the state_dict into the ControlLora.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: The name of the LoRA.
|
||||||
|
state_dict: The state_dict of the LoRA.
|
||||||
|
control_lora: The ControlLora to load the LoRA layers into.
|
||||||
|
"""
|
||||||
|
# filter from the state_dict the layers that will be used for the LoRA layers
|
||||||
|
lora_weights = {f"{key}.weight": value for key, value in state_dict.items() if ".up" in key or ".down" in key}
|
||||||
|
|
||||||
|
# move the tensors to the device and dtype of the ControlLora
|
||||||
|
lora_weights = {
|
||||||
|
key: value.to(
|
||||||
|
dtype=control_lora.dtype,
|
||||||
|
device=control_lora.device,
|
||||||
|
)
|
||||||
|
for key, value in lora_weights.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
# load every LoRA layers from the filtered state_dict
|
||||||
|
lora_layers = Lora.from_dict(name, state_dict=lora_weights)
|
||||||
|
|
||||||
|
# sort all the LoRA's keys using the `sort_keys` method
|
||||||
|
lora_layers = {
|
||||||
|
key: lora_layers[key]
|
||||||
|
for key in sorted(
|
||||||
|
lora_layers.keys(),
|
||||||
|
key=sort_keys,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# auto-attach the LoRA layers to the U-Net
|
||||||
|
auto_attach_loras(lora_layers, control_lora, exclude=["ZeroConvolution", "ConditionEncoder"])
|
||||||
|
|
||||||
|
# eject all the LoRA adapters from the U-Net
|
||||||
|
# because we need each target path as if the adapter wasn't injected
|
||||||
|
for lora_layer in lora_layers.values():
|
||||||
|
lora_adapter = lora_layer.parent
|
||||||
|
assert isinstance(lora_adapter, LoraAdapter)
|
||||||
|
lora_adapter.eject()
|
||||||
|
|
||||||
|
return lora_layers
|
||||||
|
|
||||||
|
|
||||||
|
def load_condition_encoder(
|
||||||
|
state_dict: dict[str, Tensor],
|
||||||
|
control_lora: ControlLora,
|
||||||
|
) -> None:
|
||||||
|
"""Load the ConditionEncoder's Conv2d layers from the state_dict into the ControlLora.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
state_dict: The state_dict of the ConditionEncoder.
|
||||||
|
control_lora: The control_lora to load the ConditionEncoder's Conv2d layers into.
|
||||||
|
"""
|
||||||
|
# filter from the state_dict the layers that will be used for the ConditionEncoder
|
||||||
|
condition_encoder_tensors = {key: value for key, value in state_dict.items() if "input_hint_block" in key}
|
||||||
|
|
||||||
|
# move the tensors to the device and dtype of the ControlLora
|
||||||
|
condition_encoder_tensors = {
|
||||||
|
key: value.to(
|
||||||
|
dtype=control_lora.dtype,
|
||||||
|
device=control_lora.device,
|
||||||
|
)
|
||||||
|
for key, value in condition_encoder_tensors.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
# find the ConditionEncoder's Conv2d layers
|
||||||
|
condition_encoder_layer = control_lora.ensure_find(ConditionEncoder)
|
||||||
|
condition_encoder_conv2ds = list(condition_encoder_layer.layers(Conv2d))
|
||||||
|
|
||||||
|
# replace the Conv2d layers' weights and biases with the ones from the state_dict
|
||||||
|
for i, layer in enumerate(condition_encoder_conv2ds):
|
||||||
|
layer.weight = TorchParameter(condition_encoder_tensors[f"input_hint_block.{i*2}.weight"])
|
||||||
|
layer.bias = TorchParameter(condition_encoder_tensors[f"input_hint_block.{i*2}.bias"])
|
||||||
|
|
||||||
|
|
||||||
|
def load_zero_convolutions(
|
||||||
|
state_dict: dict[str, Tensor],
|
||||||
|
control_lora: ControlLora,
|
||||||
|
) -> None:
|
||||||
|
"""Load the ZeroConvolution's Conv2d layers from the state_dict into the ControlLora.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
state_dict: The state_dict of the ZeroConvolution.
|
||||||
|
control_lora: The ControlLora to load the ZeroConvolution's Conv2d layers into.
|
||||||
|
"""
|
||||||
|
# filter from the state_dict the layers that will be used for the ZeroConvolution layers
|
||||||
|
zero_convolution_tensors = {key: value for key, value in state_dict.items() if "zero_convs" in key}
|
||||||
|
n = len(zero_convolution_tensors) // 2
|
||||||
|
zero_convolution_tensors[f"zero_convs.{n}.0.weight"] = state_dict["middle_block_out.0.weight"]
|
||||||
|
zero_convolution_tensors[f"zero_convs.{n}.0.bias"] = state_dict["middle_block_out.0.bias"]
|
||||||
|
|
||||||
|
# move the tensors to the device and dtype of the ControlLora
|
||||||
|
zero_convolution_tensors = {
|
||||||
|
key: value.to(
|
||||||
|
dtype=control_lora.dtype,
|
||||||
|
device=control_lora.device,
|
||||||
|
)
|
||||||
|
for key, value in zero_convolution_tensors.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
# find the ZeroConvolution's Conv2d layers
|
||||||
|
zero_convolution_layers = list(control_lora.layers(ZeroConvolution))
|
||||||
|
zero_convolution_conv2ds = [layer.ensure_find(Conv2d) for layer in zero_convolution_layers]
|
||||||
|
|
||||||
|
# replace the Conv2d layers' weights and biases with the ones from the state_dict
|
||||||
|
for i, layer in enumerate(zero_convolution_conv2ds):
|
||||||
|
layer.weight = TorchParameter(zero_convolution_tensors[f"zero_convs.{i}.0.weight"])
|
||||||
|
layer.bias = TorchParameter(zero_convolution_tensors[f"zero_convs.{i}.0.bias"])
|
||||||
|
|
||||||
|
|
||||||
|
def simplify_key(key: str, prefix: str, index: int | None = None) -> str:
|
||||||
|
"""Simplify a key by stripping everything to the left of the prefix.
|
||||||
|
|
||||||
|
Also optionally add a zero-padded index to the prefix.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> simplify_key("foo.bar.ControlLora.something", "ControlLora", 1)
|
||||||
|
"ControlLora_01.something"
|
||||||
|
|
||||||
|
>>> simplify_key("foo.bar.ControlLora.DownBlocks.something", "ControlLora")
|
||||||
|
"ControlLora.DownBlocks.something"
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: The key to simplify.
|
||||||
|
prefix: The prefix to remove.
|
||||||
|
index: The index to add.
|
||||||
|
"""
|
||||||
|
_, right = key.split(prefix, maxsplit=1)
|
||||||
|
if index:
|
||||||
|
return f"{prefix}_{index:02d}{right}"
|
||||||
|
else:
|
||||||
|
return f"{prefix}{right}"
|
||||||
|
|
||||||
|
|
||||||
|
def convert_lora_layers(
|
||||||
|
lora_layers: dict[str, Lora[Linear | Conv2d]],
|
||||||
|
control_lora: ControlLora,
|
||||||
|
refiners_state_dict: dict[str, Tensor],
|
||||||
|
) -> None:
|
||||||
|
"""Convert the LoRA layers to the refiners format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lora_layers: The LoRA layers to convert.
|
||||||
|
control_lora: The ControlLora to convert the LoRA layers from.
|
||||||
|
refiners_state_dict: The refiners state dict to update with the converted LoRA layers.
|
||||||
|
"""
|
||||||
|
for lora_layer in lora_layers.values():
|
||||||
|
# get the adapter associated with the LoRA layer
|
||||||
|
lora_adapter = lora_layer.parent
|
||||||
|
assert isinstance(lora_adapter, LoraAdapter)
|
||||||
|
|
||||||
|
# get the path of the adapter's target in the ControlLora
|
||||||
|
target = lora_adapter.target
|
||||||
|
path = target.get_path(parent=control_lora.ensure_find_parent(target))
|
||||||
|
|
||||||
|
state_dict = {
|
||||||
|
f"{path}.down": lora_layer.down.weight,
|
||||||
|
f"{path}.up": lora_layer.up.weight,
|
||||||
|
}
|
||||||
|
state_dict = {simplify_key(key, "ControlLora."): param for key, param in state_dict.items()}
|
||||||
|
refiners_state_dict.update(state_dict)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_zero_convolutions(
|
||||||
|
control_lora: ControlLora,
|
||||||
|
refiners_state_dict: dict[str, Tensor],
|
||||||
|
) -> None:
|
||||||
|
"""Convert the ZeroConvolution layers to the refiners format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
control_lora: The ControlLora to convert the ZeroConvolution layers from.
|
||||||
|
refiners_state_dict: The refiners state dict to update with the converted ZeroConvolution layers.
|
||||||
|
"""
|
||||||
|
zero_convolution_layers = list(control_lora.layers(ZeroConvolution))
|
||||||
|
for i, zero_convolution_layer in enumerate(zero_convolution_layers):
|
||||||
|
state_dict = zero_convolution_layer.state_dict()
|
||||||
|
path = zero_convolution_layer.get_path()
|
||||||
|
state_dict = {f"{path}.{key}": param for key, param in state_dict.items()}
|
||||||
|
state_dict = {simplify_key(key, "ZeroConvolution", i + 1): param for key, param in state_dict.items()}
|
||||||
|
refiners_state_dict.update(state_dict)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_condition_encoder(
|
||||||
|
control_lora: ControlLora,
|
||||||
|
refiners_state_dict: dict[str, Tensor],
|
||||||
|
) -> None:
|
||||||
|
"""Convert the ConditionEncoder to the refiners format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
control_lora: The ControlLora to convert the ConditionEncoder from.
|
||||||
|
refiners_state_dict: The refiners state dict to update with the converted ConditionEncoder.
|
||||||
|
"""
|
||||||
|
condition_encoder_layer = control_lora.ensure_find(ConditionEncoder)
|
||||||
|
path = condition_encoder_layer.get_path()
|
||||||
|
state_dict = condition_encoder_layer.state_dict()
|
||||||
|
state_dict = {f"{path}.{key}": param for key, param in state_dict.items()}
|
||||||
|
state_dict = {simplify_key(key, "ConditionEncoder"): param for key, param in state_dict.items()}
|
||||||
|
refiners_state_dict.update(state_dict)
|
||||||
|
|
||||||
|
|
||||||
|
class ControlLoraConversion(Conversion):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
original: Hub,
|
||||||
|
converted: Hub,
|
||||||
|
dtype: torch.dtype = torch.float32,
|
||||||
|
) -> None:
|
||||||
|
"""Initialize the weight structure.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
original_weight_hub: A HubPath object representing the original weight.
|
||||||
|
converted_weight_hub: A HubPath object representing the converted weight.
|
||||||
|
"""
|
||||||
|
self.original = original
|
||||||
|
self.converted = converted
|
||||||
|
self.dtype = dtype
|
||||||
|
|
||||||
|
def convert(self) -> None:
|
||||||
|
"""Convert the weights from the original to the converted weights."""
|
||||||
|
logging.info(
|
||||||
|
f"Converting {self.original.repo_id}/{self.original.filename} to {self.converted.repo_id}/{self.converted.filename}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# check if the converted file already exists
|
||||||
|
if self.converted.local_path.is_file():
|
||||||
|
logging.warning(f"{self.converted.local_path} already exists")
|
||||||
|
if self.converted.check_local_hash():
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.error(f"{self.converted.local_path} couldn't verify remote hash")
|
||||||
|
return
|
||||||
|
|
||||||
|
# get the original state_dict
|
||||||
|
self.original.download()
|
||||||
|
|
||||||
|
# load the original state_dict
|
||||||
|
original_state_dict = load_from_safetensors(self.original.local_path)
|
||||||
|
|
||||||
|
# convert the state_dict
|
||||||
|
sdxl = StableDiffusion_XL()
|
||||||
|
name = self.original.local_path.stem
|
||||||
|
control_lora_adapter = ControlLoraAdapter(target=sdxl.unet, name=name).inject()
|
||||||
|
control_lora = control_lora_adapter.control_lora
|
||||||
|
lora_layers = load_lora_layers(name, original_state_dict, control_lora)
|
||||||
|
load_zero_convolutions(original_state_dict, control_lora)
|
||||||
|
load_condition_encoder(original_state_dict, control_lora)
|
||||||
|
|
||||||
|
converted_state_dict: dict[str, Tensor] = {}
|
||||||
|
convert_lora_layers(lora_layers, control_lora, converted_state_dict)
|
||||||
|
convert_zero_convolutions(control_lora, converted_state_dict)
|
||||||
|
convert_condition_encoder(control_lora, converted_state_dict)
|
||||||
|
|
||||||
|
# save the converted state_dict
|
||||||
|
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
save_to_safetensors(self.converted.local_path, converted_state_dict)
|
||||||
|
|
||||||
|
# check the converted state_dict
|
||||||
|
assert self.converted.check_local_hash()
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
|
||||||
|
|
||||||
|
|
||||||
|
canny = ControlLoraConversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="lllyasviel/misc",
|
||||||
|
filename="control-lora-canny-rank128.safetensors",
|
||||||
|
revision="71f7a66a7affe631c64af469fe647217d422cac0",
|
||||||
|
expected_sha256="56389dbb245ca44de91d662529bd4298abc55ce2318f60bc19454fb72ff68247",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sdxl.controllora.canny",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="6edfa742e2b5191ce357fb559e236652b004feea490c4f1277b30abc9804321f",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
cpds = ControlLoraConversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="lllyasviel/misc",
|
||||||
|
filename="fooocus_xl_cpds_128.safetensors",
|
||||||
|
revision="71f7a66a7affe631c64af469fe647217d422cac0",
|
||||||
|
expected_sha256="eec3fd8209a65b41341ea9f415de66909c97b30fb4d20965b3304e8e5251c2f1",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sdxl.controllora.cpds",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="9a3b2a86f32e4747e98531b0af8b59a804391b538949a0dd85263722b6e64db0",
|
||||||
|
),
|
||||||
|
)
|
290
src/refiners/conversion/models/controlnet_sd15.py
Normal file
290
src/refiners/conversion/models/controlnet_sd15.py
Normal file
|
@ -0,0 +1,290 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
|
||||||
|
|
||||||
|
diffusers_recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"time_embedding.linear_1": "TimestepEncoder.RangeEncoder.Linear_1",
|
||||||
|
"time_embedding.linear_2": "TimestepEncoder.RangeEncoder.Linear_2",
|
||||||
|
"down_blocks.2.resnets.0.time_emb_proj": "DownBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"down_blocks.2.resnets.1.time_emb_proj": "DownBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"down_blocks.3.resnets.0.time_emb_proj": "DownBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.3.resnets.1.time_emb_proj": "DownBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"mid_block.resnets.0.time_emb_proj": "MiddleBlock.ResidualBlock_1.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"mid_block.resnets.1.time_emb_proj": "MiddleBlock.ResidualBlock_2.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"conv_in": "DownBlocks.Chain_1.Conv2d",
|
||||||
|
"controlnet_cond_embedding.conv_in": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_1.Conv2d",
|
||||||
|
"controlnet_cond_embedding.blocks.0": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_2.Conv2d_1",
|
||||||
|
"controlnet_cond_embedding.blocks.1": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_2.Conv2d_2",
|
||||||
|
"controlnet_cond_embedding.blocks.2": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_3.Conv2d_1",
|
||||||
|
"controlnet_cond_embedding.blocks.3": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_3.Conv2d_2",
|
||||||
|
"controlnet_cond_embedding.blocks.4": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_4.Conv2d_1",
|
||||||
|
"controlnet_cond_embedding.blocks.5": "DownBlocks.Chain_1.Residual.ConditionEncoder.Chain_4.Conv2d_2",
|
||||||
|
"controlnet_cond_embedding.conv_out": "DownBlocks.Chain_1.Residual.ConditionEncoder.Conv2d",
|
||||||
|
"down_blocks.0.resnets.0.norm1": "DownBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.0.resnets.0.norm2": "DownBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.0.attentions.0.norm": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"down_blocks.0.resnets.1.norm1": "DownBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.0.resnets.1.norm2": "DownBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.0.attentions.1.norm": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"down_blocks.1.resnets.0.norm1": "DownBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.0.resnets.0.conv1": "DownBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.0.resnets.0.conv2": "DownBlocks.Chain_2.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.0.resnets.1.conv1": "DownBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.0.resnets.1.conv2": "DownBlocks.Chain_3.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.0.downsamplers.0.conv": "DownBlocks.Chain_4.Downsample.Conv2d",
|
||||||
|
"down_blocks.0.resnets.0.time_emb_proj": "DownBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"down_blocks.0.resnets.1.time_emb_proj": "DownBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"controlnet_down_blocks.0": "DownBlocks.Chain_1.Passthrough.Conv2d",
|
||||||
|
"down_blocks.0.attentions.0.proj_in": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"down_blocks.0.attentions.0.proj_out": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"controlnet_down_blocks.1": "DownBlocks.Chain_2.Passthrough.Conv2d",
|
||||||
|
"down_blocks.0.attentions.1.proj_in": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"down_blocks.0.attentions.1.proj_out": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"controlnet_down_blocks.2": "DownBlocks.Chain_3.Passthrough.Conv2d",
|
||||||
|
"controlnet_down_blocks.3": "DownBlocks.Chain_4.Passthrough.Conv2d",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.norm1": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.norm2": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.norm3": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.norm1": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.norm2": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.norm3": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"down_blocks.1.resnets.0.conv1": "DownBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.1.resnets.0.time_emb_proj": "DownBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.1.resnets.1.time_emb_proj": "DownBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.1.resnets.0.norm2": "DownBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.1.attentions.0.norm": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"down_blocks.1.resnets.1.norm1": "DownBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.1.resnets.1.norm2": "DownBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.1.attentions.1.norm": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"down_blocks.2.resnets.0.norm1": "DownBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.1.resnets.0.conv2": "DownBlocks.Chain_5.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.1.resnets.1.conv1": "DownBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.1.resnets.1.conv2": "DownBlocks.Chain_6.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.1.downsamplers.0.conv": "DownBlocks.Chain_7.Downsample.Conv2d",
|
||||||
|
"down_blocks.1.resnets.0.conv_shortcut": "DownBlocks.Chain_5.ResidualBlock.Conv2d",
|
||||||
|
"down_blocks.1.attentions.0.proj_in": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"down_blocks.1.attentions.0.proj_out": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"controlnet_down_blocks.4": "DownBlocks.Chain_5.Passthrough.Conv2d",
|
||||||
|
"down_blocks.1.attentions.1.proj_in": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"down_blocks.1.attentions.1.proj_out": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"controlnet_down_blocks.5": "DownBlocks.Chain_6.Passthrough.Conv2d",
|
||||||
|
"controlnet_down_blocks.6": "DownBlocks.Chain_7.Passthrough.Conv2d",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.norm1": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.norm2": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.norm3": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.norm1": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.norm2": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.norm3": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"down_blocks.2.resnets.0.conv1": "DownBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.2.resnets.0.norm2": "DownBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.2.attentions.0.norm": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"down_blocks.2.resnets.1.norm1": "DownBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.2.resnets.1.norm2": "DownBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.2.attentions.1.norm": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"down_blocks.3.resnets.0.norm1": "DownBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.3.resnets.0.norm2": "DownBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.3.resnets.1.norm1": "DownBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.3.resnets.1.norm2": "DownBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"mid_block.resnets.0.norm1": "MiddleBlock.ResidualBlock_1.Chain.GroupNorm_1",
|
||||||
|
"mid_block.resnets.0.norm2": "MiddleBlock.ResidualBlock_1.Chain.GroupNorm_2",
|
||||||
|
"mid_block.attentions.0.norm": "MiddleBlock.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"mid_block.resnets.1.norm1": "MiddleBlock.ResidualBlock_2.Chain.GroupNorm_1",
|
||||||
|
"mid_block.resnets.1.norm2": "MiddleBlock.ResidualBlock_2.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.2.resnets.0.conv2": "DownBlocks.Chain_8.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.2.resnets.1.conv1": "DownBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.2.resnets.1.conv2": "DownBlocks.Chain_9.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.2.downsamplers.0.conv": "DownBlocks.Chain_10.Downsample.Conv2d",
|
||||||
|
"down_blocks.3.resnets.0.conv1": "DownBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.3.resnets.0.conv2": "DownBlocks.Chain_11.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.3.resnets.1.conv1": "DownBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.3.resnets.1.conv2": "DownBlocks.Chain_12.ResidualBlock.Chain.Conv2d",
|
||||||
|
"mid_block.resnets.0.conv1": "MiddleBlock.ResidualBlock_1.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"mid_block.resnets.0.conv2": "MiddleBlock.ResidualBlock_1.Chain.Conv2d",
|
||||||
|
"mid_block.resnets.1.conv1": "MiddleBlock.ResidualBlock_2.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"mid_block.resnets.1.conv2": "MiddleBlock.ResidualBlock_2.Chain.Conv2d",
|
||||||
|
"down_blocks.2.resnets.0.conv_shortcut": "DownBlocks.Chain_8.ResidualBlock.Conv2d",
|
||||||
|
"down_blocks.2.attentions.0.proj_in": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"down_blocks.2.attentions.0.proj_out": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"controlnet_down_blocks.7": "DownBlocks.Chain_8.Passthrough.Conv2d",
|
||||||
|
"down_blocks.2.attentions.1.proj_in": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"down_blocks.2.attentions.1.proj_out": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"controlnet_down_blocks.8": "DownBlocks.Chain_9.Passthrough.Conv2d",
|
||||||
|
"controlnet_down_blocks.9": "DownBlocks.Chain_10.Passthrough.Conv2d",
|
||||||
|
"controlnet_down_blocks.10": "DownBlocks.Chain_11.Passthrough.Conv2d",
|
||||||
|
"controlnet_down_blocks.11": "DownBlocks.Chain_12.Passthrough.Conv2d",
|
||||||
|
"mid_block.attentions.0.proj_in": "MiddleBlock.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"mid_block.attentions.0.proj_out": "MiddleBlock.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"controlnet_mid_block": "MiddleBlock.Passthrough.Conv2d",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.norm1": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.norm2": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.norm3": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.norm1": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.norm2": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.norm3": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.norm1": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.norm2": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.norm3": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn1.to_q": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn1.to_k": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn1.to_v": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn2.to_q": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn2.to_k": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn2.to_v": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.ff.net.2": "MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
tile = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="lllyasviel/control_v11f1e_sd15_tile",
|
||||||
|
filename="diffusion_pytorch_model.bin",
|
||||||
|
revision="3f877705c37010b7221c3d10743307d6b5b6efac",
|
||||||
|
expected_sha256="eb05b4c3665bd76dad70a90652014a9b3aab391abd8a5bb484e860330f9492fb",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.controlnet.tile",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="3002029df75364fcbdbf6024dc4c414c929cb8623a2fe7d406a4dbfcee1ffa5a",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
canny = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="lllyasviel/control_v11p_sd15_canny",
|
||||||
|
filename="diffusion_pytorch_model.safetensors",
|
||||||
|
revision="115a470d547982438f70198e353a921996e2e819",
|
||||||
|
expected_sha256="be713fb941fc7c625f0c7d816b6a19115783a665f3049a8974f127e0c075d9a9",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.controlnet.canny",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="0d6d0ba036dc26f4842e89f3c5f2f37feca904863e83b578db099a6fdafc4f51",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
depth = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="lllyasviel/control_v11f1p_sd15_depth",
|
||||||
|
filename="diffusion_pytorch_model.safetensors",
|
||||||
|
revision="539f99181d33db39cf1af2e517cd8056785f0a87",
|
||||||
|
expected_sha256="999aca923ca5e19e70e6afc8d11073cc3c03553ca935b636bd5925df4a1c77d1",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.controlnet.depth",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="d131fb2e73e89e56ee1e73b6af17373bba886da6ae24dd94300a93e993292133",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
normalbae = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="lllyasviel/control_v11p_sd15_normalbae",
|
||||||
|
filename="diffusion_pytorch_model.safetensors",
|
||||||
|
revision="cb7296e6587a219068e9d65864e38729cd862aa8",
|
||||||
|
expected_sha256="e6c3772b35e5cb1869beca97a6ade6e8e5283310462297b10d129e25351983d7",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.controlnet.normalbae",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="89ac08579cd244cf64bda2045d302d5455b33b951fada4abd089b5defbade364",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
lineart = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="lllyasviel/control_v11p_sd15_lineart",
|
||||||
|
filename="diffusion_pytorch_model.safetensors",
|
||||||
|
revision="8a158f547e031c5b8fbca19ead09a74767ff4db0",
|
||||||
|
expected_sha256="d9d6d0e5526dd21dfc503f9e42a93ff1f977aa52df3c14e8ac11085b518cb114",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.controlnet.lineart",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="5fb9ce17b92032c1b7d54aa7fb3c00c067f08e9c024a43cde06999a5a18391cb",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
sam = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="mfidabel/controlnet-segment-anything",
|
||||||
|
filename="diffusion_pytorch_model.bin",
|
||||||
|
revision="22bf4d81a4c7557287815a53e1f55279836e2bfa",
|
||||||
|
expected_sha256="9d4f35bb941e35ceeb54e4d6d35c9239949b193e5c7389426b95a97e43de884d",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.controlnet.sam",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="b8e0e1f2b1e542e5a21e21cdadf135981937b9ca491312b75aa85bff63b35589",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
298
src/refiners/conversion/models/dinov2.py
Normal file
298
src/refiners/conversion/models/dinov2.py
Normal file
|
@ -0,0 +1,298 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub
|
||||||
|
from refiners.fluxion.utils import load_tensors, save_to_safetensors
|
||||||
|
|
||||||
|
|
||||||
|
def convert_dinov2_facebook(weights: dict[str, torch.Tensor]) -> None:
|
||||||
|
"""Convert a DINOv2 weights from facebook to refiners."""
|
||||||
|
# get depth from "blocks" keys
|
||||||
|
depth = max([int(k.split(".")[1]) for k in weights.keys() if k.startswith("blocks.")]) + 1
|
||||||
|
|
||||||
|
# only needed when pre-training
|
||||||
|
del weights["mask_token"]
|
||||||
|
|
||||||
|
# squeeze cls_token and position_embeddings
|
||||||
|
weights["cls_token"] = weights["cls_token"].squeeze(0)
|
||||||
|
weights["pos_embed"] = weights["pos_embed"].squeeze(0)
|
||||||
|
|
||||||
|
# rename "w12" to "fc1" and "w3" to "fc2", only for giant model
|
||||||
|
for key in list(weights.keys()):
|
||||||
|
if "w3" in key:
|
||||||
|
new_key = key.replace("w3", "fc2")
|
||||||
|
weights[new_key] = weights.pop(key)
|
||||||
|
elif "w12" in key:
|
||||||
|
# we swap w1 and w2 because of the difference between our GLU implementation and theirs
|
||||||
|
# see https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/layers/swiglu_ffn.py#L31-L34
|
||||||
|
# and https://github.com/finegrain-ai/refiners/blob/a2ee70578361e4d84a65a8708564480a9b0ec67e/src/refiners/fluxion/layers/activations.py#L158-L160
|
||||||
|
weight = weights.pop(key)
|
||||||
|
w1, w2 = weight.chunk(2, dim=0)
|
||||||
|
w21 = torch.cat([w2, w1], dim=0)
|
||||||
|
new_key = key.replace("w12", "fc1")
|
||||||
|
weights[new_key] = w21
|
||||||
|
|
||||||
|
rename_keys: list[tuple[str, str]] = [
|
||||||
|
("cls_token", "Concatenate.ClassToken.Parameter.weight"),
|
||||||
|
("pos_embed", "PositionalEncoder.PositionalEmbedding.Parameter.weight"),
|
||||||
|
("patch_embed.proj.weight", "Concatenate.PatchEncoder.Conv2d.weight"),
|
||||||
|
("patch_embed.proj.bias", "Concatenate.PatchEncoder.Conv2d.bias"),
|
||||||
|
("norm.weight", "LayerNorm.weight"),
|
||||||
|
("norm.bias", "LayerNorm.bias"),
|
||||||
|
]
|
||||||
|
for i in range(depth):
|
||||||
|
rename_keys.append(
|
||||||
|
(
|
||||||
|
f"blocks.{i}.norm1.weight",
|
||||||
|
f"Transformer.TransformerLayer_{i+1}.Residual_1.LayerNorm.weight",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
rename_keys.append(
|
||||||
|
(
|
||||||
|
f"blocks.{i}.norm1.bias",
|
||||||
|
f"Transformer.TransformerLayer_{i+1}.Residual_1.LayerNorm.bias",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
rename_keys.append(
|
||||||
|
(
|
||||||
|
f"blocks.{i}.attn.proj.weight",
|
||||||
|
f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Linear.weight",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
rename_keys.append(
|
||||||
|
(
|
||||||
|
f"blocks.{i}.attn.proj.bias",
|
||||||
|
f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Linear.bias",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
rename_keys.append(
|
||||||
|
(
|
||||||
|
f"blocks.{i}.ls1.gamma",
|
||||||
|
f"Transformer.TransformerLayer_{i+1}.Residual_1.LayerScale.weight",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
rename_keys.append(
|
||||||
|
(
|
||||||
|
f"blocks.{i}.norm2.weight",
|
||||||
|
f"Transformer.TransformerLayer_{i+1}.Residual_2.LayerNorm.weight",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
rename_keys.append(
|
||||||
|
(
|
||||||
|
f"blocks.{i}.norm2.bias",
|
||||||
|
f"Transformer.TransformerLayer_{i+1}.Residual_2.LayerNorm.bias",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
rename_keys.append(
|
||||||
|
(
|
||||||
|
f"blocks.{i}.mlp.fc1.weight",
|
||||||
|
f"Transformer.TransformerLayer_{i+1}.Residual_2.FeedForward.Linear_1.weight",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
rename_keys.append(
|
||||||
|
(
|
||||||
|
f"blocks.{i}.mlp.fc1.bias",
|
||||||
|
f"Transformer.TransformerLayer_{i+1}.Residual_2.FeedForward.Linear_1.bias",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
rename_keys.append(
|
||||||
|
(
|
||||||
|
f"blocks.{i}.mlp.fc2.weight",
|
||||||
|
f"Transformer.TransformerLayer_{i+1}.Residual_2.FeedForward.Linear_2.weight",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
rename_keys.append(
|
||||||
|
(
|
||||||
|
f"blocks.{i}.mlp.fc2.bias",
|
||||||
|
f"Transformer.TransformerLayer_{i+1}.Residual_2.FeedForward.Linear_2.bias",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
rename_keys.append(
|
||||||
|
(
|
||||||
|
f"blocks.{i}.ls2.gamma",
|
||||||
|
f"Transformer.TransformerLayer_{i+1}.Residual_2.LayerScale.weight",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
if "register_tokens" in weights:
|
||||||
|
weights["register_tokens"] = weights["register_tokens"].squeeze(0)
|
||||||
|
rename_keys.append(("register_tokens", "Registers.Parameter.weight"))
|
||||||
|
|
||||||
|
# rename keys
|
||||||
|
for old_key, new_key in rename_keys:
|
||||||
|
weights[new_key] = weights.pop(old_key)
|
||||||
|
|
||||||
|
# split the qkv weights and biases
|
||||||
|
for i in range(depth):
|
||||||
|
qkv_weight = weights.pop(f"blocks.{i}.attn.qkv.weight")
|
||||||
|
q_weight, k_weight, v_weight = qkv_weight.chunk(3, dim=0)
|
||||||
|
weights[f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Distribute.Linear_1.weight"] = q_weight
|
||||||
|
weights[f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Distribute.Linear_2.weight"] = k_weight
|
||||||
|
weights[f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Distribute.Linear_3.weight"] = v_weight
|
||||||
|
|
||||||
|
qkv_bias = weights.pop(f"blocks.{i}.attn.qkv.bias")
|
||||||
|
q_bias, k_bias, v_bias = qkv_bias.chunk(3, dim=0)
|
||||||
|
weights[f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Distribute.Linear_1.bias"] = q_bias
|
||||||
|
weights[f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Distribute.Linear_2.bias"] = k_bias
|
||||||
|
weights[f"Transformer.TransformerLayer_{i+1}.Residual_1.SelfAttention.Distribute.Linear_3.bias"] = v_bias
|
||||||
|
|
||||||
|
|
||||||
|
class DinoV2Conversion(Conversion):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
original: Hub,
|
||||||
|
converted: Hub,
|
||||||
|
dtype: torch.dtype = torch.float32,
|
||||||
|
) -> None:
|
||||||
|
"""Initialize the weight structure.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
original_weight_hub: A HubPath object representing the original weight.
|
||||||
|
converted_weight_hub: A HubPath object representing the converted weight.
|
||||||
|
"""
|
||||||
|
self.original = original
|
||||||
|
self.converted = converted
|
||||||
|
self.dtype = dtype
|
||||||
|
|
||||||
|
def convert(self) -> None: # type: ignore
|
||||||
|
"""Convert the weights from the original to the converted weights."""
|
||||||
|
logging.info(f"Converting {self.original.repo_id}/{self.original.filename} to {self.converted.repo_id}")
|
||||||
|
|
||||||
|
# check if the converted file already exists
|
||||||
|
if self.converted.local_path.is_file():
|
||||||
|
logging.warning(f"{self.converted.local_path} already exists")
|
||||||
|
if self.converted.check_local_hash():
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.error(f"{self.converted.local_path} couldn't verify remote hash")
|
||||||
|
return
|
||||||
|
|
||||||
|
# get the original state_dict
|
||||||
|
self.original.download()
|
||||||
|
|
||||||
|
# load the original state_dict
|
||||||
|
original_weights = load_tensors(self.original.local_path)
|
||||||
|
|
||||||
|
# convert the state_dict
|
||||||
|
convert_dinov2_facebook(original_weights) # FIXME: this is inplace
|
||||||
|
original_weights = self.change_dtype(original_weights, self.dtype)
|
||||||
|
|
||||||
|
# save the converted state_dict
|
||||||
|
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
save_to_safetensors(self.converted.local_path, original_weights)
|
||||||
|
|
||||||
|
# check the converted state_dict
|
||||||
|
assert self.converted.check_local_hash()
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
|
||||||
|
|
||||||
|
|
||||||
|
small = DinoV2Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="facebook/github_dinov2",
|
||||||
|
filename="vits14.pth",
|
||||||
|
expected_sha256="b938bf1bc15cd2ec0feacfe3a1bb553fe8ea9ca46a7e1d8d00217f29aef60cd9",
|
||||||
|
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/dinov2.small.patch_14",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="56a4b77856e20bbb5c4f0ce135089d4cd72da344dcdb278ba0c1376c8545e543",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
small_reg = DinoV2Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="facebook/github_dinov2",
|
||||||
|
filename="vits14_reg4.pth",
|
||||||
|
expected_sha256="f433177089a681826f849f194ece3bb48f4d63fb38d32fc837e3dc7a4e5641fb",
|
||||||
|
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_reg4_pretrain.pth",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/dinov2.small.patch_14.reg_4",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="beee454507762018616635099c0ac30c7a6e4e08fbd9363c5e5d2a8f1935c3f2",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
base = DinoV2Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="facebook/github_dinov2",
|
||||||
|
filename="vitb14.pth",
|
||||||
|
expected_sha256="0b8b82f85de91b424aded121c7e1dcc2b7bc6d0adeea651bf73a13307fad8c73",
|
||||||
|
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_pretrain.pth",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/dinov2.base.patch_14",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="59b778ed980bc02843456d3fbe1893943922ac7759a9a706ca286dd45d10db1f",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
base_reg = DinoV2Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="facebook/github_dinov2",
|
||||||
|
filename="vitb14_reg4.pth",
|
||||||
|
expected_sha256="73182a088cf94833c94b1666d1c99e02fe87e2007bff57b564fb6206e25dba71",
|
||||||
|
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_reg4_pretrain.pth",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/dinov2.base.patch_14.reg_4",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="7f91aa7cd5aa51d665949ba328a938967164b363ebaacb8cae914143a7e004e7",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
large = DinoV2Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="facebook/github_dinov2",
|
||||||
|
filename="vitl14.pth",
|
||||||
|
expected_sha256="d5383ea8f4877b2472eb973e0fd72d557c7da5d3611bd527ceeb1d7162cbf428",
|
||||||
|
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_pretrain.pth",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/dinov2.large.patch_14",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="2ba79218d37482455db0d9967dfad024c3ad525499f8de0e3db5ff83faf80414",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
large_reg = DinoV2Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="facebook/github_dinov2",
|
||||||
|
filename="vitl14_reg4.pth",
|
||||||
|
expected_sha256="36e4deffbaef061a2576705b0c36f93621e2ae20bf6274694821b0b492551b51",
|
||||||
|
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_reg4_pretrain.pth",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/dinov2.large.patch_14.reg_4",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="e1d5a183a0ec15c5ac0a9e388038a07f8e90dd19e001b7bd4f7ffe3c5761667c",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
giant = DinoV2Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="facebook/github_dinov2",
|
||||||
|
filename="vitg14.pth",
|
||||||
|
expected_sha256="baf8467e50af277596bbbafa06887c177ee899ab46033649c383577d7e9309d3",
|
||||||
|
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_pretrain.pth",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/dinov2.giant.patch_14",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="5a2d6088f4fd4aa1bf527ce0edf2ae3e76eee70c900b90716c18ad7daa4a1f2f",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
giant_reg = DinoV2Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="facebook/github_dinov2",
|
||||||
|
filename="vitg14_reg4.pth",
|
||||||
|
expected_sha256="746ecb8c6301c645c5c855be91687d274587d6e48fdaec4a729753160b34a283",
|
||||||
|
download_url="https://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_reg4_pretrain.pth",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/dinov2.giant.patch_14.reg_4",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="d5f7f0917926d4fe72cd33408f79562c5d524c3e8aee999830129eecabda56a2",
|
||||||
|
),
|
||||||
|
)
|
122
src/refiners/conversion/models/ella.py
Normal file
122
src/refiners/conversion/models/ella.py
Normal file
|
@ -0,0 +1,122 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, TensorDict
|
||||||
|
from refiners.fluxion.utils import load_from_safetensors, save_to_safetensors
|
||||||
|
|
||||||
|
|
||||||
|
def convert_state_dict(state_dict: dict[str, torch.Tensor]) -> TensorDict:
|
||||||
|
new_state_dict: TensorDict = {}
|
||||||
|
|
||||||
|
for key in list(state_dict.keys()):
|
||||||
|
if "latents" in key:
|
||||||
|
new_key = "PerceiverResampler.Latents.ParameterInitialized.weight"
|
||||||
|
new_state_dict[new_key] = state_dict.pop(key)
|
||||||
|
elif "time_embedding" in key:
|
||||||
|
new_key = key.replace("time_embedding", "TimestepEncoder.RangeEncoder").replace("linear", "Linear")
|
||||||
|
new_state_dict[new_key] = state_dict.pop(key)
|
||||||
|
elif "proj_in" in key:
|
||||||
|
new_key = f"PerceiverResampler.Linear.{key.split('.')[-1]}"
|
||||||
|
new_state_dict[new_key] = state_dict.pop(key)
|
||||||
|
elif "time_aware" in key:
|
||||||
|
new_key = f"PerceiverResampler.Residual.Linear.{key.split('.')[-1]}"
|
||||||
|
new_state_dict[new_key] = state_dict.pop(key)
|
||||||
|
elif "attn.in_proj" in key:
|
||||||
|
layer_num = int(key.split(".")[2])
|
||||||
|
query_param, key_param, value_param = state_dict.pop(key).chunk(3, dim=0)
|
||||||
|
param_type = "weight" if "weight" in key else "bias"
|
||||||
|
for i, param in enumerate([query_param, key_param, value_param]):
|
||||||
|
new_key = f"PerceiverResampler.Transformer.TransformerLayer_{layer_num+1}.Residual_1.PerceiverAttention.Attention.Distribute.Linear_{i+1}.{param_type}"
|
||||||
|
new_state_dict[new_key] = param
|
||||||
|
elif "attn.out_proj" in key:
|
||||||
|
layer_num = int(key.split(".")[2])
|
||||||
|
new_key = f"PerceiverResampler.Transformer.TransformerLayer_{layer_num+1}.Residual_1.PerceiverAttention.Attention.Linear.{key.split('.')[-1]}"
|
||||||
|
new_state_dict[new_key] = state_dict.pop(key)
|
||||||
|
elif "ln_ff" in key:
|
||||||
|
layer_num = int(key.split(".")[2])
|
||||||
|
new_key = f"PerceiverResampler.Transformer.TransformerLayer_{layer_num+1}.Residual_2.AdaLayerNorm.Parallel.Chain.Linear.{key.split('.')[-1]}"
|
||||||
|
new_state_dict[new_key] = state_dict.pop(key)
|
||||||
|
elif "ln_1" in key or "ln_2" in key:
|
||||||
|
layer_num = int(key.split(".")[2])
|
||||||
|
n = 1 if int(key.split(".")[3].split("_")[-1]) == 2 else 2
|
||||||
|
new_key = f"PerceiverResampler.Transformer.TransformerLayer_{layer_num+1}.Residual_1.PerceiverAttention.Distribute.AdaLayerNorm_{n}.Parallel.Chain.Linear.{key.split('.')[-1]}"
|
||||||
|
new_state_dict[new_key] = state_dict.pop(key)
|
||||||
|
elif "mlp" in key:
|
||||||
|
layer_num = int(key.split(".")[2])
|
||||||
|
n = 1 if "c_fc" in key else 2
|
||||||
|
new_key = f"PerceiverResampler.Transformer.TransformerLayer_{layer_num+1}.Residual_2.FeedForward.Linear_{n}.{key.split('.')[-1]}"
|
||||||
|
new_state_dict[new_key] = state_dict.pop(key)
|
||||||
|
|
||||||
|
return new_state_dict
|
||||||
|
|
||||||
|
|
||||||
|
class ELLAConversion(Conversion):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
original: Hub,
|
||||||
|
converted: Hub,
|
||||||
|
dtype: torch.dtype = torch.float32,
|
||||||
|
) -> None:
|
||||||
|
"""Initialize the weight structure.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
original_weight_hub: A HubPath object representing the original weight.
|
||||||
|
converted_weight_hub: A HubPath object representing the converted weight.
|
||||||
|
"""
|
||||||
|
self.original = original
|
||||||
|
self.converted = converted
|
||||||
|
self.dtype = dtype
|
||||||
|
|
||||||
|
# TODO: use WeightRecipe instead
|
||||||
|
def convert(self) -> None: # type: ignore
|
||||||
|
"""Convert the weights from the original to the converted weights."""
|
||||||
|
logging.info(f"Converting {self.original.repo_id}/{self.original.filename} to {self.converted.repo_id}")
|
||||||
|
|
||||||
|
# check if the converted file already exists
|
||||||
|
if self.converted.local_path.is_file():
|
||||||
|
logging.warning(f"{self.converted.local_path} already exists")
|
||||||
|
if self.converted.check_local_hash():
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.error(f"{self.converted.local_path} couldn't verify remote hash")
|
||||||
|
return
|
||||||
|
|
||||||
|
# get the original state_dict
|
||||||
|
self.original.download()
|
||||||
|
|
||||||
|
# load the original state_dict
|
||||||
|
original_weights = load_from_safetensors(self.original.local_path)
|
||||||
|
|
||||||
|
# convert the state_dict
|
||||||
|
converted_state_dict = convert_state_dict(original_weights)
|
||||||
|
original_weights = self.change_dtype(converted_state_dict, self.dtype)
|
||||||
|
|
||||||
|
# save the converted state_dict
|
||||||
|
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
save_to_safetensors(self.converted.local_path, original_weights)
|
||||||
|
|
||||||
|
# check the converted state_dict
|
||||||
|
assert self.converted.check_local_hash()
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
|
||||||
|
|
||||||
|
|
||||||
|
sd15_t5xl = ELLAConversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="QQGYLab/ELLA",
|
||||||
|
filename="ella-sd1.5-tsc-t5xl.safetensors",
|
||||||
|
revision="c07675dea7873abe24a4152e1140cf0131c217d2",
|
||||||
|
expected_sha256="ca2018e325170d622389b531c0a061eea9d856b80e58e359ed54ade881517417",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.ella.tsc_t5xl",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="ffc368afb97b93792f581d4a75275f4195cf76c225961cce61c3e1ef687df7da",
|
||||||
|
),
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
36
src/refiners/conversion/models/hq_sam.py
Normal file
36
src/refiners/conversion/models/hq_sam.py
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
|
||||||
|
|
||||||
|
recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"mask_decoder.compress_vit_feat.0": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.HQFeatures.CompressViTFeat.ConvTranspose2d_1",
|
||||||
|
"mask_decoder.embedding_encoder.0": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.HQFeatures.EmbeddingEncoder.ConvTranspose2d_1",
|
||||||
|
"mask_decoder.embedding_maskfeature.0": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.EmbeddingMaskfeature.Conv2d_1",
|
||||||
|
"mask_decoder.compress_vit_feat.1": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.HQFeatures.CompressViTFeat.LayerNorm2d",
|
||||||
|
"mask_decoder.embedding_encoder.1": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.HQFeatures.EmbeddingEncoder.LayerNorm2d",
|
||||||
|
"mask_decoder.embedding_maskfeature.1": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.EmbeddingMaskfeature.LayerNorm2d",
|
||||||
|
"mask_decoder.compress_vit_feat.3": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.HQFeatures.CompressViTFeat.ConvTranspose2d_2",
|
||||||
|
"mask_decoder.embedding_encoder.3": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.HQFeatures.EmbeddingEncoder.ConvTranspose2d_2",
|
||||||
|
"mask_decoder.embedding_maskfeature.3": "Chain.HQSAMMaskPrediction.Chain.DenseEmbeddingUpscalingHQ.EmbeddingMaskfeature.Conv2d_2",
|
||||||
|
"mask_decoder.hf_mlp.layers.0": "Chain.HQSAMMaskPrediction.HQTokenMLP.MultiLinear.Linear_1",
|
||||||
|
"mask_decoder.hf_mlp.layers.1": "Chain.HQSAMMaskPrediction.HQTokenMLP.MultiLinear.Linear_2",
|
||||||
|
"mask_decoder.hf_mlp.layers.2": "Chain.HQSAMMaskPrediction.HQTokenMLP.MultiLinear.Linear_3",
|
||||||
|
"mask_decoder.hf_token": "MaskDecoderTokensExtender.hq_token",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
vit_h = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="lkeab/hq-sam",
|
||||||
|
filename="sam_hq_vit_h.pth",
|
||||||
|
expected_sha256="a7ac14a085326d9fa6199c8c698c4f0e7280afdbb974d2c4660ec60877b45e35",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sam.hq.vit_h",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="017630c780ff67673d71e91beaec8804f8b5ae3a9ea607456b4504562f96cc2f",
|
||||||
|
),
|
||||||
|
recipe=recipe,
|
||||||
|
dtype=torch.float32,
|
||||||
|
)
|
147
src/refiners/conversion/models/ipadapter_sd15.py
Normal file
147
src/refiners/conversion/models/ipadapter_sd15.py
Normal file
|
@ -0,0 +1,147 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
|
||||||
|
|
||||||
|
diffusers_recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"image_proj.proj": "image_proj.Linear",
|
||||||
|
"image_proj.norm": "image_proj.LayerNorm",
|
||||||
|
"ip_adapter.1.to_k_ip": "ip_adapter.000.to_k_ip",
|
||||||
|
"ip_adapter.1.to_v_ip": "ip_adapter.000.to_v_ip",
|
||||||
|
"ip_adapter.3.to_k_ip": "ip_adapter.001.to_k_ip",
|
||||||
|
"ip_adapter.3.to_v_ip": "ip_adapter.001.to_v_ip",
|
||||||
|
"ip_adapter.5.to_k_ip": "ip_adapter.002.to_k_ip",
|
||||||
|
"ip_adapter.5.to_v_ip": "ip_adapter.002.to_v_ip",
|
||||||
|
"ip_adapter.7.to_k_ip": "ip_adapter.003.to_k_ip",
|
||||||
|
"ip_adapter.7.to_v_ip": "ip_adapter.003.to_v_ip",
|
||||||
|
"ip_adapter.9.to_k_ip": "ip_adapter.004.to_k_ip",
|
||||||
|
"ip_adapter.9.to_v_ip": "ip_adapter.004.to_v_ip",
|
||||||
|
"ip_adapter.11.to_k_ip": "ip_adapter.005.to_k_ip",
|
||||||
|
"ip_adapter.11.to_v_ip": "ip_adapter.005.to_v_ip",
|
||||||
|
"ip_adapter.31.to_k_ip": "ip_adapter.006.to_k_ip",
|
||||||
|
"ip_adapter.31.to_v_ip": "ip_adapter.006.to_v_ip",
|
||||||
|
"ip_adapter.13.to_k_ip": "ip_adapter.007.to_k_ip",
|
||||||
|
"ip_adapter.13.to_v_ip": "ip_adapter.007.to_v_ip",
|
||||||
|
"ip_adapter.15.to_k_ip": "ip_adapter.008.to_k_ip",
|
||||||
|
"ip_adapter.15.to_v_ip": "ip_adapter.008.to_v_ip",
|
||||||
|
"ip_adapter.17.to_k_ip": "ip_adapter.009.to_k_ip",
|
||||||
|
"ip_adapter.17.to_v_ip": "ip_adapter.009.to_v_ip",
|
||||||
|
"ip_adapter.19.to_k_ip": "ip_adapter.010.to_k_ip",
|
||||||
|
"ip_adapter.19.to_v_ip": "ip_adapter.010.to_v_ip",
|
||||||
|
"ip_adapter.21.to_k_ip": "ip_adapter.011.to_k_ip",
|
||||||
|
"ip_adapter.21.to_v_ip": "ip_adapter.011.to_v_ip",
|
||||||
|
"ip_adapter.23.to_k_ip": "ip_adapter.012.to_k_ip",
|
||||||
|
"ip_adapter.23.to_v_ip": "ip_adapter.012.to_v_ip",
|
||||||
|
"ip_adapter.25.to_k_ip": "ip_adapter.013.to_k_ip",
|
||||||
|
"ip_adapter.25.to_v_ip": "ip_adapter.013.to_v_ip",
|
||||||
|
"ip_adapter.27.to_k_ip": "ip_adapter.014.to_k_ip",
|
||||||
|
"ip_adapter.27.to_v_ip": "ip_adapter.014.to_v_ip",
|
||||||
|
"ip_adapter.29.to_k_ip": "ip_adapter.015.to_k_ip",
|
||||||
|
"ip_adapter.29.to_v_ip": "ip_adapter.015.to_v_ip",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
diffusers_plus_recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"image_proj.latents": "image_proj.LatentsToken.Parameter.weight",
|
||||||
|
"image_proj.proj_in": "image_proj.Linear_1",
|
||||||
|
"image_proj.proj_out": "image_proj.Linear_2",
|
||||||
|
"image_proj.norm_out": "image_proj.LayerNorm",
|
||||||
|
"image_proj.layers.0.0.norm1": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
|
||||||
|
"image_proj.layers.0.0.norm2": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
|
||||||
|
"image_proj.layers.0.0.to_q": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
|
||||||
|
"image_proj.layers.0.0.to_kv": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
|
||||||
|
"image_proj.layers.0.0.to_out": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Linear",
|
||||||
|
"image_proj.layers.0.1.0": "image_proj.Transformer.TransformerLayer_1.Residual_2.LayerNorm",
|
||||||
|
"image_proj.layers.0.1.1": "image_proj.Transformer.TransformerLayer_1.Residual_2.FeedForward.Linear_1",
|
||||||
|
"image_proj.layers.0.1.3": "image_proj.Transformer.TransformerLayer_1.Residual_2.FeedForward.Linear_2",
|
||||||
|
"image_proj.layers.1.0.norm1": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
|
||||||
|
"image_proj.layers.1.0.norm2": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
|
||||||
|
"image_proj.layers.1.0.to_q": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
|
||||||
|
"image_proj.layers.1.0.to_kv": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
|
||||||
|
"image_proj.layers.1.0.to_out": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Linear",
|
||||||
|
"image_proj.layers.1.1.0": "image_proj.Transformer.TransformerLayer_2.Residual_2.LayerNorm",
|
||||||
|
"image_proj.layers.1.1.1": "image_proj.Transformer.TransformerLayer_2.Residual_2.FeedForward.Linear_1",
|
||||||
|
"image_proj.layers.1.1.3": "image_proj.Transformer.TransformerLayer_2.Residual_2.FeedForward.Linear_2",
|
||||||
|
"image_proj.layers.2.0.norm1": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
|
||||||
|
"image_proj.layers.2.0.norm2": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
|
||||||
|
"image_proj.layers.2.0.to_q": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
|
||||||
|
"image_proj.layers.2.0.to_kv": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
|
||||||
|
"image_proj.layers.2.0.to_out": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Linear",
|
||||||
|
"image_proj.layers.2.1.0": "image_proj.Transformer.TransformerLayer_3.Residual_2.LayerNorm",
|
||||||
|
"image_proj.layers.2.1.1": "image_proj.Transformer.TransformerLayer_3.Residual_2.FeedForward.Linear_1",
|
||||||
|
"image_proj.layers.2.1.3": "image_proj.Transformer.TransformerLayer_3.Residual_2.FeedForward.Linear_2",
|
||||||
|
"image_proj.layers.3.0.norm1": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
|
||||||
|
"image_proj.layers.3.0.norm2": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
|
||||||
|
"image_proj.layers.3.0.to_q": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
|
||||||
|
"image_proj.layers.3.0.to_kv": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
|
||||||
|
"image_proj.layers.3.0.to_out": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Linear",
|
||||||
|
"image_proj.layers.3.1.0": "image_proj.Transformer.TransformerLayer_4.Residual_2.LayerNorm",
|
||||||
|
"image_proj.layers.3.1.1": "image_proj.Transformer.TransformerLayer_4.Residual_2.FeedForward.Linear_1",
|
||||||
|
"image_proj.layers.3.1.3": "image_proj.Transformer.TransformerLayer_4.Residual_2.FeedForward.Linear_2",
|
||||||
|
"ip_adapter.1.to_k_ip": "ip_adapter.000.to_k_ip",
|
||||||
|
"ip_adapter.1.to_v_ip": "ip_adapter.000.to_v_ip",
|
||||||
|
"ip_adapter.3.to_k_ip": "ip_adapter.001.to_k_ip",
|
||||||
|
"ip_adapter.3.to_v_ip": "ip_adapter.001.to_v_ip",
|
||||||
|
"ip_adapter.5.to_k_ip": "ip_adapter.002.to_k_ip",
|
||||||
|
"ip_adapter.5.to_v_ip": "ip_adapter.002.to_v_ip",
|
||||||
|
"ip_adapter.7.to_k_ip": "ip_adapter.003.to_k_ip",
|
||||||
|
"ip_adapter.7.to_v_ip": "ip_adapter.003.to_v_ip",
|
||||||
|
"ip_adapter.9.to_k_ip": "ip_adapter.004.to_k_ip",
|
||||||
|
"ip_adapter.9.to_v_ip": "ip_adapter.004.to_v_ip",
|
||||||
|
"ip_adapter.11.to_k_ip": "ip_adapter.005.to_k_ip",
|
||||||
|
"ip_adapter.11.to_v_ip": "ip_adapter.005.to_v_ip",
|
||||||
|
"ip_adapter.31.to_k_ip": "ip_adapter.006.to_k_ip",
|
||||||
|
"ip_adapter.31.to_v_ip": "ip_adapter.006.to_v_ip",
|
||||||
|
"ip_adapter.13.to_k_ip": "ip_adapter.007.to_k_ip",
|
||||||
|
"ip_adapter.13.to_v_ip": "ip_adapter.007.to_v_ip",
|
||||||
|
"ip_adapter.15.to_k_ip": "ip_adapter.008.to_k_ip",
|
||||||
|
"ip_adapter.15.to_v_ip": "ip_adapter.008.to_v_ip",
|
||||||
|
"ip_adapter.17.to_k_ip": "ip_adapter.009.to_k_ip",
|
||||||
|
"ip_adapter.17.to_v_ip": "ip_adapter.009.to_v_ip",
|
||||||
|
"ip_adapter.19.to_k_ip": "ip_adapter.010.to_k_ip",
|
||||||
|
"ip_adapter.19.to_v_ip": "ip_adapter.010.to_v_ip",
|
||||||
|
"ip_adapter.21.to_k_ip": "ip_adapter.011.to_k_ip",
|
||||||
|
"ip_adapter.21.to_v_ip": "ip_adapter.011.to_v_ip",
|
||||||
|
"ip_adapter.23.to_k_ip": "ip_adapter.012.to_k_ip",
|
||||||
|
"ip_adapter.23.to_v_ip": "ip_adapter.012.to_v_ip",
|
||||||
|
"ip_adapter.25.to_k_ip": "ip_adapter.013.to_k_ip",
|
||||||
|
"ip_adapter.25.to_v_ip": "ip_adapter.013.to_v_ip",
|
||||||
|
"ip_adapter.27.to_k_ip": "ip_adapter.014.to_k_ip",
|
||||||
|
"ip_adapter.27.to_v_ip": "ip_adapter.014.to_v_ip",
|
||||||
|
"ip_adapter.29.to_k_ip": "ip_adapter.015.to_k_ip",
|
||||||
|
"ip_adapter.29.to_v_ip": "ip_adapter.015.to_v_ip",
|
||||||
|
},
|
||||||
|
tensor_reshapes={"image_proj.LatentsToken.Parameter.weight": (16, 768)},
|
||||||
|
)
|
||||||
|
|
||||||
|
base = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="h94/IP-Adapter",
|
||||||
|
filename="models/ip-adapter_sd15.safetensors",
|
||||||
|
revision="018e402774aeeddd60609b4ecdb7e298259dc729",
|
||||||
|
expected_sha256="289b45f16d043d0bf542e45831f971dcdaabe18b656f11e86d9dfba7e9ee3369",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.ip_adapter",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="ebabe531bac205e2fac942b353c22066abfb115b02f7fb72cd0e3361ee838ef3",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
plus = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="h94/IP-Adapter",
|
||||||
|
filename="models/ip-adapter-plus_sd15.safetensors",
|
||||||
|
revision="018e402774aeeddd60609b4ecdb7e298259dc729",
|
||||||
|
expected_sha256="a1c250be40455cc61a43da1201ec3f1edaea71214865fb47f57927e06cbe4996",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.ip_adapter.plus",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="6eae5d2098fa83e3b8bf416fb46dd77a921ad044650f13890e8d41d7c84a71d2",
|
||||||
|
),
|
||||||
|
recipe=diffusers_plus_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
363
src/refiners/conversion/models/ipadapter_sdxl.py
Normal file
363
src/refiners/conversion/models/ipadapter_sdxl.py
Normal file
|
@ -0,0 +1,363 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
|
||||||
|
|
||||||
|
diffusers_recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"image_proj.proj": "image_proj.Linear",
|
||||||
|
"image_proj.norm": "image_proj.LayerNorm",
|
||||||
|
"ip_adapter.1.to_k_ip": "ip_adapter.000.to_k_ip",
|
||||||
|
"ip_adapter.1.to_v_ip": "ip_adapter.000.to_v_ip",
|
||||||
|
"ip_adapter.3.to_k_ip": "ip_adapter.001.to_k_ip",
|
||||||
|
"ip_adapter.3.to_v_ip": "ip_adapter.001.to_v_ip",
|
||||||
|
"ip_adapter.5.to_k_ip": "ip_adapter.002.to_k_ip",
|
||||||
|
"ip_adapter.5.to_v_ip": "ip_adapter.002.to_v_ip",
|
||||||
|
"ip_adapter.7.to_k_ip": "ip_adapter.003.to_k_ip",
|
||||||
|
"ip_adapter.7.to_v_ip": "ip_adapter.003.to_v_ip",
|
||||||
|
"ip_adapter.9.to_k_ip": "ip_adapter.004.to_k_ip",
|
||||||
|
"ip_adapter.9.to_v_ip": "ip_adapter.004.to_v_ip",
|
||||||
|
"ip_adapter.11.to_k_ip": "ip_adapter.005.to_k_ip",
|
||||||
|
"ip_adapter.11.to_v_ip": "ip_adapter.005.to_v_ip",
|
||||||
|
"ip_adapter.13.to_k_ip": "ip_adapter.006.to_k_ip",
|
||||||
|
"ip_adapter.13.to_v_ip": "ip_adapter.006.to_v_ip",
|
||||||
|
"ip_adapter.15.to_k_ip": "ip_adapter.007.to_k_ip",
|
||||||
|
"ip_adapter.15.to_v_ip": "ip_adapter.007.to_v_ip",
|
||||||
|
"ip_adapter.17.to_k_ip": "ip_adapter.008.to_k_ip",
|
||||||
|
"ip_adapter.17.to_v_ip": "ip_adapter.008.to_v_ip",
|
||||||
|
"ip_adapter.19.to_k_ip": "ip_adapter.009.to_k_ip",
|
||||||
|
"ip_adapter.19.to_v_ip": "ip_adapter.009.to_v_ip",
|
||||||
|
"ip_adapter.21.to_k_ip": "ip_adapter.010.to_k_ip",
|
||||||
|
"ip_adapter.21.to_v_ip": "ip_adapter.010.to_v_ip",
|
||||||
|
"ip_adapter.23.to_k_ip": "ip_adapter.011.to_k_ip",
|
||||||
|
"ip_adapter.23.to_v_ip": "ip_adapter.011.to_v_ip",
|
||||||
|
"ip_adapter.25.to_k_ip": "ip_adapter.012.to_k_ip",
|
||||||
|
"ip_adapter.25.to_v_ip": "ip_adapter.012.to_v_ip",
|
||||||
|
"ip_adapter.27.to_k_ip": "ip_adapter.013.to_k_ip",
|
||||||
|
"ip_adapter.27.to_v_ip": "ip_adapter.013.to_v_ip",
|
||||||
|
"ip_adapter.29.to_k_ip": "ip_adapter.014.to_k_ip",
|
||||||
|
"ip_adapter.29.to_v_ip": "ip_adapter.014.to_v_ip",
|
||||||
|
"ip_adapter.31.to_k_ip": "ip_adapter.015.to_k_ip",
|
||||||
|
"ip_adapter.31.to_v_ip": "ip_adapter.015.to_v_ip",
|
||||||
|
"ip_adapter.33.to_k_ip": "ip_adapter.016.to_k_ip",
|
||||||
|
"ip_adapter.33.to_v_ip": "ip_adapter.016.to_v_ip",
|
||||||
|
"ip_adapter.35.to_k_ip": "ip_adapter.017.to_k_ip",
|
||||||
|
"ip_adapter.35.to_v_ip": "ip_adapter.017.to_v_ip",
|
||||||
|
"ip_adapter.37.to_k_ip": "ip_adapter.018.to_k_ip",
|
||||||
|
"ip_adapter.37.to_v_ip": "ip_adapter.018.to_v_ip",
|
||||||
|
"ip_adapter.39.to_k_ip": "ip_adapter.019.to_k_ip",
|
||||||
|
"ip_adapter.39.to_v_ip": "ip_adapter.019.to_v_ip",
|
||||||
|
"ip_adapter.41.to_k_ip": "ip_adapter.020.to_k_ip",
|
||||||
|
"ip_adapter.41.to_v_ip": "ip_adapter.020.to_v_ip",
|
||||||
|
"ip_adapter.43.to_k_ip": "ip_adapter.021.to_k_ip",
|
||||||
|
"ip_adapter.43.to_v_ip": "ip_adapter.021.to_v_ip",
|
||||||
|
"ip_adapter.45.to_k_ip": "ip_adapter.022.to_k_ip",
|
||||||
|
"ip_adapter.45.to_v_ip": "ip_adapter.022.to_v_ip",
|
||||||
|
"ip_adapter.47.to_k_ip": "ip_adapter.023.to_k_ip",
|
||||||
|
"ip_adapter.47.to_v_ip": "ip_adapter.023.to_v_ip",
|
||||||
|
"ip_adapter.121.to_k_ip": "ip_adapter.024.to_k_ip",
|
||||||
|
"ip_adapter.121.to_v_ip": "ip_adapter.024.to_v_ip",
|
||||||
|
"ip_adapter.123.to_k_ip": "ip_adapter.025.to_k_ip",
|
||||||
|
"ip_adapter.123.to_v_ip": "ip_adapter.025.to_v_ip",
|
||||||
|
"ip_adapter.125.to_k_ip": "ip_adapter.026.to_k_ip",
|
||||||
|
"ip_adapter.125.to_v_ip": "ip_adapter.026.to_v_ip",
|
||||||
|
"ip_adapter.127.to_k_ip": "ip_adapter.027.to_k_ip",
|
||||||
|
"ip_adapter.127.to_v_ip": "ip_adapter.027.to_v_ip",
|
||||||
|
"ip_adapter.129.to_k_ip": "ip_adapter.028.to_k_ip",
|
||||||
|
"ip_adapter.129.to_v_ip": "ip_adapter.028.to_v_ip",
|
||||||
|
"ip_adapter.131.to_k_ip": "ip_adapter.029.to_k_ip",
|
||||||
|
"ip_adapter.131.to_v_ip": "ip_adapter.029.to_v_ip",
|
||||||
|
"ip_adapter.133.to_k_ip": "ip_adapter.030.to_k_ip",
|
||||||
|
"ip_adapter.133.to_v_ip": "ip_adapter.030.to_v_ip",
|
||||||
|
"ip_adapter.135.to_k_ip": "ip_adapter.031.to_k_ip",
|
||||||
|
"ip_adapter.135.to_v_ip": "ip_adapter.031.to_v_ip",
|
||||||
|
"ip_adapter.137.to_k_ip": "ip_adapter.032.to_k_ip",
|
||||||
|
"ip_adapter.137.to_v_ip": "ip_adapter.032.to_v_ip",
|
||||||
|
"ip_adapter.139.to_k_ip": "ip_adapter.033.to_k_ip",
|
||||||
|
"ip_adapter.139.to_v_ip": "ip_adapter.033.to_v_ip",
|
||||||
|
"ip_adapter.49.to_k_ip": "ip_adapter.034.to_k_ip",
|
||||||
|
"ip_adapter.49.to_v_ip": "ip_adapter.034.to_v_ip",
|
||||||
|
"ip_adapter.51.to_k_ip": "ip_adapter.035.to_k_ip",
|
||||||
|
"ip_adapter.51.to_v_ip": "ip_adapter.035.to_v_ip",
|
||||||
|
"ip_adapter.53.to_k_ip": "ip_adapter.036.to_k_ip",
|
||||||
|
"ip_adapter.53.to_v_ip": "ip_adapter.036.to_v_ip",
|
||||||
|
"ip_adapter.55.to_k_ip": "ip_adapter.037.to_k_ip",
|
||||||
|
"ip_adapter.55.to_v_ip": "ip_adapter.037.to_v_ip",
|
||||||
|
"ip_adapter.57.to_k_ip": "ip_adapter.038.to_k_ip",
|
||||||
|
"ip_adapter.57.to_v_ip": "ip_adapter.038.to_v_ip",
|
||||||
|
"ip_adapter.59.to_k_ip": "ip_adapter.039.to_k_ip",
|
||||||
|
"ip_adapter.59.to_v_ip": "ip_adapter.039.to_v_ip",
|
||||||
|
"ip_adapter.61.to_k_ip": "ip_adapter.040.to_k_ip",
|
||||||
|
"ip_adapter.61.to_v_ip": "ip_adapter.040.to_v_ip",
|
||||||
|
"ip_adapter.63.to_k_ip": "ip_adapter.041.to_k_ip",
|
||||||
|
"ip_adapter.63.to_v_ip": "ip_adapter.041.to_v_ip",
|
||||||
|
"ip_adapter.65.to_k_ip": "ip_adapter.042.to_k_ip",
|
||||||
|
"ip_adapter.65.to_v_ip": "ip_adapter.042.to_v_ip",
|
||||||
|
"ip_adapter.67.to_k_ip": "ip_adapter.043.to_k_ip",
|
||||||
|
"ip_adapter.67.to_v_ip": "ip_adapter.043.to_v_ip",
|
||||||
|
"ip_adapter.69.to_k_ip": "ip_adapter.044.to_k_ip",
|
||||||
|
"ip_adapter.69.to_v_ip": "ip_adapter.044.to_v_ip",
|
||||||
|
"ip_adapter.71.to_k_ip": "ip_adapter.045.to_k_ip",
|
||||||
|
"ip_adapter.71.to_v_ip": "ip_adapter.045.to_v_ip",
|
||||||
|
"ip_adapter.73.to_k_ip": "ip_adapter.046.to_k_ip",
|
||||||
|
"ip_adapter.73.to_v_ip": "ip_adapter.046.to_v_ip",
|
||||||
|
"ip_adapter.75.to_k_ip": "ip_adapter.047.to_k_ip",
|
||||||
|
"ip_adapter.75.to_v_ip": "ip_adapter.047.to_v_ip",
|
||||||
|
"ip_adapter.77.to_k_ip": "ip_adapter.048.to_k_ip",
|
||||||
|
"ip_adapter.77.to_v_ip": "ip_adapter.048.to_v_ip",
|
||||||
|
"ip_adapter.79.to_k_ip": "ip_adapter.049.to_k_ip",
|
||||||
|
"ip_adapter.79.to_v_ip": "ip_adapter.049.to_v_ip",
|
||||||
|
"ip_adapter.81.to_k_ip": "ip_adapter.050.to_k_ip",
|
||||||
|
"ip_adapter.81.to_v_ip": "ip_adapter.050.to_v_ip",
|
||||||
|
"ip_adapter.83.to_k_ip": "ip_adapter.051.to_k_ip",
|
||||||
|
"ip_adapter.83.to_v_ip": "ip_adapter.051.to_v_ip",
|
||||||
|
"ip_adapter.85.to_k_ip": "ip_adapter.052.to_k_ip",
|
||||||
|
"ip_adapter.85.to_v_ip": "ip_adapter.052.to_v_ip",
|
||||||
|
"ip_adapter.87.to_k_ip": "ip_adapter.053.to_k_ip",
|
||||||
|
"ip_adapter.87.to_v_ip": "ip_adapter.053.to_v_ip",
|
||||||
|
"ip_adapter.89.to_k_ip": "ip_adapter.054.to_k_ip",
|
||||||
|
"ip_adapter.89.to_v_ip": "ip_adapter.054.to_v_ip",
|
||||||
|
"ip_adapter.91.to_k_ip": "ip_adapter.055.to_k_ip",
|
||||||
|
"ip_adapter.91.to_v_ip": "ip_adapter.055.to_v_ip",
|
||||||
|
"ip_adapter.93.to_k_ip": "ip_adapter.056.to_k_ip",
|
||||||
|
"ip_adapter.93.to_v_ip": "ip_adapter.056.to_v_ip",
|
||||||
|
"ip_adapter.95.to_k_ip": "ip_adapter.057.to_k_ip",
|
||||||
|
"ip_adapter.95.to_v_ip": "ip_adapter.057.to_v_ip",
|
||||||
|
"ip_adapter.97.to_k_ip": "ip_adapter.058.to_k_ip",
|
||||||
|
"ip_adapter.97.to_v_ip": "ip_adapter.058.to_v_ip",
|
||||||
|
"ip_adapter.99.to_k_ip": "ip_adapter.059.to_k_ip",
|
||||||
|
"ip_adapter.99.to_v_ip": "ip_adapter.059.to_v_ip",
|
||||||
|
"ip_adapter.101.to_k_ip": "ip_adapter.060.to_k_ip",
|
||||||
|
"ip_adapter.101.to_v_ip": "ip_adapter.060.to_v_ip",
|
||||||
|
"ip_adapter.103.to_k_ip": "ip_adapter.061.to_k_ip",
|
||||||
|
"ip_adapter.103.to_v_ip": "ip_adapter.061.to_v_ip",
|
||||||
|
"ip_adapter.105.to_k_ip": "ip_adapter.062.to_k_ip",
|
||||||
|
"ip_adapter.105.to_v_ip": "ip_adapter.062.to_v_ip",
|
||||||
|
"ip_adapter.107.to_k_ip": "ip_adapter.063.to_k_ip",
|
||||||
|
"ip_adapter.107.to_v_ip": "ip_adapter.063.to_v_ip",
|
||||||
|
"ip_adapter.109.to_k_ip": "ip_adapter.064.to_k_ip",
|
||||||
|
"ip_adapter.109.to_v_ip": "ip_adapter.064.to_v_ip",
|
||||||
|
"ip_adapter.111.to_k_ip": "ip_adapter.065.to_k_ip",
|
||||||
|
"ip_adapter.111.to_v_ip": "ip_adapter.065.to_v_ip",
|
||||||
|
"ip_adapter.113.to_k_ip": "ip_adapter.066.to_k_ip",
|
||||||
|
"ip_adapter.113.to_v_ip": "ip_adapter.066.to_v_ip",
|
||||||
|
"ip_adapter.115.to_k_ip": "ip_adapter.067.to_k_ip",
|
||||||
|
"ip_adapter.115.to_v_ip": "ip_adapter.067.to_v_ip",
|
||||||
|
"ip_adapter.117.to_k_ip": "ip_adapter.068.to_k_ip",
|
||||||
|
"ip_adapter.117.to_v_ip": "ip_adapter.068.to_v_ip",
|
||||||
|
"ip_adapter.119.to_k_ip": "ip_adapter.069.to_k_ip",
|
||||||
|
"ip_adapter.119.to_v_ip": "ip_adapter.069.to_v_ip",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
plus_diffusers_recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"image_proj.latents": "image_proj.LatentsToken.Parameter.weight",
|
||||||
|
"image_proj.proj_in": "image_proj.Linear_1",
|
||||||
|
"image_proj.proj_out": "image_proj.Linear_2",
|
||||||
|
"image_proj.norm_out": "image_proj.LayerNorm",
|
||||||
|
"image_proj.layers.0.0.norm1": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
|
||||||
|
"image_proj.layers.0.0.norm2": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
|
||||||
|
"image_proj.layers.0.0.to_q": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
|
||||||
|
"image_proj.layers.0.0.to_kv": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
|
||||||
|
"image_proj.layers.0.0.to_out": "image_proj.Transformer.TransformerLayer_1.Residual_1.PerceiverAttention.Linear",
|
||||||
|
"image_proj.layers.0.1.0": "image_proj.Transformer.TransformerLayer_1.Residual_2.LayerNorm",
|
||||||
|
"image_proj.layers.0.1.1": "image_proj.Transformer.TransformerLayer_1.Residual_2.FeedForward.Linear_1",
|
||||||
|
"image_proj.layers.0.1.3": "image_proj.Transformer.TransformerLayer_1.Residual_2.FeedForward.Linear_2",
|
||||||
|
"image_proj.layers.1.0.norm1": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
|
||||||
|
"image_proj.layers.1.0.norm2": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
|
||||||
|
"image_proj.layers.1.0.to_q": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
|
||||||
|
"image_proj.layers.1.0.to_kv": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
|
||||||
|
"image_proj.layers.1.0.to_out": "image_proj.Transformer.TransformerLayer_2.Residual_1.PerceiverAttention.Linear",
|
||||||
|
"image_proj.layers.1.1.0": "image_proj.Transformer.TransformerLayer_2.Residual_2.LayerNorm",
|
||||||
|
"image_proj.layers.1.1.1": "image_proj.Transformer.TransformerLayer_2.Residual_2.FeedForward.Linear_1",
|
||||||
|
"image_proj.layers.1.1.3": "image_proj.Transformer.TransformerLayer_2.Residual_2.FeedForward.Linear_2",
|
||||||
|
"image_proj.layers.2.0.norm1": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
|
||||||
|
"image_proj.layers.2.0.norm2": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
|
||||||
|
"image_proj.layers.2.0.to_q": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
|
||||||
|
"image_proj.layers.2.0.to_kv": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
|
||||||
|
"image_proj.layers.2.0.to_out": "image_proj.Transformer.TransformerLayer_3.Residual_1.PerceiverAttention.Linear",
|
||||||
|
"image_proj.layers.2.1.0": "image_proj.Transformer.TransformerLayer_3.Residual_2.LayerNorm",
|
||||||
|
"image_proj.layers.2.1.1": "image_proj.Transformer.TransformerLayer_3.Residual_2.FeedForward.Linear_1",
|
||||||
|
"image_proj.layers.2.1.3": "image_proj.Transformer.TransformerLayer_3.Residual_2.FeedForward.Linear_2",
|
||||||
|
"image_proj.layers.3.0.norm1": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Distribute.LayerNorm_1",
|
||||||
|
"image_proj.layers.3.0.norm2": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Distribute.LayerNorm_2",
|
||||||
|
"image_proj.layers.3.0.to_q": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Parallel.Chain_2.Linear",
|
||||||
|
"image_proj.layers.3.0.to_kv": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Parallel.Chain_1.Linear",
|
||||||
|
"image_proj.layers.3.0.to_out": "image_proj.Transformer.TransformerLayer_4.Residual_1.PerceiverAttention.Linear",
|
||||||
|
"image_proj.layers.3.1.0": "image_proj.Transformer.TransformerLayer_4.Residual_2.LayerNorm",
|
||||||
|
"image_proj.layers.3.1.1": "image_proj.Transformer.TransformerLayer_4.Residual_2.FeedForward.Linear_1",
|
||||||
|
"image_proj.layers.3.1.3": "image_proj.Transformer.TransformerLayer_4.Residual_2.FeedForward.Linear_2",
|
||||||
|
"ip_adapter.1.to_k_ip": "ip_adapter.000.to_k_ip",
|
||||||
|
"ip_adapter.1.to_v_ip": "ip_adapter.000.to_v_ip",
|
||||||
|
"ip_adapter.3.to_k_ip": "ip_adapter.001.to_k_ip",
|
||||||
|
"ip_adapter.3.to_v_ip": "ip_adapter.001.to_v_ip",
|
||||||
|
"ip_adapter.5.to_k_ip": "ip_adapter.002.to_k_ip",
|
||||||
|
"ip_adapter.5.to_v_ip": "ip_adapter.002.to_v_ip",
|
||||||
|
"ip_adapter.7.to_k_ip": "ip_adapter.003.to_k_ip",
|
||||||
|
"ip_adapter.7.to_v_ip": "ip_adapter.003.to_v_ip",
|
||||||
|
"ip_adapter.9.to_k_ip": "ip_adapter.004.to_k_ip",
|
||||||
|
"ip_adapter.9.to_v_ip": "ip_adapter.004.to_v_ip",
|
||||||
|
"ip_adapter.11.to_k_ip": "ip_adapter.005.to_k_ip",
|
||||||
|
"ip_adapter.11.to_v_ip": "ip_adapter.005.to_v_ip",
|
||||||
|
"ip_adapter.13.to_k_ip": "ip_adapter.006.to_k_ip",
|
||||||
|
"ip_adapter.13.to_v_ip": "ip_adapter.006.to_v_ip",
|
||||||
|
"ip_adapter.15.to_k_ip": "ip_adapter.007.to_k_ip",
|
||||||
|
"ip_adapter.15.to_v_ip": "ip_adapter.007.to_v_ip",
|
||||||
|
"ip_adapter.17.to_k_ip": "ip_adapter.008.to_k_ip",
|
||||||
|
"ip_adapter.17.to_v_ip": "ip_adapter.008.to_v_ip",
|
||||||
|
"ip_adapter.19.to_k_ip": "ip_adapter.009.to_k_ip",
|
||||||
|
"ip_adapter.19.to_v_ip": "ip_adapter.009.to_v_ip",
|
||||||
|
"ip_adapter.21.to_k_ip": "ip_adapter.010.to_k_ip",
|
||||||
|
"ip_adapter.21.to_v_ip": "ip_adapter.010.to_v_ip",
|
||||||
|
"ip_adapter.23.to_k_ip": "ip_adapter.011.to_k_ip",
|
||||||
|
"ip_adapter.23.to_v_ip": "ip_adapter.011.to_v_ip",
|
||||||
|
"ip_adapter.25.to_k_ip": "ip_adapter.012.to_k_ip",
|
||||||
|
"ip_adapter.25.to_v_ip": "ip_adapter.012.to_v_ip",
|
||||||
|
"ip_adapter.27.to_k_ip": "ip_adapter.013.to_k_ip",
|
||||||
|
"ip_adapter.27.to_v_ip": "ip_adapter.013.to_v_ip",
|
||||||
|
"ip_adapter.29.to_k_ip": "ip_adapter.014.to_k_ip",
|
||||||
|
"ip_adapter.29.to_v_ip": "ip_adapter.014.to_v_ip",
|
||||||
|
"ip_adapter.31.to_k_ip": "ip_adapter.015.to_k_ip",
|
||||||
|
"ip_adapter.31.to_v_ip": "ip_adapter.015.to_v_ip",
|
||||||
|
"ip_adapter.33.to_k_ip": "ip_adapter.016.to_k_ip",
|
||||||
|
"ip_adapter.33.to_v_ip": "ip_adapter.016.to_v_ip",
|
||||||
|
"ip_adapter.35.to_k_ip": "ip_adapter.017.to_k_ip",
|
||||||
|
"ip_adapter.35.to_v_ip": "ip_adapter.017.to_v_ip",
|
||||||
|
"ip_adapter.37.to_k_ip": "ip_adapter.018.to_k_ip",
|
||||||
|
"ip_adapter.37.to_v_ip": "ip_adapter.018.to_v_ip",
|
||||||
|
"ip_adapter.39.to_k_ip": "ip_adapter.019.to_k_ip",
|
||||||
|
"ip_adapter.39.to_v_ip": "ip_adapter.019.to_v_ip",
|
||||||
|
"ip_adapter.41.to_k_ip": "ip_adapter.020.to_k_ip",
|
||||||
|
"ip_adapter.41.to_v_ip": "ip_adapter.020.to_v_ip",
|
||||||
|
"ip_adapter.43.to_k_ip": "ip_adapter.021.to_k_ip",
|
||||||
|
"ip_adapter.43.to_v_ip": "ip_adapter.021.to_v_ip",
|
||||||
|
"ip_adapter.45.to_k_ip": "ip_adapter.022.to_k_ip",
|
||||||
|
"ip_adapter.45.to_v_ip": "ip_adapter.022.to_v_ip",
|
||||||
|
"ip_adapter.47.to_k_ip": "ip_adapter.023.to_k_ip",
|
||||||
|
"ip_adapter.47.to_v_ip": "ip_adapter.023.to_v_ip",
|
||||||
|
"ip_adapter.121.to_k_ip": "ip_adapter.024.to_k_ip",
|
||||||
|
"ip_adapter.121.to_v_ip": "ip_adapter.024.to_v_ip",
|
||||||
|
"ip_adapter.123.to_k_ip": "ip_adapter.025.to_k_ip",
|
||||||
|
"ip_adapter.123.to_v_ip": "ip_adapter.025.to_v_ip",
|
||||||
|
"ip_adapter.125.to_k_ip": "ip_adapter.026.to_k_ip",
|
||||||
|
"ip_adapter.125.to_v_ip": "ip_adapter.026.to_v_ip",
|
||||||
|
"ip_adapter.127.to_k_ip": "ip_adapter.027.to_k_ip",
|
||||||
|
"ip_adapter.127.to_v_ip": "ip_adapter.027.to_v_ip",
|
||||||
|
"ip_adapter.129.to_k_ip": "ip_adapter.028.to_k_ip",
|
||||||
|
"ip_adapter.129.to_v_ip": "ip_adapter.028.to_v_ip",
|
||||||
|
"ip_adapter.131.to_k_ip": "ip_adapter.029.to_k_ip",
|
||||||
|
"ip_adapter.131.to_v_ip": "ip_adapter.029.to_v_ip",
|
||||||
|
"ip_adapter.133.to_k_ip": "ip_adapter.030.to_k_ip",
|
||||||
|
"ip_adapter.133.to_v_ip": "ip_adapter.030.to_v_ip",
|
||||||
|
"ip_adapter.135.to_k_ip": "ip_adapter.031.to_k_ip",
|
||||||
|
"ip_adapter.135.to_v_ip": "ip_adapter.031.to_v_ip",
|
||||||
|
"ip_adapter.137.to_k_ip": "ip_adapter.032.to_k_ip",
|
||||||
|
"ip_adapter.137.to_v_ip": "ip_adapter.032.to_v_ip",
|
||||||
|
"ip_adapter.139.to_k_ip": "ip_adapter.033.to_k_ip",
|
||||||
|
"ip_adapter.139.to_v_ip": "ip_adapter.033.to_v_ip",
|
||||||
|
"ip_adapter.49.to_k_ip": "ip_adapter.034.to_k_ip",
|
||||||
|
"ip_adapter.49.to_v_ip": "ip_adapter.034.to_v_ip",
|
||||||
|
"ip_adapter.51.to_k_ip": "ip_adapter.035.to_k_ip",
|
||||||
|
"ip_adapter.51.to_v_ip": "ip_adapter.035.to_v_ip",
|
||||||
|
"ip_adapter.53.to_k_ip": "ip_adapter.036.to_k_ip",
|
||||||
|
"ip_adapter.53.to_v_ip": "ip_adapter.036.to_v_ip",
|
||||||
|
"ip_adapter.55.to_k_ip": "ip_adapter.037.to_k_ip",
|
||||||
|
"ip_adapter.55.to_v_ip": "ip_adapter.037.to_v_ip",
|
||||||
|
"ip_adapter.57.to_k_ip": "ip_adapter.038.to_k_ip",
|
||||||
|
"ip_adapter.57.to_v_ip": "ip_adapter.038.to_v_ip",
|
||||||
|
"ip_adapter.59.to_k_ip": "ip_adapter.039.to_k_ip",
|
||||||
|
"ip_adapter.59.to_v_ip": "ip_adapter.039.to_v_ip",
|
||||||
|
"ip_adapter.61.to_k_ip": "ip_adapter.040.to_k_ip",
|
||||||
|
"ip_adapter.61.to_v_ip": "ip_adapter.040.to_v_ip",
|
||||||
|
"ip_adapter.63.to_k_ip": "ip_adapter.041.to_k_ip",
|
||||||
|
"ip_adapter.63.to_v_ip": "ip_adapter.041.to_v_ip",
|
||||||
|
"ip_adapter.65.to_k_ip": "ip_adapter.042.to_k_ip",
|
||||||
|
"ip_adapter.65.to_v_ip": "ip_adapter.042.to_v_ip",
|
||||||
|
"ip_adapter.67.to_k_ip": "ip_adapter.043.to_k_ip",
|
||||||
|
"ip_adapter.67.to_v_ip": "ip_adapter.043.to_v_ip",
|
||||||
|
"ip_adapter.69.to_k_ip": "ip_adapter.044.to_k_ip",
|
||||||
|
"ip_adapter.69.to_v_ip": "ip_adapter.044.to_v_ip",
|
||||||
|
"ip_adapter.71.to_k_ip": "ip_adapter.045.to_k_ip",
|
||||||
|
"ip_adapter.71.to_v_ip": "ip_adapter.045.to_v_ip",
|
||||||
|
"ip_adapter.73.to_k_ip": "ip_adapter.046.to_k_ip",
|
||||||
|
"ip_adapter.73.to_v_ip": "ip_adapter.046.to_v_ip",
|
||||||
|
"ip_adapter.75.to_k_ip": "ip_adapter.047.to_k_ip",
|
||||||
|
"ip_adapter.75.to_v_ip": "ip_adapter.047.to_v_ip",
|
||||||
|
"ip_adapter.77.to_k_ip": "ip_adapter.048.to_k_ip",
|
||||||
|
"ip_adapter.77.to_v_ip": "ip_adapter.048.to_v_ip",
|
||||||
|
"ip_adapter.79.to_k_ip": "ip_adapter.049.to_k_ip",
|
||||||
|
"ip_adapter.79.to_v_ip": "ip_adapter.049.to_v_ip",
|
||||||
|
"ip_adapter.81.to_k_ip": "ip_adapter.050.to_k_ip",
|
||||||
|
"ip_adapter.81.to_v_ip": "ip_adapter.050.to_v_ip",
|
||||||
|
"ip_adapter.83.to_k_ip": "ip_adapter.051.to_k_ip",
|
||||||
|
"ip_adapter.83.to_v_ip": "ip_adapter.051.to_v_ip",
|
||||||
|
"ip_adapter.85.to_k_ip": "ip_adapter.052.to_k_ip",
|
||||||
|
"ip_adapter.85.to_v_ip": "ip_adapter.052.to_v_ip",
|
||||||
|
"ip_adapter.87.to_k_ip": "ip_adapter.053.to_k_ip",
|
||||||
|
"ip_adapter.87.to_v_ip": "ip_adapter.053.to_v_ip",
|
||||||
|
"ip_adapter.89.to_k_ip": "ip_adapter.054.to_k_ip",
|
||||||
|
"ip_adapter.89.to_v_ip": "ip_adapter.054.to_v_ip",
|
||||||
|
"ip_adapter.91.to_k_ip": "ip_adapter.055.to_k_ip",
|
||||||
|
"ip_adapter.91.to_v_ip": "ip_adapter.055.to_v_ip",
|
||||||
|
"ip_adapter.93.to_k_ip": "ip_adapter.056.to_k_ip",
|
||||||
|
"ip_adapter.93.to_v_ip": "ip_adapter.056.to_v_ip",
|
||||||
|
"ip_adapter.95.to_k_ip": "ip_adapter.057.to_k_ip",
|
||||||
|
"ip_adapter.95.to_v_ip": "ip_adapter.057.to_v_ip",
|
||||||
|
"ip_adapter.97.to_k_ip": "ip_adapter.058.to_k_ip",
|
||||||
|
"ip_adapter.97.to_v_ip": "ip_adapter.058.to_v_ip",
|
||||||
|
"ip_adapter.99.to_k_ip": "ip_adapter.059.to_k_ip",
|
||||||
|
"ip_adapter.99.to_v_ip": "ip_adapter.059.to_v_ip",
|
||||||
|
"ip_adapter.101.to_k_ip": "ip_adapter.060.to_k_ip",
|
||||||
|
"ip_adapter.101.to_v_ip": "ip_adapter.060.to_v_ip",
|
||||||
|
"ip_adapter.103.to_k_ip": "ip_adapter.061.to_k_ip",
|
||||||
|
"ip_adapter.103.to_v_ip": "ip_adapter.061.to_v_ip",
|
||||||
|
"ip_adapter.105.to_k_ip": "ip_adapter.062.to_k_ip",
|
||||||
|
"ip_adapter.105.to_v_ip": "ip_adapter.062.to_v_ip",
|
||||||
|
"ip_adapter.107.to_k_ip": "ip_adapter.063.to_k_ip",
|
||||||
|
"ip_adapter.107.to_v_ip": "ip_adapter.063.to_v_ip",
|
||||||
|
"ip_adapter.109.to_k_ip": "ip_adapter.064.to_k_ip",
|
||||||
|
"ip_adapter.109.to_v_ip": "ip_adapter.064.to_v_ip",
|
||||||
|
"ip_adapter.111.to_k_ip": "ip_adapter.065.to_k_ip",
|
||||||
|
"ip_adapter.111.to_v_ip": "ip_adapter.065.to_v_ip",
|
||||||
|
"ip_adapter.113.to_k_ip": "ip_adapter.066.to_k_ip",
|
||||||
|
"ip_adapter.113.to_v_ip": "ip_adapter.066.to_v_ip",
|
||||||
|
"ip_adapter.115.to_k_ip": "ip_adapter.067.to_k_ip",
|
||||||
|
"ip_adapter.115.to_v_ip": "ip_adapter.067.to_v_ip",
|
||||||
|
"ip_adapter.117.to_k_ip": "ip_adapter.068.to_k_ip",
|
||||||
|
"ip_adapter.117.to_v_ip": "ip_adapter.068.to_v_ip",
|
||||||
|
"ip_adapter.119.to_k_ip": "ip_adapter.069.to_k_ip",
|
||||||
|
"ip_adapter.119.to_v_ip": "ip_adapter.069.to_v_ip",
|
||||||
|
},
|
||||||
|
tensor_reshapes={"image_proj.LatentsToken.Parameter.weight": (16, 1280)},
|
||||||
|
)
|
||||||
|
|
||||||
|
base = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="h94/IP-Adapter",
|
||||||
|
filename="sdxl_models/ip-adapter_sdxl_vit-h.safetensors",
|
||||||
|
revision="018e402774aeeddd60609b4ecdb7e298259dc729",
|
||||||
|
expected_sha256="ebf05d918348aec7abb02a5e9ecef77e0aaea6914a5c4ea13f50d45eb1681831",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sdxl.ip_adapter",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="91fc7f3c9571ed26a93372e7251596c7269f37e134fae3a6a5f4f7247d998ab8",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
plus = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="h94/IP-Adapter",
|
||||||
|
filename="sdxl_models/ip-adapter-plus_sdxl_vit-h.safetensors",
|
||||||
|
revision="018e402774aeeddd60609b4ecdb7e298259dc729",
|
||||||
|
expected_sha256="3f5062b8400c94b7159665b21ba5c62acdcd7682262743d7f2aefedef00e6581",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sdxl.ip_adapter.plus",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="9fdbcb4c6e3a643b6e8c002945685cc9d12ddd9787ce8b3e53fdeb814002ca22",
|
||||||
|
),
|
||||||
|
recipe=plus_diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
56
src/refiners/conversion/models/loras.py
Normal file
56
src/refiners/conversion/models/loras.py
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
from refiners.conversion.utils import Hub
|
||||||
|
|
||||||
|
sd15_pokemon = Hub(
|
||||||
|
repo_id="pcuenq/pokemon-lora",
|
||||||
|
filename="pytorch_lora_weights.bin",
|
||||||
|
revision="31ae8fe6f588a78c02828e9b8d352dccd90f1a24",
|
||||||
|
expected_sha256="f712fcfb6618da14d25a4f3e0c9460a878fc2417e2df95cdd683a73f71b50384",
|
||||||
|
)
|
||||||
|
sdxl_dpo = Hub(
|
||||||
|
repo_id="radames/sdxl-DPO-LoRA",
|
||||||
|
filename="pytorch_lora_weights.safetensors",
|
||||||
|
revision="319a544fff501b3ed907df67e1db356bee364c9f",
|
||||||
|
expected_sha256="aeb5ec4a7db6679ea8085f794db1abca92cfd8e4c667a1b301b2b8ecd5599a5d",
|
||||||
|
)
|
||||||
|
sdxl_scifi = Hub(
|
||||||
|
repo_id="civitai/Ciro_Negrogni",
|
||||||
|
filename="Sci-fi_Environments_sdxl.safetensors",
|
||||||
|
expected_sha256="5a3f738c9f79c65c1fac1418b1fe593967b0c1bd24fdb27f120ef1685e815c8e",
|
||||||
|
download_url="https://civitai.com/api/download/models/140624?type=Model&format=SafeTensor",
|
||||||
|
)
|
||||||
|
sdxl_pixelart = Hub(
|
||||||
|
repo_id="civitai/NeriJS",
|
||||||
|
filename="pixel-art-xl-v1.1.safetensors",
|
||||||
|
expected_sha256="bbf3d8defbfb3fb71331545225c0cf50c74a748d2525f7c19ebb8f74445de274",
|
||||||
|
download_url="https://civitai.com/api/download/models/135931?type=Model&format=SafeTensor",
|
||||||
|
)
|
||||||
|
sdxl_age_slider = Hub(
|
||||||
|
repo_id="baulab/sliders",
|
||||||
|
filename="age.pt",
|
||||||
|
expected_sha256="8c1c096f7cc1109b4072cbc604c811a5f0ff034fc0f6dc7cf66a558550aa4890",
|
||||||
|
download_url="https://sliders.baulab.info/weights/xl_sliders/age.pt",
|
||||||
|
)
|
||||||
|
sdxl_cartoon_slider = Hub(
|
||||||
|
repo_id="baulab/sliders",
|
||||||
|
filename="cartoon_style.pt",
|
||||||
|
expected_sha256="e07c30e4f82f709a474ae11dc5108ac48f81b6996b937757c8dd198920ea9b4d",
|
||||||
|
download_url="https://sliders.baulab.info/weights/xl_sliders/cartoon_style.pt",
|
||||||
|
)
|
||||||
|
sdxl_eyesize_slider = Hub(
|
||||||
|
repo_id="baulab/sliders",
|
||||||
|
filename="eyesize.pt",
|
||||||
|
expected_sha256="8fdffa3e7788f4bd6be9a2fe3b91957b4f35999fc9fa19eabfb49f92fbf6650b",
|
||||||
|
download_url="https://sliders.baulab.info/weights/xl_sliders/eyesize.pt",
|
||||||
|
)
|
||||||
|
sdxl_lcm = Hub(
|
||||||
|
repo_id="latent-consistency/lcm-lora-sdxl",
|
||||||
|
filename="pytorch_lora_weights.safetensors",
|
||||||
|
revision="a18548dd4956b174ec5b0d78d340c8dae0a129cd",
|
||||||
|
expected_sha256="a764e6859b6e04047cd761c08ff0cee96413a8e004c9f07707530cd776b19141",
|
||||||
|
)
|
||||||
|
sdxl_lightning_4steps = Hub(
|
||||||
|
repo_id="ByteDance/SDXL-Lightning",
|
||||||
|
filename="sdxl_lightning_4step_lora.safetensors",
|
||||||
|
revision="c9a24f48e1c025556787b0c58dd67a091ece2e44",
|
||||||
|
expected_sha256="bf56cf2657efb15e465d81402ed481d1e11c4677e4bcce1bc11fe71ad8506b79",
|
||||||
|
)
|
|
@ -1,9 +1,14 @@
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from torch import Tensor
|
import requests
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, TensorDict
|
||||||
|
from refiners.fluxion.utils import save_to_safetensors
|
||||||
|
|
||||||
|
|
||||||
def convert_weights(official_state_dict: dict[str, Tensor]) -> dict[str, Tensor]:
|
def convert_weights(official_state_dict: TensorDict) -> TensorDict:
|
||||||
rm_list = [
|
rm_list = [
|
||||||
# Official weights contains useless keys
|
# Official weights contains useless keys
|
||||||
# See https://github.com/qianyu-dlut/MVANet/issues/3#issuecomment-2105650425
|
# See https://github.com/qianyu-dlut/MVANet/issues/3#issuecomment-2105650425
|
||||||
|
@ -136,3 +141,72 @@ def convert_weights(official_state_dict: dict[str, Tensor]) -> dict[str, Tensor]
|
||||||
state_dict.pop(key)
|
state_dict.pop(key)
|
||||||
|
|
||||||
return state_dict
|
return state_dict
|
||||||
|
|
||||||
|
|
||||||
|
class MVANetConversion(Conversion):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
original: Hub,
|
||||||
|
converted: Hub,
|
||||||
|
dtype: torch.dtype = torch.float16,
|
||||||
|
) -> None:
|
||||||
|
self.original = original
|
||||||
|
self.converted = converted
|
||||||
|
self.dtype = dtype
|
||||||
|
|
||||||
|
def convert(self) -> None: # type: ignore
|
||||||
|
"""Convert the weights from the original to the converted weights."""
|
||||||
|
logging.info(f"Converting {self.original.repo_id}/{self.original.filename} to {self.converted.repo_id}")
|
||||||
|
|
||||||
|
# check if the converted file already exists
|
||||||
|
if self.converted.local_path.is_file():
|
||||||
|
logging.warning(f"{self.converted.local_path} already exists")
|
||||||
|
if self.converted.check_local_hash():
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.error(f"{self.converted.local_path} couldn't verify remote hash")
|
||||||
|
return
|
||||||
|
|
||||||
|
# get the original state_dict
|
||||||
|
self.original.download()
|
||||||
|
|
||||||
|
# load the original state_dict
|
||||||
|
original_weights = self.load_state_dict(self.original.local_path)
|
||||||
|
|
||||||
|
# convert the state_dict
|
||||||
|
converted_weights = convert_weights(original_weights)
|
||||||
|
converted_weights = self.change_dtype(converted_weights, self.dtype)
|
||||||
|
|
||||||
|
# save the converted state_dict
|
||||||
|
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
save_to_safetensors(self.converted.local_path, converted_weights)
|
||||||
|
|
||||||
|
# check the converted state_dict
|
||||||
|
assert self.converted.check_local_hash()
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
|
||||||
|
|
||||||
|
|
||||||
|
mvanet = MVANetConversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="creative-graphic-design/MVANet-checkpoints",
|
||||||
|
filename="Model_80.pth",
|
||||||
|
revision="62d38c42a28b999067e2f755e32b27249bcc66c6",
|
||||||
|
expected_sha256="ffec20a382b0a1832786438475e8b912a03be727a0e3197e7ab039153fb3bc46",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/mvanet",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="cca9a6e05e977ee9ac98b3f9a248430d7fe8385f7d249eaddece318e777788e5",
|
||||||
|
),
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
finegrain_v01 = Hub(
|
||||||
|
repo_id="finegrain/finegrain-box-segmenter",
|
||||||
|
filename="model.safetensors",
|
||||||
|
revision="v0.1",
|
||||||
|
expected_sha256="fd5f13919dfc0dda102df1af648c3773c61221aa65fe58d6af978637baded1ae",
|
||||||
|
)
|
36
src/refiners/conversion/models/preprocessors.py
Normal file
36
src/refiners/conversion/models/preprocessors.py
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
|
||||||
|
|
||||||
|
informative_drawings_recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"model0.1": "Chain_1.Conv2d",
|
||||||
|
"model1.0": "Chain_2.Conv2d",
|
||||||
|
"model1.3": "Chain_3.Conv2d",
|
||||||
|
"model2.0.conv_block.1": "Residual_1.Conv2d_1",
|
||||||
|
"model2.0.conv_block.5": "Residual_1.Conv2d_2",
|
||||||
|
"model2.1.conv_block.1": "Residual_2.Conv2d_1",
|
||||||
|
"model2.1.conv_block.5": "Residual_2.Conv2d_2",
|
||||||
|
"model2.2.conv_block.1": "Residual_3.Conv2d_1",
|
||||||
|
"model2.2.conv_block.5": "Residual_3.Conv2d_2",
|
||||||
|
"model3.0": "Chain_4.ConvTranspose2d",
|
||||||
|
"model3.3": "Chain_5.ConvTranspose2d",
|
||||||
|
"model4.1": "Chain_6.Conv2d",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
informative_drawings = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="carolineec/informativedrawings",
|
||||||
|
filename="model.pth",
|
||||||
|
expected_sha256="30a534781061f34e83bb9406b4335da4ff2616c95d22a585c1245aa8363e74e0",
|
||||||
|
download_url="https://huggingface.co/spaces/carolineec/informativedrawings/resolve/main/model2.pth",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/preprocessor.informativedrawings",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="0f9a34bfcd95d89aedcc213b8d279ba1bab1279b73d8d009d1632d6276e6fcf3",
|
||||||
|
),
|
||||||
|
recipe=informative_drawings_recipe,
|
||||||
|
dtype=torch.float32,
|
||||||
|
)
|
293
src/refiners/conversion/models/sam.py
Normal file
293
src/refiners/conversion/models/sam.py
Normal file
|
@ -0,0 +1,293 @@
|
||||||
|
import logging
|
||||||
|
import types
|
||||||
|
from typing import Any, Callable, cast
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from segment_anything import build_sam_vit_h # type: ignore
|
||||||
|
from segment_anything.modeling.common import LayerNorm2d # type: ignore
|
||||||
|
from torch import Tensor, nn
|
||||||
|
|
||||||
|
import refiners.fluxion.layers as fl
|
||||||
|
from refiners.conversion.model_converter import ModelConverter
|
||||||
|
from refiners.conversion.utils import Conversion, Hub
|
||||||
|
from refiners.fluxion.utils import load_tensors, manual_seed, save_to_safetensors
|
||||||
|
from refiners.foundationals.segment_anything.image_encoder import PositionalEncoder, SAMViTH
|
||||||
|
from refiners.foundationals.segment_anything.mask_decoder import MaskDecoder
|
||||||
|
from refiners.foundationals.segment_anything.prompt_encoder import MaskEncoder, PointEncoder
|
||||||
|
|
||||||
|
|
||||||
|
class FacebookSAM(nn.Module):
|
||||||
|
image_encoder: nn.Module
|
||||||
|
prompt_encoder: nn.Module
|
||||||
|
mask_decoder: nn.Module
|
||||||
|
|
||||||
|
|
||||||
|
build_sam_vit_h = cast(Callable[[], FacebookSAM], build_sam_vit_h)
|
||||||
|
|
||||||
|
|
||||||
|
assert issubclass(LayerNorm2d, nn.Module)
|
||||||
|
custom_layers = {LayerNorm2d: fl.LayerNorm2d}
|
||||||
|
|
||||||
|
|
||||||
|
def convert_mask_encoder(prompt_encoder: nn.Module) -> dict[str, Tensor]:
|
||||||
|
manual_seed(seed=0)
|
||||||
|
refiners_mask_encoder = MaskEncoder()
|
||||||
|
|
||||||
|
converter = ModelConverter(
|
||||||
|
source_model=prompt_encoder.mask_downscaling,
|
||||||
|
target_model=refiners_mask_encoder,
|
||||||
|
custom_layer_mapping=custom_layers, # type: ignore
|
||||||
|
verbose=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
x = torch.randn(1, 256, 256)
|
||||||
|
mapping = converter.map_state_dicts(source_args=(x,))
|
||||||
|
assert mapping
|
||||||
|
|
||||||
|
source_state_dict = prompt_encoder.mask_downscaling.state_dict()
|
||||||
|
target_state_dict = refiners_mask_encoder.state_dict()
|
||||||
|
|
||||||
|
# Mapping handled manually (see below) because nn.Parameter is a special case
|
||||||
|
del target_state_dict["no_mask_embedding"]
|
||||||
|
|
||||||
|
converted_source = converter._convert_state_dict( # pyright: ignore[reportPrivateUsage]
|
||||||
|
source_state_dict=source_state_dict, target_state_dict=target_state_dict, state_dict_mapping=mapping
|
||||||
|
)
|
||||||
|
|
||||||
|
state_dict: dict[str, Tensor] = {
|
||||||
|
"no_mask_embedding": nn.Parameter(data=prompt_encoder.no_mask_embed.weight.clone()), # type: ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
state_dict.update(converted_source)
|
||||||
|
|
||||||
|
refiners_mask_encoder.load_state_dict(state_dict=state_dict)
|
||||||
|
|
||||||
|
return state_dict
|
||||||
|
|
||||||
|
|
||||||
|
def convert_point_encoder(prompt_encoder: nn.Module) -> dict[str, Tensor]:
|
||||||
|
manual_seed(seed=0)
|
||||||
|
point_embeddings: list[Tensor] = [pe.weight for pe in prompt_encoder.point_embeddings] + [
|
||||||
|
prompt_encoder.not_a_point_embed.weight
|
||||||
|
] # type: ignore
|
||||||
|
pe = prompt_encoder.pe_layer.positional_encoding_gaussian_matrix # type: ignore
|
||||||
|
assert isinstance(pe, Tensor)
|
||||||
|
state_dict: dict[str, Tensor] = {
|
||||||
|
"Residual.PointTypeEmbedding.weight": nn.Parameter(data=torch.cat(tensors=point_embeddings, dim=0)),
|
||||||
|
"CoordinateEncoder.Linear.weight": nn.Parameter(data=pe.T.contiguous()),
|
||||||
|
}
|
||||||
|
|
||||||
|
refiners_prompt_encoder = PointEncoder()
|
||||||
|
refiners_prompt_encoder.load_state_dict(state_dict=state_dict)
|
||||||
|
|
||||||
|
return state_dict
|
||||||
|
|
||||||
|
|
||||||
|
def convert_vit(vit: nn.Module) -> dict[str, Tensor]:
|
||||||
|
manual_seed(seed=0)
|
||||||
|
refiners_sam_vit_h = SAMViTH()
|
||||||
|
|
||||||
|
converter = ModelConverter(
|
||||||
|
source_model=vit,
|
||||||
|
target_model=refiners_sam_vit_h,
|
||||||
|
custom_layer_mapping=custom_layers, # type: ignore
|
||||||
|
verbose=False,
|
||||||
|
)
|
||||||
|
converter.skip_init_check = True
|
||||||
|
|
||||||
|
x = torch.randn(1, 3, 1024, 1024)
|
||||||
|
mapping = converter.map_state_dicts(source_args=(x,))
|
||||||
|
assert mapping
|
||||||
|
|
||||||
|
mapping["PositionalEncoder.Parameter.weight"] = "pos_embed"
|
||||||
|
|
||||||
|
target_state_dict = refiners_sam_vit_h.state_dict()
|
||||||
|
del target_state_dict["PositionalEncoder.Parameter.weight"]
|
||||||
|
|
||||||
|
source_state_dict = vit.state_dict()
|
||||||
|
pos_embed = source_state_dict["pos_embed"]
|
||||||
|
del source_state_dict["pos_embed"]
|
||||||
|
|
||||||
|
target_rel_keys = [
|
||||||
|
(
|
||||||
|
f"Transformer.TransformerLayer_{i}.Residual_1.FusedSelfAttention.RelativePositionAttention.horizontal_embedding",
|
||||||
|
f"Transformer.TransformerLayer_{i}.Residual_1.FusedSelfAttention.RelativePositionAttention.vertical_embedding",
|
||||||
|
)
|
||||||
|
for i in range(1, 33)
|
||||||
|
]
|
||||||
|
source_rel_keys = [(f"blocks.{i}.attn.rel_pos_w", f"blocks.{i}.attn.rel_pos_h") for i in range(32)]
|
||||||
|
|
||||||
|
rel_items: dict[str, Tensor] = {}
|
||||||
|
|
||||||
|
for (key_w, key_h), (target_key_w, target_key_h) in zip(source_rel_keys, target_rel_keys):
|
||||||
|
rel_items[target_key_w] = source_state_dict[key_w]
|
||||||
|
rel_items[target_key_h] = source_state_dict[key_h]
|
||||||
|
del source_state_dict[key_w]
|
||||||
|
del source_state_dict[key_h]
|
||||||
|
del target_state_dict[target_key_w]
|
||||||
|
del target_state_dict[target_key_h]
|
||||||
|
|
||||||
|
converted_source = converter._convert_state_dict( # pyright: ignore[reportPrivateUsage]
|
||||||
|
source_state_dict=source_state_dict, target_state_dict=target_state_dict, state_dict_mapping=mapping
|
||||||
|
)
|
||||||
|
|
||||||
|
positional_encoder = refiners_sam_vit_h.layer("PositionalEncoder", PositionalEncoder)
|
||||||
|
embed = pos_embed.reshape_as(positional_encoder.layer("Parameter", fl.Parameter).weight)
|
||||||
|
converted_source["PositionalEncoder.Parameter.weight"] = embed # type: ignore
|
||||||
|
converted_source.update(rel_items)
|
||||||
|
|
||||||
|
refiners_sam_vit_h.load_state_dict(state_dict=converted_source)
|
||||||
|
assert converter.compare_models((x,), threshold=0.5)
|
||||||
|
|
||||||
|
return converted_source
|
||||||
|
|
||||||
|
|
||||||
|
def convert_mask_decoder(mask_decoder: nn.Module) -> dict[str, Tensor]:
|
||||||
|
manual_seed(seed=0)
|
||||||
|
|
||||||
|
refiners_mask_decoder = MaskDecoder()
|
||||||
|
|
||||||
|
image_embedding = torch.randn(1, 256, 64, 64)
|
||||||
|
dense_positional_embedding = torch.randn(1, 256, 64, 64)
|
||||||
|
point_embedding = torch.randn(1, 3, 256)
|
||||||
|
mask_embedding = torch.randn(1, 256, 64, 64)
|
||||||
|
|
||||||
|
from segment_anything.modeling.common import LayerNorm2d # type: ignore
|
||||||
|
|
||||||
|
import refiners.fluxion.layers as fl
|
||||||
|
|
||||||
|
assert issubclass(LayerNorm2d, nn.Module)
|
||||||
|
custom_layers = {LayerNorm2d: fl.LayerNorm2d}
|
||||||
|
|
||||||
|
converter = ModelConverter(
|
||||||
|
source_model=mask_decoder,
|
||||||
|
target_model=refiners_mask_decoder,
|
||||||
|
custom_layer_mapping=custom_layers, # type: ignore
|
||||||
|
verbose=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
inputs = {
|
||||||
|
"image_embeddings": image_embedding,
|
||||||
|
"image_pe": dense_positional_embedding,
|
||||||
|
"sparse_prompt_embeddings": point_embedding,
|
||||||
|
"dense_prompt_embeddings": mask_embedding,
|
||||||
|
"multimask_output": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
refiners_mask_decoder.set_image_embedding(image_embedding)
|
||||||
|
refiners_mask_decoder.set_point_embedding(point_embedding)
|
||||||
|
refiners_mask_decoder.set_mask_embedding(mask_embedding)
|
||||||
|
refiners_mask_decoder.set_dense_positional_embedding(dense_positional_embedding)
|
||||||
|
|
||||||
|
mapping = converter.map_state_dicts(source_args=inputs, target_args={})
|
||||||
|
assert mapping is not None
|
||||||
|
mapping["MaskDecoderTokens.Parameter"] = "iou_token"
|
||||||
|
|
||||||
|
state_dict = converter._convert_state_dict( # type: ignore
|
||||||
|
source_state_dict=mask_decoder.state_dict(),
|
||||||
|
target_state_dict=refiners_mask_decoder.state_dict(),
|
||||||
|
state_dict_mapping=mapping,
|
||||||
|
)
|
||||||
|
state_dict["MaskDecoderTokens.Parameter.weight"] = torch.cat(
|
||||||
|
tensors=[mask_decoder.iou_token.weight, mask_decoder.mask_tokens.weight], dim=0
|
||||||
|
) # type: ignore
|
||||||
|
refiners_mask_decoder.load_state_dict(state_dict=state_dict)
|
||||||
|
|
||||||
|
refiners_mask_decoder.set_image_embedding(image_embedding)
|
||||||
|
refiners_mask_decoder.set_point_embedding(point_embedding)
|
||||||
|
refiners_mask_decoder.set_mask_embedding(mask_embedding)
|
||||||
|
refiners_mask_decoder.set_dense_positional_embedding(dense_positional_embedding)
|
||||||
|
|
||||||
|
# Perform (1) upscaling then (2) mask prediction in this order (= like in the official implementation) to make
|
||||||
|
# `compare_models` happy (MaskPrediction's Matmul runs those in the reverse order by default)
|
||||||
|
matmul = refiners_mask_decoder.ensure_find(fl.Matmul)
|
||||||
|
|
||||||
|
def forward_swapped_order(self: Any, *args: Any) -> Any:
|
||||||
|
y = self[1](*args) # (1)
|
||||||
|
x = self[0](*args) # (2)
|
||||||
|
return torch.matmul(input=x, other=y)
|
||||||
|
|
||||||
|
matmul.forward = types.MethodType(forward_swapped_order, matmul)
|
||||||
|
|
||||||
|
assert converter.compare_models(source_args=inputs, target_args={}, threshold=1e-3)
|
||||||
|
|
||||||
|
return state_dict
|
||||||
|
|
||||||
|
|
||||||
|
# TODO(laurent): convert this to a simple mapping
|
||||||
|
class ModelConverterHubDuo(Conversion):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
original: Hub,
|
||||||
|
converted: Hub,
|
||||||
|
dtype: torch.dtype,
|
||||||
|
) -> None:
|
||||||
|
self.original = original
|
||||||
|
self.converted = converted
|
||||||
|
self.dtype = dtype
|
||||||
|
|
||||||
|
def convert(self) -> None:
|
||||||
|
logging.info(f"Converting {self.original.repo_id} to {self.converted.repo_id}")
|
||||||
|
|
||||||
|
# check if the converted file already exists
|
||||||
|
if self.converted.local_path.is_file():
|
||||||
|
logging.warning(f"{self.converted.local_path} already exists")
|
||||||
|
if self.converted.check_local_hash():
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.error(f"{self.converted.local_path} couldn't verify remote hash")
|
||||||
|
return
|
||||||
|
|
||||||
|
# get the original state_dict
|
||||||
|
self.original.download()
|
||||||
|
|
||||||
|
# load the original model
|
||||||
|
sam_h = build_sam_vit_h() # type: ignore
|
||||||
|
sam_h.load_state_dict(state_dict=load_tensors(self.original.local_path))
|
||||||
|
|
||||||
|
# convert each part of the model
|
||||||
|
vit_state_dict = convert_vit(vit=sam_h.image_encoder)
|
||||||
|
mask_decoder_state_dict = convert_mask_decoder(mask_decoder=sam_h.mask_decoder)
|
||||||
|
point_encoder_state_dict = convert_point_encoder(prompt_encoder=sam_h.prompt_encoder)
|
||||||
|
mask_encoder_state_dict = convert_mask_encoder(prompt_encoder=sam_h.prompt_encoder)
|
||||||
|
|
||||||
|
# build the entire state_dict
|
||||||
|
output_state_dict = {
|
||||||
|
**{f"SAMViTH.{key}": value for key, value in vit_state_dict.items()},
|
||||||
|
**{f"MaskDecoder.{key}": value for key, value in mask_decoder_state_dict.items()},
|
||||||
|
**{f"PointEncoder.{key}": value for key, value in point_encoder_state_dict.items()},
|
||||||
|
**{f"MaskEncoder.{key}": value for key, value in mask_encoder_state_dict.items()},
|
||||||
|
}
|
||||||
|
|
||||||
|
# extract the state_dict from the DoubleTextEncoder model
|
||||||
|
state_dict = self.change_dtype(output_state_dict, self.dtype)
|
||||||
|
|
||||||
|
# save the converted state_dict
|
||||||
|
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
save_to_safetensors(self.converted.local_path, state_dict)
|
||||||
|
|
||||||
|
# check the converted state_dict
|
||||||
|
assert self.converted.check_local_hash()
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
|
||||||
|
|
||||||
|
|
||||||
|
vit_h = ModelConverterHubDuo(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="facebook/github_segment_anything",
|
||||||
|
filename="sam_vit_h.pth",
|
||||||
|
expected_sha256="a7bf3b02f3ebf1267aba913ff637d9a2d5c33d3173bb679e46d9f338c26f262e",
|
||||||
|
download_url="https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sam.vit_h",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="acc3034e9253b8e91d3e56b12e4c846c5bd44b640fd2e08bf328229f4714e8cf",
|
||||||
|
),
|
||||||
|
dtype=torch.float32,
|
||||||
|
)
|
46
src/refiners/conversion/models/t2iadapter_sd15.py
Normal file
46
src/refiners/conversion/models/t2iadapter_sd15.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
|
||||||
|
|
||||||
|
diffusers_recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"adapter.conv_in": "Conv2d",
|
||||||
|
"adapter.body.0.resnets.0.block1": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
|
||||||
|
"adapter.body.0.resnets.1.block1": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
|
||||||
|
"adapter.body.0.resnets.0.block2": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
|
||||||
|
"adapter.body.0.resnets.1.block2": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
|
||||||
|
"adapter.body.1.downsample": "StatefulResidualBlocks_2.ResidualBlocks.Downsample2d",
|
||||||
|
"adapter.body.2.downsample": "StatefulResidualBlocks_3.ResidualBlocks.Downsample2d",
|
||||||
|
"adapter.body.3.downsample": "StatefulResidualBlocks_4.ResidualBlocks.Downsample2d",
|
||||||
|
"adapter.body.1.in_conv": "StatefulResidualBlocks_2.ResidualBlocks.Conv2d",
|
||||||
|
"adapter.body.1.resnets.0.block1": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
|
||||||
|
"adapter.body.1.resnets.1.block1": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
|
||||||
|
"adapter.body.1.resnets.0.block2": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
|
||||||
|
"adapter.body.1.resnets.1.block2": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
|
||||||
|
"adapter.body.2.in_conv": "StatefulResidualBlocks_3.ResidualBlocks.Conv2d",
|
||||||
|
"adapter.body.2.resnets.0.block1": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
|
||||||
|
"adapter.body.2.resnets.1.block1": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
|
||||||
|
"adapter.body.3.resnets.0.block1": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
|
||||||
|
"adapter.body.3.resnets.1.block1": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
|
||||||
|
"adapter.body.2.resnets.0.block2": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
|
||||||
|
"adapter.body.2.resnets.1.block2": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
|
||||||
|
"adapter.body.3.resnets.0.block2": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
|
||||||
|
"adapter.body.3.resnets.1.block2": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
depth = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="TencentARC/t2iadapter_depth_sd15v2",
|
||||||
|
filename="diffusion_pytorch_model.bin",
|
||||||
|
revision="9f96518933daa6c9386692914f72af81a0f6978f",
|
||||||
|
expected_sha256="68aaebf5e7d5eeb62eaea9476c68d279ba98d0876b385cc925e12c43cee19edd",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.t2i_adapter.depth",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="0178baeb59713ef4ae4dcbca0a2d3447fdd42bbeeaed019d3dc01f0f1913f74f",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
44
src/refiners/conversion/models/t2iadapter_sdxl.py
Normal file
44
src/refiners/conversion/models/t2iadapter_sdxl.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
|
||||||
|
|
||||||
|
diffusers_recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"adapter.conv_in": "Conv2d",
|
||||||
|
"adapter.body.0.resnets.0.block1": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
|
||||||
|
"adapter.body.0.resnets.1.block1": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
|
||||||
|
"adapter.body.0.resnets.0.block2": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
|
||||||
|
"adapter.body.0.resnets.1.block2": "StatefulResidualBlocks_1.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
|
||||||
|
"adapter.body.1.in_conv": "StatefulResidualBlocks_2.ResidualBlocks.Conv2d",
|
||||||
|
"adapter.body.1.resnets.0.block1": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
|
||||||
|
"adapter.body.1.resnets.1.block1": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
|
||||||
|
"adapter.body.1.resnets.0.block2": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
|
||||||
|
"adapter.body.1.resnets.1.block2": "StatefulResidualBlocks_2.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
|
||||||
|
"adapter.body.2.downsample": "StatefulResidualBlocks_3.ResidualBlocks.Downsample2d",
|
||||||
|
"adapter.body.2.in_conv": "StatefulResidualBlocks_3.ResidualBlocks.Conv2d",
|
||||||
|
"adapter.body.2.resnets.0.block1": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
|
||||||
|
"adapter.body.2.resnets.1.block1": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
|
||||||
|
"adapter.body.3.resnets.0.block1": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_1",
|
||||||
|
"adapter.body.3.resnets.1.block1": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_1",
|
||||||
|
"adapter.body.2.resnets.0.block2": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
|
||||||
|
"adapter.body.2.resnets.1.block2": "StatefulResidualBlocks_3.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
|
||||||
|
"adapter.body.3.resnets.0.block2": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_1.Conv2d_2",
|
||||||
|
"adapter.body.3.resnets.1.block2": "StatefulResidualBlocks_4.ResidualBlocks.Chain.ResidualBlock_2.Conv2d_2",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
canny = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="TencentARC/t2i-adapter-canny-sdxl-1.0",
|
||||||
|
filename="diffusion_pytorch_model.safetensors",
|
||||||
|
revision="2d7244ba45ded9129cfbf8e96a4befb7f6094210",
|
||||||
|
expected_sha256="b601b28b7df0c0dcbbaf704ab8ba6fd22bcf35c9a875fa0c9bc933d47cc27438",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sdxl.t2i_adapter.canny",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="3aabc9b964b220b0ff80ad383eebf1885f6298f74425c1dbee659c86127d4b60",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
949
src/refiners/conversion/models/unet_sd15.py
Normal file
949
src/refiners/conversion/models/unet_sd15.py
Normal file
|
@ -0,0 +1,949 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from refiners.conversion.utils import Conversion, Hub, WeightRecipe
|
||||||
|
|
||||||
|
diffusers_recipe = WeightRecipe(
|
||||||
|
key_map={
|
||||||
|
"time_embedding.linear_1": "TimestepEncoder.RangeEncoder.Linear_1",
|
||||||
|
"time_embedding.linear_2": "TimestepEncoder.RangeEncoder.Linear_2",
|
||||||
|
"down_blocks.2.resnets.0.time_emb_proj": "DownBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"down_blocks.2.resnets.1.time_emb_proj": "DownBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"down_blocks.3.resnets.0.time_emb_proj": "DownBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.3.resnets.1.time_emb_proj": "DownBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"mid_block.resnets.0.time_emb_proj": "Sum.MiddleBlock.ResidualBlock_1.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"mid_block.resnets.1.time_emb_proj": "Sum.MiddleBlock.ResidualBlock_2.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.0.resnets.0.time_emb_proj": "UpBlocks.Chain_1.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.0.resnets.1.time_emb_proj": "UpBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.0.resnets.2.time_emb_proj": "UpBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.1.resnets.0.time_emb_proj": "UpBlocks.Chain_4.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"up_blocks.1.resnets.1.time_emb_proj": "UpBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"up_blocks.1.resnets.2.time_emb_proj": "UpBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"conv_in": "DownBlocks.Chain_1.Conv2d",
|
||||||
|
"down_blocks.0.resnets.0.norm1": "DownBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.0.resnets.0.norm2": "DownBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.0.attentions.0.norm": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"down_blocks.0.resnets.1.norm1": "DownBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.0.resnets.1.norm2": "DownBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.0.attentions.1.norm": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"down_blocks.1.resnets.0.norm1": "DownBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"up_blocks.3.resnets.0.norm2": "UpBlocks.Chain_10.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.3.attentions.0.norm": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"up_blocks.3.resnets.1.norm2": "UpBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.3.attentions.1.norm": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"up_blocks.3.resnets.2.norm2": "UpBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.3.attentions.2.norm": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"conv_norm_out": "Chain.GroupNorm",
|
||||||
|
"down_blocks.0.resnets.0.conv1": "DownBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.0.resnets.0.conv2": "DownBlocks.Chain_2.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.0.resnets.1.conv1": "DownBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.0.resnets.1.conv2": "DownBlocks.Chain_3.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.0.downsamplers.0.conv": "DownBlocks.Chain_4.Downsample.Conv2d",
|
||||||
|
"up_blocks.3.resnets.0.conv2": "UpBlocks.Chain_10.ResidualBlock.Chain.Conv2d",
|
||||||
|
"up_blocks.3.resnets.1.conv2": "UpBlocks.Chain_11.ResidualBlock.Chain.Conv2d",
|
||||||
|
"up_blocks.3.resnets.2.conv2": "UpBlocks.Chain_12.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.0.resnets.0.time_emb_proj": "DownBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"down_blocks.0.resnets.1.time_emb_proj": "DownBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"up_blocks.3.resnets.0.time_emb_proj": "UpBlocks.Chain_10.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"up_blocks.3.resnets.1.time_emb_proj": "UpBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"up_blocks.3.resnets.2.time_emb_proj": "UpBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"down_blocks.0.attentions.0.proj_in": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"down_blocks.0.attentions.0.proj_out": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"down_blocks.0.attentions.1.proj_in": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"down_blocks.0.attentions.1.proj_out": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"up_blocks.3.attentions.0.proj_in": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"up_blocks.3.attentions.0.proj_out": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"up_blocks.3.attentions.1.proj_in": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"up_blocks.3.attentions.1.proj_out": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"up_blocks.3.attentions.2.proj_in": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"up_blocks.3.attentions.2.proj_out": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.norm1": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.norm2": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.norm3": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.norm1": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.norm2": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.norm3": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.norm1": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.norm2": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.norm3": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.norm1": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.norm2": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.norm3": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.norm1": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.norm2": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.norm3": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"down_blocks.1.resnets.0.conv1": "DownBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.1.resnets.0.time_emb_proj": "DownBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.1.resnets.1.time_emb_proj": "DownBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.2.resnets.0.time_emb_proj": "UpBlocks.Chain_7.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.2.resnets.1.time_emb_proj": "UpBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"up_blocks.2.resnets.2.time_emb_proj": "UpBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"down_blocks.1.resnets.0.norm2": "DownBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.1.attentions.0.norm": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"down_blocks.1.resnets.1.norm1": "DownBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.1.resnets.1.norm2": "DownBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.1.attentions.1.norm": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"down_blocks.2.resnets.0.norm1": "DownBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"up_blocks.2.resnets.0.norm2": "UpBlocks.Chain_7.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.2.attentions.0.norm": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"up_blocks.2.resnets.1.norm2": "UpBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.2.attentions.1.norm": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"up_blocks.2.resnets.2.norm2": "UpBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.2.attentions.2.norm": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"up_blocks.3.resnets.1.norm1": "UpBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"up_blocks.3.resnets.2.norm1": "UpBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.1.resnets.0.conv2": "DownBlocks.Chain_5.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.1.resnets.1.conv1": "DownBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.1.resnets.1.conv2": "DownBlocks.Chain_6.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.1.downsamplers.0.conv": "DownBlocks.Chain_7.Downsample.Conv2d",
|
||||||
|
"up_blocks.2.resnets.0.conv2": "UpBlocks.Chain_7.ResidualBlock.Chain.Conv2d",
|
||||||
|
"up_blocks.2.resnets.1.conv2": "UpBlocks.Chain_8.ResidualBlock.Chain.Conv2d",
|
||||||
|
"up_blocks.2.resnets.2.conv2": "UpBlocks.Chain_9.ResidualBlock.Chain.Conv2d",
|
||||||
|
"up_blocks.2.upsamplers.0.conv": "UpBlocks.Chain_9.Upsample.Conv2d",
|
||||||
|
"down_blocks.1.resnets.0.conv_shortcut": "DownBlocks.Chain_5.ResidualBlock.Conv2d",
|
||||||
|
"down_blocks.1.attentions.0.proj_in": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"down_blocks.1.attentions.0.proj_out": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"down_blocks.1.attentions.1.proj_in": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"down_blocks.1.attentions.1.proj_out": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"up_blocks.2.attentions.0.proj_in": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"up_blocks.2.attentions.0.proj_out": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"up_blocks.2.attentions.1.proj_in": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"up_blocks.2.attentions.1.proj_out": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"up_blocks.2.attentions.2.proj_in": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"up_blocks.2.attentions.2.proj_out": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.norm1": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.norm2": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.norm3": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.norm1": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.norm2": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.norm3": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.norm1": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.norm2": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.norm3": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.norm1": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.norm2": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.norm3": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.norm1": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.norm2": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.norm3": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"down_blocks.2.resnets.0.conv1": "DownBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.2.resnets.0.norm2": "DownBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.2.attentions.0.norm": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"down_blocks.2.resnets.1.norm1": "DownBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.2.resnets.1.norm2": "DownBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.2.attentions.1.norm": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"down_blocks.3.resnets.0.norm1": "DownBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.3.resnets.0.norm2": "DownBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"down_blocks.3.resnets.1.norm1": "DownBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.3.resnets.1.norm2": "DownBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"mid_block.resnets.0.norm1": "Sum.MiddleBlock.ResidualBlock_1.Chain.GroupNorm_1",
|
||||||
|
"mid_block.resnets.0.norm2": "Sum.MiddleBlock.ResidualBlock_1.Chain.GroupNorm_2",
|
||||||
|
"mid_block.attentions.0.norm": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"mid_block.resnets.1.norm1": "Sum.MiddleBlock.ResidualBlock_2.Chain.GroupNorm_1",
|
||||||
|
"mid_block.resnets.1.norm2": "Sum.MiddleBlock.ResidualBlock_2.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.0.resnets.0.norm2": "UpBlocks.Chain_1.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.0.resnets.1.norm2": "UpBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.0.resnets.2.norm2": "UpBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.1.resnets.0.norm2": "UpBlocks.Chain_4.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.1.attentions.0.norm": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"up_blocks.1.resnets.1.norm2": "UpBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.1.attentions.1.norm": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"up_blocks.1.resnets.2.norm2": "UpBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"up_blocks.1.attentions.2.norm": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"up_blocks.2.resnets.1.norm1": "UpBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"down_blocks.2.resnets.0.conv2": "DownBlocks.Chain_8.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.2.resnets.1.conv1": "DownBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.2.resnets.1.conv2": "DownBlocks.Chain_9.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.2.downsamplers.0.conv": "DownBlocks.Chain_10.Downsample.Conv2d",
|
||||||
|
"down_blocks.3.resnets.0.conv1": "DownBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.3.resnets.0.conv2": "DownBlocks.Chain_11.ResidualBlock.Chain.Conv2d",
|
||||||
|
"down_blocks.3.resnets.1.conv1": "DownBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"down_blocks.3.resnets.1.conv2": "DownBlocks.Chain_12.ResidualBlock.Chain.Conv2d",
|
||||||
|
"mid_block.resnets.0.conv1": "Sum.MiddleBlock.ResidualBlock_1.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"mid_block.resnets.0.conv2": "Sum.MiddleBlock.ResidualBlock_1.Chain.Conv2d",
|
||||||
|
"mid_block.resnets.1.conv1": "Sum.MiddleBlock.ResidualBlock_2.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"mid_block.resnets.1.conv2": "Sum.MiddleBlock.ResidualBlock_2.Chain.Conv2d",
|
||||||
|
"up_blocks.0.resnets.0.conv2": "UpBlocks.Chain_1.ResidualBlock.Chain.Conv2d",
|
||||||
|
"up_blocks.0.resnets.1.conv2": "UpBlocks.Chain_2.ResidualBlock.Chain.Conv2d",
|
||||||
|
"up_blocks.0.resnets.2.conv2": "UpBlocks.Chain_3.ResidualBlock.Chain.Conv2d",
|
||||||
|
"up_blocks.0.upsamplers.0.conv": "UpBlocks.Chain_3.Upsample.Conv2d",
|
||||||
|
"up_blocks.1.resnets.0.conv2": "UpBlocks.Chain_4.ResidualBlock.Chain.Conv2d",
|
||||||
|
"up_blocks.1.resnets.1.conv2": "UpBlocks.Chain_5.ResidualBlock.Chain.Conv2d",
|
||||||
|
"up_blocks.1.resnets.2.conv2": "UpBlocks.Chain_6.ResidualBlock.Chain.Conv2d",
|
||||||
|
"up_blocks.1.upsamplers.0.conv": "UpBlocks.Chain_6.Upsample.Conv2d",
|
||||||
|
"down_blocks.2.resnets.0.conv_shortcut": "DownBlocks.Chain_8.ResidualBlock.Conv2d",
|
||||||
|
"down_blocks.2.attentions.0.proj_in": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"down_blocks.2.attentions.0.proj_out": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"down_blocks.2.attentions.1.proj_in": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"down_blocks.2.attentions.1.proj_out": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"mid_block.attentions.0.proj_in": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"mid_block.attentions.0.proj_out": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"up_blocks.1.attentions.0.proj_in": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"up_blocks.1.attentions.0.proj_out": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"up_blocks.1.attentions.1.proj_in": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"up_blocks.1.attentions.1.proj_out": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"up_blocks.1.attentions.2.proj_in": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"up_blocks.1.attentions.2.proj_out": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.norm1": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.norm2": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.norm3": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.norm1": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.norm2": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.norm3": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.norm1": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.norm2": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.norm3": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.norm1": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.norm2": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.norm3": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.norm1": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.norm2": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.norm3": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.norm1": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.norm2": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.norm3": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn1.to_q": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn1.to_k": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn1.to_v": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn2.to_q": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn2.to_k": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.attn2.to_v": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"mid_block.attentions.0.transformer_blocks.0.ff.net.2": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"up_blocks.0.resnets.0.norm1": "UpBlocks.Chain_1.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"up_blocks.0.resnets.1.norm1": "UpBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"up_blocks.0.resnets.2.norm1": "UpBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"up_blocks.1.resnets.0.norm1": "UpBlocks.Chain_4.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"up_blocks.1.resnets.1.norm1": "UpBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"up_blocks.0.resnets.0.conv1": "UpBlocks.Chain_1.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"up_blocks.0.resnets.1.conv1": "UpBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"up_blocks.0.resnets.2.conv1": "UpBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"up_blocks.1.resnets.0.conv1": "UpBlocks.Chain_4.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"up_blocks.1.resnets.1.conv1": "UpBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"up_blocks.0.resnets.0.conv_shortcut": "UpBlocks.Chain_1.ResidualBlock.Conv2d",
|
||||||
|
"up_blocks.0.resnets.1.conv_shortcut": "UpBlocks.Chain_2.ResidualBlock.Conv2d",
|
||||||
|
"up_blocks.0.resnets.2.conv_shortcut": "UpBlocks.Chain_3.ResidualBlock.Conv2d",
|
||||||
|
"up_blocks.1.resnets.0.conv_shortcut": "UpBlocks.Chain_4.ResidualBlock.Conv2d",
|
||||||
|
"up_blocks.1.resnets.1.conv_shortcut": "UpBlocks.Chain_5.ResidualBlock.Conv2d",
|
||||||
|
"up_blocks.1.resnets.2.norm1": "UpBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"up_blocks.2.resnets.0.norm1": "UpBlocks.Chain_7.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"up_blocks.1.resnets.2.conv1": "UpBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"up_blocks.1.resnets.2.conv_shortcut": "UpBlocks.Chain_6.ResidualBlock.Conv2d",
|
||||||
|
"up_blocks.2.resnets.0.conv1": "UpBlocks.Chain_7.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"up_blocks.2.resnets.0.conv_shortcut": "UpBlocks.Chain_7.ResidualBlock.Conv2d",
|
||||||
|
"up_blocks.2.resnets.1.conv1": "UpBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"up_blocks.2.resnets.1.conv_shortcut": "UpBlocks.Chain_8.ResidualBlock.Conv2d",
|
||||||
|
"up_blocks.2.resnets.2.norm1": "UpBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"up_blocks.3.resnets.0.norm1": "UpBlocks.Chain_10.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"up_blocks.2.resnets.2.conv1": "UpBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"up_blocks.2.resnets.2.conv_shortcut": "UpBlocks.Chain_9.ResidualBlock.Conv2d",
|
||||||
|
"up_blocks.3.resnets.0.conv1": "UpBlocks.Chain_10.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"up_blocks.3.resnets.0.conv_shortcut": "UpBlocks.Chain_10.ResidualBlock.Conv2d",
|
||||||
|
"up_blocks.3.resnets.1.conv1": "UpBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"up_blocks.3.resnets.2.conv1": "UpBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"up_blocks.3.resnets.1.conv_shortcut": "UpBlocks.Chain_11.ResidualBlock.Conv2d",
|
||||||
|
"up_blocks.3.resnets.2.conv_shortcut": "UpBlocks.Chain_12.ResidualBlock.Conv2d",
|
||||||
|
"conv_out": "Chain.Conv2d",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
civitai_recipe = WeightRecipe(
|
||||||
|
key_prefix="model.diffusion_model.",
|
||||||
|
key_map={
|
||||||
|
"time_embed.0": "TimestepEncoder.RangeEncoder.Linear_1",
|
||||||
|
"time_embed.2": "TimestepEncoder.RangeEncoder.Linear_2",
|
||||||
|
"input_blocks.0.0": "DownBlocks.Chain_1.Conv2d",
|
||||||
|
"input_blocks.1.0.in_layers.0": "DownBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"input_blocks.1.0.in_layers.2": "DownBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"input_blocks.1.0.emb_layers.1": "DownBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"input_blocks.1.0.out_layers.0": "DownBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"input_blocks.1.0.out_layers.3": "DownBlocks.Chain_2.ResidualBlock.Chain.Conv2d",
|
||||||
|
"input_blocks.1.1.norm": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"input_blocks.1.1.proj_in": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.norm1": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.norm2": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.norm3": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"input_blocks.1.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"input_blocks.1.1.proj_out": "DownBlocks.Chain_2.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"input_blocks.2.0.in_layers.0": "DownBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"input_blocks.2.0.in_layers.2": "DownBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"input_blocks.2.0.emb_layers.1": "DownBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"input_blocks.2.0.out_layers.0": "DownBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"input_blocks.2.0.out_layers.3": "DownBlocks.Chain_3.ResidualBlock.Chain.Conv2d",
|
||||||
|
"input_blocks.2.1.norm": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"input_blocks.2.1.proj_in": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.norm1": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.norm2": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.norm3": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"input_blocks.2.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"input_blocks.2.1.proj_out": "DownBlocks.Chain_3.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"input_blocks.3.0.op": "DownBlocks.Chain_4.Downsample.Conv2d",
|
||||||
|
"input_blocks.4.0.in_layers.0": "DownBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"input_blocks.4.0.in_layers.2": "DownBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"input_blocks.4.0.emb_layers.1": "DownBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"input_blocks.4.0.out_layers.0": "DownBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"input_blocks.4.0.out_layers.3": "DownBlocks.Chain_5.ResidualBlock.Chain.Conv2d",
|
||||||
|
"input_blocks.4.0.skip_connection": "DownBlocks.Chain_5.ResidualBlock.Conv2d",
|
||||||
|
"input_blocks.4.1.norm": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"input_blocks.4.1.proj_in": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.norm1": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.norm2": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.norm3": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"input_blocks.4.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"input_blocks.4.1.proj_out": "DownBlocks.Chain_5.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"input_blocks.5.0.in_layers.0": "DownBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"input_blocks.5.0.in_layers.2": "DownBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"input_blocks.5.0.emb_layers.1": "DownBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"input_blocks.5.0.out_layers.0": "DownBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"input_blocks.5.0.out_layers.3": "DownBlocks.Chain_6.ResidualBlock.Chain.Conv2d",
|
||||||
|
"input_blocks.5.1.norm": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"input_blocks.5.1.proj_in": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.norm1": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.norm2": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.norm3": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"input_blocks.5.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"input_blocks.5.1.proj_out": "DownBlocks.Chain_6.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"input_blocks.6.0.op": "DownBlocks.Chain_7.Downsample.Conv2d",
|
||||||
|
"input_blocks.7.0.in_layers.0": "DownBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"input_blocks.7.0.in_layers.2": "DownBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"input_blocks.7.0.emb_layers.1": "DownBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"input_blocks.7.0.out_layers.0": "DownBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"input_blocks.7.0.out_layers.3": "DownBlocks.Chain_8.ResidualBlock.Chain.Conv2d",
|
||||||
|
"input_blocks.7.0.skip_connection": "DownBlocks.Chain_8.ResidualBlock.Conv2d",
|
||||||
|
"input_blocks.7.1.norm": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"input_blocks.7.1.proj_in": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.norm1": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.norm2": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.norm3": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"input_blocks.7.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"input_blocks.7.1.proj_out": "DownBlocks.Chain_8.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"input_blocks.8.0.in_layers.0": "DownBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"input_blocks.8.0.in_layers.2": "DownBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"input_blocks.8.0.emb_layers.1": "DownBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"input_blocks.8.0.out_layers.0": "DownBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"input_blocks.8.0.out_layers.3": "DownBlocks.Chain_9.ResidualBlock.Chain.Conv2d",
|
||||||
|
"input_blocks.8.1.norm": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"input_blocks.8.1.proj_in": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.norm1": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.attn1.to_q": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.attn1.to_k": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.attn1.to_v": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.attn1.to_out.0": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.norm2": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.attn2.to_q": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.attn2.to_k": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.attn2.to_v": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.attn2.to_out.0": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.norm3": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.ff.net.0.proj": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"input_blocks.8.1.transformer_blocks.0.ff.net.2": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"input_blocks.8.1.proj_out": "DownBlocks.Chain_9.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"input_blocks.9.0.op": "DownBlocks.Chain_10.Downsample.Conv2d",
|
||||||
|
"input_blocks.10.0.in_layers.0": "DownBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"input_blocks.10.0.in_layers.2": "DownBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"input_blocks.10.0.emb_layers.1": "DownBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"input_blocks.10.0.out_layers.0": "DownBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"input_blocks.10.0.out_layers.3": "DownBlocks.Chain_11.ResidualBlock.Chain.Conv2d",
|
||||||
|
"input_blocks.11.0.in_layers.0": "DownBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"input_blocks.11.0.in_layers.2": "DownBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"input_blocks.11.0.emb_layers.1": "DownBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"input_blocks.11.0.out_layers.0": "DownBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"input_blocks.11.0.out_layers.3": "DownBlocks.Chain_12.ResidualBlock.Chain.Conv2d",
|
||||||
|
"middle_block.0.in_layers.0": "Sum.MiddleBlock.ResidualBlock_1.Chain.GroupNorm_1",
|
||||||
|
"middle_block.0.in_layers.2": "Sum.MiddleBlock.ResidualBlock_1.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"middle_block.0.emb_layers.1": "Sum.MiddleBlock.ResidualBlock_1.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"middle_block.0.out_layers.0": "Sum.MiddleBlock.ResidualBlock_1.Chain.GroupNorm_2",
|
||||||
|
"middle_block.0.out_layers.3": "Sum.MiddleBlock.ResidualBlock_1.Chain.Conv2d",
|
||||||
|
"middle_block.1.norm": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"middle_block.1.proj_in": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"middle_block.1.transformer_blocks.0.norm1": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"middle_block.1.transformer_blocks.0.attn1.to_q": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"middle_block.1.transformer_blocks.0.attn1.to_k": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"middle_block.1.transformer_blocks.0.attn1.to_v": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"middle_block.1.transformer_blocks.0.attn1.to_out.0": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"middle_block.1.transformer_blocks.0.norm2": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"middle_block.1.transformer_blocks.0.attn2.to_q": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"middle_block.1.transformer_blocks.0.attn2.to_k": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"middle_block.1.transformer_blocks.0.attn2.to_v": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"middle_block.1.transformer_blocks.0.attn2.to_out.0": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"middle_block.1.transformer_blocks.0.norm3": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"middle_block.1.transformer_blocks.0.ff.net.0.proj": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"middle_block.1.transformer_blocks.0.ff.net.2": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"middle_block.1.proj_out": "Sum.MiddleBlock.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"middle_block.2.in_layers.0": "Sum.MiddleBlock.ResidualBlock_2.Chain.GroupNorm_1",
|
||||||
|
"middle_block.2.in_layers.2": "Sum.MiddleBlock.ResidualBlock_2.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"middle_block.2.emb_layers.1": "Sum.MiddleBlock.ResidualBlock_2.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"middle_block.2.out_layers.0": "Sum.MiddleBlock.ResidualBlock_2.Chain.GroupNorm_2",
|
||||||
|
"middle_block.2.out_layers.3": "Sum.MiddleBlock.ResidualBlock_2.Chain.Conv2d",
|
||||||
|
"output_blocks.0.0.in_layers.0": "UpBlocks.Chain_1.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"output_blocks.0.0.in_layers.2": "UpBlocks.Chain_1.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"output_blocks.0.0.emb_layers.1": "UpBlocks.Chain_1.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"output_blocks.0.0.out_layers.0": "UpBlocks.Chain_1.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"output_blocks.0.0.out_layers.3": "UpBlocks.Chain_1.ResidualBlock.Chain.Conv2d",
|
||||||
|
"output_blocks.0.0.skip_connection": "UpBlocks.Chain_1.ResidualBlock.Conv2d",
|
||||||
|
"output_blocks.1.0.in_layers.0": "UpBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"output_blocks.1.0.in_layers.2": "UpBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"output_blocks.1.0.emb_layers.1": "UpBlocks.Chain_2.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"output_blocks.1.0.out_layers.0": "UpBlocks.Chain_2.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"output_blocks.1.0.out_layers.3": "UpBlocks.Chain_2.ResidualBlock.Chain.Conv2d",
|
||||||
|
"output_blocks.1.0.skip_connection": "UpBlocks.Chain_2.ResidualBlock.Conv2d",
|
||||||
|
"output_blocks.2.0.in_layers.0": "UpBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"output_blocks.2.0.in_layers.2": "UpBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"output_blocks.2.0.emb_layers.1": "UpBlocks.Chain_3.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"output_blocks.2.0.out_layers.0": "UpBlocks.Chain_3.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"output_blocks.2.0.out_layers.3": "UpBlocks.Chain_3.ResidualBlock.Chain.Conv2d",
|
||||||
|
"output_blocks.2.0.skip_connection": "UpBlocks.Chain_3.ResidualBlock.Conv2d",
|
||||||
|
"output_blocks.2.1.conv": "UpBlocks.Chain_3.Upsample.Conv2d",
|
||||||
|
"output_blocks.3.0.in_layers.0": "UpBlocks.Chain_4.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"output_blocks.3.0.in_layers.2": "UpBlocks.Chain_4.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"output_blocks.3.0.emb_layers.1": "UpBlocks.Chain_4.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"output_blocks.3.0.out_layers.0": "UpBlocks.Chain_4.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"output_blocks.3.0.out_layers.3": "UpBlocks.Chain_4.ResidualBlock.Chain.Conv2d",
|
||||||
|
"output_blocks.3.0.skip_connection": "UpBlocks.Chain_4.ResidualBlock.Conv2d",
|
||||||
|
"output_blocks.3.1.norm": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"output_blocks.3.1.proj_in": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.norm1": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.norm2": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.norm3": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"output_blocks.3.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"output_blocks.3.1.proj_out": "UpBlocks.Chain_4.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"output_blocks.4.0.in_layers.0": "UpBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"output_blocks.4.0.in_layers.2": "UpBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"output_blocks.4.0.emb_layers.1": "UpBlocks.Chain_5.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"output_blocks.4.0.out_layers.0": "UpBlocks.Chain_5.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"output_blocks.4.0.out_layers.3": "UpBlocks.Chain_5.ResidualBlock.Chain.Conv2d",
|
||||||
|
"output_blocks.4.0.skip_connection": "UpBlocks.Chain_5.ResidualBlock.Conv2d",
|
||||||
|
"output_blocks.4.1.norm": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"output_blocks.4.1.proj_in": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.norm1": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.norm2": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.norm3": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"output_blocks.4.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"output_blocks.4.1.proj_out": "UpBlocks.Chain_5.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"output_blocks.5.0.in_layers.0": "UpBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"output_blocks.5.0.in_layers.2": "UpBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"output_blocks.5.0.emb_layers.1": "UpBlocks.Chain_6.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"output_blocks.5.0.out_layers.0": "UpBlocks.Chain_6.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"output_blocks.5.0.out_layers.3": "UpBlocks.Chain_6.ResidualBlock.Chain.Conv2d",
|
||||||
|
"output_blocks.5.0.skip_connection": "UpBlocks.Chain_6.ResidualBlock.Conv2d",
|
||||||
|
"output_blocks.5.1.norm": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"output_blocks.5.1.proj_in": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.norm1": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.norm2": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.norm3": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"output_blocks.5.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"output_blocks.5.1.proj_out": "UpBlocks.Chain_6.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"output_blocks.5.2.conv": "UpBlocks.Chain_6.Upsample.Conv2d",
|
||||||
|
"output_blocks.6.0.in_layers.0": "UpBlocks.Chain_7.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"output_blocks.6.0.in_layers.2": "UpBlocks.Chain_7.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"output_blocks.6.0.emb_layers.1": "UpBlocks.Chain_7.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"output_blocks.6.0.out_layers.0": "UpBlocks.Chain_7.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"output_blocks.6.0.out_layers.3": "UpBlocks.Chain_7.ResidualBlock.Chain.Conv2d",
|
||||||
|
"output_blocks.6.0.skip_connection": "UpBlocks.Chain_7.ResidualBlock.Conv2d",
|
||||||
|
"output_blocks.6.1.norm": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"output_blocks.6.1.proj_in": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.norm1": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.norm2": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.norm3": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"output_blocks.6.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"output_blocks.6.1.proj_out": "UpBlocks.Chain_7.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"output_blocks.7.0.in_layers.0": "UpBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"output_blocks.7.0.in_layers.2": "UpBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"output_blocks.7.0.emb_layers.1": "UpBlocks.Chain_8.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"output_blocks.7.0.out_layers.0": "UpBlocks.Chain_8.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"output_blocks.7.0.out_layers.3": "UpBlocks.Chain_8.ResidualBlock.Chain.Conv2d",
|
||||||
|
"output_blocks.7.0.skip_connection": "UpBlocks.Chain_8.ResidualBlock.Conv2d",
|
||||||
|
"output_blocks.7.1.norm": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"output_blocks.7.1.proj_in": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.norm1": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.norm2": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.norm3": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"output_blocks.7.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"output_blocks.7.1.proj_out": "UpBlocks.Chain_8.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"output_blocks.8.0.in_layers.0": "UpBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"output_blocks.8.0.in_layers.2": "UpBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"output_blocks.8.0.emb_layers.1": "UpBlocks.Chain_9.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"output_blocks.8.0.out_layers.0": "UpBlocks.Chain_9.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"output_blocks.8.0.out_layers.3": "UpBlocks.Chain_9.ResidualBlock.Chain.Conv2d",
|
||||||
|
"output_blocks.8.0.skip_connection": "UpBlocks.Chain_9.ResidualBlock.Conv2d",
|
||||||
|
"output_blocks.8.1.norm": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"output_blocks.8.1.proj_in": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.norm1": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.norm2": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.norm3": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"output_blocks.8.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"output_blocks.8.1.proj_out": "UpBlocks.Chain_9.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"output_blocks.8.2.conv": "UpBlocks.Chain_9.Upsample.Conv2d",
|
||||||
|
"output_blocks.9.0.in_layers.0": "UpBlocks.Chain_10.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"output_blocks.9.0.in_layers.2": "UpBlocks.Chain_10.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"output_blocks.9.0.emb_layers.1": "UpBlocks.Chain_10.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"output_blocks.9.0.out_layers.0": "UpBlocks.Chain_10.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"output_blocks.9.0.out_layers.3": "UpBlocks.Chain_10.ResidualBlock.Chain.Conv2d",
|
||||||
|
"output_blocks.9.0.skip_connection": "UpBlocks.Chain_10.ResidualBlock.Conv2d",
|
||||||
|
"output_blocks.9.1.norm": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"output_blocks.9.1.proj_in": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.norm1": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.norm2": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.norm3": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"output_blocks.9.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"output_blocks.9.1.proj_out": "UpBlocks.Chain_10.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"output_blocks.10.0.in_layers.0": "UpBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"output_blocks.10.0.in_layers.2": "UpBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"output_blocks.10.0.emb_layers.1": "UpBlocks.Chain_11.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"output_blocks.10.0.out_layers.0": "UpBlocks.Chain_11.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"output_blocks.10.0.out_layers.3": "UpBlocks.Chain_11.ResidualBlock.Chain.Conv2d",
|
||||||
|
"output_blocks.10.0.skip_connection": "UpBlocks.Chain_11.ResidualBlock.Conv2d",
|
||||||
|
"output_blocks.10.1.norm": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"output_blocks.10.1.proj_in": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.norm1": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.norm2": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.norm3": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"output_blocks.10.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"output_blocks.10.1.proj_out": "UpBlocks.Chain_11.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"output_blocks.11.0.in_layers.0": "UpBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_1",
|
||||||
|
"output_blocks.11.0.in_layers.2": "UpBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Conv2d",
|
||||||
|
"output_blocks.11.0.emb_layers.1": "UpBlocks.Chain_12.ResidualBlock.Chain.RangeAdapter2d.Chain.Linear",
|
||||||
|
"output_blocks.11.0.out_layers.0": "UpBlocks.Chain_12.ResidualBlock.Chain.GroupNorm_2",
|
||||||
|
"output_blocks.11.0.out_layers.3": "UpBlocks.Chain_12.ResidualBlock.Chain.Conv2d",
|
||||||
|
"output_blocks.11.0.skip_connection": "UpBlocks.Chain_12.ResidualBlock.Conv2d",
|
||||||
|
"output_blocks.11.1.norm": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_1.GroupNorm",
|
||||||
|
"output_blocks.11.1.proj_in": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_1.Conv2d",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.norm1": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.LayerNorm",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.attn1.to_q": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_1",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.attn1.to_k": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_2",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.attn1.to_v": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Distribute.Linear_3",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.attn1.to_out.0": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_1.SelfAttention.Linear",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.norm2": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.LayerNorm",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.attn2.to_q": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_1",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.attn2.to_k": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_2",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.attn2.to_v": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Distribute.Linear_3",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.attn2.to_out.0": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_2.Attention.Linear",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.norm3": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.LayerNorm",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.ff.net.0.proj": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_1",
|
||||||
|
"output_blocks.11.1.transformer_blocks.0.ff.net.2": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_2.CrossAttentionBlock.Residual_3.Linear_2",
|
||||||
|
"output_blocks.11.1.proj_out": "UpBlocks.Chain_12.CLIPLCrossAttention.Chain_3.Conv2d",
|
||||||
|
"out.0": "Chain.GroupNorm",
|
||||||
|
"out.2": "Chain.Conv2d",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
runwayml = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="stable-diffusion-v1-5/stable-diffusion-v1-5",
|
||||||
|
filename="unet/diffusion_pytorch_model.safetensors",
|
||||||
|
revision="f03de327dd89b501a01da37fc5240cf4fdba85a1",
|
||||||
|
expected_sha256="19da7aaa4b880e59d56843f1fcb4dd9b599c28a1d9d9af7c1143057c8ffae9f1",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.unet",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="6c0488a590f151128565bac105dbc3ce6563643f270c5c32ea756fa317a1c256",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
runwayml_inpainting = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="stable-diffusion-v1-5/stable-diffusion-inpainting",
|
||||||
|
filename="unet/diffusion_pytorch_model.fp16.safetensors",
|
||||||
|
revision="8a4288a76071f7280aedbdb3253bdb9e9d5d84bb",
|
||||||
|
expected_sha256="24b788b4a777748377cc20364eea4ae113c8c42f4468c16bc8c02fdae5492af9",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.unet_inpainting",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="331283db7631bfe0027d5f7107ec00ac64679e25fe6e195b79a7b040ad3666ae",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
juggernaut_reborn = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="civitai/KandooAi/juggernaut",
|
||||||
|
filename="reborn/onefile_fp16.safetensors",
|
||||||
|
download_url="https://civitai.com/api/download/models/274039?type=Model&format=SafeTensor&size=pruned&fp=fp16",
|
||||||
|
expected_sha256="338b85bc4f7628bc42cce336242e79154a57c2a4360531436e97f7793568f18c",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.juggernaut.reborn.unet",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="64a75d16fdb11faeedbef8270fcdfe3051284f743cdf46d8bb89c09499a22591",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
juggernaut_aftermath = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="civitai/KandooAi/juggernaut",
|
||||||
|
filename="aftermath/onefile_fp32.safetensors",
|
||||||
|
download_url="https://civitai.com/api/download/models/127207?type=Model&format=SafeTensor&size=full&fp=fp32",
|
||||||
|
expected_sha256="7ed60a2f5842de8e6755965ffbf2a0ee273429939f307233c22f7dffd8d7ae3d",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.juggernaut.aftermath.unet",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="5882c517eac0670df60755cb9eb762081a1d6a37431d3f7f9a3a6d8dfe764d86",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float32,
|
||||||
|
)
|
||||||
|
juggernaut_aftermath_inpainting = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="civitai/KandooAi/juggernaut",
|
||||||
|
filename="aftermath-inpainting/onefile_fp32.safetensors",
|
||||||
|
download_url="https://civitai.com/api/download/models/129549?type=Model&format=SafeTensor&size=full&fp=fp32",
|
||||||
|
expected_sha256="b370189733ef44a3661a96139c02fde22d36df5ad12d1112b0b56fc3d6bfbdba",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.juggernaut.aftermath.unet_inpainting",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="c00a85060de351d617cc3a3be6865cf3493a0557f1f32303cb8a385c6368b9a8",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float32,
|
||||||
|
)
|
||||||
|
realistic_stock_photo_v3 = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="Yntec/realisticStockPhoto3",
|
||||||
|
filename="realisticStockPhoto_v30SD15.safetensors",
|
||||||
|
expected_sha256="f85affae9aae16276eaf670f810946e2d03c4d300791a0380f07653cb78ba31b",
|
||||||
|
# download_url="https://civitai.com/api/download/models/524032?type=Model&format=SafeTensor&size=full&fp=fp16",
|
||||||
|
# civitai model is gated by auth, using a mirror on hf hub instead
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.realistic_stock_photo.v3_0.unet",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="18a8616375a8738cc051c3d5a63979e7d40dec4d720f88247424db2ebd663131",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
realistic_vision_v5 = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="civitai/SG_161222/realistic_vision",
|
||||||
|
filename="v5/onefile_fp32.safetensors",
|
||||||
|
download_url="https://civitai.com/api/download/models/130072?type=Model&format=SafeTensor&size=full&fp=fp16",
|
||||||
|
expected_sha256="ef76aa2332635f4352463343beec9c5aae3bd107a73c0fd7221abbbcf8bd5470",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.realistic_vision.v5_1.unet",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="3d6fc9cb9eabb1487e0337ed17ab29bbfabfc5e5faf47c3635a4ee73b5bb1164",
|
||||||
|
),
|
||||||
|
recipe=civitai_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
|
||||||
|
ic_light_fc = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="lllyasviel/ic-light",
|
||||||
|
filename="iclight_sd15_fc.safetensors",
|
||||||
|
revision="f5950d474dc0cd5bb5a0c66189534cd13f28eb70",
|
||||||
|
expected_sha256="a033fbaaa2f3f7859fa6a4477ee63ebbf9c116bf3569d5811856d2807f3468cd",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.ic_light.fc",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="2d4e8ff0ac65274ec9655eeac459226e4790de1326e4338e34c4348bdf763350",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
ic_light_fcon = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="lllyasviel/ic-light",
|
||||||
|
filename="iclight_sd15_fcon.safetensors",
|
||||||
|
revision="f5950d474dc0cd5bb5a0c66189534cd13f28eb70",
|
||||||
|
expected_sha256="37652ef27028c8fdb9882830b1621e4e648d26e19cb2035a6af8d52f3a6d8d87",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.ic_light.fcon",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="856cf6a6cb6b57335073c3140bb38c3cc35bcffa69cd8a57c166fac37b3594d4",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
||||||
|
ic_light_fbc = Conversion(
|
||||||
|
original=Hub(
|
||||||
|
repo_id="lllyasviel/ic-light",
|
||||||
|
filename="iclight_sd15_fbc.safetensors",
|
||||||
|
revision="f5950d474dc0cd5bb5a0c66189534cd13f28eb70",
|
||||||
|
expected_sha256="bb8ccedaa4944b16cfa8356afcbc2c2174cc4c4af57de19124ae0cddd0d96947",
|
||||||
|
),
|
||||||
|
converted=Hub(
|
||||||
|
repo_id="refiners/sd15.ic_light.fbc",
|
||||||
|
filename="model.safetensors",
|
||||||
|
expected_sha256="c79f275b94566da66801cf8fe9c1872202ff5c9b634d321a7e6ecaface0e456f",
|
||||||
|
),
|
||||||
|
recipe=diffusers_recipe,
|
||||||
|
dtype=torch.float16,
|
||||||
|
)
|
2192
src/refiners/conversion/models/unet_sdxl.py
Normal file
2192
src/refiners/conversion/models/unet_sdxl.py
Normal file
File diff suppressed because it is too large
Load diff
347
src/refiners/conversion/utils.py
Normal file
347
src/refiners/conversion/utils.py
Normal file
|
@ -0,0 +1,347 @@
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from hashlib import sha256
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, cast
|
||||||
|
from warnings import warn
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import torch
|
||||||
|
from huggingface_hub import ( # pyright: ignore[reportMissingTypeStubs]
|
||||||
|
HfFileMetadata,
|
||||||
|
get_hf_file_metadata, # pyright: ignore[reportUnknownVariableType]
|
||||||
|
hf_hub_download, # pyright: ignore[reportUnknownVariableType]
|
||||||
|
hf_hub_url,
|
||||||
|
)
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from refiners.fluxion.utils import load_from_safetensors, load_tensors, save_to_safetensors
|
||||||
|
|
||||||
|
AnyDict = dict[str, Any]
|
||||||
|
TensorDict = dict[str, torch.Tensor]
|
||||||
|
|
||||||
|
|
||||||
|
def download_file_url(url: str, destination: Path) -> None:
|
||||||
|
"""Download a file from a url to a destination."""
|
||||||
|
logging.debug(f"Downloading {url} to {destination}")
|
||||||
|
|
||||||
|
# get the size of the file
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
total = int(response.headers.get("content-length", 0))
|
||||||
|
|
||||||
|
# create a progress bar
|
||||||
|
bar = tqdm(
|
||||||
|
desc=destination.name,
|
||||||
|
total=total,
|
||||||
|
unit="iB",
|
||||||
|
unit_scale=True,
|
||||||
|
unit_divisor=1024,
|
||||||
|
leave=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# download the file
|
||||||
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with destination.open("wb") as f:
|
||||||
|
with requests.get(url, stream=True) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
for chunk in r.iter_content(chunk_size=1024 * 1000):
|
||||||
|
size = f.write(chunk)
|
||||||
|
bar.update(size)
|
||||||
|
bar.close()
|
||||||
|
|
||||||
|
|
||||||
|
class Hub:
|
||||||
|
"""A class representing a weight on the Hub.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
The Hub denotes a directory on the local machine where the weights are stored.
|
||||||
|
The Hub may also correspond to a remote repository on the Hugging Face Hub.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
repo_id: str,
|
||||||
|
filename: str,
|
||||||
|
expected_sha256: str,
|
||||||
|
revision: str = "main",
|
||||||
|
download_url: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Initialize the HubPath.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_id: The repository identifier on the hub.
|
||||||
|
filename: The filename of the file in the repository.
|
||||||
|
revision: The revision of the file on the hf hub.
|
||||||
|
expected_sha256: The sha256 hash of the file.
|
||||||
|
download_url: The url to download the file from, if not from the huggingface hub.
|
||||||
|
"""
|
||||||
|
self.repo_id = repo_id
|
||||||
|
self.filename = filename
|
||||||
|
self.revision = revision
|
||||||
|
self.expected_sha256 = expected_sha256.lower()
|
||||||
|
self.override_download_url = download_url
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def hub_location():
|
||||||
|
"""Return the path to the local hub root directory."""
|
||||||
|
return Path(os.getenv("REFINERS_HUB_PATH", "tests/weights"))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def hf_url(self) -> str:
|
||||||
|
"""Return the url to the file on the hf hub."""
|
||||||
|
assert self.override_download_url is None, f"{self.repo_id}/{self.filename} is not available on the hub"
|
||||||
|
return hf_hub_url(
|
||||||
|
repo_id=self.repo_id,
|
||||||
|
filename=self.filename,
|
||||||
|
revision=self.revision,
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def hf_cache_path(self) -> Path:
|
||||||
|
"""Download the file from the hf hub and return its path in the local hf cache."""
|
||||||
|
return Path(
|
||||||
|
hf_hub_download(
|
||||||
|
repo_id=self.repo_id,
|
||||||
|
filename=self.filename,
|
||||||
|
revision=self.revision,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def hf_metadata(self) -> HfFileMetadata:
|
||||||
|
"""Return the metadata of the file on the hf hub."""
|
||||||
|
return get_hf_file_metadata(self.hf_url)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def hf_sha256_hash(self) -> str:
|
||||||
|
"""Return the sha256 hash of the file on the hf hub."""
|
||||||
|
remote_hash = self.hf_metadata.etag
|
||||||
|
assert remote_hash is not None
|
||||||
|
assert len(remote_hash) == 64
|
||||||
|
return remote_hash.lower()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def local_path(self) -> Path:
|
||||||
|
"""Return the path to the file in the local hub."""
|
||||||
|
return self.hub_location() / self.repo_id / self.filename
|
||||||
|
|
||||||
|
@property
|
||||||
|
def local_hash(self) -> str:
|
||||||
|
"""Return the sha256 hash of the file in the local hub."""
|
||||||
|
assert self.local_path.is_file(), f"{self.local_path} does not exist"
|
||||||
|
# TODO: use https://docs.python.org/3/library/hashlib.html#hashlib.file_digest when support python >= 3.11
|
||||||
|
return sha256(self.local_path.read_bytes()).hexdigest().lower()
|
||||||
|
|
||||||
|
def check_local_hash(self) -> bool:
|
||||||
|
"""Check if the sha256 hash of the file in the local hub is correct."""
|
||||||
|
if self.expected_sha256 != self.local_hash:
|
||||||
|
logging.warning(f"{self.local_path} local sha256 mismatch, {self.local_hash} != {self.expected_sha256}")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
logging.debug(f"{self.local_path} local sha256 is correct ({self.local_hash})")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def check_remote_hash(self) -> bool:
|
||||||
|
"""Check if the sha256 hash of the file on the hf hub is correct."""
|
||||||
|
if self.expected_sha256 != self.hf_sha256_hash:
|
||||||
|
logging.warning(
|
||||||
|
f"{self.local_path} remote sha256 mismatch, {self.hf_sha256_hash} != {self.expected_sha256}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
logging.debug(f"{self.local_path} remote sha256 is correct ({self.hf_sha256_hash})")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def download(self) -> None:
|
||||||
|
"""Download the file from the hf hub or from the override download url."""
|
||||||
|
self.local_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
if self.local_path.is_file():
|
||||||
|
logging.warning(f"{self.local_path} already exists")
|
||||||
|
elif self.override_download_url is not None:
|
||||||
|
download_file_url(url=self.override_download_url, destination=self.local_path)
|
||||||
|
else:
|
||||||
|
# TODO: pas assez de message de log quand local_path existe pas et que ça vient du hf cache
|
||||||
|
self.local_path.symlink_to(self.hf_cache_path)
|
||||||
|
assert self.check_local_hash()
|
||||||
|
|
||||||
|
|
||||||
|
class WeightRecipe:
|
||||||
|
"""A class representing a recipe to convert weights from one format to another."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
key_map: dict[str, str],
|
||||||
|
key_prefix: str = "",
|
||||||
|
key_aliases: dict[str, str] = {},
|
||||||
|
tensor_reshapes: dict[str, tuple[int, ...]] = {},
|
||||||
|
):
|
||||||
|
"""Initialize the weight recipe.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key_map: A dictionary mapping the keys of the original state dict to the converted state dict.
|
||||||
|
key_prefix: A prefix to remove from the keys of the original state dict.
|
||||||
|
key_aliases: A dictionary mapping the keys of the original state dict to their aliases.
|
||||||
|
tensor_reshapes: A dictionary mapping the keys of the original state dict to their new shapes.
|
||||||
|
"""
|
||||||
|
self.key_prefix = key_prefix
|
||||||
|
self.key_map = key_map
|
||||||
|
self.key_aliases = key_aliases
|
||||||
|
self.tensor_reshapes = tensor_reshapes
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def flatten_state_dict(state_dict: AnyDict, sep: str = ".") -> AnyDict:
|
||||||
|
"""Flattens a nested dictionary into a dictionary with dot-separated keys.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
state_dict: A nested dictionary.
|
||||||
|
sep: The separator to use between keys when flattening.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _flatten(current_dict: AnyDict, parent_key: str = "") -> AnyDict:
|
||||||
|
items: AnyDict = {}
|
||||||
|
for k, v in current_dict.items():
|
||||||
|
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
||||||
|
if isinstance(v, dict):
|
||||||
|
items.update(_flatten(cast(AnyDict, v), new_key))
|
||||||
|
else:
|
||||||
|
items[new_key] = v
|
||||||
|
return items
|
||||||
|
|
||||||
|
return _flatten(state_dict)
|
||||||
|
|
||||||
|
def name_map_keys(self, state_dict: TensorDict) -> TensorDict:
|
||||||
|
"""Map the keys of the state dict according to the name map."""
|
||||||
|
new_state_dict: TensorDict = {}
|
||||||
|
for key, value in state_dict.items():
|
||||||
|
# check for .weight or .bias suffixes
|
||||||
|
suffix = next(s for s in (".weight", ".bias", "") if key.endswith(s))
|
||||||
|
key = key.removesuffix(suffix)
|
||||||
|
|
||||||
|
# remove key_prefix
|
||||||
|
key = key.removeprefix(self.key_prefix)
|
||||||
|
|
||||||
|
# check for key aliases
|
||||||
|
source_key = self.key_aliases.get(key, key)
|
||||||
|
|
||||||
|
# get target_key from key_map
|
||||||
|
target_key = self.key_map.get(source_key)
|
||||||
|
if target_key is None:
|
||||||
|
continue # ignore key if it doesn't exist in the key_map
|
||||||
|
|
||||||
|
# add value to new_state_dict with the mapped key
|
||||||
|
new_state_dict[target_key + suffix] = value
|
||||||
|
|
||||||
|
return new_state_dict
|
||||||
|
|
||||||
|
def reshape_tensors(self, state_dict: TensorDict) -> TensorDict:
|
||||||
|
"""Reshape tensors in the state dict according to tensor_reshapes."""
|
||||||
|
new_state_dict = state_dict.copy()
|
||||||
|
for key, value in state_dict.items():
|
||||||
|
if key in self.tensor_reshapes:
|
||||||
|
new_shape = self.tensor_reshapes[key]
|
||||||
|
new_state_dict[key] = value.reshape(new_shape)
|
||||||
|
return new_state_dict
|
||||||
|
|
||||||
|
def translate_keys(self, state_dict: AnyDict, flatten_state_dict: bool = True) -> TensorDict:
|
||||||
|
"""Translate the keys of a state dict."""
|
||||||
|
if flatten_state_dict:
|
||||||
|
state_dict = self.flatten_state_dict(state_dict)
|
||||||
|
|
||||||
|
state_dict = self.name_map_keys(state_dict)
|
||||||
|
state_dict = self.reshape_tensors(state_dict)
|
||||||
|
|
||||||
|
return state_dict
|
||||||
|
|
||||||
|
|
||||||
|
class Conversion:
|
||||||
|
"""Structure to link original and converted weights on the Hub."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
original: Hub,
|
||||||
|
converted: Hub,
|
||||||
|
recipe: WeightRecipe,
|
||||||
|
dtype: torch.dtype,
|
||||||
|
) -> None:
|
||||||
|
"""Initialize the weight structure.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
original: A Hub object representing the original weight.
|
||||||
|
converted: A Hub object representing the converted weight.
|
||||||
|
recipe: A WeightRecipe object used to convert from the original to the converted weight.
|
||||||
|
dtype: The dtype of the converted weights.
|
||||||
|
"""
|
||||||
|
self.original = original
|
||||||
|
self.converted = converted
|
||||||
|
self.recipe = recipe
|
||||||
|
self.dtype = dtype
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load_state_dict(path: Path) -> AnyDict:
|
||||||
|
"""Load a state dict from a file."""
|
||||||
|
if path.suffix == ".safetensors" or path.suffix == ".sft":
|
||||||
|
return load_from_safetensors(path)
|
||||||
|
else:
|
||||||
|
return load_tensors(path)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def filter_tensors_state_dict(state_dict: AnyDict) -> TensorDict:
|
||||||
|
"""Filter out non-tensor values and tensors with NaNs from a state dict."""
|
||||||
|
new_state_dict: TensorDict = {}
|
||||||
|
for key, value in state_dict.items():
|
||||||
|
if not isinstance(value, torch.Tensor):
|
||||||
|
warn(f"Value for key {key} is not a tensor, filtering")
|
||||||
|
continue
|
||||||
|
if torch.isnan(value).sum().item() > 0:
|
||||||
|
warn(f"Found NaNs in {key}, filtering")
|
||||||
|
continue
|
||||||
|
new_state_dict[key] = value
|
||||||
|
return new_state_dict
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def change_dtype(state_dict: TensorDict, dtype: torch.dtype) -> TensorDict:
|
||||||
|
"""Change the dtype of the tensors in a state dict."""
|
||||||
|
return {k: v.to(dtype=dtype) for k, v in state_dict.items()}
|
||||||
|
|
||||||
|
def convert(self) -> None:
|
||||||
|
"""Convert the weights from the original to the converted weights.
|
||||||
|
|
||||||
|
Note: The original weights are automatically downloaded if they are not already present.
|
||||||
|
"""
|
||||||
|
logging.info(
|
||||||
|
f"Converting {self.original.repo_id}/{self.original.filename} "
|
||||||
|
f"to {self.converted.repo_id}/{self.converted.filename}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# check if the converted file already exists
|
||||||
|
if self.converted.local_path.is_file():
|
||||||
|
logging.warning(f"{self.converted.local_path} already exists")
|
||||||
|
if self.converted.check_local_hash():
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.error(f"{self.converted.local_path} couldn't verify remote hash")
|
||||||
|
return
|
||||||
|
|
||||||
|
# get the original state_dict
|
||||||
|
self.original.download()
|
||||||
|
|
||||||
|
# load the original state_dict
|
||||||
|
original_state_dict = self.load_state_dict(self.original.local_path)
|
||||||
|
original_state_dict = self.filter_tensors_state_dict(original_state_dict)
|
||||||
|
|
||||||
|
# convert the state_dict
|
||||||
|
converted_state_dict = self.recipe.translate_keys(original_state_dict)
|
||||||
|
converted_state_dict = self.change_dtype(converted_state_dict, self.dtype)
|
||||||
|
|
||||||
|
# save the converted state_dict
|
||||||
|
self.converted.local_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
save_to_safetensors(self.converted.local_path, converted_state_dict)
|
||||||
|
|
||||||
|
# check the converted state_dict
|
||||||
|
assert self.converted.check_local_hash()
|
||||||
|
try:
|
||||||
|
assert self.converted.check_remote_hash()
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
logging.warning(f"{self.converted.local_path} couldn't verify remote hash")
|
Loading…
Reference in a new issue