mirror of
https://github.com/finegrain-ai/refiners.git
synced 2024-11-13 00:28:14 +00:00
2023 lines
156 KiB
HTML
2023 lines
156 KiB
HTML
|
|
<!doctype html>
|
|
<html lang="en" class="no-js">
|
|
<head>
|
|
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|
|
|
<meta name="description" content="A micro framework on top of PyTorch with first class citizen APIs for foundation model adaptation">
|
|
|
|
|
|
|
|
|
|
<link rel="prev" href="../../concepts/adapter/">
|
|
|
|
|
|
<link rel="next" href="../training_101/">
|
|
|
|
|
|
<link rel="icon" href="../../assets/favicon.svg">
|
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.34">
|
|
|
|
|
|
|
|
<title>Adapting SDXL - Refiners</title>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/stylesheets/main.35f28582.min.css">
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/stylesheets/palette.06af60db.min.css">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/_mkdocstrings.css">
|
|
|
|
<link rel="stylesheet" href="../../stylesheets/extra.css">
|
|
|
|
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</head>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="deep-orange" data-md-color-accent="deep-orange">
|
|
|
|
|
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
|
<label class="md-overlay" for="__drawer"></label>
|
|
<div data-md-component="skip">
|
|
|
|
|
|
<a href="#adapting-stable-diffusion-xl" class="md-skip">
|
|
Skip to content
|
|
</a>
|
|
|
|
</div>
|
|
<div data-md-component="announce">
|
|
|
|
<aside class="md-banner">
|
|
<div class="md-banner__inner md-grid md-typeset">
|
|
|
|
<button class="md-banner__button md-icon" aria-label="Don't show this again">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
|
</button>
|
|
|
|
|
|
|
|
Check out our brand new <a href="https://finegrain.ai/bounties">Bounty Program</a> 💰!
|
|
|
|
|
|
</div>
|
|
|
|
<script>var el=document.querySelector("[data-md-component=announce]");if(el){var content=el.querySelector(".md-typeset");__md_hash(content.innerHTML)===__md_get("__announce")&&(el.hidden=!0)}</script>
|
|
|
|
</aside>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<header class="md-header md-header--shadow md-header--lifted" data-md-component="header">
|
|
<nav class="md-header__inner md-grid" aria-label="Header">
|
|
<a href="../.." title="Refiners" class="md-header__button md-logo" aria-label="Refiners" data-md-component="logo">
|
|
|
|
<img src="../../assets/favicon.svg" alt="logo">
|
|
|
|
</a>
|
|
<label class="md-header__button md-icon" for="__drawer">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
|
</label>
|
|
<div class="md-header__title" data-md-component="header-title">
|
|
<div class="md-header__ellipsis">
|
|
<div class="md-header__topic">
|
|
<span class="md-ellipsis">
|
|
Refiners
|
|
</span>
|
|
</div>
|
|
<div class="md-header__topic" data-md-component="header-topic">
|
|
<span class="md-ellipsis">
|
|
|
|
Adapting SDXL
|
|
|
|
</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-header__button md-icon" for="__search">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
|
</label>
|
|
<div class="md-search" data-md-component="search" role="dialog">
|
|
<label class="md-search__overlay" for="__search"></label>
|
|
<div class="md-search__inner" role="search">
|
|
<form class="md-search__form" name="search">
|
|
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
|
|
<label class="md-search__icon md-icon" for="__search">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
|
</label>
|
|
<nav class="md-search__options" aria-label="Search">
|
|
|
|
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
|
</button>
|
|
</nav>
|
|
|
|
</form>
|
|
<div class="md-search__output">
|
|
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
|
<div class="md-search-result" data-md-component="search-result">
|
|
<div class="md-search-result__meta">
|
|
Initializing search
|
|
</div>
|
|
<ol class="md-search-result__list" role="presentation"></ol>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div class="md-header__source">
|
|
<a href="https://github.com/finegrain-ai/refiners" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
Refiners
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
</nav>
|
|
|
|
|
|
|
|
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
|
<div class="md-grid">
|
|
<ul class="md-tabs__list">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../.." class="md-tabs__link">
|
|
|
|
|
|
Home
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../../getting-started/recommended/" class="md-tabs__link">
|
|
|
|
|
|
Getting started
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../../concepts/chain/" class="md-tabs__link">
|
|
|
|
|
|
Key Concepts
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item md-tabs__item--active">
|
|
<a href="./" class="md-tabs__link">
|
|
|
|
|
|
Guides
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../../reference/fluxion/adapters/" class="md-tabs__link">
|
|
|
|
|
|
API Reference
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</div>
|
|
</nav>
|
|
|
|
|
|
</header>
|
|
|
|
<div class="md-container" data-md-component="container">
|
|
|
|
|
|
|
|
|
|
<main class="md-main" data-md-component="main">
|
|
<div class="md-main__inner md-grid">
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
|
<label class="md-nav__title" for="__drawer">
|
|
<a href="../.." title="Refiners" class="md-nav__button md-logo" aria-label="Refiners" data-md-component="logo">
|
|
|
|
<img src="../../assets/favicon.svg" alt="logo">
|
|
|
|
</a>
|
|
Refiners
|
|
</label>
|
|
|
|
<div class="md-nav__source">
|
|
<a href="https://github.com/finegrain-ai/refiners" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
Refiners
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_1" id="__nav_1_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Home
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_1">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Home
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../.." class="md-nav__link">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12 3.77-.75.84S9.97 6.06 8.68 7.94 6 12.07 6 14.23a6 6 0 0 0 6 6 6 6 0 0 0 6-6c0-2.16-1.39-4.41-2.68-6.29s-2.57-3.33-2.57-3.33zm0 3.13c.44.52.84.95 1.68 2.17 1.21 1.76 2.32 4 2.32 5.16 0 2.22-1.78 4-4 4s-4-1.78-4-4c0-1.16 1.11-3.4 2.32-5.16.84-1.22 1.24-1.65 1.68-2.17"/></svg>
|
|
|
|
<span class="md-ellipsis">
|
|
Welcome
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../home/why/" class="md-nav__link">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11h3v2h-3zM1 11h3v2H1zM13 1v3h-2V1zM4.92 3.5l2.13 2.14-1.42 1.41L3.5 4.93zm12.03 2.13 2.12-2.13 1.43 1.43-2.13 2.12zM12 6a6 6 0 0 1 6 6c0 2.22-1.21 4.16-3 5.2V19a1 1 0 0 1-1 1h-4a1 1 0 0 1-1-1v-1.8c-1.79-1.04-3-2.98-3-5.2a6 6 0 0 1 6-6m2 15v1a1 1 0 0 1-1 1h-2a1 1 0 0 1-1-1v-1zm-3-3h2v-2.13c1.73-.44 3-2.01 3-3.87a4 4 0 0 0-4-4 4 4 0 0 0-4 4c0 1.86 1.27 3.43 3 3.87z"/></svg>
|
|
|
|
<span class="md-ellipsis">
|
|
Manifesto
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Getting started
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_2">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Getting started
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../getting-started/recommended/" class="md-nav__link">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12 15.39-3.76 2.27.99-4.28-3.32-2.88 4.38-.37L12 6.09l1.71 4.04 4.38.37-3.32 2.88.99 4.28M22 9.24l-7.19-.61L12 2 9.19 8.63 2 9.24l5.45 4.73L5.82 21 12 17.27 18.18 21l-1.64-7.03z"/></svg>
|
|
|
|
<span class="md-ellipsis">
|
|
Recommended usage
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../getting-started/advanced/" class="md-nav__link">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9 1.09V6H7V1.09C4.16 1.57 2 4.03 2 7c0 2.22 1.21 4.15 3 5.19V21c0 .55.45 1 1 1h4c.55 0 1-.45 1-1v-8.81c1.79-1.04 3-2.97 3-5.19 0-2.97-2.16-5.43-5-5.91m1 9.37-1 .58V20H7v-8.96l-1-.58C4.77 9.74 4 8.42 4 7c0-1 .37-1.94 1-2.65V8h6V4.35c.63.71 1 1.65 1 2.65 0 1.42-.77 2.74-2 3.46m10.94 7.48a3.3 3.3 0 0 0 0-.89l.97-.73a.22.22 0 0 0 .06-.29l-.92-1.56c-.05-.1-.18-.14-.29-.1l-1.15.45c-.24-.17-.49-.32-.78-.44l-.17-1.19a.235.235 0 0 0-.23-.19h-1.85c-.12 0-.22.08-.24.19l-.17 1.19c-.29.12-.54.27-.78.44l-1.15-.45c-.1-.04-.24 0-.28.1l-.93 1.56c-.06.1-.03.22.06.29l.97.73c-.01.15-.03.3-.03.45s.02.29.03.44l-.97.74a.22.22 0 0 0-.06.29l.93 1.56c.04.1.18.13.28.1l1.15-.46c.24.18.49.33.78.45l.17 1.19c.02.11.12.19.24.19h1.85c.11 0 .21-.08.23-.19l.17-1.19c.29-.12.54-.27.78-.45l1.15.46c.11.03.24 0 .29-.1l.92-1.56a.22.22 0 0 0-.06-.29zM17.5 19c-.83 0-1.5-.67-1.5-1.5s.67-1.5 1.5-1.5 1.5.67 1.5 1.5-.67 1.5-1.5 1.5"/></svg>
|
|
|
|
<span class="md-ellipsis">
|
|
Advanced usage
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Key Concepts
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_3">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Key Concepts
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../concepts/chain/" class="md-nav__link">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 1a2.5 2.5 0 0 0-2.5 2.5A2.5 2.5 0 0 0 11 5.79V7H7a2 2 0 0 0-2 2v.71A2.5 2.5 0 0 0 3.5 12 2.5 2.5 0 0 0 5 14.29V15H4a2 2 0 0 0-2 2v1.21A2.5 2.5 0 0 0 .5 20.5 2.5 2.5 0 0 0 3 23a2.5 2.5 0 0 0 2.5-2.5A2.5 2.5 0 0 0 4 18.21V17h4v1.21a2.5 2.5 0 0 0-1.5 2.29A2.5 2.5 0 0 0 9 23a2.5 2.5 0 0 0 2.5-2.5 2.5 2.5 0 0 0-1.5-2.29V17a2 2 0 0 0-2-2H7v-.71A2.5 2.5 0 0 0 8.5 12 2.5 2.5 0 0 0 7 9.71V9h10v.71A2.5 2.5 0 0 0 15.5 12a2.5 2.5 0 0 0 1.5 2.29V15h-1a2 2 0 0 0-2 2v1.21a2.5 2.5 0 0 0-1.5 2.29A2.5 2.5 0 0 0 15 23a2.5 2.5 0 0 0 2.5-2.5 2.5 2.5 0 0 0-1.5-2.29V17h4v1.21a2.5 2.5 0 0 0-1.5 2.29A2.5 2.5 0 0 0 21 23a2.5 2.5 0 0 0 2.5-2.5 2.5 2.5 0 0 0-1.5-2.29V17a2 2 0 0 0-2-2h-1v-.71A2.5 2.5 0 0 0 20.5 12 2.5 2.5 0 0 0 19 9.71V9a2 2 0 0 0-2-2h-4V5.79a2.5 2.5 0 0 0 1.5-2.29A2.5 2.5 0 0 0 12 1m0 1.5a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1M6 11a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m12 0a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1M3 19.5a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m6 0a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m6 0a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m6 0a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1"/></svg>
|
|
|
|
<span class="md-ellipsis">
|
|
Chain
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../concepts/context/" class="md-nav__link">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9 22a1 1 0 0 1-1-1v-3H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h16a2 2 0 0 1 2 2v12a2 2 0 0 1-2 2h-6.1l-3.7 3.71c-.2.19-.45.29-.7.29zm1-6v3.08L13.08 16H20V4H4v12zm3-6h-2V6h2zm0 4h-2v-2h2z"/></svg>
|
|
|
|
<span class="md-ellipsis">
|
|
Context
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../concepts/adapter/" class="md-nav__link">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M2 12h2v5h16v-5h2v5a2 2 0 0 1-2 2H4a2 2 0 0 1-2-2m9-12h2v3h3v2h-3v3h-2v-3H8V8h3Z"/></svg>
|
|
|
|
<span class="md-ellipsis">
|
|
Adapter
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked>
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Guides
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true">
|
|
<label class="md-nav__title" for="__nav_4">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Guides
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active">
|
|
|
|
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__link md-nav__link--active" for="__toc">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M2 13h2v2h2v-2h2v2h2v-2h2v2h2v-5l3-3V1h2l4 2-4 2v2l3 3v12H11v-3a2 2 0 0 0-2-2 2 2 0 0 0-2 2v3H2zm16-3c-.55 0-1 .54-1 1.2V13h2v-1.8c0-.66-.45-1.2-1-1.2"/></svg>
|
|
|
|
<span class="md-ellipsis">
|
|
Adapting SDXL
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<a href="./" class="md-nav__link md-nav__link--active">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M2 13h2v2h2v-2h2v2h2v-2h2v2h2v-5l3-3V1h2l4 2-4 2v2l3 3v12H11v-3a2 2 0 0 0-2-2 2 2 0 0 0-2 2v3H2zm16-3c-.55 0-1 .54-1 1.2V13h2v-1.8c0-.66-.45-1.2-1-1.2"/></svg>
|
|
|
|
<span class="md-ellipsis">
|
|
Adapting SDXL
|
|
</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#prerequisites" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Prerequisites
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#single-lora" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Single LoRA
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#multiple-loras" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Multiple LoRAs
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#multiple-loras-ip-adapter" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Multiple LoRAs + IP-Adapter
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#everything-else-t2i-adapter" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Everything else + T2I-Adapter
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#wrap-up" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Wrap up
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../training_101/" class="md-nav__link">
|
|
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 22a2 2 0 0 0 2-2V4a2 2 0 0 0-2-2h-6v7L9.5 7.5 7 9V2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2z"/></svg>
|
|
|
|
<span class="md-ellipsis">
|
|
Training 101
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_5" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
API Reference
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_5">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
API Reference
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_5_1" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_5_1" id="__nav_5_1_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Refiners
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_1_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_5_1">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Refiners
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_5_1_1" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_5_1_1" id="__nav_5_1_1_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Fluxion
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_5_1_1_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_5_1_1">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Fluxion
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/fluxion/adapters/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Adapters
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/fluxion/context/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Context
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/fluxion/layers/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Layers
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/fluxion/model_converter/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Model Converter
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/fluxion/utils/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Utils
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_5_1_2" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_5_1_2" id="__nav_5_1_2_label" tabindex="0">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Foundation Models
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_5_1_2_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_5_1_2">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Foundation Models
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/foundationals/clip/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> CLIP
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/foundationals/dinov2/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> DINOv2
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/foundationals/latent_diffusion/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Latent Diffusion
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/foundationals/segment_anything/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Segment Anything
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../reference/foundationals/swin/" class="md-nav__link">
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Swin Transformers
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#prerequisites" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Prerequisites
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#single-lora" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Single LoRA
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#multiple-loras" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Multiple LoRAs
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#multiple-loras-ip-adapter" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Multiple LoRAs + IP-Adapter
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#everything-else-t2i-adapter" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Everything else + T2I-Adapter
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#wrap-up" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Wrap up
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-content" data-md-component="content">
|
|
<article class="md-content__inner md-typeset">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<h1 id="adapting-stable-diffusion-xl">Adapting Stable Diffusion XL<a class="headerlink" href="#adapting-stable-diffusion-xl" title="Permanent link">¶</a></h1>
|
|
<p>Stable Diffusion XL (SDXL) is a very popular text-to-image open source foundation model. This guide will show you how to boost its capabilities with Refiners, using iconic adapters the framework supports out-of-the-box, i.e. without the need for tedious prompt engineering. We'll follow a step by step approach, progressively increasing the number of adapters involved to showcase how simple adapter composition is using Refiners. Our use case will be the generation of an image with "a futuristic castle surrounded by a forest, mountains in the background".</p>
|
|
<h2 id="prerequisites">Prerequisites<a class="headerlink" href="#prerequisites" title="Permanent link">¶</a></h2>
|
|
<p>Make sure Refiners is installed in your local environment - see <a href="/getting-started/recommended/">Getting started</a> - and you have access to a decent GPU. </p>
|
|
<div class="admonition warning">
|
|
<p class="admonition-title">Warning</p>
|
|
<p>As the examples in this guide's code snippets use CUDA, a minimum of 24GB VRAM is needed. </p>
|
|
</div>
|
|
<p>Before diving into the adapters themselves, let's establish a baseline by simply prompting SDXL with Refiners.</p>
|
|
<div class="admonition note">
|
|
<p class="admonition-title">Reminder</p>
|
|
<p>A StableDiffusion model is composed of three modules: </p>
|
|
<ul>
|
|
<li>An Autoencoder, responsible for embedding images into a latent space;</li>
|
|
<li>A UNet, responsible for the diffusion process;</li>
|
|
<li>A prompt encoder, such as CLIP, responsible for encoding the user prompt which will guide the diffusion process.</li>
|
|
</ul>
|
|
</div>
|
|
<p>As Refiners comes with a new model representation - see <a href="/concepts/chain/">Chain</a> - , you need to download and convert the weights of each module by calling our conversion scripts directly from your terminal (make sure you're in your local <code>refiners</code> directory, with your local environment active):</p>
|
|
<div class="language-bash highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a>python<span class="w"> </span>scripts/conversion/convert_transformers_clip_text_model.py<span class="w"> </span>--from<span class="w"> </span><span class="s2">"stabilityai/stable-diffusion-xl-base-1.0"</span><span class="w"> </span>--subfolder2<span class="w"> </span>text_encoder_2<span class="w"> </span>--to<span class="w"> </span>DoubleCLIPTextEncoder.safetensors<span class="w"> </span>--half
|
|
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a>python<span class="w"> </span>scripts/conversion/convert_diffusers_unet.py<span class="w"> </span>--from<span class="w"> </span><span class="s2">"stabilityai/stable-diffusion-xl-base-1.0"</span><span class="w"> </span>--to<span class="w"> </span>sdxl-unet.safetensors<span class="w"> </span>--half
|
|
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a>python<span class="w"> </span>scripts/conversion/convert_diffusers_autoencoder_kl.py<span class="w"> </span>--from<span class="w"> </span><span class="s2">"madebyollin/sdxl-vae-fp16-fix"</span><span class="w"> </span>--subfolder<span class="w"> </span><span class="s2">""</span><span class="w"> </span>--to<span class="w"> </span>sdxl-lda.safetensors<span class="w"> </span>--half
|
|
</span></code></pre></div>
|
|
<div class="admonition note">
|
|
<p class="admonition-title">Note</p>
|
|
<p>This will download the original weights from https://huggingface.co/ which takes some time. If you already have this repo cloned locally, use the <code>--from /path/to/stabilityai/stable-diffusion-xl-base-1.0</code> option instead.</p>
|
|
</div>
|
|
<p>Now, we can write the Python script responsible for inference. Just create a simple <code>inference.py</code> file, and open it in your favorite editor.</p>
|
|
<p>Start by instantiating a <a class="autorefs autorefs-internal" href="../../reference/foundationals/latent_diffusion/#refiners.foundationals.latent_diffusion.stable_diffusion_xl.StableDiffusion_XL"><code>StableDiffusion_XL</code></a> model and load it with the converted weights:</p>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="kn">import</span> <span class="nn">torch</span>
|
|
</span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a>
|
|
</span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="kn">from</span> <span class="nn">refiners.fluxion.utils</span> <span class="kn">import</span> <span class="n">manual_seed</span><span class="p">,</span> <span class="n">no_grad</span>
|
|
</span><span id="__span-1-4"><a id="__codelineno-1-4" name="__codelineno-1-4" href="#__codelineno-1-4"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.stable_diffusion_xl</span> <span class="kn">import</span> <span class="n">StableDiffusion_XL</span>
|
|
</span><span id="__span-1-5"><a id="__codelineno-1-5" name="__codelineno-1-5" href="#__codelineno-1-5"></a>
|
|
</span><span id="__span-1-6"><a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a><span class="c1"># Load SDXL</span>
|
|
</span><span id="__span-1-7"><a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a><span class="n">sdxl</span> <span class="o">=</span> <span class="n">StableDiffusion_XL</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="s2">"cuda"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float16</span><span class="p">)</span> <span class="c1"># Using half-precision for memory efficiency</span>
|
|
</span><span id="__span-1-8"><a id="__codelineno-1-8" name="__codelineno-1-8" href="#__codelineno-1-8"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">clip_text_encoder</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"DoubleCLIPTextEncoder.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-1-9"><a id="__codelineno-1-9" name="__codelineno-1-9" href="#__codelineno-1-9"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">unet</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"sdxl-unet.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-1-10"><a id="__codelineno-1-10" name="__codelineno-1-10" href="#__codelineno-1-10"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">lda</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"sdxl-lda.safetensors"</span><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
<p>Then, define the inference parameters by setting the appropriate prompt / seed / inference steps:</p>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-2-1"><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="c1"># Hyperparameters</span>
|
|
</span><span id="__span-2-2"><a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="n">prompt</span> <span class="o">=</span> <span class="s2">"a futuristic castle surrounded by a forest, mountains in the background"</span>
|
|
</span><span id="__span-2-3"><a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="n">seed</span> <span class="o">=</span> <span class="mi">42</span>
|
|
</span><span id="__span-2-4"><a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">set_inference_steps</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="n">first_step</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
</span><span id="__span-2-5"><a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a>
|
|
</span><span id="__span-2-6"><a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a><span class="c1"># Enable self-attention guidance to enhance the quality of the generated images</span>
|
|
</span><span id="__span-2-7"><a id="__codelineno-2-7" name="__codelineno-2-7" href="#__codelineno-2-7"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">set_self_attention_guidance</span><span class="p">(</span><span class="n">enable</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">scale</span><span class="o">=</span><span class="mf">0.75</span><span class="p">)</span>
|
|
</span><span id="__span-2-8"><a id="__codelineno-2-8" name="__codelineno-2-8" href="#__codelineno-2-8"></a>
|
|
</span><span id="__span-2-9"><a id="__codelineno-2-9" name="__codelineno-2-9" href="#__codelineno-2-9"></a><span class="c1"># ... Inference process</span>
|
|
</span></code></pre></div>
|
|
<p>You can now define and run the proper inference process:</p>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-3-1"><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="k">with</span> <span class="n">no_grad</span><span class="p">():</span> <span class="c1"># Disable gradient calculation for memory-efficient inference</span>
|
|
</span><span id="__span-3-2"><a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a> <span class="n">clip_text_embedding</span><span class="p">,</span> <span class="n">pooled_text_embedding</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">compute_clip_text_embedding</span><span class="p">(</span>
|
|
</span><span id="__span-3-3"><a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a> <span class="n">text</span><span class="o">=</span><span class="n">prompt</span> <span class="o">+</span> <span class="s2">", best quality, high quality"</span><span class="p">,</span>
|
|
</span><span id="__span-3-4"><a id="__codelineno-3-4" name="__codelineno-3-4" href="#__codelineno-3-4"></a> <span class="n">negative_text</span><span class="o">=</span><span class="s2">"monochrome, lowres, bad anatomy, worst quality, low quality"</span><span class="p">,</span>
|
|
</span><span id="__span-3-5"><a id="__codelineno-3-5" name="__codelineno-3-5" href="#__codelineno-3-5"></a> <span class="p">)</span>
|
|
</span><span id="__span-3-6"><a id="__codelineno-3-6" name="__codelineno-3-6" href="#__codelineno-3-6"></a> <span class="n">time_ids</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">default_time_ids</span>
|
|
</span><span id="__span-3-7"><a id="__codelineno-3-7" name="__codelineno-3-7" href="#__codelineno-3-7"></a>
|
|
</span><span id="__span-3-8"><a id="__codelineno-3-8" name="__codelineno-3-8" href="#__codelineno-3-8"></a> <span class="n">manual_seed</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
|
|
</span><span id="__span-3-9"><a id="__codelineno-3-9" name="__codelineno-3-9" href="#__codelineno-3-9"></a>
|
|
</span><span id="__span-3-10"><a id="__codelineno-3-10" name="__codelineno-3-10" href="#__codelineno-3-10"></a> <span class="c1"># SDXL typically generates 1024x1024, here we use a higher resolution.</span>
|
|
</span><span id="__span-3-11"><a id="__codelineno-3-11" name="__codelineno-3-11" href="#__codelineno-3-11"></a> <span class="n">x</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">init_latents</span><span class="p">((</span><span class="mi">2048</span><span class="p">,</span> <span class="mi">2048</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">sdxl</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
|
|
</span><span id="__span-3-12"><a id="__codelineno-3-12" name="__codelineno-3-12" href="#__codelineno-3-12"></a>
|
|
</span><span id="__span-3-13"><a id="__codelineno-3-13" name="__codelineno-3-13" href="#__codelineno-3-13"></a> <span class="c1"># Diffusion process</span>
|
|
</span><span id="__span-3-14"><a id="__codelineno-3-14" name="__codelineno-3-14" href="#__codelineno-3-14"></a> <span class="k">for</span> <span class="n">step</span> <span class="ow">in</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">steps</span><span class="p">:</span>
|
|
</span><span id="__span-3-15"><a id="__codelineno-3-15" name="__codelineno-3-15" href="#__codelineno-3-15"></a> <span class="k">if</span> <span class="n">step</span> <span class="o">%</span> <span class="mi">10</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
</span><span id="__span-3-16"><a id="__codelineno-3-16" name="__codelineno-3-16" href="#__codelineno-3-16"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Step </span><span class="si">{</span><span class="n">step</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
</span><span id="__span-3-17"><a id="__codelineno-3-17" name="__codelineno-3-17" href="#__codelineno-3-17"></a> <span class="n">x</span> <span class="o">=</span> <span class="n">sdxl</span><span class="p">(</span>
|
|
</span><span id="__span-3-18"><a id="__codelineno-3-18" name="__codelineno-3-18" href="#__codelineno-3-18"></a> <span class="n">x</span><span class="p">,</span>
|
|
</span><span id="__span-3-19"><a id="__codelineno-3-19" name="__codelineno-3-19" href="#__codelineno-3-19"></a> <span class="n">step</span><span class="o">=</span><span class="n">step</span><span class="p">,</span>
|
|
</span><span id="__span-3-20"><a id="__codelineno-3-20" name="__codelineno-3-20" href="#__codelineno-3-20"></a> <span class="n">clip_text_embedding</span><span class="o">=</span><span class="n">clip_text_embedding</span><span class="p">,</span>
|
|
</span><span id="__span-3-21"><a id="__codelineno-3-21" name="__codelineno-3-21" href="#__codelineno-3-21"></a> <span class="n">pooled_text_embedding</span><span class="o">=</span><span class="n">pooled_text_embedding</span><span class="p">,</span>
|
|
</span><span id="__span-3-22"><a id="__codelineno-3-22" name="__codelineno-3-22" href="#__codelineno-3-22"></a> <span class="n">time_ids</span><span class="o">=</span><span class="n">time_ids</span><span class="p">,</span>
|
|
</span><span id="__span-3-23"><a id="__codelineno-3-23" name="__codelineno-3-23" href="#__codelineno-3-23"></a> <span class="p">)</span>
|
|
</span><span id="__span-3-24"><a id="__codelineno-3-24" name="__codelineno-3-24" href="#__codelineno-3-24"></a> <span class="n">predicted_image</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">lda</span><span class="o">.</span><span class="n">decode_latents</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
|
</span><span id="__span-3-25"><a id="__codelineno-3-25" name="__codelineno-3-25" href="#__codelineno-3-25"></a>
|
|
</span><span id="__span-3-26"><a id="__codelineno-3-26" name="__codelineno-3-26" href="#__codelineno-3-26"></a><span class="n">predicted_image</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="s2">"vanilla_sdxl.png"</span><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
<details class="example">
|
|
<summary>Expand to see the entire end-to-end code</summary>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="kn">import</span> <span class="nn">torch</span>
|
|
</span><span id="__span-4-2"><a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a>
|
|
</span><span id="__span-4-3"><a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a><span class="kn">from</span> <span class="nn">refiners.fluxion.utils</span> <span class="kn">import</span> <span class="n">manual_seed</span><span class="p">,</span> <span class="n">no_grad</span>
|
|
</span><span id="__span-4-4"><a id="__codelineno-4-4" name="__codelineno-4-4" href="#__codelineno-4-4"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.stable_diffusion_xl</span> <span class="kn">import</span> <span class="n">StableDiffusion_XL</span>
|
|
</span><span id="__span-4-5"><a id="__codelineno-4-5" name="__codelineno-4-5" href="#__codelineno-4-5"></a>
|
|
</span><span id="__span-4-6"><a id="__codelineno-4-6" name="__codelineno-4-6" href="#__codelineno-4-6"></a><span class="c1"># Load SDXL</span>
|
|
</span><span id="__span-4-7"><a id="__codelineno-4-7" name="__codelineno-4-7" href="#__codelineno-4-7"></a><span class="n">sdxl</span> <span class="o">=</span> <span class="n">StableDiffusion_XL</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="s2">"cuda"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float16</span><span class="p">)</span>
|
|
</span><span id="__span-4-8"><a id="__codelineno-4-8" name="__codelineno-4-8" href="#__codelineno-4-8"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">clip_text_encoder</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"DoubleCLIPTextEncoder.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-4-9"><a id="__codelineno-4-9" name="__codelineno-4-9" href="#__codelineno-4-9"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">unet</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"sdxl-unet.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-4-10"><a id="__codelineno-4-10" name="__codelineno-4-10" href="#__codelineno-4-10"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">lda</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"sdxl-lda.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-4-11"><a id="__codelineno-4-11" name="__codelineno-4-11" href="#__codelineno-4-11"></a>
|
|
</span><span id="__span-4-12"><a id="__codelineno-4-12" name="__codelineno-4-12" href="#__codelineno-4-12"></a><span class="c1"># Hyperparameters</span>
|
|
</span><span id="__span-4-13"><a id="__codelineno-4-13" name="__codelineno-4-13" href="#__codelineno-4-13"></a><span class="n">prompt</span> <span class="o">=</span> <span class="s2">"a futuristic castle surrounded by a forest, mountains in the background"</span>
|
|
</span><span id="__span-4-14"><a id="__codelineno-4-14" name="__codelineno-4-14" href="#__codelineno-4-14"></a><span class="n">seed</span> <span class="o">=</span> <span class="mi">42</span>
|
|
</span><span id="__span-4-15"><a id="__codelineno-4-15" name="__codelineno-4-15" href="#__codelineno-4-15"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">set_inference_steps</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="n">first_step</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
</span><span id="__span-4-16"><a id="__codelineno-4-16" name="__codelineno-4-16" href="#__codelineno-4-16"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">set_self_attention_guidance</span><span class="p">(</span>
|
|
</span><span id="__span-4-17"><a id="__codelineno-4-17" name="__codelineno-4-17" href="#__codelineno-4-17"></a> <span class="n">enable</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">scale</span><span class="o">=</span><span class="mf">0.75</span>
|
|
</span><span id="__span-4-18"><a id="__codelineno-4-18" name="__codelineno-4-18" href="#__codelineno-4-18"></a><span class="p">)</span> <span class="c1"># Enable self-attention guidance to enhance the quality of the generated images</span>
|
|
</span><span id="__span-4-19"><a id="__codelineno-4-19" name="__codelineno-4-19" href="#__codelineno-4-19"></a>
|
|
</span><span id="__span-4-20"><a id="__codelineno-4-20" name="__codelineno-4-20" href="#__codelineno-4-20"></a>
|
|
</span><span id="__span-4-21"><a id="__codelineno-4-21" name="__codelineno-4-21" href="#__codelineno-4-21"></a><span class="k">with</span> <span class="n">no_grad</span><span class="p">():</span> <span class="c1"># Disable gradient calculation for memory-efficient inference</span>
|
|
</span><span id="__span-4-22"><a id="__codelineno-4-22" name="__codelineno-4-22" href="#__codelineno-4-22"></a> <span class="n">clip_text_embedding</span><span class="p">,</span> <span class="n">pooled_text_embedding</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">compute_clip_text_embedding</span><span class="p">(</span>
|
|
</span><span id="__span-4-23"><a id="__codelineno-4-23" name="__codelineno-4-23" href="#__codelineno-4-23"></a> <span class="n">text</span><span class="o">=</span><span class="n">prompt</span> <span class="o">+</span> <span class="s2">", best quality, high quality"</span><span class="p">,</span>
|
|
</span><span id="__span-4-24"><a id="__codelineno-4-24" name="__codelineno-4-24" href="#__codelineno-4-24"></a> <span class="n">negative_text</span><span class="o">=</span><span class="s2">"monochrome, lowres, bad anatomy, worst quality, low quality"</span><span class="p">,</span>
|
|
</span><span id="__span-4-25"><a id="__codelineno-4-25" name="__codelineno-4-25" href="#__codelineno-4-25"></a> <span class="p">)</span>
|
|
</span><span id="__span-4-26"><a id="__codelineno-4-26" name="__codelineno-4-26" href="#__codelineno-4-26"></a> <span class="n">time_ids</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">default_time_ids</span>
|
|
</span><span id="__span-4-27"><a id="__codelineno-4-27" name="__codelineno-4-27" href="#__codelineno-4-27"></a>
|
|
</span><span id="__span-4-28"><a id="__codelineno-4-28" name="__codelineno-4-28" href="#__codelineno-4-28"></a> <span class="n">manual_seed</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
|
</span><span id="__span-4-29"><a id="__codelineno-4-29" name="__codelineno-4-29" href="#__codelineno-4-29"></a>
|
|
</span><span id="__span-4-30"><a id="__codelineno-4-30" name="__codelineno-4-30" href="#__codelineno-4-30"></a> <span class="c1"># SDXL typically generates 1024x1024, here we use a higher resolution.</span>
|
|
</span><span id="__span-4-31"><a id="__codelineno-4-31" name="__codelineno-4-31" href="#__codelineno-4-31"></a> <span class="n">x</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">init_latents</span><span class="p">((</span><span class="mi">2048</span><span class="p">,</span> <span class="mi">2048</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">sdxl</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
|
|
</span><span id="__span-4-32"><a id="__codelineno-4-32" name="__codelineno-4-32" href="#__codelineno-4-32"></a>
|
|
</span><span id="__span-4-33"><a id="__codelineno-4-33" name="__codelineno-4-33" href="#__codelineno-4-33"></a> <span class="c1"># Diffusion process</span>
|
|
</span><span id="__span-4-34"><a id="__codelineno-4-34" name="__codelineno-4-34" href="#__codelineno-4-34"></a> <span class="k">for</span> <span class="n">step</span> <span class="ow">in</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">steps</span><span class="p">:</span>
|
|
</span><span id="__span-4-35"><a id="__codelineno-4-35" name="__codelineno-4-35" href="#__codelineno-4-35"></a> <span class="k">if</span> <span class="n">step</span> <span class="o">%</span> <span class="mi">10</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
</span><span id="__span-4-36"><a id="__codelineno-4-36" name="__codelineno-4-36" href="#__codelineno-4-36"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Step </span><span class="si">{</span><span class="n">step</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
</span><span id="__span-4-37"><a id="__codelineno-4-37" name="__codelineno-4-37" href="#__codelineno-4-37"></a> <span class="n">x</span> <span class="o">=</span> <span class="n">sdxl</span><span class="p">(</span>
|
|
</span><span id="__span-4-38"><a id="__codelineno-4-38" name="__codelineno-4-38" href="#__codelineno-4-38"></a> <span class="n">x</span><span class="p">,</span>
|
|
</span><span id="__span-4-39"><a id="__codelineno-4-39" name="__codelineno-4-39" href="#__codelineno-4-39"></a> <span class="n">step</span><span class="o">=</span><span class="n">step</span><span class="p">,</span>
|
|
</span><span id="__span-4-40"><a id="__codelineno-4-40" name="__codelineno-4-40" href="#__codelineno-4-40"></a> <span class="n">clip_text_embedding</span><span class="o">=</span><span class="n">clip_text_embedding</span><span class="p">,</span>
|
|
</span><span id="__span-4-41"><a id="__codelineno-4-41" name="__codelineno-4-41" href="#__codelineno-4-41"></a> <span class="n">pooled_text_embedding</span><span class="o">=</span><span class="n">pooled_text_embedding</span><span class="p">,</span>
|
|
</span><span id="__span-4-42"><a id="__codelineno-4-42" name="__codelineno-4-42" href="#__codelineno-4-42"></a> <span class="n">time_ids</span><span class="o">=</span><span class="n">time_ids</span><span class="p">,</span>
|
|
</span><span id="__span-4-43"><a id="__codelineno-4-43" name="__codelineno-4-43" href="#__codelineno-4-43"></a> <span class="p">)</span>
|
|
</span><span id="__span-4-44"><a id="__codelineno-4-44" name="__codelineno-4-44" href="#__codelineno-4-44"></a> <span class="n">predicted_image</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">lda</span><span class="o">.</span><span class="n">decode_latents</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
|
</span><span id="__span-4-45"><a id="__codelineno-4-45" name="__codelineno-4-45" href="#__codelineno-4-45"></a>
|
|
</span><span id="__span-4-46"><a id="__codelineno-4-46" name="__codelineno-4-46" href="#__codelineno-4-46"></a><span class="n">predicted_image</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="s2">"vanilla_sdxl.png"</span><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
</details>
|
|
<p>It's time to execute your code. The resulting image should look like this:</p>
|
|
<figure markdown>
|
|
<img src="vanilla_sdxl.webp" alt="Generated image of a castle using default SDXL weights" width="400">
|
|
<figcaption>Generated image of a castle using default SDXL weights.</figcaption>
|
|
</figure>
|
|
|
|
<p>It is not really what we prompted the model for, unfortunately. To get a more futuristic-looking castle, you can either go for tedious prompt engineering, or use a pretrainered LoRA tailored to our use case, like the <a href="https://civitai.com/models/105945?modelVersionId=140624">Sci-fi Environments</a> LoRA available on Civitai. We'll now show you how the LoRA option works with Refiners. </p>
|
|
<h2 id="single-lora">Single LoRA<a class="headerlink" href="#single-lora" title="Permanent link">¶</a></h2>
|
|
<p>To use the <a href="https://civitai.com/models/105945?modelVersionId=140624">Sci-fi Environments</a> LoRA, all you have to do is download its weights to disk as a <code>.safetensors</code>, and inject them into SDXL using <a class="autorefs autorefs-internal" href="../../reference/foundationals/latent_diffusion/#refiners.foundationals.latent_diffusion.lora.SDLoraManager"><code>SDLoraManager</code></a> right after instantiating <code>StableDiffusion_XL</code>:</p>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="kn">from</span> <span class="nn">refiners.fluxion.utils</span> <span class="kn">import</span> <span class="n">load_from_safetensors</span>
|
|
</span><span id="__span-5-2"><a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.lora</span> <span class="kn">import</span> <span class="n">SDLoraManager</span>
|
|
</span><span id="__span-5-3"><a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a>
|
|
</span><span id="__span-5-4"><a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a><span class="c1"># Load LoRA weights from disk and inject them into target</span>
|
|
</span><span id="__span-5-5"><a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a><span class="n">manager</span> <span class="o">=</span> <span class="n">SDLoraManager</span><span class="p">(</span><span class="n">sdxl</span><span class="p">)</span>
|
|
</span><span id="__span-5-6"><a id="__codelineno-5-6" name="__codelineno-5-6" href="#__codelineno-5-6"></a><span class="n">scifi_lora_weights</span> <span class="o">=</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"Sci-fi_Environments_sdxl.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-5-7"><a id="__codelineno-5-7" name="__codelineno-5-7" href="#__codelineno-5-7"></a><span class="n">manager</span><span class="o">.</span><span class="n">add_loras</span><span class="p">(</span><span class="s2">"scifi-lora"</span><span class="p">,</span> <span class="n">tensors</span><span class="o">=</span><span class="n">scifi_lora_weights</span><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
<details class="example">
|
|
<summary>Expand to see the entire end-to-end code</summary>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-6-1"><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="kn">import</span> <span class="nn">torch</span>
|
|
</span><span id="__span-6-2"><a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a>
|
|
</span><span id="__span-6-3"><a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a><span class="kn">from</span> <span class="nn">refiners.fluxion.utils</span> <span class="kn">import</span> <span class="n">load_from_safetensors</span><span class="p">,</span> <span class="n">manual_seed</span><span class="p">,</span> <span class="n">no_grad</span>
|
|
</span><span id="__span-6-4"><a id="__codelineno-6-4" name="__codelineno-6-4" href="#__codelineno-6-4"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.lora</span> <span class="kn">import</span> <span class="n">SDLoraManager</span>
|
|
</span><span id="__span-6-5"><a id="__codelineno-6-5" name="__codelineno-6-5" href="#__codelineno-6-5"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.stable_diffusion_xl</span> <span class="kn">import</span> <span class="n">StableDiffusion_XL</span>
|
|
</span><span id="__span-6-6"><a id="__codelineno-6-6" name="__codelineno-6-6" href="#__codelineno-6-6"></a>
|
|
</span><span id="__span-6-7"><a id="__codelineno-6-7" name="__codelineno-6-7" href="#__codelineno-6-7"></a><span class="c1"># Load SDXL</span>
|
|
</span><span id="__span-6-8"><a id="__codelineno-6-8" name="__codelineno-6-8" href="#__codelineno-6-8"></a><span class="n">sdxl</span> <span class="o">=</span> <span class="n">StableDiffusion_XL</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="s2">"cuda"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float16</span><span class="p">)</span>
|
|
</span><span id="__span-6-9"><a id="__codelineno-6-9" name="__codelineno-6-9" href="#__codelineno-6-9"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">clip_text_encoder</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"DoubleCLIPTextEncoder.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-6-10"><a id="__codelineno-6-10" name="__codelineno-6-10" href="#__codelineno-6-10"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">unet</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"sdxl-unet.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-6-11"><a id="__codelineno-6-11" name="__codelineno-6-11" href="#__codelineno-6-11"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">lda</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"sdxl-lda.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-6-12"><a id="__codelineno-6-12" name="__codelineno-6-12" href="#__codelineno-6-12"></a>
|
|
</span><span id="__span-6-13"><a id="__codelineno-6-13" name="__codelineno-6-13" href="#__codelineno-6-13"></a><span class="c1"># Load LoRA weights from disk and inject them into target</span>
|
|
</span><span id="__span-6-14"><a id="__codelineno-6-14" name="__codelineno-6-14" href="#__codelineno-6-14"></a><span class="n">manager</span> <span class="o">=</span> <span class="n">SDLoraManager</span><span class="p">(</span><span class="n">sdxl</span><span class="p">)</span>
|
|
</span><span id="__span-6-15"><a id="__codelineno-6-15" name="__codelineno-6-15" href="#__codelineno-6-15"></a><span class="n">scifi_lora_weights</span> <span class="o">=</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"Sci-fi_Environments_sdxl.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-6-16"><a id="__codelineno-6-16" name="__codelineno-6-16" href="#__codelineno-6-16"></a><span class="n">manager</span><span class="o">.</span><span class="n">add_loras</span><span class="p">(</span><span class="s2">"scifi-lora"</span><span class="p">,</span> <span class="n">tensors</span><span class="o">=</span><span class="n">scifi_lora_weights</span><span class="p">)</span>
|
|
</span><span id="__span-6-17"><a id="__codelineno-6-17" name="__codelineno-6-17" href="#__codelineno-6-17"></a>
|
|
</span><span id="__span-6-18"><a id="__codelineno-6-18" name="__codelineno-6-18" href="#__codelineno-6-18"></a><span class="c1"># Hyperparameters</span>
|
|
</span><span id="__span-6-19"><a id="__codelineno-6-19" name="__codelineno-6-19" href="#__codelineno-6-19"></a><span class="n">prompt</span> <span class="o">=</span> <span class="s2">"a futuristic castle surrounded by a forest, mountains in the background"</span>
|
|
</span><span id="__span-6-20"><a id="__codelineno-6-20" name="__codelineno-6-20" href="#__codelineno-6-20"></a><span class="n">seed</span> <span class="o">=</span> <span class="mi">42</span>
|
|
</span><span id="__span-6-21"><a id="__codelineno-6-21" name="__codelineno-6-21" href="#__codelineno-6-21"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">set_inference_steps</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="n">first_step</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
</span><span id="__span-6-22"><a id="__codelineno-6-22" name="__codelineno-6-22" href="#__codelineno-6-22"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">set_self_attention_guidance</span><span class="p">(</span>
|
|
</span><span id="__span-6-23"><a id="__codelineno-6-23" name="__codelineno-6-23" href="#__codelineno-6-23"></a> <span class="n">enable</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">scale</span><span class="o">=</span><span class="mf">0.75</span>
|
|
</span><span id="__span-6-24"><a id="__codelineno-6-24" name="__codelineno-6-24" href="#__codelineno-6-24"></a><span class="p">)</span> <span class="c1"># Enable self-attention guidance to enhance the quality of the generated images</span>
|
|
</span><span id="__span-6-25"><a id="__codelineno-6-25" name="__codelineno-6-25" href="#__codelineno-6-25"></a>
|
|
</span><span id="__span-6-26"><a id="__codelineno-6-26" name="__codelineno-6-26" href="#__codelineno-6-26"></a><span class="k">with</span> <span class="n">no_grad</span><span class="p">():</span>
|
|
</span><span id="__span-6-27"><a id="__codelineno-6-27" name="__codelineno-6-27" href="#__codelineno-6-27"></a> <span class="n">clip_text_embedding</span><span class="p">,</span> <span class="n">pooled_text_embedding</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">compute_clip_text_embedding</span><span class="p">(</span>
|
|
</span><span id="__span-6-28"><a id="__codelineno-6-28" name="__codelineno-6-28" href="#__codelineno-6-28"></a> <span class="n">text</span><span class="o">=</span><span class="n">prompt</span> <span class="o">+</span> <span class="s2">", best quality, high quality"</span><span class="p">,</span>
|
|
</span><span id="__span-6-29"><a id="__codelineno-6-29" name="__codelineno-6-29" href="#__codelineno-6-29"></a> <span class="n">negative_text</span><span class="o">=</span><span class="s2">"monochrome, lowres, bad anatomy, worst quality, low quality"</span><span class="p">,</span>
|
|
</span><span id="__span-6-30"><a id="__codelineno-6-30" name="__codelineno-6-30" href="#__codelineno-6-30"></a> <span class="p">)</span>
|
|
</span><span id="__span-6-31"><a id="__codelineno-6-31" name="__codelineno-6-31" href="#__codelineno-6-31"></a> <span class="n">time_ids</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">default_time_ids</span>
|
|
</span><span id="__span-6-32"><a id="__codelineno-6-32" name="__codelineno-6-32" href="#__codelineno-6-32"></a>
|
|
</span><span id="__span-6-33"><a id="__codelineno-6-33" name="__codelineno-6-33" href="#__codelineno-6-33"></a> <span class="n">manual_seed</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
|
</span><span id="__span-6-34"><a id="__codelineno-6-34" name="__codelineno-6-34" href="#__codelineno-6-34"></a>
|
|
</span><span id="__span-6-35"><a id="__codelineno-6-35" name="__codelineno-6-35" href="#__codelineno-6-35"></a> <span class="c1"># SDXL typically generates 1024x1024, here we use a higher resolution.</span>
|
|
</span><span id="__span-6-36"><a id="__codelineno-6-36" name="__codelineno-6-36" href="#__codelineno-6-36"></a> <span class="n">x</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">init_latents</span><span class="p">((</span><span class="mi">2048</span><span class="p">,</span> <span class="mi">2048</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">sdxl</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
|
|
</span><span id="__span-6-37"><a id="__codelineno-6-37" name="__codelineno-6-37" href="#__codelineno-6-37"></a>
|
|
</span><span id="__span-6-38"><a id="__codelineno-6-38" name="__codelineno-6-38" href="#__codelineno-6-38"></a> <span class="c1"># Diffusion process</span>
|
|
</span><span id="__span-6-39"><a id="__codelineno-6-39" name="__codelineno-6-39" href="#__codelineno-6-39"></a> <span class="k">for</span> <span class="n">step</span> <span class="ow">in</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">steps</span><span class="p">:</span>
|
|
</span><span id="__span-6-40"><a id="__codelineno-6-40" name="__codelineno-6-40" href="#__codelineno-6-40"></a> <span class="k">if</span> <span class="n">step</span> <span class="o">%</span> <span class="mi">10</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
</span><span id="__span-6-41"><a id="__codelineno-6-41" name="__codelineno-6-41" href="#__codelineno-6-41"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Step </span><span class="si">{</span><span class="n">step</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
</span><span id="__span-6-42"><a id="__codelineno-6-42" name="__codelineno-6-42" href="#__codelineno-6-42"></a> <span class="n">x</span> <span class="o">=</span> <span class="n">sdxl</span><span class="p">(</span>
|
|
</span><span id="__span-6-43"><a id="__codelineno-6-43" name="__codelineno-6-43" href="#__codelineno-6-43"></a> <span class="n">x</span><span class="p">,</span>
|
|
</span><span id="__span-6-44"><a id="__codelineno-6-44" name="__codelineno-6-44" href="#__codelineno-6-44"></a> <span class="n">step</span><span class="o">=</span><span class="n">step</span><span class="p">,</span>
|
|
</span><span id="__span-6-45"><a id="__codelineno-6-45" name="__codelineno-6-45" href="#__codelineno-6-45"></a> <span class="n">clip_text_embedding</span><span class="o">=</span><span class="n">clip_text_embedding</span><span class="p">,</span>
|
|
</span><span id="__span-6-46"><a id="__codelineno-6-46" name="__codelineno-6-46" href="#__codelineno-6-46"></a> <span class="n">pooled_text_embedding</span><span class="o">=</span><span class="n">pooled_text_embedding</span><span class="p">,</span>
|
|
</span><span id="__span-6-47"><a id="__codelineno-6-47" name="__codelineno-6-47" href="#__codelineno-6-47"></a> <span class="n">time_ids</span><span class="o">=</span><span class="n">time_ids</span><span class="p">,</span>
|
|
</span><span id="__span-6-48"><a id="__codelineno-6-48" name="__codelineno-6-48" href="#__codelineno-6-48"></a> <span class="p">)</span>
|
|
</span><span id="__span-6-49"><a id="__codelineno-6-49" name="__codelineno-6-49" href="#__codelineno-6-49"></a> <span class="n">predicted_image</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">lda</span><span class="o">.</span><span class="n">decode_latents</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
|
</span><span id="__span-6-50"><a id="__codelineno-6-50" name="__codelineno-6-50" href="#__codelineno-6-50"></a>
|
|
</span><span id="__span-6-51"><a id="__codelineno-6-51" name="__codelineno-6-51" href="#__codelineno-6-51"></a><span class="n">predicted_image</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="s2">"scifi_sdxl.png"</span><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
</details>
|
|
<p>You should get something like this - pretty neat, isn't it? </p>
|
|
<figure markdown>
|
|
<img src="scifi_sdxl.webp" alt="Sci-fi castle" width="400">
|
|
<figcaption>Generated image of a castle in sci-fi style.</figcaption>
|
|
</figure>
|
|
|
|
<h2 id="multiple-loras">Multiple LoRAs<a class="headerlink" href="#multiple-loras" title="Permanent link">¶</a></h2>
|
|
<p>Continuing with our futuristic castle example, we might want to turn it, for instance, into a pixel art. </p>
|
|
<p>Again, we could either try some tedious prompt engineering,
|
|
or instead use another LoRA found on the web, such as <a href="https://civitai.com/models/120096/pixel-art-xl?modelVersionId=135931">Pixel Art LoRA</a>, found on Civitai.
|
|
This is dead simple as <a class="autorefs autorefs-internal" href="../../reference/foundationals/latent_diffusion/#refiners.foundationals.latent_diffusion.lora.SDLoraManager"><code>SDLoraManager</code></a> allows loading multiple LoRAs:</p>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-7-1"><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="c1"># Load LoRAs weights from disk and inject them into target</span>
|
|
</span><span id="__span-7-2"><a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a><span class="n">manager</span> <span class="o">=</span> <span class="n">SDLoraManager</span><span class="p">(</span><span class="n">sdxl</span><span class="p">)</span>
|
|
</span><span id="__span-7-3"><a id="__codelineno-7-3" name="__codelineno-7-3" href="#__codelineno-7-3"></a><span class="n">manager</span><span class="o">.</span><span class="n">add_loras</span><span class="p">(</span><span class="s2">"scifi-lora"</span><span class="p">,</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"Sci-fi_Environments_sdxl.safetensors"</span><span class="p">))</span>
|
|
</span><span id="__span-7-4"><a id="__codelineno-7-4" name="__codelineno-7-4" href="#__codelineno-7-4"></a><span class="n">manager</span><span class="o">.</span><span class="n">add_loras</span><span class="p">(</span><span class="s2">"pixel-art-lora"</span><span class="p">,</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"pixel-art-xl-v1.1.safetensors"</span><span class="p">))</span>
|
|
</span></code></pre></div>
|
|
<p>Adapters such as LoRAs also have a <a class="autorefs autorefs-internal" href="../../reference/fluxion/adapters/#refiners.fluxion.adapters.Lora.scale">scale</a> (roughly) quantifying the effect of this Adapter.
|
|
Refiners allows setting different scales for each Adapter, allowing the user to balance the effect of each Adapter:</p>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-8-1"><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a><span class="c1"># Load LoRAs weights from disk and inject them into target</span>
|
|
</span><span id="__span-8-2"><a id="__codelineno-8-2" name="__codelineno-8-2" href="#__codelineno-8-2"></a><span class="n">manager</span> <span class="o">=</span> <span class="n">SDLoraManager</span><span class="p">(</span><span class="n">sdxl</span><span class="p">)</span>
|
|
</span><span id="__span-8-3"><a id="__codelineno-8-3" name="__codelineno-8-3" href="#__codelineno-8-3"></a><span class="n">manager</span><span class="o">.</span><span class="n">add_loras</span><span class="p">(</span><span class="s2">"scifi-lora"</span><span class="p">,</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"Sci-fi_Environments_sdxl.safetensors"</span><span class="p">),</span> <span class="n">scale</span><span class="o">=</span><span class="mf">1.0</span><span class="p">)</span>
|
|
</span><span id="__span-8-4"><a id="__codelineno-8-4" name="__codelineno-8-4" href="#__codelineno-8-4"></a><span class="n">manager</span><span class="o">.</span><span class="n">add_loras</span><span class="p">(</span><span class="s2">"pixel-art-lora"</span><span class="p">,</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"pixel-art-xl-v1.1.safetensors"</span><span class="p">),</span> <span class="n">scale</span><span class="o">=</span><span class="mf">1.4</span><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
<details class="example">
|
|
<summary>Expand to see the entire end-to-end code</summary>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-9-1"><a id="__codelineno-9-1" name="__codelineno-9-1" href="#__codelineno-9-1"></a><span class="kn">import</span> <span class="nn">torch</span>
|
|
</span><span id="__span-9-2"><a id="__codelineno-9-2" name="__codelineno-9-2" href="#__codelineno-9-2"></a>
|
|
</span><span id="__span-9-3"><a id="__codelineno-9-3" name="__codelineno-9-3" href="#__codelineno-9-3"></a><span class="kn">from</span> <span class="nn">refiners.fluxion.utils</span> <span class="kn">import</span> <span class="n">load_from_safetensors</span><span class="p">,</span> <span class="n">manual_seed</span><span class="p">,</span> <span class="n">no_grad</span>
|
|
</span><span id="__span-9-4"><a id="__codelineno-9-4" name="__codelineno-9-4" href="#__codelineno-9-4"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.lora</span> <span class="kn">import</span> <span class="n">SDLoraManager</span>
|
|
</span><span id="__span-9-5"><a id="__codelineno-9-5" name="__codelineno-9-5" href="#__codelineno-9-5"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.stable_diffusion_xl</span> <span class="kn">import</span> <span class="n">StableDiffusion_XL</span>
|
|
</span><span id="__span-9-6"><a id="__codelineno-9-6" name="__codelineno-9-6" href="#__codelineno-9-6"></a>
|
|
</span><span id="__span-9-7"><a id="__codelineno-9-7" name="__codelineno-9-7" href="#__codelineno-9-7"></a><span class="c1"># Load SDXL</span>
|
|
</span><span id="__span-9-8"><a id="__codelineno-9-8" name="__codelineno-9-8" href="#__codelineno-9-8"></a><span class="n">sdxl</span> <span class="o">=</span> <span class="n">StableDiffusion_XL</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="s2">"cuda"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float16</span><span class="p">)</span>
|
|
</span><span id="__span-9-9"><a id="__codelineno-9-9" name="__codelineno-9-9" href="#__codelineno-9-9"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">clip_text_encoder</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"DoubleCLIPTextEncoder.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-9-10"><a id="__codelineno-9-10" name="__codelineno-9-10" href="#__codelineno-9-10"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">unet</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"sdxl-unet.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-9-11"><a id="__codelineno-9-11" name="__codelineno-9-11" href="#__codelineno-9-11"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">lda</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"sdxl-lda.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-9-12"><a id="__codelineno-9-12" name="__codelineno-9-12" href="#__codelineno-9-12"></a>
|
|
</span><span id="__span-9-13"><a id="__codelineno-9-13" name="__codelineno-9-13" href="#__codelineno-9-13"></a><span class="c1"># Load LoRAs weights from disk and inject them into target</span>
|
|
</span><span id="__span-9-14"><a id="__codelineno-9-14" name="__codelineno-9-14" href="#__codelineno-9-14"></a><span class="n">manager</span> <span class="o">=</span> <span class="n">SDLoraManager</span><span class="p">(</span><span class="n">sdxl</span><span class="p">)</span>
|
|
</span><span id="__span-9-15"><a id="__codelineno-9-15" name="__codelineno-9-15" href="#__codelineno-9-15"></a><span class="n">scifi_lora_weights</span> <span class="o">=</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"Sci-fi_Environments_sdxl.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-9-16"><a id="__codelineno-9-16" name="__codelineno-9-16" href="#__codelineno-9-16"></a><span class="n">pixel_art_lora_weights</span> <span class="o">=</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"pixel-art-xl-v1.1.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-9-17"><a id="__codelineno-9-17" name="__codelineno-9-17" href="#__codelineno-9-17"></a><span class="n">manager</span><span class="o">.</span><span class="n">add_loras</span><span class="p">(</span><span class="s2">"scifi-lora"</span><span class="p">,</span> <span class="n">scifi_lora_weights</span><span class="p">,</span> <span class="n">scale</span><span class="o">=</span><span class="mf">1.0</span><span class="p">)</span>
|
|
</span><span id="__span-9-18"><a id="__codelineno-9-18" name="__codelineno-9-18" href="#__codelineno-9-18"></a><span class="n">manager</span><span class="o">.</span><span class="n">add_loras</span><span class="p">(</span><span class="s2">"pixel-art-lora"</span><span class="p">,</span> <span class="n">pixel_art_lora_weights</span><span class="p">,</span> <span class="n">scale</span><span class="o">=</span><span class="mf">1.4</span><span class="p">)</span>
|
|
</span><span id="__span-9-19"><a id="__codelineno-9-19" name="__codelineno-9-19" href="#__codelineno-9-19"></a>
|
|
</span><span id="__span-9-20"><a id="__codelineno-9-20" name="__codelineno-9-20" href="#__codelineno-9-20"></a><span class="c1"># Hyperparameters</span>
|
|
</span><span id="__span-9-21"><a id="__codelineno-9-21" name="__codelineno-9-21" href="#__codelineno-9-21"></a><span class="n">prompt</span> <span class="o">=</span> <span class="s2">"a futuristic castle surrounded by a forest, mountains in the background"</span>
|
|
</span><span id="__span-9-22"><a id="__codelineno-9-22" name="__codelineno-9-22" href="#__codelineno-9-22"></a><span class="n">seed</span> <span class="o">=</span> <span class="mi">42</span>
|
|
</span><span id="__span-9-23"><a id="__codelineno-9-23" name="__codelineno-9-23" href="#__codelineno-9-23"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">set_inference_steps</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="n">first_step</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
</span><span id="__span-9-24"><a id="__codelineno-9-24" name="__codelineno-9-24" href="#__codelineno-9-24"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">set_self_attention_guidance</span><span class="p">(</span>
|
|
</span><span id="__span-9-25"><a id="__codelineno-9-25" name="__codelineno-9-25" href="#__codelineno-9-25"></a> <span class="n">enable</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">scale</span><span class="o">=</span><span class="mf">0.75</span>
|
|
</span><span id="__span-9-26"><a id="__codelineno-9-26" name="__codelineno-9-26" href="#__codelineno-9-26"></a><span class="p">)</span> <span class="c1"># Enable self-attention guidance to enhance the quality of the generated images</span>
|
|
</span><span id="__span-9-27"><a id="__codelineno-9-27" name="__codelineno-9-27" href="#__codelineno-9-27"></a>
|
|
</span><span id="__span-9-28"><a id="__codelineno-9-28" name="__codelineno-9-28" href="#__codelineno-9-28"></a><span class="k">with</span> <span class="n">no_grad</span><span class="p">():</span>
|
|
</span><span id="__span-9-29"><a id="__codelineno-9-29" name="__codelineno-9-29" href="#__codelineno-9-29"></a> <span class="n">clip_text_embedding</span><span class="p">,</span> <span class="n">pooled_text_embedding</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">compute_clip_text_embedding</span><span class="p">(</span>
|
|
</span><span id="__span-9-30"><a id="__codelineno-9-30" name="__codelineno-9-30" href="#__codelineno-9-30"></a> <span class="n">text</span><span class="o">=</span><span class="n">prompt</span> <span class="o">+</span> <span class="s2">", best quality, high quality"</span><span class="p">,</span>
|
|
</span><span id="__span-9-31"><a id="__codelineno-9-31" name="__codelineno-9-31" href="#__codelineno-9-31"></a> <span class="n">negative_text</span><span class="o">=</span><span class="s2">"monochrome, lowres, bad anatomy, worst quality, low quality"</span><span class="p">,</span>
|
|
</span><span id="__span-9-32"><a id="__codelineno-9-32" name="__codelineno-9-32" href="#__codelineno-9-32"></a> <span class="p">)</span>
|
|
</span><span id="__span-9-33"><a id="__codelineno-9-33" name="__codelineno-9-33" href="#__codelineno-9-33"></a> <span class="n">time_ids</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">default_time_ids</span>
|
|
</span><span id="__span-9-34"><a id="__codelineno-9-34" name="__codelineno-9-34" href="#__codelineno-9-34"></a>
|
|
</span><span id="__span-9-35"><a id="__codelineno-9-35" name="__codelineno-9-35" href="#__codelineno-9-35"></a> <span class="n">manual_seed</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
|
</span><span id="__span-9-36"><a id="__codelineno-9-36" name="__codelineno-9-36" href="#__codelineno-9-36"></a>
|
|
</span><span id="__span-9-37"><a id="__codelineno-9-37" name="__codelineno-9-37" href="#__codelineno-9-37"></a> <span class="c1"># SDXL typically generates 1024x1024, here we use a higher resolution.</span>
|
|
</span><span id="__span-9-38"><a id="__codelineno-9-38" name="__codelineno-9-38" href="#__codelineno-9-38"></a> <span class="n">x</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">init_latents</span><span class="p">((</span><span class="mi">2048</span><span class="p">,</span> <span class="mi">2048</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">sdxl</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
|
|
</span><span id="__span-9-39"><a id="__codelineno-9-39" name="__codelineno-9-39" href="#__codelineno-9-39"></a>
|
|
</span><span id="__span-9-40"><a id="__codelineno-9-40" name="__codelineno-9-40" href="#__codelineno-9-40"></a> <span class="c1"># Diffusion process</span>
|
|
</span><span id="__span-9-41"><a id="__codelineno-9-41" name="__codelineno-9-41" href="#__codelineno-9-41"></a> <span class="k">for</span> <span class="n">step</span> <span class="ow">in</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">steps</span><span class="p">:</span>
|
|
</span><span id="__span-9-42"><a id="__codelineno-9-42" name="__codelineno-9-42" href="#__codelineno-9-42"></a> <span class="k">if</span> <span class="n">step</span> <span class="o">%</span> <span class="mi">10</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
</span><span id="__span-9-43"><a id="__codelineno-9-43" name="__codelineno-9-43" href="#__codelineno-9-43"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Step </span><span class="si">{</span><span class="n">step</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
</span><span id="__span-9-44"><a id="__codelineno-9-44" name="__codelineno-9-44" href="#__codelineno-9-44"></a> <span class="n">x</span> <span class="o">=</span> <span class="n">sdxl</span><span class="p">(</span>
|
|
</span><span id="__span-9-45"><a id="__codelineno-9-45" name="__codelineno-9-45" href="#__codelineno-9-45"></a> <span class="n">x</span><span class="p">,</span>
|
|
</span><span id="__span-9-46"><a id="__codelineno-9-46" name="__codelineno-9-46" href="#__codelineno-9-46"></a> <span class="n">step</span><span class="o">=</span><span class="n">step</span><span class="p">,</span>
|
|
</span><span id="__span-9-47"><a id="__codelineno-9-47" name="__codelineno-9-47" href="#__codelineno-9-47"></a> <span class="n">clip_text_embedding</span><span class="o">=</span><span class="n">clip_text_embedding</span><span class="p">,</span>
|
|
</span><span id="__span-9-48"><a id="__codelineno-9-48" name="__codelineno-9-48" href="#__codelineno-9-48"></a> <span class="n">pooled_text_embedding</span><span class="o">=</span><span class="n">pooled_text_embedding</span><span class="p">,</span>
|
|
</span><span id="__span-9-49"><a id="__codelineno-9-49" name="__codelineno-9-49" href="#__codelineno-9-49"></a> <span class="n">time_ids</span><span class="o">=</span><span class="n">time_ids</span><span class="p">,</span>
|
|
</span><span id="__span-9-50"><a id="__codelineno-9-50" name="__codelineno-9-50" href="#__codelineno-9-50"></a> <span class="p">)</span>
|
|
</span><span id="__span-9-51"><a id="__codelineno-9-51" name="__codelineno-9-51" href="#__codelineno-9-51"></a> <span class="n">predicted_image</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">lda</span><span class="o">.</span><span class="n">decode_latents</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
|
</span><span id="__span-9-52"><a id="__codelineno-9-52" name="__codelineno-9-52" href="#__codelineno-9-52"></a>
|
|
</span><span id="__span-9-53"><a id="__codelineno-9-53" name="__codelineno-9-53" href="#__codelineno-9-53"></a><span class="n">predicted_image</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="s2">"scifi_pixel_sdxl.png"</span><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
</details>
|
|
<p>The results are looking great:</p>
|
|
<figure markdown>
|
|
<img src="scifi_pixel_sdxl.webp" alt="Sci-fi Pixel Art castle" width="400">
|
|
<figcaption>Generated image of a castle in sci-fi, pixel art style.</figcaption>
|
|
</figure>
|
|
|
|
<h2 id="multiple-loras-ip-adapter">Multiple LoRAs + IP-Adapter<a class="headerlink" href="#multiple-loras-ip-adapter" title="Permanent link">¶</a></h2>
|
|
<p>Refiners really shines when it comes to composing different Adapters to fully exploit the possibilities of foundation models.</p>
|
|
<p>For instance, IP-Adapter (covered in <a href="https://blog.finegrain.ai/posts/supercharge-stable-diffusion-ip-adapter/">a previous blog post</a>) is a common choice for practictioners wanting to guide the diffusion process towards a specific prompt image.</p>
|
|
<p>In our example, consider this image of the <a href="https://en.wikipedia.org/wiki/Neuschwanstein_Castle">Neuschwanstein Castle</a>:</p>
|
|
<figure markdown>
|
|
<img src="german-castle.jpg" alt="Castle Image" width="400">
|
|
<figcaption>Credits: Bayerische Schlösserverwaltung, Anton Brandl</figcaption>
|
|
</figure>
|
|
|
|
<p>We would like to guide the diffusion process to align with this image, using IP-Adapter. First, download the image as well as the weights of IP-Adapter by calling the following commands from your terminal (again, make sure in you're in your local <code>refiners</code> directory):</p>
|
|
<div class="language-bash highlight"><pre><span></span><code><span id="__span-10-1"><a id="__codelineno-10-1" name="__codelineno-10-1" href="#__codelineno-10-1"></a>curl<span class="w"> </span>-O<span class="w"> </span>https://refine.rs/guides/adapting_sdxl/german-castle.jpg
|
|
</span><span id="__span-10-2"><a id="__codelineno-10-2" name="__codelineno-10-2" href="#__codelineno-10-2"></a>python<span class="w"> </span>scripts/conversion/convert_transformers_clip_image_model.py<span class="w"> </span>--from<span class="w"> </span><span class="s2">"stabilityai/stable-diffusion-2-1-unclip"</span><span class="w"> </span>--to<span class="w"> </span>CLIPImageEncoderH.safetensors<span class="w"> </span>--half
|
|
</span><span id="__span-10-3"><a id="__codelineno-10-3" name="__codelineno-10-3" href="#__codelineno-10-3"></a>curl<span class="w"> </span>-LO<span class="w"> </span>https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/ip-adapter-plus_sdxl_vit-h.bin
|
|
</span><span id="__span-10-4"><a id="__codelineno-10-4" name="__codelineno-10-4" href="#__codelineno-10-4"></a>python<span class="w"> </span>scripts/conversion/convert_diffusers_ip_adapter.py<span class="w"> </span>--from<span class="w"> </span>ip-adapter-plus_sdxl_vit-h.bin<span class="w"> </span>--half
|
|
</span></code></pre></div>
|
|
<p>This will download and convert both IP-Adapter and CLIP Image Encoder pretrained weights.</p>
|
|
<p>Then, in your Python code, simply instantiate a <a class="autorefs autorefs-internal" href="../../reference/foundationals/latent_diffusion/#refiners.foundationals.latent_diffusion.stable_diffusion_xl.SDXLIPAdapter"><code>SDXLIPAdapter</code></a> targeting our <code>sdxl.unet</code>, and inject it using a simple <code>.inject()</code> call:</p>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-11-1"><a id="__codelineno-11-1" name="__codelineno-11-1" href="#__codelineno-11-1"></a><span class="c1"># IP-Adapter</span>
|
|
</span><span id="__span-11-2"><a id="__codelineno-11-2" name="__codelineno-11-2" href="#__codelineno-11-2"></a><span class="n">ip_adapter</span> <span class="o">=</span> <span class="n">SDXLIPAdapter</span><span class="p">(</span>
|
|
</span><span id="__span-11-3"><a id="__codelineno-11-3" name="__codelineno-11-3" href="#__codelineno-11-3"></a> <span class="n">target</span><span class="o">=</span><span class="n">sdxl</span><span class="o">.</span><span class="n">unet</span><span class="p">,</span>
|
|
</span><span id="__span-11-4"><a id="__codelineno-11-4" name="__codelineno-11-4" href="#__codelineno-11-4"></a> <span class="n">weights</span><span class="o">=</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"ip-adapter-plus_sdxl_vit-h.safetensors"</span><span class="p">),</span>
|
|
</span><span id="__span-11-5"><a id="__codelineno-11-5" name="__codelineno-11-5" href="#__codelineno-11-5"></a> <span class="n">scale</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span>
|
|
</span><span id="__span-11-6"><a id="__codelineno-11-6" name="__codelineno-11-6" href="#__codelineno-11-6"></a> <span class="n">fine_grained</span><span class="o">=</span><span class="kc">True</span> <span class="c1"># Use fine-grained IP-Adapter (i.e IP-Adapter Plus)</span>
|
|
</span><span id="__span-11-7"><a id="__codelineno-11-7" name="__codelineno-11-7" href="#__codelineno-11-7"></a><span class="p">)</span>
|
|
</span><span id="__span-11-8"><a id="__codelineno-11-8" name="__codelineno-11-8" href="#__codelineno-11-8"></a><span class="n">ip_adapter</span><span class="o">.</span><span class="n">clip_image_encoder</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"CLIPImageEncoderH.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-11-9"><a id="__codelineno-11-9" name="__codelineno-11-9" href="#__codelineno-11-9"></a><span class="n">ip_adapter</span><span class="o">.</span><span class="n">inject</span><span class="p">()</span>
|
|
</span></code></pre></div>
|
|
<p>Then, at runtime, we simply compute the embedding of the image prompt through the <code>ip_adapter</code> object, and set its embedding calling <code>.set_clip_image_embedding()</code>:</p>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-12-1"><a id="__codelineno-12-1" name="__codelineno-12-1" href="#__codelineno-12-1"></a><span class="kn">from</span> <span class="nn">PIL</span> <span class="kn">import</span> <span class="n">Image</span>
|
|
</span><span id="__span-12-2"><a id="__codelineno-12-2" name="__codelineno-12-2" href="#__codelineno-12-2"></a><span class="n">image_prompt</span> <span class="o">=</span> <span class="n">Image</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="s2">"german-castle.jpg"</span><span class="p">)</span>
|
|
</span><span id="__span-12-3"><a id="__codelineno-12-3" name="__codelineno-12-3" href="#__codelineno-12-3"></a>
|
|
</span><span id="__span-12-4"><a id="__codelineno-12-4" name="__codelineno-12-4" href="#__codelineno-12-4"></a><span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
|
</span><span id="__span-12-5"><a id="__codelineno-12-5" name="__codelineno-12-5" href="#__codelineno-12-5"></a> <span class="n">clip_image_embedding</span> <span class="o">=</span> <span class="n">ip_adapter</span><span class="o">.</span><span class="n">compute_clip_image_embedding</span><span class="p">(</span><span class="n">ip_adapter</span><span class="o">.</span><span class="n">preprocess_image</span><span class="p">(</span><span class="n">image_prompt</span><span class="p">))</span>
|
|
</span><span id="__span-12-6"><a id="__codelineno-12-6" name="__codelineno-12-6" href="#__codelineno-12-6"></a> <span class="n">ip_adapter</span><span class="o">.</span><span class="n">set_clip_image_embedding</span><span class="p">(</span><span class="n">clip_image_embedding</span><span class="p">)</span>
|
|
</span><span id="__span-12-7"><a id="__codelineno-12-7" name="__codelineno-12-7" href="#__codelineno-12-7"></a>
|
|
</span><span id="__span-12-8"><a id="__codelineno-12-8" name="__codelineno-12-8" href="#__codelineno-12-8"></a><span class="c1"># And start the diffusion process</span>
|
|
</span></code></pre></div>
|
|
<div class="admonition note">
|
|
<p class="admonition-title">Note</p>
|
|
<p>Be wary that composing Adapters (especially ones of different natures, such as LoRAs and IP-Adapter) can be tricky, as their respective effects can be adversarial. This is visible in our example below. In the code below, we tuned the LoRAs scales respectively to <code>1.5</code> and <code>1.55</code>. We invite you to try and test different seeds and scales to find the perfect combination!</p>
|
|
</div>
|
|
<details class="example">
|
|
<summary>Expand to see the entire end-to-end code</summary>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-13-1"><a id="__codelineno-13-1" name="__codelineno-13-1" href="#__codelineno-13-1"></a><span class="kn">import</span> <span class="nn">torch</span>
|
|
</span><span id="__span-13-2"><a id="__codelineno-13-2" name="__codelineno-13-2" href="#__codelineno-13-2"></a><span class="kn">from</span> <span class="nn">PIL</span> <span class="kn">import</span> <span class="n">Image</span>
|
|
</span><span id="__span-13-3"><a id="__codelineno-13-3" name="__codelineno-13-3" href="#__codelineno-13-3"></a>
|
|
</span><span id="__span-13-4"><a id="__codelineno-13-4" name="__codelineno-13-4" href="#__codelineno-13-4"></a><span class="kn">from</span> <span class="nn">refiners.fluxion.utils</span> <span class="kn">import</span> <span class="n">load_from_safetensors</span><span class="p">,</span> <span class="n">manual_seed</span><span class="p">,</span> <span class="n">no_grad</span>
|
|
</span><span id="__span-13-5"><a id="__codelineno-13-5" name="__codelineno-13-5" href="#__codelineno-13-5"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.lora</span> <span class="kn">import</span> <span class="n">SDLoraManager</span>
|
|
</span><span id="__span-13-6"><a id="__codelineno-13-6" name="__codelineno-13-6" href="#__codelineno-13-6"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.stable_diffusion_xl</span> <span class="kn">import</span> <span class="n">StableDiffusion_XL</span>
|
|
</span><span id="__span-13-7"><a id="__codelineno-13-7" name="__codelineno-13-7" href="#__codelineno-13-7"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.stable_diffusion_xl.image_prompt</span> <span class="kn">import</span> <span class="n">SDXLIPAdapter</span>
|
|
</span><span id="__span-13-8"><a id="__codelineno-13-8" name="__codelineno-13-8" href="#__codelineno-13-8"></a>
|
|
</span><span id="__span-13-9"><a id="__codelineno-13-9" name="__codelineno-13-9" href="#__codelineno-13-9"></a><span class="c1"># Load SDXL</span>
|
|
</span><span id="__span-13-10"><a id="__codelineno-13-10" name="__codelineno-13-10" href="#__codelineno-13-10"></a><span class="n">sdxl</span> <span class="o">=</span> <span class="n">StableDiffusion_XL</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="s2">"cuda"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float16</span><span class="p">)</span>
|
|
</span><span id="__span-13-11"><a id="__codelineno-13-11" name="__codelineno-13-11" href="#__codelineno-13-11"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">clip_text_encoder</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"DoubleCLIPTextEncoder.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-13-12"><a id="__codelineno-13-12" name="__codelineno-13-12" href="#__codelineno-13-12"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">unet</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"sdxl-unet.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-13-13"><a id="__codelineno-13-13" name="__codelineno-13-13" href="#__codelineno-13-13"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">lda</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"sdxl-lda.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-13-14"><a id="__codelineno-13-14" name="__codelineno-13-14" href="#__codelineno-13-14"></a>
|
|
</span><span id="__span-13-15"><a id="__codelineno-13-15" name="__codelineno-13-15" href="#__codelineno-13-15"></a><span class="c1"># Load LoRAs weights from disk and inject them into target</span>
|
|
</span><span id="__span-13-16"><a id="__codelineno-13-16" name="__codelineno-13-16" href="#__codelineno-13-16"></a><span class="n">manager</span> <span class="o">=</span> <span class="n">SDLoraManager</span><span class="p">(</span><span class="n">sdxl</span><span class="p">)</span>
|
|
</span><span id="__span-13-17"><a id="__codelineno-13-17" name="__codelineno-13-17" href="#__codelineno-13-17"></a><span class="n">scifi_lora_weights</span> <span class="o">=</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"Sci-fi_Environments_sdxl.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-13-18"><a id="__codelineno-13-18" name="__codelineno-13-18" href="#__codelineno-13-18"></a><span class="n">pixel_art_lora_weights</span> <span class="o">=</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"pixel-art-xl-v1.1.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-13-19"><a id="__codelineno-13-19" name="__codelineno-13-19" href="#__codelineno-13-19"></a><span class="n">manager</span><span class="o">.</span><span class="n">add_loras</span><span class="p">(</span><span class="s2">"scifi-lora"</span><span class="p">,</span> <span class="n">scifi_lora_weights</span><span class="p">,</span> <span class="n">scale</span><span class="o">=</span><span class="mf">1.5</span><span class="p">)</span>
|
|
</span><span id="__span-13-20"><a id="__codelineno-13-20" name="__codelineno-13-20" href="#__codelineno-13-20"></a><span class="n">manager</span><span class="o">.</span><span class="n">add_loras</span><span class="p">(</span><span class="s2">"pixel-art-lora"</span><span class="p">,</span> <span class="n">pixel_art_lora_weights</span><span class="p">,</span> <span class="n">scale</span><span class="o">=</span><span class="mf">1.55</span><span class="p">)</span>
|
|
</span><span id="__span-13-21"><a id="__codelineno-13-21" name="__codelineno-13-21" href="#__codelineno-13-21"></a>
|
|
</span><span id="__span-13-22"><a id="__codelineno-13-22" name="__codelineno-13-22" href="#__codelineno-13-22"></a><span class="c1"># Load IP-Adapter</span>
|
|
</span><span id="__span-13-23"><a id="__codelineno-13-23" name="__codelineno-13-23" href="#__codelineno-13-23"></a><span class="n">ip_adapter</span> <span class="o">=</span> <span class="n">SDXLIPAdapter</span><span class="p">(</span>
|
|
</span><span id="__span-13-24"><a id="__codelineno-13-24" name="__codelineno-13-24" href="#__codelineno-13-24"></a> <span class="n">target</span><span class="o">=</span><span class="n">sdxl</span><span class="o">.</span><span class="n">unet</span><span class="p">,</span>
|
|
</span><span id="__span-13-25"><a id="__codelineno-13-25" name="__codelineno-13-25" href="#__codelineno-13-25"></a> <span class="n">weights</span><span class="o">=</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"ip-adapter-plus_sdxl_vit-h.safetensors"</span><span class="p">),</span>
|
|
</span><span id="__span-13-26"><a id="__codelineno-13-26" name="__codelineno-13-26" href="#__codelineno-13-26"></a> <span class="n">scale</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span>
|
|
</span><span id="__span-13-27"><a id="__codelineno-13-27" name="__codelineno-13-27" href="#__codelineno-13-27"></a> <span class="n">fine_grained</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="c1"># Use fine-grained IP-Adapter (IP-Adapter Plus)</span>
|
|
</span><span id="__span-13-28"><a id="__codelineno-13-28" name="__codelineno-13-28" href="#__codelineno-13-28"></a><span class="p">)</span>
|
|
</span><span id="__span-13-29"><a id="__codelineno-13-29" name="__codelineno-13-29" href="#__codelineno-13-29"></a><span class="n">ip_adapter</span><span class="o">.</span><span class="n">clip_image_encoder</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"CLIPImageEncoderH.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-13-30"><a id="__codelineno-13-30" name="__codelineno-13-30" href="#__codelineno-13-30"></a><span class="n">ip_adapter</span><span class="o">.</span><span class="n">inject</span><span class="p">()</span>
|
|
</span><span id="__span-13-31"><a id="__codelineno-13-31" name="__codelineno-13-31" href="#__codelineno-13-31"></a>
|
|
</span><span id="__span-13-32"><a id="__codelineno-13-32" name="__codelineno-13-32" href="#__codelineno-13-32"></a><span class="c1"># Hyperparameters</span>
|
|
</span><span id="__span-13-33"><a id="__codelineno-13-33" name="__codelineno-13-33" href="#__codelineno-13-33"></a><span class="n">prompt</span> <span class="o">=</span> <span class="s2">"a futuristic castle surrounded by a forest, mountains in the background"</span>
|
|
</span><span id="__span-13-34"><a id="__codelineno-13-34" name="__codelineno-13-34" href="#__codelineno-13-34"></a><span class="n">image_prompt</span> <span class="o">=</span> <span class="n">Image</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="s2">"german-castle.jpg"</span><span class="p">)</span>
|
|
</span><span id="__span-13-35"><a id="__codelineno-13-35" name="__codelineno-13-35" href="#__codelineno-13-35"></a><span class="n">seed</span> <span class="o">=</span> <span class="mi">42</span>
|
|
</span><span id="__span-13-36"><a id="__codelineno-13-36" name="__codelineno-13-36" href="#__codelineno-13-36"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">set_inference_steps</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="n">first_step</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
</span><span id="__span-13-37"><a id="__codelineno-13-37" name="__codelineno-13-37" href="#__codelineno-13-37"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">set_self_attention_guidance</span><span class="p">(</span>
|
|
</span><span id="__span-13-38"><a id="__codelineno-13-38" name="__codelineno-13-38" href="#__codelineno-13-38"></a> <span class="n">enable</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">scale</span><span class="o">=</span><span class="mf">0.75</span>
|
|
</span><span id="__span-13-39"><a id="__codelineno-13-39" name="__codelineno-13-39" href="#__codelineno-13-39"></a><span class="p">)</span> <span class="c1"># Enable self-attention guidance to enhance the quality of the generated images</span>
|
|
</span><span id="__span-13-40"><a id="__codelineno-13-40" name="__codelineno-13-40" href="#__codelineno-13-40"></a>
|
|
</span><span id="__span-13-41"><a id="__codelineno-13-41" name="__codelineno-13-41" href="#__codelineno-13-41"></a><span class="k">with</span> <span class="n">no_grad</span><span class="p">():</span>
|
|
</span><span id="__span-13-42"><a id="__codelineno-13-42" name="__codelineno-13-42" href="#__codelineno-13-42"></a> <span class="n">clip_text_embedding</span><span class="p">,</span> <span class="n">pooled_text_embedding</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">compute_clip_text_embedding</span><span class="p">(</span>
|
|
</span><span id="__span-13-43"><a id="__codelineno-13-43" name="__codelineno-13-43" href="#__codelineno-13-43"></a> <span class="n">text</span><span class="o">=</span><span class="n">prompt</span> <span class="o">+</span> <span class="s2">", best quality, high quality"</span><span class="p">,</span>
|
|
</span><span id="__span-13-44"><a id="__codelineno-13-44" name="__codelineno-13-44" href="#__codelineno-13-44"></a> <span class="n">negative_text</span><span class="o">=</span><span class="s2">"monochrome, lowres, bad anatomy, worst quality, low quality"</span><span class="p">,</span>
|
|
</span><span id="__span-13-45"><a id="__codelineno-13-45" name="__codelineno-13-45" href="#__codelineno-13-45"></a> <span class="p">)</span>
|
|
</span><span id="__span-13-46"><a id="__codelineno-13-46" name="__codelineno-13-46" href="#__codelineno-13-46"></a> <span class="n">time_ids</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">default_time_ids</span>
|
|
</span><span id="__span-13-47"><a id="__codelineno-13-47" name="__codelineno-13-47" href="#__codelineno-13-47"></a>
|
|
</span><span id="__span-13-48"><a id="__codelineno-13-48" name="__codelineno-13-48" href="#__codelineno-13-48"></a> <span class="n">clip_image_embedding</span> <span class="o">=</span> <span class="n">ip_adapter</span><span class="o">.</span><span class="n">compute_clip_image_embedding</span><span class="p">(</span><span class="n">ip_adapter</span><span class="o">.</span><span class="n">preprocess_image</span><span class="p">(</span><span class="n">image_prompt</span><span class="p">))</span>
|
|
</span><span id="__span-13-49"><a id="__codelineno-13-49" name="__codelineno-13-49" href="#__codelineno-13-49"></a> <span class="n">ip_adapter</span><span class="o">.</span><span class="n">set_clip_image_embedding</span><span class="p">(</span><span class="n">clip_image_embedding</span><span class="p">)</span>
|
|
</span><span id="__span-13-50"><a id="__codelineno-13-50" name="__codelineno-13-50" href="#__codelineno-13-50"></a>
|
|
</span><span id="__span-13-51"><a id="__codelineno-13-51" name="__codelineno-13-51" href="#__codelineno-13-51"></a> <span class="n">manual_seed</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
|
</span><span id="__span-13-52"><a id="__codelineno-13-52" name="__codelineno-13-52" href="#__codelineno-13-52"></a> <span class="n">x</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">init_latents</span><span class="p">((</span><span class="mi">1024</span><span class="p">,</span> <span class="mi">1024</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">sdxl</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
|
|
</span><span id="__span-13-53"><a id="__codelineno-13-53" name="__codelineno-13-53" href="#__codelineno-13-53"></a>
|
|
</span><span id="__span-13-54"><a id="__codelineno-13-54" name="__codelineno-13-54" href="#__codelineno-13-54"></a> <span class="c1"># Diffusion process</span>
|
|
</span><span id="__span-13-55"><a id="__codelineno-13-55" name="__codelineno-13-55" href="#__codelineno-13-55"></a> <span class="k">for</span> <span class="n">step</span> <span class="ow">in</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">steps</span><span class="p">:</span>
|
|
</span><span id="__span-13-56"><a id="__codelineno-13-56" name="__codelineno-13-56" href="#__codelineno-13-56"></a> <span class="k">if</span> <span class="n">step</span> <span class="o">%</span> <span class="mi">10</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
</span><span id="__span-13-57"><a id="__codelineno-13-57" name="__codelineno-13-57" href="#__codelineno-13-57"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Step </span><span class="si">{</span><span class="n">step</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
</span><span id="__span-13-58"><a id="__codelineno-13-58" name="__codelineno-13-58" href="#__codelineno-13-58"></a> <span class="n">x</span> <span class="o">=</span> <span class="n">sdxl</span><span class="p">(</span>
|
|
</span><span id="__span-13-59"><a id="__codelineno-13-59" name="__codelineno-13-59" href="#__codelineno-13-59"></a> <span class="n">x</span><span class="p">,</span>
|
|
</span><span id="__span-13-60"><a id="__codelineno-13-60" name="__codelineno-13-60" href="#__codelineno-13-60"></a> <span class="n">step</span><span class="o">=</span><span class="n">step</span><span class="p">,</span>
|
|
</span><span id="__span-13-61"><a id="__codelineno-13-61" name="__codelineno-13-61" href="#__codelineno-13-61"></a> <span class="n">clip_text_embedding</span><span class="o">=</span><span class="n">clip_text_embedding</span><span class="p">,</span>
|
|
</span><span id="__span-13-62"><a id="__codelineno-13-62" name="__codelineno-13-62" href="#__codelineno-13-62"></a> <span class="n">pooled_text_embedding</span><span class="o">=</span><span class="n">pooled_text_embedding</span><span class="p">,</span>
|
|
</span><span id="__span-13-63"><a id="__codelineno-13-63" name="__codelineno-13-63" href="#__codelineno-13-63"></a> <span class="n">time_ids</span><span class="o">=</span><span class="n">time_ids</span><span class="p">,</span>
|
|
</span><span id="__span-13-64"><a id="__codelineno-13-64" name="__codelineno-13-64" href="#__codelineno-13-64"></a> <span class="p">)</span>
|
|
</span><span id="__span-13-65"><a id="__codelineno-13-65" name="__codelineno-13-65" href="#__codelineno-13-65"></a> <span class="n">predicted_image</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">lda</span><span class="o">.</span><span class="n">decode_latents</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
|
</span><span id="__span-13-66"><a id="__codelineno-13-66" name="__codelineno-13-66" href="#__codelineno-13-66"></a>
|
|
</span><span id="__span-13-67"><a id="__codelineno-13-67" name="__codelineno-13-67" href="#__codelineno-13-67"></a><span class="n">predicted_image</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="s2">"scifi_pixel_IP_sdxl.png"</span><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
</details>
|
|
<p>The result looks convincing: we do get a <em>pixel-art, futuristic-looking Neuschwanstein castle</em>!</p>
|
|
<figure markdown>
|
|
<img src="scifi_pixel_IP_sdxl.webp" alt="Generated image in sci-fi, pixel art style, using IP-Adapter." width="400">
|
|
<figcaption>Generated image in sci-fi, pixel art style, using IP-Adapter.</figcaption>
|
|
</figure>
|
|
|
|
<h2 id="everything-else-t2i-adapter">Everything else + T2I-Adapter<a class="headerlink" href="#everything-else-t2i-adapter" title="Permanent link">¶</a></h2>
|
|
<p>T2I-Adapters<sup id="fnref:1"><a class="footnote-ref" href="#fn:1">1</a></sup> are a powerful class of Adapters aiming at controlling the Text-to-Image (T2I) diffusion process with external control signals, such as canny edges or pose estimations inputs.
|
|
In this section, we will compose our previous example with the <a href="https://huggingface.co/TencentARC/t2i-adapter-depth-zoe-sdxl-1.0">Depth-Zoe Adapter</a>, providing a depth condition to the diffusion process using the following depth map as input signal:</p>
|
|
<figure markdown>
|
|
<img src="zoe-depth-map-german-castle.png" alt="Input depth map of the initial castle image" width="400">
|
|
<figcaption>Input depth map of the initial castle image.</figcaption>
|
|
</figure>
|
|
|
|
<p>First, download the image as well as the weights of T2I-Depth-Zoe-Adapter by calling the following commands:</p>
|
|
<div class="language-bash highlight"><pre><span></span><code><span id="__span-14-1"><a id="__codelineno-14-1" name="__codelineno-14-1" href="#__codelineno-14-1"></a>curl<span class="w"> </span>-O<span class="w"> </span>https://refine.rs/guides/adapting_sdxl/zoe-depth-map-german-castle.png
|
|
</span><span id="__span-14-2"><a id="__codelineno-14-2" name="__codelineno-14-2" href="#__codelineno-14-2"></a>python<span class="w"> </span>scripts/conversion/convert_diffusers_t2i_adapter.py<span class="w"> </span>--from<span class="w"> </span><span class="s2">"TencentARC/t2i-adapter-depth-zoe-sdxl-1.0"</span><span class="w"> </span>--to<span class="w"> </span>t2i_depth_zoe_xl.safetensors<span class="w"> </span>--half
|
|
</span></code></pre></div>
|
|
<p>Then, just inject it as usual:</p>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-15-1"><a id="__codelineno-15-1" name="__codelineno-15-1" href="#__codelineno-15-1"></a><span class="c1"># Load T2I-Adapter</span>
|
|
</span><span id="__span-15-2"><a id="__codelineno-15-2" name="__codelineno-15-2" href="#__codelineno-15-2"></a><span class="n">t2i_adapter</span> <span class="o">=</span> <span class="n">SDXLT2IAdapter</span><span class="p">(</span>
|
|
</span><span id="__span-15-3"><a id="__codelineno-15-3" name="__codelineno-15-3" href="#__codelineno-15-3"></a> <span class="n">target</span><span class="o">=</span><span class="n">sdxl</span><span class="o">.</span><span class="n">unet</span><span class="p">,</span>
|
|
</span><span id="__span-15-4"><a id="__codelineno-15-4" name="__codelineno-15-4" href="#__codelineno-15-4"></a> <span class="n">name</span><span class="o">=</span><span class="s2">"zoe-depth"</span><span class="p">,</span>
|
|
</span><span id="__span-15-5"><a id="__codelineno-15-5" name="__codelineno-15-5" href="#__codelineno-15-5"></a> <span class="n">weights</span><span class="o">=</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"t2i_depth_zoe_xl.safetensors"</span><span class="p">),</span>
|
|
</span><span id="__span-15-6"><a id="__codelineno-15-6" name="__codelineno-15-6" href="#__codelineno-15-6"></a> <span class="n">scale</span><span class="o">=</span><span class="mf">0.72</span><span class="p">,</span>
|
|
</span><span id="__span-15-7"><a id="__codelineno-15-7" name="__codelineno-15-7" href="#__codelineno-15-7"></a><span class="p">)</span><span class="o">.</span><span class="n">inject</span><span class="p">()</span>
|
|
</span></code></pre></div>
|
|
<p>Finally, at runtime, compute the embedding of the input condition through the <code>t2i_adapter</code> object, and set its embedding calling <code>.set_condition_features()</code>:</p>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-16-1"><a id="__codelineno-16-1" name="__codelineno-16-1" href="#__codelineno-16-1"></a><span class="kn">from</span> <span class="nn">refiners.fluxion.utils</span> <span class="kn">import</span> <span class="n">image_to_tensor</span><span class="p">,</span> <span class="n">interpolate</span>
|
|
</span><span id="__span-16-2"><a id="__codelineno-16-2" name="__codelineno-16-2" href="#__codelineno-16-2"></a>
|
|
</span><span id="__span-16-3"><a id="__codelineno-16-3" name="__codelineno-16-3" href="#__codelineno-16-3"></a><span class="n">image_depth_condition</span> <span class="o">=</span> <span class="n">Image</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="s2">"zoe-depth-map-german-castle.png"</span><span class="p">)</span>
|
|
</span><span id="__span-16-4"><a id="__codelineno-16-4" name="__codelineno-16-4" href="#__codelineno-16-4"></a>
|
|
</span><span id="__span-16-5"><a id="__codelineno-16-5" name="__codelineno-16-5" href="#__codelineno-16-5"></a><span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
|
</span><span id="__span-16-6"><a id="__codelineno-16-6" name="__codelineno-16-6" href="#__codelineno-16-6"></a> <span class="n">condition</span> <span class="o">=</span> <span class="n">image_to_tensor</span><span class="p">(</span><span class="n">image_depth_condition</span><span class="o">.</span><span class="n">convert</span><span class="p">(</span><span class="s2">"RGB"</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="n">sdxl</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">sdxl</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
|
|
</span><span id="__span-16-7"><a id="__codelineno-16-7" name="__codelineno-16-7" href="#__codelineno-16-7"></a> <span class="c1"># Spatial dimensions should be divisible by default downscale factor (=16 for T2IAdapter ConditionEncoder)</span>
|
|
</span><span id="__span-16-8"><a id="__codelineno-16-8" name="__codelineno-16-8" href="#__codelineno-16-8"></a> <span class="n">condition</span> <span class="o">=</span> <span class="n">interpolate</span><span class="p">(</span><span class="n">condition</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Size</span><span class="p">((</span><span class="mi">1024</span><span class="p">,</span> <span class="mi">1024</span><span class="p">)))</span>
|
|
</span><span id="__span-16-9"><a id="__codelineno-16-9" name="__codelineno-16-9" href="#__codelineno-16-9"></a> <span class="n">t2i_adapter</span><span class="o">.</span><span class="n">set_condition_features</span><span class="p">(</span><span class="n">features</span><span class="o">=</span><span class="n">t2i_adapter</span><span class="o">.</span><span class="n">compute_condition_features</span><span class="p">(</span><span class="n">condition</span><span class="p">))</span>
|
|
</span></code></pre></div>
|
|
<details class="example">
|
|
<summary>Expand to see the entire end-to-end code</summary>
|
|
<div class="language-py highlight"><pre><span></span><code><span id="__span-17-1"><a id="__codelineno-17-1" name="__codelineno-17-1" href="#__codelineno-17-1"></a><span class="kn">import</span> <span class="nn">torch</span>
|
|
</span><span id="__span-17-2"><a id="__codelineno-17-2" name="__codelineno-17-2" href="#__codelineno-17-2"></a><span class="kn">from</span> <span class="nn">PIL</span> <span class="kn">import</span> <span class="n">Image</span>
|
|
</span><span id="__span-17-3"><a id="__codelineno-17-3" name="__codelineno-17-3" href="#__codelineno-17-3"></a>
|
|
</span><span id="__span-17-4"><a id="__codelineno-17-4" name="__codelineno-17-4" href="#__codelineno-17-4"></a><span class="kn">from</span> <span class="nn">refiners.fluxion.utils</span> <span class="kn">import</span> <span class="n">load_from_safetensors</span><span class="p">,</span> <span class="n">manual_seed</span><span class="p">,</span> <span class="n">no_grad</span><span class="p">,</span> <span class="n">image_to_tensor</span>
|
|
</span><span id="__span-17-5"><a id="__codelineno-17-5" name="__codelineno-17-5" href="#__codelineno-17-5"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.lora</span> <span class="kn">import</span> <span class="n">SDLoraManager</span>
|
|
</span><span id="__span-17-6"><a id="__codelineno-17-6" name="__codelineno-17-6" href="#__codelineno-17-6"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.stable_diffusion_xl</span> <span class="kn">import</span> <span class="n">StableDiffusion_XL</span><span class="p">,</span> <span class="n">SDXLT2IAdapter</span>
|
|
</span><span id="__span-17-7"><a id="__codelineno-17-7" name="__codelineno-17-7" href="#__codelineno-17-7"></a><span class="kn">from</span> <span class="nn">refiners.foundationals.latent_diffusion.stable_diffusion_xl.image_prompt</span> <span class="kn">import</span> <span class="n">SDXLIPAdapter</span>
|
|
</span><span id="__span-17-8"><a id="__codelineno-17-8" name="__codelineno-17-8" href="#__codelineno-17-8"></a>
|
|
</span><span id="__span-17-9"><a id="__codelineno-17-9" name="__codelineno-17-9" href="#__codelineno-17-9"></a><span class="c1"># Load SDXL</span>
|
|
</span><span id="__span-17-10"><a id="__codelineno-17-10" name="__codelineno-17-10" href="#__codelineno-17-10"></a><span class="n">sdxl</span> <span class="o">=</span> <span class="n">StableDiffusion_XL</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="s2">"cuda"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float16</span><span class="p">)</span>
|
|
</span><span id="__span-17-11"><a id="__codelineno-17-11" name="__codelineno-17-11" href="#__codelineno-17-11"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">clip_text_encoder</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"DoubleCLIPTextEncoder.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-17-12"><a id="__codelineno-17-12" name="__codelineno-17-12" href="#__codelineno-17-12"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">unet</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"sdxl-unet.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-17-13"><a id="__codelineno-17-13" name="__codelineno-17-13" href="#__codelineno-17-13"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">lda</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"sdxl-lda.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-17-14"><a id="__codelineno-17-14" name="__codelineno-17-14" href="#__codelineno-17-14"></a>
|
|
</span><span id="__span-17-15"><a id="__codelineno-17-15" name="__codelineno-17-15" href="#__codelineno-17-15"></a><span class="c1"># Load LoRAs weights from disk and inject them into target</span>
|
|
</span><span id="__span-17-16"><a id="__codelineno-17-16" name="__codelineno-17-16" href="#__codelineno-17-16"></a><span class="n">manager</span> <span class="o">=</span> <span class="n">SDLoraManager</span><span class="p">(</span><span class="n">sdxl</span><span class="p">)</span>
|
|
</span><span id="__span-17-17"><a id="__codelineno-17-17" name="__codelineno-17-17" href="#__codelineno-17-17"></a><span class="n">scifi_lora_weights</span> <span class="o">=</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"Sci-fi_Environments_sdxl.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-17-18"><a id="__codelineno-17-18" name="__codelineno-17-18" href="#__codelineno-17-18"></a><span class="n">pixel_art_lora_weights</span> <span class="o">=</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"pixel-art-xl-v1.1.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-17-19"><a id="__codelineno-17-19" name="__codelineno-17-19" href="#__codelineno-17-19"></a><span class="n">manager</span><span class="o">.</span><span class="n">add_loras</span><span class="p">(</span><span class="s2">"scifi-lora"</span><span class="p">,</span> <span class="n">scifi_lora_weights</span><span class="p">,</span> <span class="n">scale</span><span class="o">=</span><span class="mf">1.5</span><span class="p">)</span>
|
|
</span><span id="__span-17-20"><a id="__codelineno-17-20" name="__codelineno-17-20" href="#__codelineno-17-20"></a><span class="n">manager</span><span class="o">.</span><span class="n">add_loras</span><span class="p">(</span><span class="s2">"pixel-art-lora"</span><span class="p">,</span> <span class="n">pixel_art_lora_weights</span><span class="p">,</span> <span class="n">scale</span><span class="o">=</span><span class="mf">1.55</span><span class="p">)</span>
|
|
</span><span id="__span-17-21"><a id="__codelineno-17-21" name="__codelineno-17-21" href="#__codelineno-17-21"></a>
|
|
</span><span id="__span-17-22"><a id="__codelineno-17-22" name="__codelineno-17-22" href="#__codelineno-17-22"></a><span class="c1"># Load IP-Adapter</span>
|
|
</span><span id="__span-17-23"><a id="__codelineno-17-23" name="__codelineno-17-23" href="#__codelineno-17-23"></a><span class="n">ip_adapter</span> <span class="o">=</span> <span class="n">SDXLIPAdapter</span><span class="p">(</span>
|
|
</span><span id="__span-17-24"><a id="__codelineno-17-24" name="__codelineno-17-24" href="#__codelineno-17-24"></a> <span class="n">target</span><span class="o">=</span><span class="n">sdxl</span><span class="o">.</span><span class="n">unet</span><span class="p">,</span>
|
|
</span><span id="__span-17-25"><a id="__codelineno-17-25" name="__codelineno-17-25" href="#__codelineno-17-25"></a> <span class="n">weights</span><span class="o">=</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"ip-adapter-plus_sdxl_vit-h.safetensors"</span><span class="p">),</span>
|
|
</span><span id="__span-17-26"><a id="__codelineno-17-26" name="__codelineno-17-26" href="#__codelineno-17-26"></a> <span class="n">scale</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span>
|
|
</span><span id="__span-17-27"><a id="__codelineno-17-27" name="__codelineno-17-27" href="#__codelineno-17-27"></a> <span class="n">fine_grained</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="c1"># Use fine-grained IP-Adapter (IP-Adapter Plus)</span>
|
|
</span><span id="__span-17-28"><a id="__codelineno-17-28" name="__codelineno-17-28" href="#__codelineno-17-28"></a><span class="p">)</span>
|
|
</span><span id="__span-17-29"><a id="__codelineno-17-29" name="__codelineno-17-29" href="#__codelineno-17-29"></a><span class="n">ip_adapter</span><span class="o">.</span><span class="n">clip_image_encoder</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"CLIPImageEncoderH.safetensors"</span><span class="p">)</span>
|
|
</span><span id="__span-17-30"><a id="__codelineno-17-30" name="__codelineno-17-30" href="#__codelineno-17-30"></a><span class="n">ip_adapter</span><span class="o">.</span><span class="n">inject</span><span class="p">()</span>
|
|
</span><span id="__span-17-31"><a id="__codelineno-17-31" name="__codelineno-17-31" href="#__codelineno-17-31"></a>
|
|
</span><span id="__span-17-32"><a id="__codelineno-17-32" name="__codelineno-17-32" href="#__codelineno-17-32"></a><span class="c1"># Load T2I-Adapter</span>
|
|
</span><span id="__span-17-33"><a id="__codelineno-17-33" name="__codelineno-17-33" href="#__codelineno-17-33"></a><span class="n">t2i_adapter</span> <span class="o">=</span> <span class="n">SDXLT2IAdapter</span><span class="p">(</span>
|
|
</span><span id="__span-17-34"><a id="__codelineno-17-34" name="__codelineno-17-34" href="#__codelineno-17-34"></a> <span class="n">target</span><span class="o">=</span><span class="n">sdxl</span><span class="o">.</span><span class="n">unet</span><span class="p">,</span>
|
|
</span><span id="__span-17-35"><a id="__codelineno-17-35" name="__codelineno-17-35" href="#__codelineno-17-35"></a> <span class="n">name</span><span class="o">=</span><span class="s2">"zoe-depth"</span><span class="p">,</span>
|
|
</span><span id="__span-17-36"><a id="__codelineno-17-36" name="__codelineno-17-36" href="#__codelineno-17-36"></a> <span class="n">weights</span><span class="o">=</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="s2">"t2i_depth_zoe_xl.safetensors"</span><span class="p">),</span>
|
|
</span><span id="__span-17-37"><a id="__codelineno-17-37" name="__codelineno-17-37" href="#__codelineno-17-37"></a> <span class="n">scale</span><span class="o">=</span><span class="mf">0.72</span><span class="p">,</span>
|
|
</span><span id="__span-17-38"><a id="__codelineno-17-38" name="__codelineno-17-38" href="#__codelineno-17-38"></a><span class="p">)</span><span class="o">.</span><span class="n">inject</span><span class="p">()</span>
|
|
</span><span id="__span-17-39"><a id="__codelineno-17-39" name="__codelineno-17-39" href="#__codelineno-17-39"></a>
|
|
</span><span id="__span-17-40"><a id="__codelineno-17-40" name="__codelineno-17-40" href="#__codelineno-17-40"></a><span class="c1"># Hyperparameters</span>
|
|
</span><span id="__span-17-41"><a id="__codelineno-17-41" name="__codelineno-17-41" href="#__codelineno-17-41"></a><span class="n">prompt</span> <span class="o">=</span> <span class="s2">"a futuristic castle surrounded by a forest, mountains in the background"</span>
|
|
</span><span id="__span-17-42"><a id="__codelineno-17-42" name="__codelineno-17-42" href="#__codelineno-17-42"></a><span class="n">image_prompt</span> <span class="o">=</span> <span class="n">Image</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="s2">"german-castle.jpg"</span><span class="p">)</span>
|
|
</span><span id="__span-17-43"><a id="__codelineno-17-43" name="__codelineno-17-43" href="#__codelineno-17-43"></a><span class="n">image_depth_condition</span> <span class="o">=</span> <span class="n">Image</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="s2">"zoe-depth-map-german-castle.png"</span><span class="p">)</span>
|
|
</span><span id="__span-17-44"><a id="__codelineno-17-44" name="__codelineno-17-44" href="#__codelineno-17-44"></a><span class="n">seed</span> <span class="o">=</span> <span class="mi">42</span>
|
|
</span><span id="__span-17-45"><a id="__codelineno-17-45" name="__codelineno-17-45" href="#__codelineno-17-45"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">set_inference_steps</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="n">first_step</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
</span><span id="__span-17-46"><a id="__codelineno-17-46" name="__codelineno-17-46" href="#__codelineno-17-46"></a><span class="n">sdxl</span><span class="o">.</span><span class="n">set_self_attention_guidance</span><span class="p">(</span>
|
|
</span><span id="__span-17-47"><a id="__codelineno-17-47" name="__codelineno-17-47" href="#__codelineno-17-47"></a> <span class="n">enable</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">scale</span><span class="o">=</span><span class="mf">0.75</span>
|
|
</span><span id="__span-17-48"><a id="__codelineno-17-48" name="__codelineno-17-48" href="#__codelineno-17-48"></a><span class="p">)</span> <span class="c1"># Enable self-attention guidance to enhance the quality of the generated images</span>
|
|
</span><span id="__span-17-49"><a id="__codelineno-17-49" name="__codelineno-17-49" href="#__codelineno-17-49"></a>
|
|
</span><span id="__span-17-50"><a id="__codelineno-17-50" name="__codelineno-17-50" href="#__codelineno-17-50"></a><span class="k">with</span> <span class="n">no_grad</span><span class="p">():</span>
|
|
</span><span id="__span-17-51"><a id="__codelineno-17-51" name="__codelineno-17-51" href="#__codelineno-17-51"></a> <span class="n">clip_text_embedding</span><span class="p">,</span> <span class="n">pooled_text_embedding</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">compute_clip_text_embedding</span><span class="p">(</span>
|
|
</span><span id="__span-17-52"><a id="__codelineno-17-52" name="__codelineno-17-52" href="#__codelineno-17-52"></a> <span class="n">text</span><span class="o">=</span><span class="n">prompt</span> <span class="o">+</span> <span class="s2">", best quality, high quality"</span><span class="p">,</span>
|
|
</span><span id="__span-17-53"><a id="__codelineno-17-53" name="__codelineno-17-53" href="#__codelineno-17-53"></a> <span class="n">negative_text</span><span class="o">=</span><span class="s2">"monochrome, lowres, bad anatomy, worst quality, low quality"</span><span class="p">,</span>
|
|
</span><span id="__span-17-54"><a id="__codelineno-17-54" name="__codelineno-17-54" href="#__codelineno-17-54"></a> <span class="p">)</span>
|
|
</span><span id="__span-17-55"><a id="__codelineno-17-55" name="__codelineno-17-55" href="#__codelineno-17-55"></a> <span class="n">time_ids</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">default_time_ids</span>
|
|
</span><span id="__span-17-56"><a id="__codelineno-17-56" name="__codelineno-17-56" href="#__codelineno-17-56"></a>
|
|
</span><span id="__span-17-57"><a id="__codelineno-17-57" name="__codelineno-17-57" href="#__codelineno-17-57"></a> <span class="n">clip_image_embedding</span> <span class="o">=</span> <span class="n">ip_adapter</span><span class="o">.</span><span class="n">compute_clip_image_embedding</span><span class="p">(</span><span class="n">ip_adapter</span><span class="o">.</span><span class="n">preprocess_image</span><span class="p">(</span><span class="n">image_prompt</span><span class="p">))</span>
|
|
</span><span id="__span-17-58"><a id="__codelineno-17-58" name="__codelineno-17-58" href="#__codelineno-17-58"></a> <span class="n">ip_adapter</span><span class="o">.</span><span class="n">set_clip_image_embedding</span><span class="p">(</span><span class="n">clip_image_embedding</span><span class="p">)</span>
|
|
</span><span id="__span-17-59"><a id="__codelineno-17-59" name="__codelineno-17-59" href="#__codelineno-17-59"></a>
|
|
</span><span id="__span-17-60"><a id="__codelineno-17-60" name="__codelineno-17-60" href="#__codelineno-17-60"></a> <span class="c1"># Spatial dimensions should be divisible by default downscale factor (=16 for T2IAdapter ConditionEncoder)</span>
|
|
</span><span id="__span-17-61"><a id="__codelineno-17-61" name="__codelineno-17-61" href="#__codelineno-17-61"></a> <span class="n">condition</span> <span class="o">=</span> <span class="n">image_to_tensor</span><span class="p">(</span><span class="n">image_depth_condition</span><span class="o">.</span><span class="n">convert</span><span class="p">(</span><span class="s2">"RGB"</span><span class="p">)</span><span class="o">.</span><span class="n">resize</span><span class="p">((</span><span class="mi">1024</span><span class="p">,</span> <span class="mi">1024</span><span class="p">)),</span> <span class="n">device</span><span class="o">=</span><span class="n">sdxl</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">sdxl</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
|
|
</span><span id="__span-17-62"><a id="__codelineno-17-62" name="__codelineno-17-62" href="#__codelineno-17-62"></a> <span class="n">t2i_adapter</span><span class="o">.</span><span class="n">set_condition_features</span><span class="p">(</span><span class="n">features</span><span class="o">=</span><span class="n">t2i_adapter</span><span class="o">.</span><span class="n">compute_condition_features</span><span class="p">(</span><span class="n">condition</span><span class="p">))</span>
|
|
</span><span id="__span-17-63"><a id="__codelineno-17-63" name="__codelineno-17-63" href="#__codelineno-17-63"></a>
|
|
</span><span id="__span-17-64"><a id="__codelineno-17-64" name="__codelineno-17-64" href="#__codelineno-17-64"></a> <span class="n">manual_seed</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
|
</span><span id="__span-17-65"><a id="__codelineno-17-65" name="__codelineno-17-65" href="#__codelineno-17-65"></a> <span class="n">x</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">init_latents</span><span class="p">((</span><span class="mi">1024</span><span class="p">,</span> <span class="mi">1024</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">sdxl</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
|
|
</span><span id="__span-17-66"><a id="__codelineno-17-66" name="__codelineno-17-66" href="#__codelineno-17-66"></a>
|
|
</span><span id="__span-17-67"><a id="__codelineno-17-67" name="__codelineno-17-67" href="#__codelineno-17-67"></a> <span class="c1"># Diffusion process</span>
|
|
</span><span id="__span-17-68"><a id="__codelineno-17-68" name="__codelineno-17-68" href="#__codelineno-17-68"></a> <span class="k">for</span> <span class="n">step</span> <span class="ow">in</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">steps</span><span class="p">:</span>
|
|
</span><span id="__span-17-69"><a id="__codelineno-17-69" name="__codelineno-17-69" href="#__codelineno-17-69"></a> <span class="k">if</span> <span class="n">step</span> <span class="o">%</span> <span class="mi">10</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
</span><span id="__span-17-70"><a id="__codelineno-17-70" name="__codelineno-17-70" href="#__codelineno-17-70"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Step </span><span class="si">{</span><span class="n">step</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
</span><span id="__span-17-71"><a id="__codelineno-17-71" name="__codelineno-17-71" href="#__codelineno-17-71"></a> <span class="n">x</span> <span class="o">=</span> <span class="n">sdxl</span><span class="p">(</span>
|
|
</span><span id="__span-17-72"><a id="__codelineno-17-72" name="__codelineno-17-72" href="#__codelineno-17-72"></a> <span class="n">x</span><span class="p">,</span>
|
|
</span><span id="__span-17-73"><a id="__codelineno-17-73" name="__codelineno-17-73" href="#__codelineno-17-73"></a> <span class="n">step</span><span class="o">=</span><span class="n">step</span><span class="p">,</span>
|
|
</span><span id="__span-17-74"><a id="__codelineno-17-74" name="__codelineno-17-74" href="#__codelineno-17-74"></a> <span class="n">clip_text_embedding</span><span class="o">=</span><span class="n">clip_text_embedding</span><span class="p">,</span>
|
|
</span><span id="__span-17-75"><a id="__codelineno-17-75" name="__codelineno-17-75" href="#__codelineno-17-75"></a> <span class="n">pooled_text_embedding</span><span class="o">=</span><span class="n">pooled_text_embedding</span><span class="p">,</span>
|
|
</span><span id="__span-17-76"><a id="__codelineno-17-76" name="__codelineno-17-76" href="#__codelineno-17-76"></a> <span class="n">time_ids</span><span class="o">=</span><span class="n">time_ids</span><span class="p">,</span>
|
|
</span><span id="__span-17-77"><a id="__codelineno-17-77" name="__codelineno-17-77" href="#__codelineno-17-77"></a> <span class="p">)</span>
|
|
</span><span id="__span-17-78"><a id="__codelineno-17-78" name="__codelineno-17-78" href="#__codelineno-17-78"></a> <span class="n">predicted_image</span> <span class="o">=</span> <span class="n">sdxl</span><span class="o">.</span><span class="n">lda</span><span class="o">.</span><span class="n">decode_latents</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
|
</span><span id="__span-17-79"><a id="__codelineno-17-79" name="__codelineno-17-79" href="#__codelineno-17-79"></a>
|
|
</span><span id="__span-17-80"><a id="__codelineno-17-80" name="__codelineno-17-80" href="#__codelineno-17-80"></a><span class="n">predicted_image</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="s2">"scifi_pixel_IP_T2I_sdxl.png"</span><span class="p">)</span>
|
|
</span></code></pre></div>
|
|
</details>
|
|
<p>The results look convincing: the depth and proportions of the initial castle are more faithful, while preserving our <em>futuristic, pixel-art style</em>!</p>
|
|
<figure markdown>
|
|
<img src="scifi_pixel_IP_T2I_sdxl.webp" alt="Generated image in sci-fi, pixel art style, using IP and T2I Adapters" width="400">
|
|
<figcaption>Generated image in sci-fi, pixel art style, using IP and T2I Adapters.</figcaption>
|
|
</figure>
|
|
|
|
<h2 id="wrap-up">Wrap up<a class="headerlink" href="#wrap-up" title="Permanent link">¶</a></h2>
|
|
<p>As you can see in this guide, composing Adapters on top of foundation models is pretty seamless in Refiners, allowing practitioners to quickly test out different combinations of Adapters for their needs. We encourage you to try out different ones, and even train some yourselves!</p>
|
|
<div class="footnote">
|
|
<hr />
|
|
<ol>
|
|
<li id="fn:1">
|
|
<p>Mou, C., Wang, X., Xie, L., Zhang, J., Qi, Z., Shan, Y., & Qie, X. (2023). T2i-adapter: Learning adapters to dig out more controllable ability for text-to-image diffusion models. <a class="footnote-backref" href="#fnref:1" title="Jump back to footnote 1 in the text">↩</a></p>
|
|
</li>
|
|
</ol>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</article>
|
|
</div>
|
|
|
|
|
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
|
</div>
|
|
|
|
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg>
|
|
Back to top
|
|
</button>
|
|
|
|
</main>
|
|
|
|
<footer class="md-footer">
|
|
|
|
<div class="md-footer-meta md-typeset">
|
|
<div class="md-footer-meta__inner md-grid">
|
|
<div class="md-copyright">
|
|
|
|
<div class="md-copyright__highlight">
|
|
© Lagon Technologies
|
|
</div>
|
|
|
|
|
|
Made with
|
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
|
Material for MkDocs
|
|
</a>
|
|
|
|
</div>
|
|
|
|
<div class="md-social">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://discord.gg/mCmjNUVV7d" target="_blank" rel="noopener" title="discord.gg" class="md-social__link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 640 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M524.531 69.836a1.5 1.5 0 0 0-.764-.7A485 485 0 0 0 404.081 32.03a1.82 1.82 0 0 0-1.923.91 338 338 0 0 0-14.9 30.6 447.9 447.9 0 0 0-134.426 0 310 310 0 0 0-15.135-30.6 1.89 1.89 0 0 0-1.924-.91 483.7 483.7 0 0 0-119.688 37.107 1.7 1.7 0 0 0-.788.676C39.068 183.651 18.186 294.69 28.43 404.354a2.02 2.02 0 0 0 .765 1.375 487.7 487.7 0 0 0 146.825 74.189 1.9 1.9 0 0 0 2.063-.676A348 348 0 0 0 208.12 430.4a1.86 1.86 0 0 0-1.019-2.588 321 321 0 0 1-45.868-21.853 1.885 1.885 0 0 1-.185-3.126 251 251 0 0 0 9.109-7.137 1.82 1.82 0 0 1 1.9-.256c96.229 43.917 200.41 43.917 295.5 0a1.81 1.81 0 0 1 1.924.233 235 235 0 0 0 9.132 7.16 1.884 1.884 0 0 1-.162 3.126 301.4 301.4 0 0 1-45.89 21.83 1.875 1.875 0 0 0-1 2.611 391 391 0 0 0 30.014 48.815 1.86 1.86 0 0 0 2.063.7A486 486 0 0 0 610.7 405.729a1.88 1.88 0 0 0 .765-1.352c12.264-126.783-20.532-236.912-86.934-334.541M222.491 337.58c-28.972 0-52.844-26.587-52.844-59.239s23.409-59.241 52.844-59.241c29.665 0 53.306 26.82 52.843 59.239 0 32.654-23.41 59.241-52.843 59.241m195.38 0c-28.971 0-52.843-26.587-52.843-59.239s23.409-59.241 52.843-59.241c29.667 0 53.307 26.82 52.844 59.239 0 32.654-23.177 59.241-52.844 59.241"/></svg>
|
|
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://github.com/finegrain-ai/refiners" target="_blank" rel="noopener" title="github.com" class="md-social__link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8M97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
|
|
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://twitter.com/finegrain_ai" target="_blank" rel="noopener" title="twitter.com" class="md-social__link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253"/></svg>
|
|
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://www.linkedin.com/company/finegrain-ai/" target="_blank" rel="noopener" title="www.linkedin.com" class="md-social__link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M416 32H31.9C14.3 32 0 46.5 0 64.3v383.4C0 465.5 14.3 480 31.9 480H416c17.6 0 32-14.5 32-32.3V64.3c0-17.8-14.4-32.3-32-32.3M135.4 416H69V202.2h66.5V416zm-33.2-243c-21.3 0-38.5-17.3-38.5-38.5S80.9 96 102.2 96c21.2 0 38.5 17.3 38.5 38.5 0 21.3-17.2 38.5-38.5 38.5m282.1 243h-66.4V312c0-24.8-.5-56.7-34.5-56.7-34.6 0-39.9 27-39.9 54.9V416h-66.4V202.2h63.7v29.2h.9c8.9-16.8 30.6-34.5 62.9-34.5 67.2 0 79.7 44.3 79.7 101.9z"/></svg>
|
|
</a>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
</footer>
|
|
|
|
</div>
|
|
<div class="md-dialog" data-md-component="dialog">
|
|
<div class="md-dialog__inner md-typeset"></div>
|
|
</div>
|
|
|
|
|
|
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.tabs", "navigation.sections", "navigation.top", "navigation.tracking", "navigation.expand", "navigation.path", "toc.follow", "navigation.tabs.sticky", "content.code.copy", "announce.dismiss"], "search": "../../assets/javascripts/workers/search.07f07601.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
|
|
|
|
|
|
<script src="../../assets/javascripts/bundle.56dfad97.min.js"></script>
|
|
|
|
|
|
</body>
|
|
</html> |