refiners/reference/foundationals/segment_anything/index.html

4160 lines
242 KiB
HTML
Raw Normal View History

<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="description" content="A micro framework on top of PyTorch with first class citizen APIs for foundation model adaptation">
<link rel="prev" href="../latent_diffusion/">
<link rel="next" href="../swin/">
<link rel="icon" href="../../../assets/favicon.svg">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.44">
<title>Segment Anything - Refiners</title>
<link rel="stylesheet" href="../../../assets/stylesheets/main.0253249f.min.css">
<link rel="stylesheet" href="../../../assets/stylesheets/palette.06af60db.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<link rel="stylesheet" href="../../../assets/_mkdocstrings.css">
<link rel="stylesheet" href="../../../stylesheets/extra.css">
<script>__md_scope=new URL("../../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="deep-orange" data-md-color-accent="deep-orange">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#refiners.foundationals.segment_anything.HQSAMAdapter" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
<aside class="md-banner">
<div class="md-banner__inner md-grid md-typeset">
<button class="md-banner__button md-icon" aria-label="Don't show this again">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
Check out our <a href="https://finegrain.ai/bounties">Bounty Program</a> 💰!
</div>
<script>var el=document.querySelector("[data-md-component=announce]");if(el){var content=el.querySelector(".md-typeset");__md_hash(content.innerHTML)===__md_get("__announce")&&(el.hidden=!0)}</script>
</aside>
</div>
<header class="md-header md-header--shadow md-header--lifted" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../../.." title="Refiners" class="md-header__button md-logo" aria-label="Refiners" data-md-component="logo">
<img src="../../../assets/favicon.svg" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
Refiners
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Segment Anything
</span>
</div>
</div>
</div>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/finegrain-ai/refiners" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
Refiners
</div>
</a>
</div>
</nav>
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../../.." class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item">
<a href="../../../getting-started/recommended/" class="md-tabs__link">
Getting started
</a>
</li>
<li class="md-tabs__item">
<a href="../../../concepts/chain/" class="md-tabs__link">
Key Concepts
</a>
</li>
<li class="md-tabs__item">
<a href="../../../guides/adapting_sdxl/" class="md-tabs__link">
Guides
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../../fluxion/adapters/" class="md-tabs__link">
API Reference
</a>
</li>
</ul>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../../.." title="Refiners" class="md-nav__button md-logo" aria-label="Refiners" data-md-component="logo">
<img src="../../../assets/favicon.svg" alt="logo">
</a>
Refiners
</label>
<div class="md-nav__source">
<a href="https://github.com/finegrain-ai/refiners" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
Refiners
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
<label class="md-nav__link" for="__nav_1" id="__nav_1_label" tabindex="0">
<span class="md-ellipsis">
Home
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1">
<span class="md-nav__icon md-icon"></span>
Home
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../.." class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12 3.77-.75.84S9.97 6.06 8.68 7.94 6 12.07 6 14.23a6 6 0 0 0 6 6 6 6 0 0 0 6-6c0-2.16-1.39-4.41-2.68-6.29s-2.57-3.33-2.57-3.33zm0 3.13c.44.52.84.95 1.68 2.17 1.21 1.76 2.32 4 2.32 5.16 0 2.22-1.78 4-4 4s-4-1.78-4-4c0-1.16 1.11-3.4 2.32-5.16.84-1.22 1.24-1.65 1.68-2.17"/></svg>
<span class="md-ellipsis">
Welcome
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../home/why/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11h3v2h-3zM1 11h3v2H1zM13 1v3h-2V1zM4.92 3.5l2.13 2.14-1.42 1.41L3.5 4.93zm12.03 2.13 2.12-2.13 1.43 1.43-2.13 2.12zM12 6a6 6 0 0 1 6 6c0 2.22-1.21 4.16-3 5.2V19a1 1 0 0 1-1 1h-4a1 1 0 0 1-1-1v-1.8c-1.79-1.04-3-2.98-3-5.2a6 6 0 0 1 6-6m2 15v1a1 1 0 0 1-1 1h-2a1 1 0 0 1-1-1v-1zm-3-3h2v-2.13c1.73-.44 3-2.01 3-3.87a4 4 0 0 0-4-4 4 4 0 0 0-4 4c0 1.86 1.27 3.43 3 3.87z"/></svg>
<span class="md-ellipsis">
Manifesto
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<span class="md-ellipsis">
Getting started
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
Getting started
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../getting-started/recommended/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12 15.39-3.76 2.27.99-4.28-3.32-2.88 4.38-.37L12 6.09l1.71 4.04 4.38.37-3.32 2.88.99 4.28M22 9.24l-7.19-.61L12 2 9.19 8.63 2 9.24l5.45 4.73L5.82 21 12 17.27 18.18 21l-1.64-7.03z"/></svg>
<span class="md-ellipsis">
Recommended usage
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../getting-started/advanced/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9 1.09V6H7V1.09C4.16 1.57 2 4.03 2 7c0 2.22 1.21 4.15 3 5.19V21c0 .55.45 1 1 1h4c.55 0 1-.45 1-1v-8.81c1.79-1.04 3-2.97 3-5.19 0-2.97-2.16-5.43-5-5.91m1 9.37-1 .58V20H7v-8.96l-1-.58C4.77 9.74 4 8.42 4 7c0-1 .37-1.94 1-2.65V8h6V4.35c.63.71 1 1.65 1 2.65 0 1.42-.77 2.74-2 3.46m10.94 7.48a3.3 3.3 0 0 0 0-.89l.97-.73a.22.22 0 0 0 .06-.29l-.92-1.56c-.05-.1-.18-.14-.29-.1l-1.15.45c-.24-.17-.49-.32-.78-.44l-.17-1.19a.235.235 0 0 0-.23-.19h-1.85c-.12 0-.22.08-.24.19l-.17 1.19c-.29.12-.54.27-.78.44l-1.15-.45c-.1-.04-.24 0-.28.1l-.93 1.56c-.06.1-.03.22.06.29l.97.73c-.01.15-.03.3-.03.45s.02.29.03.44l-.97.74a.22.22 0 0 0-.06.29l.93 1.56c.04.1.18.13.28.1l1.15-.46c.24.18.49.33.78.45l.17 1.19c.02.11.12.19.24.19h1.85c.11 0 .21-.08.23-.19l.17-1.19c.29-.12.54-.27.78-.45l1.15.46c.11.03.24 0 .29-.1l.92-1.56a.22.22 0 0 0-.06-.29zM17.5 19c-.83 0-1.5-.67-1.5-1.5s.67-1.5 1.5-1.5 1.5.67 1.5 1.5-.67 1.5-1.5 1.5"/></svg>
<span class="md-ellipsis">
Advanced usage
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<span class="md-ellipsis">
Key Concepts
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
Key Concepts
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../concepts/chain/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 1a2.5 2.5 0 0 0-2.5 2.5A2.5 2.5 0 0 0 11 5.79V7H7a2 2 0 0 0-2 2v.71A2.5 2.5 0 0 0 3.5 12 2.5 2.5 0 0 0 5 14.29V15H4a2 2 0 0 0-2 2v1.21A2.5 2.5 0 0 0 .5 20.5 2.5 2.5 0 0 0 3 23a2.5 2.5 0 0 0 2.5-2.5A2.5 2.5 0 0 0 4 18.21V17h4v1.21a2.5 2.5 0 0 0-1.5 2.29A2.5 2.5 0 0 0 9 23a2.5 2.5 0 0 0 2.5-2.5 2.5 2.5 0 0 0-1.5-2.29V17a2 2 0 0 0-2-2H7v-.71A2.5 2.5 0 0 0 8.5 12 2.5 2.5 0 0 0 7 9.71V9h10v.71A2.5 2.5 0 0 0 15.5 12a2.5 2.5 0 0 0 1.5 2.29V15h-1a2 2 0 0 0-2 2v1.21a2.5 2.5 0 0 0-1.5 2.29A2.5 2.5 0 0 0 15 23a2.5 2.5 0 0 0 2.5-2.5 2.5 2.5 0 0 0-1.5-2.29V17h4v1.21a2.5 2.5 0 0 0-1.5 2.29A2.5 2.5 0 0 0 21 23a2.5 2.5 0 0 0 2.5-2.5 2.5 2.5 0 0 0-1.5-2.29V17a2 2 0 0 0-2-2h-1v-.71A2.5 2.5 0 0 0 20.5 12 2.5 2.5 0 0 0 19 9.71V9a2 2 0 0 0-2-2h-4V5.79a2.5 2.5 0 0 0 1.5-2.29A2.5 2.5 0 0 0 12 1m0 1.5a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1M6 11a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m12 0a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1M3 19.5a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m6 0a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m6 0a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m6 0a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1"/></svg>
<span class="md-ellipsis">
Chain
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../concepts/context/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9 22a1 1 0 0 1-1-1v-3H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h16a2 2 0 0 1 2 2v12a2 2 0 0 1-2 2h-6.1l-3.7 3.71c-.2.19-.45.29-.7.29zm1-6v3.08L13.08 16H20V4H4v12zm3-6h-2V6h2zm0 4h-2v-2h2z"/></svg>
<span class="md-ellipsis">
Context
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../concepts/adapter/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M2 12h2v5h16v-5h2v5a2 2 0 0 1-2 2H4a2 2 0 0 1-2-2m9-12h2v3h3v2h-3v3h-2v-3H8V8h3Z"/></svg>
<span class="md-ellipsis">
Adapter
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
<span class="md-ellipsis">
Guides
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
<span class="md-nav__icon md-icon"></span>
Guides
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../guides/adapting_sdxl/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M2 13h2v2h2v-2h2v2h2v-2h2v2h2v-5l3-3V1h2l4 2-4 2v2l3 3v12H11v-3a2 2 0 0 0-2-2 2 2 0 0 0-2 2v3H2zm16-3c-.55 0-1 .54-1 1.2V13h2v-1.8c0-.66-.45-1.2-1-1.2"/></svg>
<span class="md-ellipsis">
Adapting SDXL
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../guides/training_101/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 22a2 2 0 0 0 2-2V4a2 2 0 0 0-2-2h-6v7L9.5 7.5 7 9V2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2z"/></svg>
<span class="md-ellipsis">
Training 101
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" checked>
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="">
<span class="md-ellipsis">
API Reference
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_5">
<span class="md-nav__icon md-icon"></span>
API Reference
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_1" checked>
<label class="md-nav__link" for="__nav_5_1" id="__nav_5_1_label" tabindex="">
<span class="md-ellipsis">
Refiners
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_1_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_5_1">
<span class="md-nav__icon md-icon"></span>
Refiners
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_5_1_1" >
<label class="md-nav__link" for="__nav_5_1_1" id="__nav_5_1_1_label" tabindex="0">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Fluxion
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_5_1_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5_1_1">
<span class="md-nav__icon md-icon"></span>
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Fluxion
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../fluxion/adapters/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Adapters
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../fluxion/layers/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Layers
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../fluxion/context/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Context
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../fluxion/utils/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Utils
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_1_2" checked>
<label class="md-nav__link" for="__nav_5_1_2" id="__nav_5_1_2_label" tabindex="0">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Foundation Models
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_5_1_2_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_5_1_2">
<span class="md-nav__icon md-icon"></span>
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Foundation Models
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../clip/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> CLIP
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../dinov2/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> DINOv2
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../latent_diffusion/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Latent Diffusion
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Segment Anything
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Segment Anything
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.HQSAMAdapter" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-class"></code>&nbsp;HQSAMAdapter
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-class"></code>&nbsp;SegmentAnything
</span>
</a>
<nav class="md-nav" aria-label=" SegmentAnything">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.image_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;image_encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.image_encoder_resolution" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;image_encoder_resolution
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.mask_decoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;mask_decoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.mask_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;mask_encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.point_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;point_encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.compute_image_embedding" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;compute_image_embedding
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.normalize" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;normalize
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.postprocess_masks" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;postprocess_masks
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.predict" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;predict
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.preprocess_image" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;preprocess_image
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnythingH" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-class"></code>&nbsp;SegmentAnythingH
</span>
</a>
<nav class="md-nav" aria-label=" SegmentAnythingH">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnythingH.image_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;image_encoder
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.compute_scaled_size" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;compute_scaled_size
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.image_to_scaled_tensor" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;image_to_scaled_tensor
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.normalize_coordinates" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;normalize_coordinates
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.pad_image_tensor" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;pad_image_tensor
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.postprocess_masks" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;postprocess_masks
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.preprocess_image" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;preprocess_image
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../swin/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Swin Transformers
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.HQSAMAdapter" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-class"></code>&nbsp;HQSAMAdapter
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-class"></code>&nbsp;SegmentAnything
</span>
</a>
<nav class="md-nav" aria-label=" SegmentAnything">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.image_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;image_encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.image_encoder_resolution" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;image_encoder_resolution
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.mask_decoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;mask_decoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.mask_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;mask_encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.point_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;point_encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.compute_image_embedding" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;compute_image_embedding
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.normalize" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;normalize
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.postprocess_masks" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;postprocess_masks
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.predict" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;predict
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.preprocess_image" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;preprocess_image
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnythingH" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-class"></code>&nbsp;SegmentAnythingH
</span>
</a>
<nav class="md-nav" aria-label=" SegmentAnythingH">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnythingH.image_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;image_encoder
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.compute_scaled_size" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;compute_scaled_size
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.image_to_scaled_tensor" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;image_to_scaled_tensor
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.normalize_coordinates" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;normalize_coordinates
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.pad_image_tensor" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;pad_image_tensor
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.postprocess_masks" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;postprocess_masks
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.preprocess_image" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;preprocess_image
</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1><code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Segment Anything</h1>
<div class="doc doc-object doc-module">
<div class="doc doc-contents first">
<div class="doc doc-children">
<div class="doc doc-object doc-class">
<h2 id="refiners.foundationals.segment_anything.HQSAMAdapter" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">HQSAMAdapter</span>
<a href="#refiners.foundationals.segment_anything.HQSAMAdapter" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">HQSAMAdapter</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">target</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-internal" title="refiners.foundationals.segment_anything.model.SegmentAnything" href="#refiners.foundationals.segment_anything.SegmentAnything">SegmentAnything</a></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">hq_mask_only</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#bool">bool</a></span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">weights</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#dict">dict</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#str">str</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="p">)</span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="refiners.fluxion.layers.Chain" href="../../fluxion/layers/#refiners.fluxion.layers.Chain">Chain</a></code>, <code><a class="autorefs autorefs-internal" title="refiners.fluxion.adapters.Adapter" href="../../fluxion/adapters/#refiners.fluxion.adapters.Adapter">Adapter</a>[<a class="autorefs autorefs-internal" title="refiners.foundationals.segment_anything.model.SegmentAnything" href="#refiners.foundationals.segment_anything.SegmentAnything">SegmentAnything</a>]</code></p>
<p>Adapter for SAM introducing HQ features.</p>
<p>See <a href="https://arxiv.org/abs/2306.01567">[arXiv:2306.01567] Segment Anything in High Quality</a> for details.</p>
<details class="example" open>
<summary>Example</summary>
<div class="language-py highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="kn">from</span> <span class="nn">refiners.fluxion.utils</span> <span class="kn">import</span> <span class="n">load_from_safetensors</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="c1"># Tips: run scripts/prepare_test_weights.py to download the weights</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="n">tensor_path</span> <span class="o">=</span> <span class="s2">&quot;./tests/weights/refiners-sam-hq-vit-h.safetensors&quot;</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="n">weights</span> <span class="o">=</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="n">tensor_path</span><span class="p">)</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a>
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a><span class="n">hq_sam_adapter</span> <span class="o">=</span> <span class="n">HQSAMAdapter</span><span class="p">(</span><span class="n">sam_h</span><span class="p">,</span> <span class="n">weights</span><span class="o">=</span><span class="n">weights</span><span class="p">)</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a><span class="n">hq_sam_adapter</span><span class="o">.</span><span class="n">inject</span><span class="p">()</span> <span class="c1"># then use SAM as usual</span>
</span></code></pre></div>
</details>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>target</code>
</td>
<td>
<code><a class="autorefs autorefs-internal" title="refiners.foundationals.segment_anything.model.SegmentAnything" href="#refiners.foundationals.segment_anything.SegmentAnything">SegmentAnything</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The SegmentAnything model to adapt.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>hq_mask_only</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#bool">bool</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Whether to output only the high-quality mask or use it for mask correction (by summing it with the base SAM mask).</p>
</div>
</td>
<td>
<code>False</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>weights</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#dict">dict</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#str">str</a>, <a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a>] | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The weights of the HQSAMAdapter.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/hq_sam.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-313">313</a></span>
<span class="normal"><a href="#__codelineno-0-314">314</a></span>
<span class="normal"><a href="#__codelineno-0-315">315</a></span>
<span class="normal"><a href="#__codelineno-0-316">316</a></span>
<span class="normal"><a href="#__codelineno-0-317">317</a></span>
<span class="normal"><a href="#__codelineno-0-318">318</a></span>
<span class="normal"><a href="#__codelineno-0-319">319</a></span>
<span class="normal"><a href="#__codelineno-0-320">320</a></span>
<span class="normal"><a href="#__codelineno-0-321">321</a></span>
<span class="normal"><a href="#__codelineno-0-322">322</a></span>
<span class="normal"><a href="#__codelineno-0-323">323</a></span>
<span class="normal"><a href="#__codelineno-0-324">324</a></span>
<span class="normal"><a href="#__codelineno-0-325">325</a></span>
<span class="normal"><a href="#__codelineno-0-326">326</a></span>
<span class="normal"><a href="#__codelineno-0-327">327</a></span>
<span class="normal"><a href="#__codelineno-0-328">328</a></span>
<span class="normal"><a href="#__codelineno-0-329">329</a></span>
<span class="normal"><a href="#__codelineno-0-330">330</a></span>
<span class="normal"><a href="#__codelineno-0-331">331</a></span>
<span class="normal"><a href="#__codelineno-0-332">332</a></span>
<span class="normal"><a href="#__codelineno-0-333">333</a></span>
<span class="normal"><a href="#__codelineno-0-334">334</a></span>
<span class="normal"><a href="#__codelineno-0-335">335</a></span>
<span class="normal"><a href="#__codelineno-0-336">336</a></span>
<span class="normal"><a href="#__codelineno-0-337">337</a></span>
<span class="normal"><a href="#__codelineno-0-338">338</a></span>
<span class="normal"><a href="#__codelineno-0-339">339</a></span>
<span class="normal"><a href="#__codelineno-0-340">340</a></span>
<span class="normal"><a href="#__codelineno-0-341">341</a></span>
<span class="normal"><a href="#__codelineno-0-342">342</a></span>
<span class="normal"><a href="#__codelineno-0-343">343</a></span>
<span class="normal"><a href="#__codelineno-0-344">344</a></span>
<span class="normal"><a href="#__codelineno-0-345">345</a></span>
<span class="normal"><a href="#__codelineno-0-346">346</a></span>
<span class="normal"><a href="#__codelineno-0-347">347</a></span>
<span class="normal"><a href="#__codelineno-0-348">348</a></span>
<span class="normal"><a href="#__codelineno-0-349">349</a></span>
<span class="normal"><a href="#__codelineno-0-350">350</a></span>
<span class="normal"><a href="#__codelineno-0-351">351</a></span>
<span class="normal"><a href="#__codelineno-0-352">352</a></span>
<span class="normal"><a href="#__codelineno-0-353">353</a></span>
<span class="normal"><a href="#__codelineno-0-354">354</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-313"><a id="__codelineno-0-313" name="__codelineno-0-313"></a><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
</span><span id="__span-0-314"><a id="__codelineno-0-314" name="__codelineno-0-314"></a> <span class="bp">self</span><span class="p">,</span>
</span><span id="__span-0-315"><a id="__codelineno-0-315" name="__codelineno-0-315"></a> <span class="n">target</span><span class="p">:</span> <span class="n">SegmentAnything</span><span class="p">,</span>
</span><span id="__span-0-316"><a id="__codelineno-0-316" name="__codelineno-0-316"></a> <span class="n">hq_mask_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
</span><span id="__span-0-317"><a id="__codelineno-0-317" name="__codelineno-0-317"></a> <span class="n">weights</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-318"><a id="__codelineno-0-318" name="__codelineno-0-318"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-319"><a id="__codelineno-0-319" name="__codelineno-0-319"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize the adapter.</span>
</span><span id="__span-0-320"><a id="__codelineno-0-320" name="__codelineno-0-320"></a>
</span><span id="__span-0-321"><a id="__codelineno-0-321" name="__codelineno-0-321"></a><span class="sd"> Args:</span>
</span><span id="__span-0-322"><a id="__codelineno-0-322" name="__codelineno-0-322"></a><span class="sd"> target: The SegmentAnything model to adapt.</span>
</span><span id="__span-0-323"><a id="__codelineno-0-323" name="__codelineno-0-323"></a><span class="sd"> hq_mask_only: Whether to output only the high-quality mask or use it for mask correction (by summing it with the base SAM mask).</span>
</span><span id="__span-0-324"><a id="__codelineno-0-324" name="__codelineno-0-324"></a><span class="sd"> weights: The weights of the HQSAMAdapter.</span>
</span><span id="__span-0-325"><a id="__codelineno-0-325" name="__codelineno-0-325"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-326"><a id="__codelineno-0-326" name="__codelineno-0-326"></a> <span class="bp">self</span><span class="o">.</span><span class="n">vit_embedding_dim</span> <span class="o">=</span> <span class="n">target</span><span class="o">.</span><span class="n">image_encoder</span><span class="o">.</span><span class="n">embedding_dim</span>
</span><span id="__span-0-327"><a id="__codelineno-0-327" name="__codelineno-0-327"></a> <span class="bp">self</span><span class="o">.</span><span class="n">target_num_mask_tokens</span> <span class="o">=</span> <span class="n">target</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">num_multimask_outputs</span> <span class="o">+</span> <span class="mi">2</span>
</span><span id="__span-0-328"><a id="__codelineno-0-328" name="__codelineno-0-328"></a>
</span><span id="__span-0-329"><a id="__codelineno-0-329" name="__codelineno-0-329"></a> <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">setup_adapter</span><span class="p">(</span><span class="n">target</span><span class="p">):</span>
</span><span id="__span-0-330"><a id="__codelineno-0-330" name="__codelineno-0-330"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">target</span><span class="p">)</span>
</span><span id="__span-0-331"><a id="__codelineno-0-331" name="__codelineno-0-331"></a>
</span><span id="__span-0-332"><a id="__codelineno-0-332" name="__codelineno-0-332"></a> <span class="k">if</span> <span class="n">target</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">multimask_output</span><span class="p">:</span>
</span><span id="__span-0-333"><a id="__codelineno-0-333" name="__codelineno-0-333"></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Multi-mask mode is not supported in HQSAMAdapter.&quot;</span><span class="p">)</span>
</span><span id="__span-0-334"><a id="__codelineno-0-334" name="__codelineno-0-334"></a>
</span><span id="__span-0-335"><a id="__codelineno-0-335" name="__codelineno-0-335"></a> <span class="n">mask_prediction</span> <span class="o">=</span> <span class="n">target</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">ensure_find</span><span class="p">(</span><span class="n">MaskPrediction</span><span class="p">)</span>
</span><span id="__span-0-336"><a id="__codelineno-0-336" name="__codelineno-0-336"></a>
</span><span id="__span-0-337"><a id="__codelineno-0-337" name="__codelineno-0-337"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_mask_prediction_adapter</span> <span class="o">=</span> <span class="p">[</span>
</span><span id="__span-0-338"><a id="__codelineno-0-338" name="__codelineno-0-338"></a> <span class="n">MaskPredictionAdapter</span><span class="p">(</span>
</span><span id="__span-0-339"><a id="__codelineno-0-339" name="__codelineno-0-339"></a> <span class="n">mask_prediction</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">vit_embedding_dim</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">target_num_mask_tokens</span><span class="p">,</span> <span class="n">target</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">target</span><span class="o">.</span><span class="n">dtype</span>
</span><span id="__span-0-340"><a id="__codelineno-0-340" name="__codelineno-0-340"></a> <span class="p">)</span>
</span><span id="__span-0-341"><a id="__codelineno-0-341" name="__codelineno-0-341"></a> <span class="p">]</span>
</span><span id="__span-0-342"><a id="__codelineno-0-342" name="__codelineno-0-342"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_register_adapter_module</span><span class="p">(</span><span class="s2">&quot;Chain.HQSAMMaskPrediction&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_prediction_adapter</span><span class="o">.</span><span class="n">hq_sam_mask_prediction</span><span class="p">)</span>
</span><span id="__span-0-343"><a id="__codelineno-0-343" name="__codelineno-0-343"></a>
</span><span id="__span-0-344"><a id="__codelineno-0-344" name="__codelineno-0-344"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_image_encoder_adapter</span> <span class="o">=</span> <span class="p">[</span><span class="n">SAMViTAdapter</span><span class="p">(</span><span class="n">target</span><span class="o">.</span><span class="n">image_encoder</span><span class="p">)]</span>
</span><span id="__span-0-345"><a id="__codelineno-0-345" name="__codelineno-0-345"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_predictions_post_proc</span> <span class="o">=</span> <span class="p">[</span><span class="n">PredictionsPostProc</span><span class="p">(</span><span class="n">hq_mask_only</span><span class="p">)]</span>
</span><span id="__span-0-346"><a id="__codelineno-0-346" name="__codelineno-0-346"></a>
</span><span id="__span-0-347"><a id="__codelineno-0-347" name="__codelineno-0-347"></a> <span class="n">mask_decoder_tokens</span> <span class="o">=</span> <span class="n">target</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">ensure_find</span><span class="p">(</span><span class="n">MaskDecoderTokens</span><span class="p">)</span>
</span><span id="__span-0-348"><a id="__codelineno-0-348" name="__codelineno-0-348"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_mask_decoder_tokens_extender</span> <span class="o">=</span> <span class="p">[</span><span class="n">MaskDecoderTokensExtender</span><span class="p">(</span><span class="n">mask_decoder_tokens</span><span class="p">)]</span>
</span><span id="__span-0-349"><a id="__codelineno-0-349" name="__codelineno-0-349"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_register_adapter_module</span><span class="p">(</span><span class="s2">&quot;MaskDecoderTokensExtender.hq_token&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_decoder_tokens_extender</span><span class="o">.</span><span class="n">hq_token</span><span class="p">)</span>
</span><span id="__span-0-350"><a id="__codelineno-0-350" name="__codelineno-0-350"></a>
</span><span id="__span-0-351"><a id="__codelineno-0-351" name="__codelineno-0-351"></a> <span class="k">if</span> <span class="n">weights</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-352"><a id="__codelineno-0-352" name="__codelineno-0-352"></a> <span class="bp">self</span><span class="o">.</span><span class="n">load_weights</span><span class="p">(</span><span class="n">weights</span><span class="p">)</span>
</span><span id="__span-0-353"><a id="__codelineno-0-353" name="__codelineno-0-353"></a>
</span><span id="__span-0-354"><a id="__codelineno-0-354" name="__codelineno-0-354"></a> <span class="bp">self</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="n">target</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">target</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="refiners.foundationals.segment_anything.SegmentAnything" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">SegmentAnything</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">SegmentAnything</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">image_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.image_encoder.SAMViT">SAMViT</span></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">point_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.prompt_encoder.PointEncoder">PointEncoder</span></span><span class="p">,</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">mask_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.prompt_encoder.MaskEncoder">MaskEncoder</span></span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> <span class="n">mask_decoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.mask_decoder.MaskDecoder">MaskDecoder</span></span><span class="p">,</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a> <span class="n">device</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.device" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.device">device</a></span> <span class="o">|</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#str">str</a></span> <span class="o">=</span> <span class="s2">&quot;cpu&quot;</span><span class="p">,</span>
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a> <span class="n">dtype</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.dtype" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.dtype">dtype</a></span> <span class="o">=</span> <span class="n"><span title="torch.float32">float32</span></span><span class="p">,</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a><span class="p">)</span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="refiners.fluxion.layers.Chain" href="../../fluxion/layers/#refiners.fluxion.layers.Chain">Chain</a></code></p>
<p>SegmentAnything model.</p>
<p>See <a href="https://arxiv.org/abs/2304.02643">[arXiv:2304.02643] Segment Anything</a></p>
<p>E.g. see <a class="autorefs autorefs-internal" href="#refiners.foundationals.segment_anything.SegmentAnythingH"><code>SegmentAnythingH</code></a> for usage.</p>
<p><span class="doc-section-title">Attributes:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code><span title="refiners.foundationals.segment_anything.SegmentAnything.mask_threshold">mask_threshold</span></code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></code>
</td>
<td>
<div class="doc-md-description">
<p>0.0</p>
</div>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>image_encoder</code>
</td>
<td>
<code><span title="refiners.foundationals.segment_anything.image_encoder.SAMViT">SAMViT</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The image encoder to use.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>point_encoder</code>
</td>
<td>
<code><span title="refiners.foundationals.segment_anything.prompt_encoder.PointEncoder">PointEncoder</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The point encoder to use.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>mask_encoder</code>
</td>
<td>
<code><span title="refiners.foundationals.segment_anything.prompt_encoder.MaskEncoder">MaskEncoder</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The mask encoder to use.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>mask_decoder</code>
</td>
<td>
<code><span title="refiners.foundationals.segment_anything.mask_decoder.MaskDecoder">MaskDecoder</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The mask decoder to use.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-40">40</a></span>
<span class="normal"><a href="#__codelineno-0-41">41</a></span>
<span class="normal"><a href="#__codelineno-0-42">42</a></span>
<span class="normal"><a href="#__codelineno-0-43">43</a></span>
<span class="normal"><a href="#__codelineno-0-44">44</a></span>
<span class="normal"><a href="#__codelineno-0-45">45</a></span>
<span class="normal"><a href="#__codelineno-0-46">46</a></span>
<span class="normal"><a href="#__codelineno-0-47">47</a></span>
<span class="normal"><a href="#__codelineno-0-48">48</a></span>
<span class="normal"><a href="#__codelineno-0-49">49</a></span>
<span class="normal"><a href="#__codelineno-0-50">50</a></span>
<span class="normal"><a href="#__codelineno-0-51">51</a></span>
<span class="normal"><a href="#__codelineno-0-52">52</a></span>
<span class="normal"><a href="#__codelineno-0-53">53</a></span>
<span class="normal"><a href="#__codelineno-0-54">54</a></span>
<span class="normal"><a href="#__codelineno-0-55">55</a></span>
<span class="normal"><a href="#__codelineno-0-56">56</a></span>
<span class="normal"><a href="#__codelineno-0-57">57</a></span>
<span class="normal"><a href="#__codelineno-0-58">58</a></span>
<span class="normal"><a href="#__codelineno-0-59">59</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a> <span class="bp">self</span><span class="p">,</span>
</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a> <span class="n">image_encoder</span><span class="p">:</span> <span class="n">SAMViT</span><span class="p">,</span>
</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a> <span class="n">point_encoder</span><span class="p">:</span> <span class="n">PointEncoder</span><span class="p">,</span>
</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a> <span class="n">mask_encoder</span><span class="p">:</span> <span class="n">MaskEncoder</span><span class="p">,</span>
</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a> <span class="n">mask_decoder</span><span class="p">:</span> <span class="n">MaskDecoder</span><span class="p">,</span>
</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a> <span class="n">device</span><span class="p">:</span> <span class="n">Device</span> <span class="o">|</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;cpu&quot;</span><span class="p">,</span>
</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a> <span class="n">dtype</span><span class="p">:</span> <span class="n">DType</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span>
</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize SegmentAnything model.</span>
</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a>
</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a><span class="sd"> Args:</span>
</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a><span class="sd"> image_encoder: The image encoder to use.</span>
</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a><span class="sd"> point_encoder: The point encoder to use.</span>
</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a><span class="sd"> mask_encoder: The mask encoder to use.</span>
</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a><span class="sd"> mask_decoder: The mask decoder to use.</span>
</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">image_encoder</span><span class="p">,</span> <span class="n">point_encoder</span><span class="p">,</span> <span class="n">mask_encoder</span><span class="p">,</span> <span class="n">mask_decoder</span><span class="p">)</span>
</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a>
</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a> <span class="bp">self</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-attribute">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.image_encoder" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">image_encoder</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.image_encoder" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">image_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.image_encoder.SAMViT">SAMViT</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>The image encoder.</p>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.image_encoder_resolution" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">image_encoder_resolution</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.image_encoder_resolution" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>The resolution of the image encoder.</p>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.mask_decoder" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">mask_decoder</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.mask_decoder" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">mask_decoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.mask_decoder.MaskDecoder">MaskDecoder</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>The mask decoder.</p>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.mask_encoder" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">mask_encoder</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.mask_encoder" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">mask_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.prompt_encoder.MaskEncoder">MaskEncoder</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>The mask encoder.</p>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.point_encoder" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">point_encoder</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.point_encoder" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">point_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.prompt_encoder.PointEncoder">PointEncoder</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>The point encoder.</p>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.compute_image_embedding" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">compute_image_embedding</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.compute_image_embedding" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">compute_image_embedding</span><span class="p">(</span><span class="n">image</span><span class="p">:</span> <span class="n"><span title="PIL.Image.Image">Image</span></span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><span title="refiners.foundationals.segment_anything.model.ImageEmbedding">ImageEmbedding</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Compute the embedding of an image.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>image</code>
</td>
<td>
<code><span title="PIL.Image.Image">Image</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The image to compute the embedding of.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="refiners.foundationals.segment_anything.model.ImageEmbedding">ImageEmbedding</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The computed image embedding.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-81">81</a></span>
<span class="normal"><a href="#__codelineno-0-82">82</a></span>
<span class="normal"><a href="#__codelineno-0-83">83</a></span>
<span class="normal"><a href="#__codelineno-0-84">84</a></span>
<span class="normal"><a href="#__codelineno-0-85">85</a></span>
<span class="normal"><a href="#__codelineno-0-86">86</a></span>
<span class="normal"><a href="#__codelineno-0-87">87</a></span>
<span class="normal"><a href="#__codelineno-0-88">88</a></span>
<span class="normal"><a href="#__codelineno-0-89">89</a></span>
<span class="normal"><a href="#__codelineno-0-90">90</a></span>
<span class="normal"><a href="#__codelineno-0-91">91</a></span>
<span class="normal"><a href="#__codelineno-0-92">92</a></span>
<span class="normal"><a href="#__codelineno-0-93">93</a></span>
<span class="normal"><a href="#__codelineno-0-94">94</a></span>
<span class="normal"><a href="#__codelineno-0-95">95</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a><span class="nd">@no_grad</span><span class="p">()</span>
</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a><span class="k">def</span> <span class="nf">compute_image_embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">image</span><span class="p">:</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ImageEmbedding</span><span class="p">:</span>
</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Compute the embedding of an image.</span>
</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a>
</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a><span class="sd"> Args:</span>
</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a><span class="sd"> image: The image to compute the embedding of.</span>
</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a>
</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a><span class="sd"> The computed image embedding.</span>
</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a> <span class="n">original_size</span> <span class="o">=</span> <span class="p">(</span><span class="n">image</span><span class="o">.</span><span class="n">height</span><span class="p">,</span> <span class="n">image</span><span class="o">.</span><span class="n">width</span><span class="p">)</span>
</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a> <span class="k">return</span> <span class="n">ImageEmbedding</span><span class="p">(</span>
</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a> <span class="n">features</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">image_encoder</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">preprocess_image</span><span class="p">(</span><span class="n">image</span><span class="p">)),</span>
</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a> <span class="n">original_image_size</span><span class="o">=</span><span class="n">original_size</span><span class="p">,</span>
</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a> <span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.normalize" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">normalize</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.normalize" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">normalize</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">coordinates</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span><span class="p">,</span> <span class="n">original_size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">]</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>See <a class="autorefs autorefs-internal" href="#refiners.foundationals.segment_anything.utils.normalize_coordinates"><code>normalize_coordinates</code></a>
Args:
coordinates: a tensor of coordinates.
original_size: (h, w) the original size of the image.
Returns:
The [0,1] normalized coordinates tensor.</p>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-179">179</a></span>
<span class="normal"><a href="#__codelineno-0-180">180</a></span>
<span class="normal"><a href="#__codelineno-0-181">181</a></span>
<span class="normal"><a href="#__codelineno-0-182">182</a></span>
<span class="normal"><a href="#__codelineno-0-183">183</a></span>
<span class="normal"><a href="#__codelineno-0-184">184</a></span>
<span class="normal"><a href="#__codelineno-0-185">185</a></span>
<span class="normal"><a href="#__codelineno-0-186">186</a></span>
<span class="normal"><a href="#__codelineno-0-187">187</a></span>
<span class="normal"><a href="#__codelineno-0-188">188</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-179"><a id="__codelineno-0-179" name="__codelineno-0-179"></a><span class="k">def</span> <span class="nf">normalize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">coordinates</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">original_size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-180"><a id="__codelineno-0-180" name="__codelineno-0-180"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
</span><span id="__span-0-181"><a id="__codelineno-0-181" name="__codelineno-0-181"></a><span class="sd"> See [`normalize_coordinates`][refiners.foundationals.segment_anything.utils.normalize_coordinates]</span>
</span><span id="__span-0-182"><a id="__codelineno-0-182" name="__codelineno-0-182"></a><span class="sd"> Args:</span>
</span><span id="__span-0-183"><a id="__codelineno-0-183" name="__codelineno-0-183"></a><span class="sd"> coordinates: a tensor of coordinates.</span>
</span><span id="__span-0-184"><a id="__codelineno-0-184" name="__codelineno-0-184"></a><span class="sd"> original_size: (h, w) the original size of the image.</span>
</span><span id="__span-0-185"><a id="__codelineno-0-185" name="__codelineno-0-185"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-186"><a id="__codelineno-0-186" name="__codelineno-0-186"></a><span class="sd"> The [0,1] normalized coordinates tensor.</span>
</span><span id="__span-0-187"><a id="__codelineno-0-187" name="__codelineno-0-187"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-188"><a id="__codelineno-0-188" name="__codelineno-0-188"></a> <span class="k">return</span> <span class="n">normalize_coordinates</span><span class="p">(</span><span class="n">coordinates</span><span class="p">,</span> <span class="n">original_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">image_encoder_resolution</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.postprocess_masks" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">postprocess_masks</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.postprocess_masks" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">postprocess_masks</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">low_res_masks</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span><span class="p">,</span> <span class="n">original_size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">]</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>See <a class="autorefs autorefs-internal" href="#refiners.foundationals.segment_anything.utils.postprocess_masks"><code>postprocess_masks</code></a>
Args:
low_res_masks: a mask tensor of size (N, 1, 256, 256)
original_size: (h, w) the original size of the image.
Returns:
The mask of shape (N, 1, H, W)</p>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-190">190</a></span>
<span class="normal"><a href="#__codelineno-0-191">191</a></span>
<span class="normal"><a href="#__codelineno-0-192">192</a></span>
<span class="normal"><a href="#__codelineno-0-193">193</a></span>
<span class="normal"><a href="#__codelineno-0-194">194</a></span>
<span class="normal"><a href="#__codelineno-0-195">195</a></span>
<span class="normal"><a href="#__codelineno-0-196">196</a></span>
<span class="normal"><a href="#__codelineno-0-197">197</a></span>
<span class="normal"><a href="#__codelineno-0-198">198</a></span>
<span class="normal"><a href="#__codelineno-0-199">199</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-190"><a id="__codelineno-0-190" name="__codelineno-0-190"></a><span class="k">def</span> <span class="nf">postprocess_masks</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">low_res_masks</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">original_size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-191"><a id="__codelineno-0-191" name="__codelineno-0-191"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
</span><span id="__span-0-192"><a id="__codelineno-0-192" name="__codelineno-0-192"></a><span class="sd"> See [`postprocess_masks`][refiners.foundationals.segment_anything.utils.postprocess_masks]</span>
</span><span id="__span-0-193"><a id="__codelineno-0-193" name="__codelineno-0-193"></a><span class="sd"> Args:</span>
</span><span id="__span-0-194"><a id="__codelineno-0-194" name="__codelineno-0-194"></a><span class="sd"> low_res_masks: a mask tensor of size (N, 1, 256, 256)</span>
</span><span id="__span-0-195"><a id="__codelineno-0-195" name="__codelineno-0-195"></a><span class="sd"> original_size: (h, w) the original size of the image.</span>
</span><span id="__span-0-196"><a id="__codelineno-0-196" name="__codelineno-0-196"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-197"><a id="__codelineno-0-197" name="__codelineno-0-197"></a><span class="sd"> The mask of shape (N, 1, H, W)</span>
</span><span id="__span-0-198"><a id="__codelineno-0-198" name="__codelineno-0-198"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-199"><a id="__codelineno-0-199" name="__codelineno-0-199"></a> <span class="k">return</span> <span class="n">postprocess_masks</span><span class="p">(</span><span class="n">low_res_masks</span><span class="p">,</span> <span class="n">original_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">image_encoder_resolution</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.predict" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">predict</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.predict" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">predict</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="nb">input</span><span class="p">:</span> <span class="n"><span title="PIL.Image.Image">Image</span></span> <span class="o">|</span> <span class="n"><span title="refiners.foundationals.segment_anything.model.ImageEmbedding">ImageEmbedding</span></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">foreground_points</span><span class="p">:</span> <span class="p">(</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n"><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></span><span class="p">]]</span> <span class="o">|</span> <span class="kc">None</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> <span class="p">)</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a> <span class="n">background_points</span><span class="p">:</span> <span class="p">(</span>
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a> <span class="n"><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></span><span class="p">]]</span> <span class="o">|</span> <span class="kc">None</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a> <span class="p">)</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a> <span class="n">box_points</span><span class="p">:</span> <span class="p">(</span>
</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10" href="#__codelineno-0-10"></a> <span class="n"><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></span><span class="p">]]]</span> <span class="o">|</span> <span class="kc">None</span>
</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11" href="#__codelineno-0-11"></a> <span class="p">)</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12" href="#__codelineno-0-12"></a> <span class="n">low_res_mask</span><span class="p">:</span> <span class="p">(</span>
</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13" href="#__codelineno-0-13"></a> <span class="n"><span title="jaxtyping.Float">Float</span></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span><span class="p">,</span> <span class="s2">&quot;1 1 256 256&quot;</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span>
</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14" href="#__codelineno-0-14"></a> <span class="p">)</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15" href="#__codelineno-0-15"></a> <span class="n">binarize</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#bool">bool</a></span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16" href="#__codelineno-0-16"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span><span class="p">]</span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Predict the masks of the input image.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>input</code>
</td>
<td>
<code><span title="PIL.Image.Image">Image</span> | <span title="refiners.foundationals.segment_anything.model.ImageEmbedding">ImageEmbedding</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The input image or its embedding.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>foreground_points</code>
</td>
<td>
<code><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a>]] | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The points of the foreground.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>background_points</code>
</td>
<td>
<code><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a>]] | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The points of the background.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>box_points</code>
</td>
<td>
<code><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a>[<a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a>]]] | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The points of the box.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>low_res_mask</code>
</td>
<td>
<code><span title="jaxtyping.Float">Float</span>[<a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a>, &#39;1 1 256 256&#39;] | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The low resolution mask.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>binarize</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#bool">bool</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Whether to binarize the masks.</p>
</div>
</td>
<td>
<code>True</code>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The predicted masks.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The IOU prediction.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The low resolution masks.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-97"> 97</a></span>
<span class="normal"><a href="#__codelineno-0-98"> 98</a></span>
<span class="normal"><a href="#__codelineno-0-99"> 99</a></span>
<span class="normal"><a href="#__codelineno-0-100">100</a></span>
<span class="normal"><a href="#__codelineno-0-101">101</a></span>
<span class="normal"><a href="#__codelineno-0-102">102</a></span>
<span class="normal"><a href="#__codelineno-0-103">103</a></span>
<span class="normal"><a href="#__codelineno-0-104">104</a></span>
<span class="normal"><a href="#__codelineno-0-105">105</a></span>
<span class="normal"><a href="#__codelineno-0-106">106</a></span>
<span class="normal"><a href="#__codelineno-0-107">107</a></span>
<span class="normal"><a href="#__codelineno-0-108">108</a></span>
<span class="normal"><a href="#__codelineno-0-109">109</a></span>
<span class="normal"><a href="#__codelineno-0-110">110</a></span>
<span class="normal"><a href="#__codelineno-0-111">111</a></span>
<span class="normal"><a href="#__codelineno-0-112">112</a></span>
<span class="normal"><a href="#__codelineno-0-113">113</a></span>
<span class="normal"><a href="#__codelineno-0-114">114</a></span>
<span class="normal"><a href="#__codelineno-0-115">115</a></span>
<span class="normal"><a href="#__codelineno-0-116">116</a></span>
<span class="normal"><a href="#__codelineno-0-117">117</a></span>
<span class="normal"><a href="#__codelineno-0-118">118</a></span>
<span class="normal"><a href="#__codelineno-0-119">119</a></span>
<span class="normal"><a href="#__codelineno-0-120">120</a></span>
<span class="normal"><a href="#__codelineno-0-121">121</a></span>
<span class="normal"><a href="#__codelineno-0-122">122</a></span>
<span class="normal"><a href="#__codelineno-0-123">123</a></span>
<span class="normal"><a href="#__codelineno-0-124">124</a></span>
<span class="normal"><a href="#__codelineno-0-125">125</a></span>
<span class="normal"><a href="#__codelineno-0-126">126</a></span>
<span class="normal"><a href="#__codelineno-0-127">127</a></span>
<span class="normal"><a href="#__codelineno-0-128">128</a></span>
<span class="normal"><a href="#__codelineno-0-129">129</a></span>
<span class="normal"><a href="#__codelineno-0-130">130</a></span>
<span class="normal"><a href="#__codelineno-0-131">131</a></span>
<span class="normal"><a href="#__codelineno-0-132">132</a></span>
<span class="normal"><a href="#__codelineno-0-133">133</a></span>
<span class="normal"><a href="#__codelineno-0-134">134</a></span>
<span class="normal"><a href="#__codelineno-0-135">135</a></span>
<span class="normal"><a href="#__codelineno-0-136">136</a></span>
<span class="normal"><a href="#__codelineno-0-137">137</a></span>
<span class="normal"><a href="#__codelineno-0-138">138</a></span>
<span class="normal"><a href="#__codelineno-0-139">139</a></span>
<span class="normal"><a href="#__codelineno-0-140">140</a></span>
<span class="normal"><a href="#__codelineno-0-141">141</a></span>
<span class="normal"><a href="#__codelineno-0-142">142</a></span>
<span class="normal"><a href="#__codelineno-0-143">143</a></span>
<span class="normal"><a href="#__codelineno-0-144">144</a></span>
<span class="normal"><a href="#__codelineno-0-145">145</a></span>
<span class="normal"><a href="#__codelineno-0-146">146</a></span>
<span class="normal"><a href="#__codelineno-0-147">147</a></span>
<span class="normal"><a href="#__codelineno-0-148">148</a></span>
<span class="normal"><a href="#__codelineno-0-149">149</a></span>
<span class="normal"><a href="#__codelineno-0-150">150</a></span>
<span class="normal"><a href="#__codelineno-0-151">151</a></span>
<span class="normal"><a href="#__codelineno-0-152">152</a></span>
<span class="normal"><a href="#__codelineno-0-153">153</a></span>
<span class="normal"><a href="#__codelineno-0-154">154</a></span>
<span class="normal"><a href="#__codelineno-0-155">155</a></span>
<span class="normal"><a href="#__codelineno-0-156">156</a></span>
<span class="normal"><a href="#__codelineno-0-157">157</a></span>
<span class="normal"><a href="#__codelineno-0-158">158</a></span>
<span class="normal"><a href="#__codelineno-0-159">159</a></span>
<span class="normal"><a href="#__codelineno-0-160">160</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a><span class="nd">@no_grad</span><span class="p">()</span>
</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a><span class="k">def</span> <span class="nf">predict</span><span class="p">(</span>
</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a> <span class="bp">self</span><span class="p">,</span>
</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a> <span class="nb">input</span><span class="p">:</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span> <span class="o">|</span> <span class="n">ImageEmbedding</span><span class="p">,</span>
</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a> <span class="n">foreground_points</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a> <span class="n">background_points</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a> <span class="n">box_points</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">Sequence</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a> <span class="n">low_res_mask</span><span class="p">:</span> <span class="n">Float</span><span class="p">[</span><span class="n">Tensor</span><span class="p">,</span> <span class="s2">&quot;1 1 256 256&quot;</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a> <span class="n">binarize</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">]:</span>
</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Predict the masks of the input image.</span>
</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a>
</span><span id="__span-0-109"><a id="__codelineno-0-109" name="__codelineno-0-109"></a><span class="sd"> Args:</span>
</span><span id="__span-0-110"><a id="__codelineno-0-110" name="__codelineno-0-110"></a><span class="sd"> input: The input image or its embedding.</span>
</span><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a><span class="sd"> foreground_points: The points of the foreground.</span>
</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a><span class="sd"> background_points: The points of the background.</span>
</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a><span class="sd"> box_points: The points of the box.</span>
</span><span id="__span-0-114"><a id="__codelineno-0-114" name="__codelineno-0-114"></a><span class="sd"> low_res_mask: The low resolution mask.</span>
</span><span id="__span-0-115"><a id="__codelineno-0-115" name="__codelineno-0-115"></a><span class="sd"> binarize: Whether to binarize the masks.</span>
</span><span id="__span-0-116"><a id="__codelineno-0-116" name="__codelineno-0-116"></a>
</span><span id="__span-0-117"><a id="__codelineno-0-117" name="__codelineno-0-117"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-118"><a id="__codelineno-0-118" name="__codelineno-0-118"></a><span class="sd"> The predicted masks.</span>
</span><span id="__span-0-119"><a id="__codelineno-0-119" name="__codelineno-0-119"></a><span class="sd"> The IOU prediction.</span>
</span><span id="__span-0-120"><a id="__codelineno-0-120" name="__codelineno-0-120"></a><span class="sd"> The low resolution masks.</span>
</span><span id="__span-0-121"><a id="__codelineno-0-121" name="__codelineno-0-121"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-122"><a id="__codelineno-0-122" name="__codelineno-0-122"></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">ImageEmbedding</span><span class="p">):</span>
</span><span id="__span-0-123"><a id="__codelineno-0-123" name="__codelineno-0-123"></a> <span class="n">original_size</span> <span class="o">=</span> <span class="nb">input</span><span class="o">.</span><span class="n">original_image_size</span>
</span><span id="__span-0-124"><a id="__codelineno-0-124" name="__codelineno-0-124"></a> <span class="n">image_embedding</span> <span class="o">=</span> <span class="nb">input</span><span class="o">.</span><span class="n">features</span>
</span><span id="__span-0-125"><a id="__codelineno-0-125" name="__codelineno-0-125"></a> <span class="k">else</span><span class="p">:</span>
</span><span id="__span-0-126"><a id="__codelineno-0-126" name="__codelineno-0-126"></a> <span class="n">original_size</span> <span class="o">=</span> <span class="p">(</span><span class="nb">input</span><span class="o">.</span><span class="n">height</span><span class="p">,</span> <span class="nb">input</span><span class="o">.</span><span class="n">width</span><span class="p">)</span>
</span><span id="__span-0-127"><a id="__codelineno-0-127" name="__codelineno-0-127"></a> <span class="n">image_embedding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">image_encoder</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">preprocess_image</span><span class="p">(</span><span class="nb">input</span><span class="p">))</span>
</span><span id="__span-0-128"><a id="__codelineno-0-128" name="__codelineno-0-128"></a>
</span><span id="__span-0-129"><a id="__codelineno-0-129" name="__codelineno-0-129"></a> <span class="n">coordinates</span><span class="p">,</span> <span class="n">type_mask</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">point_encoder</span><span class="o">.</span><span class="n">points_to_tensor</span><span class="p">(</span>
</span><span id="__span-0-130"><a id="__codelineno-0-130" name="__codelineno-0-130"></a> <span class="n">foreground_points</span><span class="o">=</span><span class="n">foreground_points</span><span class="p">,</span>
</span><span id="__span-0-131"><a id="__codelineno-0-131" name="__codelineno-0-131"></a> <span class="n">background_points</span><span class="o">=</span><span class="n">background_points</span><span class="p">,</span>
</span><span id="__span-0-132"><a id="__codelineno-0-132" name="__codelineno-0-132"></a> <span class="n">box_points</span><span class="o">=</span><span class="n">box_points</span><span class="p">,</span>
</span><span id="__span-0-133"><a id="__codelineno-0-133" name="__codelineno-0-133"></a> <span class="p">)</span>
</span><span id="__span-0-134"><a id="__codelineno-0-134" name="__codelineno-0-134"></a> <span class="bp">self</span><span class="o">.</span><span class="n">point_encoder</span><span class="o">.</span><span class="n">set_type_mask</span><span class="p">(</span><span class="n">type_mask</span><span class="o">=</span><span class="n">type_mask</span><span class="p">)</span>
</span><span id="__span-0-135"><a id="__codelineno-0-135" name="__codelineno-0-135"></a>
</span><span id="__span-0-136"><a id="__codelineno-0-136" name="__codelineno-0-136"></a> <span class="k">if</span> <span class="n">low_res_mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-137"><a id="__codelineno-0-137" name="__codelineno-0-137"></a> <span class="n">mask_embedding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_encoder</span><span class="p">(</span><span class="n">low_res_mask</span><span class="p">)</span>
</span><span id="__span-0-138"><a id="__codelineno-0-138" name="__codelineno-0-138"></a> <span class="k">else</span><span class="p">:</span>
</span><span id="__span-0-139"><a id="__codelineno-0-139" name="__codelineno-0-139"></a> <span class="n">mask_embedding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_encoder</span><span class="o">.</span><span class="n">get_no_mask_dense_embedding</span><span class="p">(</span>
</span><span id="__span-0-140"><a id="__codelineno-0-140" name="__codelineno-0-140"></a> <span class="n">image_embedding_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">image_encoder</span><span class="o">.</span><span class="n">image_embedding_size</span>
</span><span id="__span-0-141"><a id="__codelineno-0-141" name="__codelineno-0-141"></a> <span class="p">)</span>
</span><span id="__span-0-142"><a id="__codelineno-0-142" name="__codelineno-0-142"></a>
</span><span id="__span-0-143"><a id="__codelineno-0-143" name="__codelineno-0-143"></a> <span class="n">point_embedding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">point_encoder</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">normalize</span><span class="p">(</span><span class="n">coordinates</span><span class="p">,</span> <span class="n">original_size</span><span class="o">=</span><span class="n">original_size</span><span class="p">))</span>
</span><span id="__span-0-144"><a id="__codelineno-0-144" name="__codelineno-0-144"></a> <span class="n">dense_positional_embedding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">point_encoder</span><span class="o">.</span><span class="n">get_dense_positional_embedding</span><span class="p">(</span>
</span><span id="__span-0-145"><a id="__codelineno-0-145" name="__codelineno-0-145"></a> <span class="n">image_embedding_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">image_encoder</span><span class="o">.</span><span class="n">image_embedding_size</span>
</span><span id="__span-0-146"><a id="__codelineno-0-146" name="__codelineno-0-146"></a> <span class="p">)</span>
</span><span id="__span-0-147"><a id="__codelineno-0-147" name="__codelineno-0-147"></a>
</span><span id="__span-0-148"><a id="__codelineno-0-148" name="__codelineno-0-148"></a> <span class="bp">self</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">set_image_embedding</span><span class="p">(</span><span class="n">image_embedding</span><span class="o">=</span><span class="n">image_embedding</span><span class="p">)</span>
</span><span id="__span-0-149"><a id="__codelineno-0-149" name="__codelineno-0-149"></a> <span class="bp">self</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">set_mask_embedding</span><span class="p">(</span><span class="n">mask_embedding</span><span class="o">=</span><span class="n">mask_embedding</span><span class="p">)</span>
</span><span id="__span-0-150"><a id="__codelineno-0-150" name="__codelineno-0-150"></a> <span class="bp">self</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">set_point_embedding</span><span class="p">(</span><span class="n">point_embedding</span><span class="o">=</span><span class="n">point_embedding</span><span class="p">)</span>
</span><span id="__span-0-151"><a id="__codelineno-0-151" name="__codelineno-0-151"></a> <span class="bp">self</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">set_dense_positional_embedding</span><span class="p">(</span><span class="n">dense_positional_embedding</span><span class="o">=</span><span class="n">dense_positional_embedding</span><span class="p">)</span>
</span><span id="__span-0-152"><a id="__codelineno-0-152" name="__codelineno-0-152"></a>
</span><span id="__span-0-153"><a id="__codelineno-0-153" name="__codelineno-0-153"></a> <span class="n">low_res_masks</span><span class="p">,</span> <span class="n">iou_predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_decoder</span><span class="p">()</span>
</span><span id="__span-0-154"><a id="__codelineno-0-154" name="__codelineno-0-154"></a>
</span><span id="__span-0-155"><a id="__codelineno-0-155" name="__codelineno-0-155"></a> <span class="n">high_res_masks</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">postprocess_masks</span><span class="p">(</span><span class="n">low_res_masks</span><span class="p">,</span> <span class="n">original_size</span><span class="p">)</span>
</span><span id="__span-0-156"><a id="__codelineno-0-156" name="__codelineno-0-156"></a>
</span><span id="__span-0-157"><a id="__codelineno-0-157" name="__codelineno-0-157"></a> <span class="k">if</span> <span class="n">binarize</span><span class="p">:</span>
</span><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a> <span class="n">high_res_masks</span> <span class="o">=</span> <span class="n">high_res_masks</span> <span class="o">&gt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_threshold</span>
</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a>
</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a> <span class="k">return</span> <span class="n">high_res_masks</span><span class="p">,</span> <span class="n">iou_predictions</span><span class="p">,</span> <span class="n">low_res_masks</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.preprocess_image" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">preprocess_image</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.preprocess_image" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">preprocess_image</span><span class="p">(</span><span class="n">image</span><span class="p">:</span> <span class="n"><span title="PIL.Image.Image">Image</span></span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>See <a class="autorefs autorefs-internal" href="#refiners.foundationals.segment_anything.utils.preprocess_image"><code>preprocess_image</code></a>
Args:
image: The image to preprocess.
Returns:
The preprocessed tensor.</p>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-169">169</a></span>
<span class="normal"><a href="#__codelineno-0-170">170</a></span>
<span class="normal"><a href="#__codelineno-0-171">171</a></span>
<span class="normal"><a href="#__codelineno-0-172">172</a></span>
<span class="normal"><a href="#__codelineno-0-173">173</a></span>
<span class="normal"><a href="#__codelineno-0-174">174</a></span>
<span class="normal"><a href="#__codelineno-0-175">175</a></span>
<span class="normal"><a href="#__codelineno-0-176">176</a></span>
<span class="normal"><a href="#__codelineno-0-177">177</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-169"><a id="__codelineno-0-169" name="__codelineno-0-169"></a><span class="k">def</span> <span class="nf">preprocess_image</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">image</span><span class="p">:</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-170"><a id="__codelineno-0-170" name="__codelineno-0-170"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
</span><span id="__span-0-171"><a id="__codelineno-0-171" name="__codelineno-0-171"></a><span class="sd"> See [`preprocess_image`][refiners.foundationals.segment_anything.utils.preprocess_image]</span>
</span><span id="__span-0-172"><a id="__codelineno-0-172" name="__codelineno-0-172"></a><span class="sd"> Args:</span>
</span><span id="__span-0-173"><a id="__codelineno-0-173" name="__codelineno-0-173"></a><span class="sd"> image: The image to preprocess.</span>
</span><span id="__span-0-174"><a id="__codelineno-0-174" name="__codelineno-0-174"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-175"><a id="__codelineno-0-175" name="__codelineno-0-175"></a><span class="sd"> The preprocessed tensor.</span>
</span><span id="__span-0-176"><a id="__codelineno-0-176" name="__codelineno-0-176"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-177"><a id="__codelineno-0-177" name="__codelineno-0-177"></a> <span class="k">return</span> <span class="n">preprocess_image</span><span class="p">(</span><span class="n">image</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">image_encoder_resolution</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="refiners.foundationals.segment_anything.SegmentAnythingH" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">SegmentAnythingH</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnythingH" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">SegmentAnythingH</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">image_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.image_encoder.SAMViTH">SAMViTH</span></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">point_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.prompt_encoder.PointEncoder">PointEncoder</span></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">mask_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.prompt_encoder.MaskEncoder">MaskEncoder</span></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> <span class="n">mask_decoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.mask_decoder.MaskDecoder">MaskDecoder</span></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a> <span class="n">multimask_output</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#bool">bool</a></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a> <span class="n">device</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.device" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.device">device</a></span> <span class="o">|</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#str">str</a></span> <span class="o">=</span> <span class="s2">&quot;cpu&quot;</span><span class="p">,</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a> <span class="n">dtype</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.dtype" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.dtype">dtype</a></span> <span class="o">=</span> <span class="n"><span title="torch.float32">float32</span></span><span class="p">,</span>
</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a><span class="p">)</span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="refiners.foundationals.segment_anything.model.SegmentAnything" href="#refiners.foundationals.segment_anything.SegmentAnything">SegmentAnything</a></code></p>
<p>SegmentAnything huge model.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>image_encoder</code>
</td>
<td>
<code><span title="refiners.foundationals.segment_anything.image_encoder.SAMViTH">SAMViTH</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The image encoder to use.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>point_encoder</code>
</td>
<td>
<code><span title="refiners.foundationals.segment_anything.prompt_encoder.PointEncoder">PointEncoder</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The point encoder to use.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>mask_encoder</code>
</td>
<td>
<code><span title="refiners.foundationals.segment_anything.prompt_encoder.MaskEncoder">MaskEncoder</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The mask encoder to use.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>mask_decoder</code>
</td>
<td>
<code><span title="refiners.foundationals.segment_anything.mask_decoder.MaskDecoder">MaskDecoder</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The mask decoder to use.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>multimask_output</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#bool">bool</a> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>Whether to use multimask output.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>device</code>
</td>
<td>
<code><a class="autorefs autorefs-external" title="torch.device" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.device">device</a> | <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#str">str</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The PyTorch device to use.</p>
</div>
</td>
<td>
<code>&#39;cpu&#39;</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>dtype</code>
</td>
<td>
<code><a class="autorefs autorefs-external" title="torch.dtype" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.dtype">dtype</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The PyTorch data type to use.</p>
</div>
</td>
<td>
<code><span title="torch.float32">float32</span></code>
</td>
</tr>
</tbody>
</table>
<details class="example" open>
<summary>Example</summary>
<div class="language-py highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">device</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span> <span class="k">if</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">is_available</span><span class="p">()</span> <span class="k">else</span> <span class="s2">&quot;cpu&quot;</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="c1"># multimask_output=True is recommended for ambiguous prompts such as a single point.</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="c1"># Below, a box prompt is passed, so just use multimask_output=False which will return a single mask</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="n">sam_h</span> <span class="o">=</span> <span class="n">SegmentAnythingH</span><span class="p">(</span><span class="n">multimask_output</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a>
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a><span class="c1"># Tips: run scripts/prepare_test_weights.py to download the weights</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a><span class="n">tensors_path</span> <span class="o">=</span> <span class="s2">&quot;./tests/weights/segment-anything-h.safetensors&quot;</span>
</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a><span class="n">sam_h</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="n">tensors_path</span><span class="o">=</span><span class="n">tensors_path</span><span class="p">)</span>
</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10" href="#__codelineno-0-10"></a>
</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11" href="#__codelineno-0-11"></a><span class="kn">from</span> <span class="nn">PIL</span> <span class="kn">import</span> <span class="n">Image</span>
</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12" href="#__codelineno-0-12"></a><span class="n">image</span> <span class="o">=</span> <span class="n">Image</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="s2">&quot;image.png&quot;</span><span class="p">)</span>
</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13" href="#__codelineno-0-13"></a>
</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14" href="#__codelineno-0-14"></a><span class="n">masks</span><span class="p">,</span> <span class="o">*</span><span class="n">_</span> <span class="o">=</span> <span class="n">sam_h</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">image</span><span class="p">,</span> <span class="n">box_points</span><span class="o">=</span><span class="p">[[(</span><span class="n">x1</span><span class="p">,</span> <span class="n">y1</span><span class="p">),</span> <span class="p">(</span><span class="n">x2</span><span class="p">,</span> <span class="n">y2</span><span class="p">)]])</span>
</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15" href="#__codelineno-0-15"></a>
</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16" href="#__codelineno-0-16"></a><span class="k">assert</span> <span class="n">masks</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">image</span><span class="o">.</span><span class="n">height</span><span class="p">,</span> <span class="n">image</span><span class="o">.</span><span class="n">width</span><span class="p">)</span>
</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17" href="#__codelineno-0-17"></a><span class="k">assert</span> <span class="n">masks</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="n">torch</span><span class="o">.</span><span class="n">bool</span>
</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18" href="#__codelineno-0-18"></a>
</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19" href="#__codelineno-0-19"></a><span class="c1"># convert it to [0,255] uint8 ndarray of shape (H, W)</span>
</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20" href="#__codelineno-0-20"></a><span class="n">mask</span> <span class="o">=</span> <span class="n">masks</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s2">&quot;uint8&quot;</span><span class="p">)</span> <span class="o">*</span> <span class="mi">255</span>
</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21" href="#__codelineno-0-21"></a>
</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22" href="#__codelineno-0-22"></a><span class="n">Image</span><span class="o">.</span><span class="n">fromarray</span><span class="p">(</span><span class="n">mask</span><span class="p">)</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="s2">&quot;mask_image.png&quot;</span><span class="p">)</span>
</span></code></pre></div>
</details>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-205">205</a></span>
<span class="normal"><a href="#__codelineno-0-206">206</a></span>
<span class="normal"><a href="#__codelineno-0-207">207</a></span>
<span class="normal"><a href="#__codelineno-0-208">208</a></span>
<span class="normal"><a href="#__codelineno-0-209">209</a></span>
<span class="normal"><a href="#__codelineno-0-210">210</a></span>
<span class="normal"><a href="#__codelineno-0-211">211</a></span>
<span class="normal"><a href="#__codelineno-0-212">212</a></span>
<span class="normal"><a href="#__codelineno-0-213">213</a></span>
<span class="normal"><a href="#__codelineno-0-214">214</a></span>
<span class="normal"><a href="#__codelineno-0-215">215</a></span>
<span class="normal"><a href="#__codelineno-0-216">216</a></span>
<span class="normal"><a href="#__codelineno-0-217">217</a></span>
<span class="normal"><a href="#__codelineno-0-218">218</a></span>
<span class="normal"><a href="#__codelineno-0-219">219</a></span>
<span class="normal"><a href="#__codelineno-0-220">220</a></span>
<span class="normal"><a href="#__codelineno-0-221">221</a></span>
<span class="normal"><a href="#__codelineno-0-222">222</a></span>
<span class="normal"><a href="#__codelineno-0-223">223</a></span>
<span class="normal"><a href="#__codelineno-0-224">224</a></span>
<span class="normal"><a href="#__codelineno-0-225">225</a></span>
<span class="normal"><a href="#__codelineno-0-226">226</a></span>
<span class="normal"><a href="#__codelineno-0-227">227</a></span>
<span class="normal"><a href="#__codelineno-0-228">228</a></span>
<span class="normal"><a href="#__codelineno-0-229">229</a></span>
<span class="normal"><a href="#__codelineno-0-230">230</a></span>
<span class="normal"><a href="#__codelineno-0-231">231</a></span>
<span class="normal"><a href="#__codelineno-0-232">232</a></span>
<span class="normal"><a href="#__codelineno-0-233">233</a></span>
<span class="normal"><a href="#__codelineno-0-234">234</a></span>
<span class="normal"><a href="#__codelineno-0-235">235</a></span>
<span class="normal"><a href="#__codelineno-0-236">236</a></span>
<span class="normal"><a href="#__codelineno-0-237">237</a></span>
<span class="normal"><a href="#__codelineno-0-238">238</a></span>
<span class="normal"><a href="#__codelineno-0-239">239</a></span>
<span class="normal"><a href="#__codelineno-0-240">240</a></span>
<span class="normal"><a href="#__codelineno-0-241">241</a></span>
<span class="normal"><a href="#__codelineno-0-242">242</a></span>
<span class="normal"><a href="#__codelineno-0-243">243</a></span>
<span class="normal"><a href="#__codelineno-0-244">244</a></span>
<span class="normal"><a href="#__codelineno-0-245">245</a></span>
<span class="normal"><a href="#__codelineno-0-246">246</a></span>
<span class="normal"><a href="#__codelineno-0-247">247</a></span>
<span class="normal"><a href="#__codelineno-0-248">248</a></span>
<span class="normal"><a href="#__codelineno-0-249">249</a></span>
<span class="normal"><a href="#__codelineno-0-250">250</a></span>
<span class="normal"><a href="#__codelineno-0-251">251</a></span>
<span class="normal"><a href="#__codelineno-0-252">252</a></span>
<span class="normal"><a href="#__codelineno-0-253">253</a></span>
<span class="normal"><a href="#__codelineno-0-254">254</a></span>
<span class="normal"><a href="#__codelineno-0-255">255</a></span>
<span class="normal"><a href="#__codelineno-0-256">256</a></span>
<span class="normal"><a href="#__codelineno-0-257">257</a></span>
<span class="normal"><a href="#__codelineno-0-258">258</a></span>
<span class="normal"><a href="#__codelineno-0-259">259</a></span>
<span class="normal"><a href="#__codelineno-0-260">260</a></span>
<span class="normal"><a href="#__codelineno-0-261">261</a></span>
<span class="normal"><a href="#__codelineno-0-262">262</a></span>
<span class="normal"><a href="#__codelineno-0-263">263</a></span>
<span class="normal"><a href="#__codelineno-0-264">264</a></span>
<span class="normal"><a href="#__codelineno-0-265">265</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-205"><a id="__codelineno-0-205" name="__codelineno-0-205"></a><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
</span><span id="__span-0-206"><a id="__codelineno-0-206" name="__codelineno-0-206"></a> <span class="bp">self</span><span class="p">,</span>
</span><span id="__span-0-207"><a id="__codelineno-0-207" name="__codelineno-0-207"></a> <span class="n">image_encoder</span><span class="p">:</span> <span class="n">SAMViTH</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-208"><a id="__codelineno-0-208" name="__codelineno-0-208"></a> <span class="n">point_encoder</span><span class="p">:</span> <span class="n">PointEncoder</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-209"><a id="__codelineno-0-209" name="__codelineno-0-209"></a> <span class="n">mask_encoder</span><span class="p">:</span> <span class="n">MaskEncoder</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-210"><a id="__codelineno-0-210" name="__codelineno-0-210"></a> <span class="n">mask_decoder</span><span class="p">:</span> <span class="n">MaskDecoder</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-211"><a id="__codelineno-0-211" name="__codelineno-0-211"></a> <span class="n">multimask_output</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-212"><a id="__codelineno-0-212" name="__codelineno-0-212"></a> <span class="n">device</span><span class="p">:</span> <span class="n">Device</span> <span class="o">|</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;cpu&quot;</span><span class="p">,</span>
</span><span id="__span-0-213"><a id="__codelineno-0-213" name="__codelineno-0-213"></a> <span class="n">dtype</span><span class="p">:</span> <span class="n">DType</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span>
</span><span id="__span-0-214"><a id="__codelineno-0-214" name="__codelineno-0-214"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-215"><a id="__codelineno-0-215" name="__codelineno-0-215"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize SegmentAnything huge model.</span>
</span><span id="__span-0-216"><a id="__codelineno-0-216" name="__codelineno-0-216"></a>
</span><span id="__span-0-217"><a id="__codelineno-0-217" name="__codelineno-0-217"></a><span class="sd"> Args:</span>
</span><span id="__span-0-218"><a id="__codelineno-0-218" name="__codelineno-0-218"></a><span class="sd"> image_encoder: The image encoder to use.</span>
</span><span id="__span-0-219"><a id="__codelineno-0-219" name="__codelineno-0-219"></a><span class="sd"> point_encoder: The point encoder to use.</span>
</span><span id="__span-0-220"><a id="__codelineno-0-220" name="__codelineno-0-220"></a><span class="sd"> mask_encoder: The mask encoder to use.</span>
</span><span id="__span-0-221"><a id="__codelineno-0-221" name="__codelineno-0-221"></a><span class="sd"> mask_decoder: The mask decoder to use.</span>
</span><span id="__span-0-222"><a id="__codelineno-0-222" name="__codelineno-0-222"></a><span class="sd"> multimask_output: Whether to use multimask output.</span>
</span><span id="__span-0-223"><a id="__codelineno-0-223" name="__codelineno-0-223"></a><span class="sd"> device: The PyTorch device to use.</span>
</span><span id="__span-0-224"><a id="__codelineno-0-224" name="__codelineno-0-224"></a><span class="sd"> dtype: The PyTorch data type to use.</span>
</span><span id="__span-0-225"><a id="__codelineno-0-225" name="__codelineno-0-225"></a>
</span><span id="__span-0-226"><a id="__codelineno-0-226" name="__codelineno-0-226"></a><span class="sd"> Example:</span>
</span><span id="__span-0-227"><a id="__codelineno-0-227" name="__codelineno-0-227"></a><span class="sd"> ```py</span>
</span><span id="__span-0-228"><a id="__codelineno-0-228" name="__codelineno-0-228"></a><span class="sd"> device=&quot;cuda&quot; if torch.cuda.is_available() else &quot;cpu&quot;</span>
</span><span id="__span-0-229"><a id="__codelineno-0-229" name="__codelineno-0-229"></a>
</span><span id="__span-0-230"><a id="__codelineno-0-230" name="__codelineno-0-230"></a><span class="sd"> # multimask_output=True is recommended for ambiguous prompts such as a single point.</span>
</span><span id="__span-0-231"><a id="__codelineno-0-231" name="__codelineno-0-231"></a><span class="sd"> # Below, a box prompt is passed, so just use multimask_output=False which will return a single mask</span>
</span><span id="__span-0-232"><a id="__codelineno-0-232" name="__codelineno-0-232"></a><span class="sd"> sam_h = SegmentAnythingH(multimask_output=False, device=device)</span>
</span><span id="__span-0-233"><a id="__codelineno-0-233" name="__codelineno-0-233"></a>
</span><span id="__span-0-234"><a id="__codelineno-0-234" name="__codelineno-0-234"></a><span class="sd"> # Tips: run scripts/prepare_test_weights.py to download the weights</span>
</span><span id="__span-0-235"><a id="__codelineno-0-235" name="__codelineno-0-235"></a><span class="sd"> tensors_path = &quot;./tests/weights/segment-anything-h.safetensors&quot;</span>
</span><span id="__span-0-236"><a id="__codelineno-0-236" name="__codelineno-0-236"></a><span class="sd"> sam_h.load_from_safetensors(tensors_path=tensors_path)</span>
</span><span id="__span-0-237"><a id="__codelineno-0-237" name="__codelineno-0-237"></a>
</span><span id="__span-0-238"><a id="__codelineno-0-238" name="__codelineno-0-238"></a><span class="sd"> from PIL import Image</span>
</span><span id="__span-0-239"><a id="__codelineno-0-239" name="__codelineno-0-239"></a><span class="sd"> image = Image.open(&quot;image.png&quot;)</span>
</span><span id="__span-0-240"><a id="__codelineno-0-240" name="__codelineno-0-240"></a>
</span><span id="__span-0-241"><a id="__codelineno-0-241" name="__codelineno-0-241"></a><span class="sd"> masks, *_ = sam_h.predict(image, box_points=[[(x1, y1), (x2, y2)]])</span>
</span><span id="__span-0-242"><a id="__codelineno-0-242" name="__codelineno-0-242"></a>
</span><span id="__span-0-243"><a id="__codelineno-0-243" name="__codelineno-0-243"></a><span class="sd"> assert masks.shape == (1, 1, image.height, image.width)</span>
</span><span id="__span-0-244"><a id="__codelineno-0-244" name="__codelineno-0-244"></a><span class="sd"> assert masks.dtype == torch.bool</span>
</span><span id="__span-0-245"><a id="__codelineno-0-245" name="__codelineno-0-245"></a>
</span><span id="__span-0-246"><a id="__codelineno-0-246" name="__codelineno-0-246"></a><span class="sd"> # convert it to [0,255] uint8 ndarray of shape (H, W)</span>
</span><span id="__span-0-247"><a id="__codelineno-0-247" name="__codelineno-0-247"></a><span class="sd"> mask = masks[0, 0].cpu().numpy().astype(&quot;uint8&quot;) * 255</span>
</span><span id="__span-0-248"><a id="__codelineno-0-248" name="__codelineno-0-248"></a>
</span><span id="__span-0-249"><a id="__codelineno-0-249" name="__codelineno-0-249"></a><span class="sd"> Image.fromarray(mask).save(&quot;mask_image.png&quot;)</span>
</span><span id="__span-0-250"><a id="__codelineno-0-250" name="__codelineno-0-250"></a><span class="sd"> ```</span>
</span><span id="__span-0-251"><a id="__codelineno-0-251" name="__codelineno-0-251"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-252"><a id="__codelineno-0-252" name="__codelineno-0-252"></a> <span class="n">image_encoder</span> <span class="o">=</span> <span class="n">image_encoder</span> <span class="ow">or</span> <span class="n">SAMViTH</span><span class="p">()</span>
</span><span id="__span-0-253"><a id="__codelineno-0-253" name="__codelineno-0-253"></a> <span class="n">point_encoder</span> <span class="o">=</span> <span class="n">point_encoder</span> <span class="ow">or</span> <span class="n">PointEncoder</span><span class="p">()</span>
</span><span id="__span-0-254"><a id="__codelineno-0-254" name="__codelineno-0-254"></a> <span class="n">mask_encoder</span> <span class="o">=</span> <span class="n">mask_encoder</span> <span class="ow">or</span> <span class="n">MaskEncoder</span><span class="p">()</span>
</span><span id="__span-0-255"><a id="__codelineno-0-255" name="__codelineno-0-255"></a>
</span><span id="__span-0-256"><a id="__codelineno-0-256" name="__codelineno-0-256"></a> <span class="k">if</span> <span class="n">mask_decoder</span><span class="p">:</span>
</span><span id="__span-0-257"><a id="__codelineno-0-257" name="__codelineno-0-257"></a> <span class="k">assert</span> <span class="p">(</span>
</span><span id="__span-0-258"><a id="__codelineno-0-258" name="__codelineno-0-258"></a> <span class="n">multimask_output</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">mask_decoder</span><span class="o">.</span><span class="n">multimask_output</span> <span class="o">==</span> <span class="n">multimask_output</span>
</span><span id="__span-0-259"><a id="__codelineno-0-259" name="__codelineno-0-259"></a> <span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;mask_decoder.multimask_output </span><span class="si">{</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">multimask_output</span><span class="si">}</span><span class="s2"> should match multimask_output (</span><span class="si">{</span><span class="n">multimask_output</span><span class="si">}</span><span class="s2">)&quot;</span>
</span><span id="__span-0-260"><a id="__codelineno-0-260" name="__codelineno-0-260"></a> <span class="k">else</span><span class="p">:</span>
</span><span id="__span-0-261"><a id="__codelineno-0-261" name="__codelineno-0-261"></a> <span class="n">mask_decoder</span> <span class="o">=</span> <span class="n">MaskDecoder</span><span class="p">(</span><span class="n">multimask_output</span><span class="p">)</span> <span class="k">if</span> <span class="n">multimask_output</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">MaskDecoder</span><span class="p">()</span>
</span><span id="__span-0-262"><a id="__codelineno-0-262" name="__codelineno-0-262"></a>
</span><span id="__span-0-263"><a id="__codelineno-0-263" name="__codelineno-0-263"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">image_encoder</span><span class="p">,</span> <span class="n">point_encoder</span><span class="p">,</span> <span class="n">mask_encoder</span><span class="p">,</span> <span class="n">mask_decoder</span><span class="p">)</span>
</span><span id="__span-0-264"><a id="__codelineno-0-264" name="__codelineno-0-264"></a>
</span><span id="__span-0-265"><a id="__codelineno-0-265" name="__codelineno-0-265"></a> <span class="bp">self</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-attribute">
<h3 id="refiners.foundationals.segment_anything.SegmentAnythingH.image_encoder" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">image_encoder</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnythingH.image_encoder" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">image_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.image_encoder.SAMViTH">SAMViTH</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>The image encoder.</p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-module">
<div class="doc doc-contents first">
<div class="doc doc-children">
<div class="doc doc-object doc-function">
<h2 id="refiners.foundationals.segment_anything.utils.compute_scaled_size" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-function"></code> <span class="doc doc-object-name doc-function-name">compute_scaled_size</span>
<a href="#refiners.foundationals.segment_anything.utils.compute_scaled_size" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">compute_scaled_size</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">],</span> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">]</span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Compute the scaled size as expected by the image encoder.
This computed size keep the ratio of the input image, and scale it to fit inside the square (image_encoder_resolution, image_encoder_resolution) of image encoder.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>size</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The size (h, w) of the input image.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>image_encoder_resolution</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Image encoder resolution.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The target height.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The target width.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/utils.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-7"> 7</a></span>
<span class="normal"><a href="#__codelineno-0-8"> 8</a></span>
<span class="normal"><a href="#__codelineno-0-9"> 9</a></span>
<span class="normal"><a href="#__codelineno-0-10">10</a></span>
<span class="normal"><a href="#__codelineno-0-11">11</a></span>
<span class="normal"><a href="#__codelineno-0-12">12</a></span>
<span class="normal"><a href="#__codelineno-0-13">13</a></span>
<span class="normal"><a href="#__codelineno-0-14">14</a></span>
<span class="normal"><a href="#__codelineno-0-15">15</a></span>
<span class="normal"><a href="#__codelineno-0-16">16</a></span>
<span class="normal"><a href="#__codelineno-0-17">17</a></span>
<span class="normal"><a href="#__codelineno-0-18">18</a></span>
<span class="normal"><a href="#__codelineno-0-19">19</a></span>
<span class="normal"><a href="#__codelineno-0-20">20</a></span>
<span class="normal"><a href="#__codelineno-0-21">21</a></span>
<span class="normal"><a href="#__codelineno-0-22">22</a></span>
<span class="normal"><a href="#__codelineno-0-23">23</a></span>
<span class="normal"><a href="#__codelineno-0-24">24</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7"></a><span class="k">def</span> <span class="nf">compute_scaled_size</span><span class="p">(</span><span class="n">size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]:</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Compute the scaled size as expected by the image encoder.</span>
</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9"></a><span class="sd"> This computed size keep the ratio of the input image, and scale it to fit inside the square (image_encoder_resolution, image_encoder_resolution) of image encoder.</span>
</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a>
</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="sd"> Args:</span>
</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="sd"> size: The size (h, w) of the input image.</span>
</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="sd"> image_encoder_resolution: Image encoder resolution.</span>
</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a>
</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="sd"> The target height.</span>
</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="sd"> The target width.</span>
</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a> <span class="n">oldh</span><span class="p">,</span> <span class="n">oldw</span> <span class="o">=</span> <span class="n">size</span>
</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a> <span class="n">scale</span> <span class="o">=</span> <span class="n">image_encoder_resolution</span> <span class="o">*</span> <span class="mf">1.0</span> <span class="o">/</span> <span class="nb">max</span><span class="p">(</span><span class="n">oldh</span><span class="p">,</span> <span class="n">oldw</span><span class="p">)</span>
</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a> <span class="n">newh</span><span class="p">,</span> <span class="n">neww</span> <span class="o">=</span> <span class="n">oldh</span> <span class="o">*</span> <span class="n">scale</span><span class="p">,</span> <span class="n">oldw</span> <span class="o">*</span> <span class="n">scale</span>
</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a> <span class="n">neww</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">neww</span> <span class="o">+</span> <span class="mf">0.5</span><span class="p">)</span>
</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a> <span class="n">newh</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">newh</span> <span class="o">+</span> <span class="mf">0.5</span><span class="p">)</span>
</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a> <span class="k">return</span> <span class="p">(</span><span class="n">newh</span><span class="p">,</span> <span class="n">neww</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="refiners.foundationals.segment_anything.utils.image_to_scaled_tensor" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-function"></code> <span class="doc doc-object-name doc-function-name">image_to_scaled_tensor</span>
<a href="#refiners.foundationals.segment_anything.utils.image_to_scaled_tensor" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">image_to_scaled_tensor</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">image</span><span class="p">:</span> <span class="n"><span title="PIL.Image.Image">Image</span></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">scaled_size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">],</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">device</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.device" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.device">device</a></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> <span class="n">dtype</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.dtype" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.dtype">dtype</a></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Resize the image to <code>scaled_size</code> and convert it to a tensor.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>image</code>
</td>
<td>
<code><span title="PIL.Image.Image">Image</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The image.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>scaled_size</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The target size (h, w).</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>device</code>
</td>
<td>
<code><a class="autorefs autorefs-external" title="torch.device" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.device">device</a> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>Tensor device.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>dtype</code>
</td>
<td>
<code><a class="autorefs autorefs-external" title="torch.dtype" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.dtype">dtype</a> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>Tensor dtype.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
</tbody>
</table>
<p>Returns:
a Tensor of shape (1, c, h, w)</p>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/utils.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-27">27</a></span>
<span class="normal"><a href="#__codelineno-0-28">28</a></span>
<span class="normal"><a href="#__codelineno-0-29">29</a></span>
<span class="normal"><a href="#__codelineno-0-30">30</a></span>
<span class="normal"><a href="#__codelineno-0-31">31</a></span>
<span class="normal"><a href="#__codelineno-0-32">32</a></span>
<span class="normal"><a href="#__codelineno-0-33">33</a></span>
<span class="normal"><a href="#__codelineno-0-34">34</a></span>
<span class="normal"><a href="#__codelineno-0-35">35</a></span>
<span class="normal"><a href="#__codelineno-0-36">36</a></span>
<span class="normal"><a href="#__codelineno-0-37">37</a></span>
<span class="normal"><a href="#__codelineno-0-38">38</a></span>
<span class="normal"><a href="#__codelineno-0-39">39</a></span>
<span class="normal"><a href="#__codelineno-0-40">40</a></span>
<span class="normal"><a href="#__codelineno-0-41">41</a></span>
<span class="normal"><a href="#__codelineno-0-42">42</a></span>
<span class="normal"><a href="#__codelineno-0-43">43</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a><span class="k">def</span> <span class="nf">image_to_scaled_tensor</span><span class="p">(</span>
</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a> <span class="n">image</span><span class="p">:</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span><span class="p">,</span> <span class="n">scaled_size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> <span class="n">device</span><span class="p">:</span> <span class="n">Device</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">dtype</span><span class="p">:</span> <span class="n">DType</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Resize the image to `scaled_size` and convert it to a tensor.</span>
</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a>
</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a><span class="sd"> Args:</span>
</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a><span class="sd"> image: The image.</span>
</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a><span class="sd"> scaled_size: The target size (h, w).</span>
</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a><span class="sd"> device: Tensor device.</span>
</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a><span class="sd"> dtype: Tensor dtype.</span>
</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a><span class="sd"> a Tensor of shape (1, c, h, w)</span>
</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">scaled_size</span>
</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a> <span class="n">resized</span> <span class="o">=</span> <span class="n">image</span><span class="o">.</span><span class="n">resize</span><span class="p">((</span><span class="n">w</span><span class="p">,</span> <span class="n">h</span><span class="p">),</span> <span class="n">resample</span><span class="o">=</span><span class="n">Image</span><span class="o">.</span><span class="n">Resampling</span><span class="o">.</span><span class="n">BILINEAR</span><span class="p">)</span> <span class="c1"># type: ignore</span>
</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>
</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a> <span class="k">return</span> <span class="n">image_to_tensor</span><span class="p">(</span><span class="n">resized</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span> <span class="o">*</span> <span class="mf">255.0</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="refiners.foundationals.segment_anything.utils.normalize_coordinates" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-function"></code> <span class="doc doc-object-name doc-function-name">normalize_coordinates</span>
<a href="#refiners.foundationals.segment_anything.utils.normalize_coordinates" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">normalize_coordinates</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">coordinates</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">original_size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">],</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Normalize the coordinates in the [0,1] range</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>coordinates</code>
</td>
<td>
<code><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The coordinates to normalize.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>original_size</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The original image size.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>image_encoder_resolution</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Image encoder resolution.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The normalized coordinates.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/utils.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-111">111</a></span>
<span class="normal"><a href="#__codelineno-0-112">112</a></span>
<span class="normal"><a href="#__codelineno-0-113">113</a></span>
<span class="normal"><a href="#__codelineno-0-114">114</a></span>
<span class="normal"><a href="#__codelineno-0-115">115</a></span>
<span class="normal"><a href="#__codelineno-0-116">116</a></span>
<span class="normal"><a href="#__codelineno-0-117">117</a></span>
<span class="normal"><a href="#__codelineno-0-118">118</a></span>
<span class="normal"><a href="#__codelineno-0-119">119</a></span>
<span class="normal"><a href="#__codelineno-0-120">120</a></span>
<span class="normal"><a href="#__codelineno-0-121">121</a></span>
<span class="normal"><a href="#__codelineno-0-122">122</a></span>
<span class="normal"><a href="#__codelineno-0-123">123</a></span>
<span class="normal"><a href="#__codelineno-0-124">124</a></span>
<span class="normal"><a href="#__codelineno-0-125">125</a></span>
<span class="normal"><a href="#__codelineno-0-126">126</a></span>
<span class="normal"><a href="#__codelineno-0-127">127</a></span>
<span class="normal"><a href="#__codelineno-0-128">128</a></span>
<span class="normal"><a href="#__codelineno-0-129">129</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a><span class="k">def</span> <span class="nf">normalize_coordinates</span><span class="p">(</span><span class="n">coordinates</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">original_size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Normalize the coordinates in the [0,1] range</span>
</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a>
</span><span id="__span-0-114"><a id="__codelineno-0-114" name="__codelineno-0-114"></a><span class="sd"> Args:</span>
</span><span id="__span-0-115"><a id="__codelineno-0-115" name="__codelineno-0-115"></a><span class="sd"> coordinates: The coordinates to normalize.</span>
</span><span id="__span-0-116"><a id="__codelineno-0-116" name="__codelineno-0-116"></a><span class="sd"> original_size: The original image size.</span>
</span><span id="__span-0-117"><a id="__codelineno-0-117" name="__codelineno-0-117"></a><span class="sd"> image_encoder_resolution: Image encoder resolution.</span>
</span><span id="__span-0-118"><a id="__codelineno-0-118" name="__codelineno-0-118"></a>
</span><span id="__span-0-119"><a id="__codelineno-0-119" name="__codelineno-0-119"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-120"><a id="__codelineno-0-120" name="__codelineno-0-120"></a><span class="sd"> The normalized coordinates.</span>
</span><span id="__span-0-121"><a id="__codelineno-0-121" name="__codelineno-0-121"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-122"><a id="__codelineno-0-122" name="__codelineno-0-122"></a> <span class="n">scaled_size</span> <span class="o">=</span> <span class="n">compute_scaled_size</span><span class="p">(</span><span class="n">original_size</span><span class="p">,</span> <span class="n">image_encoder_resolution</span><span class="p">)</span>
</span><span id="__span-0-123"><a id="__codelineno-0-123" name="__codelineno-0-123"></a> <span class="n">coordinates</span><span class="p">[:,</span> <span class="p">:,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
</span><span id="__span-0-124"><a id="__codelineno-0-124" name="__codelineno-0-124"></a> <span class="p">(</span><span class="n">coordinates</span><span class="p">[:,</span> <span class="p">:,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">*</span> <span class="p">(</span><span class="n">scaled_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">/</span> <span class="n">original_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span> <span class="o">+</span> <span class="mf">0.5</span>
</span><span id="__span-0-125"><a id="__codelineno-0-125" name="__codelineno-0-125"></a> <span class="p">)</span> <span class="o">/</span> <span class="n">image_encoder_resolution</span>
</span><span id="__span-0-126"><a id="__codelineno-0-126" name="__codelineno-0-126"></a> <span class="n">coordinates</span><span class="p">[:,</span> <span class="p">:,</span> <span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
</span><span id="__span-0-127"><a id="__codelineno-0-127" name="__codelineno-0-127"></a> <span class="p">(</span><span class="n">coordinates</span><span class="p">[:,</span> <span class="p">:,</span> <span class="mi">1</span><span class="p">]</span> <span class="o">*</span> <span class="p">(</span><span class="n">scaled_size</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">/</span> <span class="n">original_size</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span> <span class="o">+</span> <span class="mf">0.5</span>
</span><span id="__span-0-128"><a id="__codelineno-0-128" name="__codelineno-0-128"></a> <span class="p">)</span> <span class="o">/</span> <span class="n">image_encoder_resolution</span>
</span><span id="__span-0-129"><a id="__codelineno-0-129" name="__codelineno-0-129"></a> <span class="k">return</span> <span class="n">coordinates</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="refiners.foundationals.segment_anything.utils.pad_image_tensor" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-function"></code> <span class="doc doc-object-name doc-function-name">pad_image_tensor</span>
<a href="#refiners.foundationals.segment_anything.utils.pad_image_tensor" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">pad_image_tensor</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">image_tensor</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">scaled_size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">],</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Pad an image with zeros to make it square.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>image_tensor</code>
</td>
<td>
<code><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The image tensor to pad.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>scaled_size</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The scaled size (h, w).</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>image_encoder_resolution</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Image encoder resolution.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The padded image.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/utils.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-72">72</a></span>
<span class="normal"><a href="#__codelineno-0-73">73</a></span>
<span class="normal"><a href="#__codelineno-0-74">74</a></span>
<span class="normal"><a href="#__codelineno-0-75">75</a></span>
<span class="normal"><a href="#__codelineno-0-76">76</a></span>
<span class="normal"><a href="#__codelineno-0-77">77</a></span>
<span class="normal"><a href="#__codelineno-0-78">78</a></span>
<span class="normal"><a href="#__codelineno-0-79">79</a></span>
<span class="normal"><a href="#__codelineno-0-80">80</a></span>
<span class="normal"><a href="#__codelineno-0-81">81</a></span>
<span class="normal"><a href="#__codelineno-0-82">82</a></span>
<span class="normal"><a href="#__codelineno-0-83">83</a></span>
<span class="normal"><a href="#__codelineno-0-84">84</a></span>
<span class="normal"><a href="#__codelineno-0-85">85</a></span>
<span class="normal"><a href="#__codelineno-0-86">86</a></span>
<span class="normal"><a href="#__codelineno-0-87">87</a></span>
<span class="normal"><a href="#__codelineno-0-88">88</a></span>
<span class="normal"><a href="#__codelineno-0-89">89</a></span>
<span class="normal"><a href="#__codelineno-0-90">90</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a><span class="k">def</span> <span class="nf">pad_image_tensor</span><span class="p">(</span><span class="n">image_tensor</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">scaled_size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Pad an image with zeros to make it square.</span>
</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a>
</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a><span class="sd"> Args:</span>
</span><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a><span class="sd"> image_tensor: The image tensor to pad.</span>
</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a><span class="sd"> scaled_size: The scaled size (h, w).</span>
</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a><span class="sd"> image_encoder_resolution: Image encoder resolution.</span>
</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a>
</span><span id="__span-0-80"><a id="__codelineno-0-80" name="__codelineno-0-80"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a><span class="sd"> The padded image.</span>
</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">image_tensor</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span> <span class="o">==</span> <span class="mi">4</span>
</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a> <span class="k">assert</span> <span class="n">image_tensor</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="o">&lt;=</span> <span class="n">image_encoder_resolution</span>
</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a> <span class="k">assert</span> <span class="n">image_tensor</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="o">&lt;=</span> <span class="n">image_encoder_resolution</span>
</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a>
</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">scaled_size</span>
</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a> <span class="n">padh</span> <span class="o">=</span> <span class="n">image_encoder_resolution</span> <span class="o">-</span> <span class="n">h</span>
</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a> <span class="n">padw</span> <span class="o">=</span> <span class="n">image_encoder_resolution</span> <span class="o">-</span> <span class="n">w</span>
</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a> <span class="k">return</span> <span class="n">pad</span><span class="p">(</span><span class="n">image_tensor</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">padw</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">padh</span><span class="p">))</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="refiners.foundationals.segment_anything.utils.postprocess_masks" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-function"></code> <span class="doc doc-object-name doc-function-name">postprocess_masks</span>
<a href="#refiners.foundationals.segment_anything.utils.postprocess_masks" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">postprocess_masks</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">low_res_masks</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">original_size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">],</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Postprocess the masks to fit the original image size and remove zero-padding (if any).</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>low_res_masks</code>
</td>
<td>
<code><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The masks to postprocess.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>original_size</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The original size (h, w).</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>image_encoder_resolution</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Image encoder resolution.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The postprocessed masks.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/utils.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-93"> 93</a></span>
<span class="normal"><a href="#__codelineno-0-94"> 94</a></span>
<span class="normal"><a href="#__codelineno-0-95"> 95</a></span>
<span class="normal"><a href="#__codelineno-0-96"> 96</a></span>
<span class="normal"><a href="#__codelineno-0-97"> 97</a></span>
<span class="normal"><a href="#__codelineno-0-98"> 98</a></span>
<span class="normal"><a href="#__codelineno-0-99"> 99</a></span>
<span class="normal"><a href="#__codelineno-0-100">100</a></span>
<span class="normal"><a href="#__codelineno-0-101">101</a></span>
<span class="normal"><a href="#__codelineno-0-102">102</a></span>
<span class="normal"><a href="#__codelineno-0-103">103</a></span>
<span class="normal"><a href="#__codelineno-0-104">104</a></span>
<span class="normal"><a href="#__codelineno-0-105">105</a></span>
<span class="normal"><a href="#__codelineno-0-106">106</a></span>
<span class="normal"><a href="#__codelineno-0-107">107</a></span>
<span class="normal"><a href="#__codelineno-0-108">108</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a><span class="k">def</span> <span class="nf">postprocess_masks</span><span class="p">(</span><span class="n">low_res_masks</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">original_size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Postprocess the masks to fit the original image size and remove zero-padding (if any).</span>
</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a>
</span><span id="__span-0-96"><a id="__codelineno-0-96" name="__codelineno-0-96"></a><span class="sd"> Args:</span>
</span><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a><span class="sd"> low_res_masks: The masks to postprocess.</span>
</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a><span class="sd"> original_size: The original size (h, w).</span>
</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a><span class="sd"> image_encoder_resolution: Image encoder resolution.</span>
</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a>
</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a><span class="sd"> The postprocessed masks.</span>
</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a> <span class="n">scaled_size</span> <span class="o">=</span> <span class="n">compute_scaled_size</span><span class="p">(</span><span class="n">original_size</span><span class="p">,</span> <span class="n">image_encoder_resolution</span><span class="p">)</span>
</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a> <span class="n">masks</span> <span class="o">=</span> <span class="n">interpolate</span><span class="p">(</span><span class="n">low_res_masks</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">Size</span><span class="p">((</span><span class="n">image_encoder_resolution</span><span class="p">,</span> <span class="n">image_encoder_resolution</span><span class="p">)),</span> <span class="n">mode</span><span class="o">=</span><span class="s2">&quot;bilinear&quot;</span><span class="p">)</span>
</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a> <span class="n">masks</span> <span class="o">=</span> <span class="n">masks</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="p">:</span> <span class="n">scaled_size</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="p">:</span> <span class="n">scaled_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]]</span> <span class="c1"># remove padding added at `preprocess_image` time</span>
</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a> <span class="n">masks</span> <span class="o">=</span> <span class="n">interpolate</span><span class="p">(</span><span class="n">masks</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">Size</span><span class="p">(</span><span class="n">original_size</span><span class="p">),</span> <span class="n">mode</span><span class="o">=</span><span class="s2">&quot;bilinear&quot;</span><span class="p">)</span>
</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a> <span class="k">return</span> <span class="n">masks</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="refiners.foundationals.segment_anything.utils.preprocess_image" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-function"></code> <span class="doc doc-object-name doc-function-name">preprocess_image</span>
<a href="#refiners.foundationals.segment_anything.utils.preprocess_image" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">preprocess_image</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">image</span><span class="p">:</span> <span class="n"><span title="PIL.Image.Image">Image</span></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">device</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.device" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.device">device</a></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> <span class="n">dtype</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" title="torch.dtype" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.dtype">dtype</a></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Preprocess an image without distorting its aspect ratio.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>image</code>
</td>
<td>
<code><span title="PIL.Image.Image">Image</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The image to preprocess before calling the image encoder.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>image_encoder_resolution</code>
</td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Image encoder resolution.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>device</code>
</td>
<td>
<code><a class="autorefs autorefs-external" title="torch.device" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.device">device</a> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>Tensor device (None by default).</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>dtype</code>
</td>
<td>
<code><a class="autorefs autorefs-external" title="torch.dtype" href="https://pytorch.org/docs/main/tensor_attributes.html#torch.dtype">dtype</a> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>Tensor dtype (None by default).</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-external" title="torch.Tensor" href="https://pytorch.org/docs/main/tensors.html#torch.Tensor">Tensor</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The preprocessed image.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/utils.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-46">46</a></span>
<span class="normal"><a href="#__codelineno-0-47">47</a></span>
<span class="normal"><a href="#__codelineno-0-48">48</a></span>
<span class="normal"><a href="#__codelineno-0-49">49</a></span>
<span class="normal"><a href="#__codelineno-0-50">50</a></span>
<span class="normal"><a href="#__codelineno-0-51">51</a></span>
<span class="normal"><a href="#__codelineno-0-52">52</a></span>
<span class="normal"><a href="#__codelineno-0-53">53</a></span>
<span class="normal"><a href="#__codelineno-0-54">54</a></span>
<span class="normal"><a href="#__codelineno-0-55">55</a></span>
<span class="normal"><a href="#__codelineno-0-56">56</a></span>
<span class="normal"><a href="#__codelineno-0-57">57</a></span>
<span class="normal"><a href="#__codelineno-0-58">58</a></span>
<span class="normal"><a href="#__codelineno-0-59">59</a></span>
<span class="normal"><a href="#__codelineno-0-60">60</a></span>
<span class="normal"><a href="#__codelineno-0-61">61</a></span>
<span class="normal"><a href="#__codelineno-0-62">62</a></span>
<span class="normal"><a href="#__codelineno-0-63">63</a></span>
<span class="normal"><a href="#__codelineno-0-64">64</a></span>
<span class="normal"><a href="#__codelineno-0-65">65</a></span>
<span class="normal"><a href="#__codelineno-0-66">66</a></span>
<span class="normal"><a href="#__codelineno-0-67">67</a></span>
<span class="normal"><a href="#__codelineno-0-68">68</a></span>
<span class="normal"><a href="#__codelineno-0-69">69</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a><span class="k">def</span> <span class="nf">preprocess_image</span><span class="p">(</span>
</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a> <span class="n">image</span><span class="p">:</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span><span class="p">,</span> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">device</span><span class="p">:</span> <span class="n">Device</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">dtype</span><span class="p">:</span> <span class="n">DType</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Preprocess an image without distorting its aspect ratio.</span>
</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a>
</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a><span class="sd"> Args:</span>
</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a><span class="sd"> image: The image to preprocess before calling the image encoder.</span>
</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a><span class="sd"> image_encoder_resolution: Image encoder resolution.</span>
</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a><span class="sd"> device: Tensor device (None by default).</span>
</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a><span class="sd"> dtype: Tensor dtype (None by default).</span>
</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a>
</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a><span class="sd"> The preprocessed image.</span>
</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a>
</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a> <span class="n">scaled_size</span> <span class="o">=</span> <span class="n">compute_scaled_size</span><span class="p">((</span><span class="n">image</span><span class="o">.</span><span class="n">height</span><span class="p">,</span> <span class="n">image</span><span class="o">.</span><span class="n">width</span><span class="p">),</span> <span class="n">image_encoder_resolution</span><span class="p">)</span>
</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a>
</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a> <span class="n">image_tensor</span> <span class="o">=</span> <span class="n">image_to_scaled_tensor</span><span class="p">(</span><span class="n">image</span><span class="p">,</span> <span class="n">scaled_size</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a>
</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a> <span class="k">return</span> <span class="n">pad_image_tensor</span><span class="p">(</span>
</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a> <span class="n">normalize</span><span class="p">(</span><span class="n">image_tensor</span><span class="p">,</span> <span class="n">mean</span><span class="o">=</span><span class="p">[</span><span class="mf">123.675</span><span class="p">,</span> <span class="mf">116.28</span><span class="p">,</span> <span class="mf">103.53</span><span class="p">],</span> <span class="n">std</span><span class="o">=</span><span class="p">[</span><span class="mf">58.395</span><span class="p">,</span> <span class="mf">57.12</span><span class="p">,</span> <span class="mf">57.375</span><span class="p">]),</span>
</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a> <span class="n">scaled_size</span><span class="p">,</span>
</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a> <span class="n">image_encoder_resolution</span><span class="p">,</span>
</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a> <span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg>
Back to top
</button>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
<div class="md-copyright__highlight">
© Lagon Technologies
</div>
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
<div class="md-social">
<a href="https://discord.gg/mCmjNUVV7d" target="_blank" rel="noopener" title="discord.gg" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 640 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M524.531 69.836a1.5 1.5 0 0 0-.764-.7A485 485 0 0 0 404.081 32.03a1.82 1.82 0 0 0-1.923.91 338 338 0 0 0-14.9 30.6 447.9 447.9 0 0 0-134.426 0 310 310 0 0 0-15.135-30.6 1.89 1.89 0 0 0-1.924-.91 483.7 483.7 0 0 0-119.688 37.107 1.7 1.7 0 0 0-.788.676C39.068 183.651 18.186 294.69 28.43 404.354a2.02 2.02 0 0 0 .765 1.375 487.7 487.7 0 0 0 146.825 74.189 1.9 1.9 0 0 0 2.063-.676A348 348 0 0 0 208.12 430.4a1.86 1.86 0 0 0-1.019-2.588 321 321 0 0 1-45.868-21.853 1.885 1.885 0 0 1-.185-3.126 251 251 0 0 0 9.109-7.137 1.82 1.82 0 0 1 1.9-.256c96.229 43.917 200.41 43.917 295.5 0a1.81 1.81 0 0 1 1.924.233 235 235 0 0 0 9.132 7.16 1.884 1.884 0 0 1-.162 3.126 301.4 301.4 0 0 1-45.89 21.83 1.875 1.875 0 0 0-1 2.611 391 391 0 0 0 30.014 48.815 1.86 1.86 0 0 0 2.063.7A486 486 0 0 0 610.7 405.729a1.88 1.88 0 0 0 .765-1.352c12.264-126.783-20.532-236.912-86.934-334.541M222.491 337.58c-28.972 0-52.844-26.587-52.844-59.239s23.409-59.241 52.844-59.241c29.665 0 53.306 26.82 52.843 59.239 0 32.654-23.41 59.241-52.843 59.241m195.38 0c-28.971 0-52.843-26.587-52.843-59.239s23.409-59.241 52.843-59.241c29.667 0 53.307 26.82 52.844 59.239 0 32.654-23.177 59.241-52.844 59.241"/></svg>
</a>
<a href="https://github.com/finegrain-ai/refiners" target="_blank" rel="noopener" title="github.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8M97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
</a>
<a href="https://twitter.com/finegrain_ai" target="_blank" rel="noopener" title="twitter.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253"/></svg>
</a>
<a href="https://www.linkedin.com/company/finegrain-ai/" target="_blank" rel="noopener" title="www.linkedin.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M416 32H31.9C14.3 32 0 46.5 0 64.3v383.4C0 465.5 14.3 480 31.9 480H416c17.6 0 32-14.5 32-32.3V64.3c0-17.8-14.4-32.3-32-32.3M135.4 416H69V202.2h66.5V416zm-33.2-243c-21.3 0-38.5-17.3-38.5-38.5S80.9 96 102.2 96c21.2 0 38.5 17.3 38.5 38.5 0 21.3-17.2 38.5-38.5 38.5m282.1 243h-66.4V312c0-24.8-.5-56.7-34.5-56.7-34.6 0-39.9 27-39.9 54.9V416h-66.4V202.2h63.7v29.2h.9c8.9-16.8 30.6-34.5 62.9-34.5 67.2 0 79.7 44.3 79.7 101.9z"/></svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "../../..", "features": ["navigation.tabs", "navigation.sections", "navigation.top", "navigation.tracking", "navigation.expand", "navigation.path", "toc.follow", "navigation.tabs.sticky", "content.code.copy", "announce.dismiss"], "search": "../../../assets/javascripts/workers/search.6ce7567c.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
<script src="../../../assets/javascripts/bundle.83f73b43.min.js"></script>
</body>
</html>