refiners/reference/foundationals/segment_anything/index.html
2024-09-18 08:46:35 +00:00

4076 lines
237 KiB
HTML
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="description" content="A micro framework on top of PyTorch with first class citizen APIs for foundation model adaptation">
<link rel="prev" href="../latent_diffusion/">
<link rel="next" href="../swin/">
<link rel="icon" href="../../../assets/favicon.svg">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.35">
<title>Segment Anything - Refiners</title>
<link rel="stylesheet" href="../../../assets/stylesheets/main.35f28582.min.css">
<link rel="stylesheet" href="../../../assets/stylesheets/palette.06af60db.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<link rel="stylesheet" href="../../../assets/_mkdocstrings.css">
<link rel="stylesheet" href="../../../stylesheets/extra.css">
<script>__md_scope=new URL("../../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="deep-orange" data-md-color-accent="deep-orange">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#refiners.foundationals.segment_anything.HQSAMAdapter" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
<aside class="md-banner">
<div class="md-banner__inner md-grid md-typeset">
<button class="md-banner__button md-icon" aria-label="Don't show this again">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
Check out our brand new <a href="https://finegrain.ai/bounties">Bounty Program</a> 💰!
</div>
<script>var el=document.querySelector("[data-md-component=announce]");if(el){var content=el.querySelector(".md-typeset");__md_hash(content.innerHTML)===__md_get("__announce")&&(el.hidden=!0)}</script>
</aside>
</div>
<header class="md-header md-header--shadow md-header--lifted" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../../.." title="Refiners" class="md-header__button md-logo" aria-label="Refiners" data-md-component="logo">
<img src="../../../assets/favicon.svg" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
Refiners
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Segment Anything
</span>
</div>
</div>
</div>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/finegrain-ai/refiners" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
Refiners
</div>
</a>
</div>
</nav>
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../../.." class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item">
<a href="../../../getting-started/recommended/" class="md-tabs__link">
Getting started
</a>
</li>
<li class="md-tabs__item">
<a href="../../../concepts/chain/" class="md-tabs__link">
Key Concepts
</a>
</li>
<li class="md-tabs__item">
<a href="../../../guides/adapting_sdxl/" class="md-tabs__link">
Guides
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../../fluxion/adapters/" class="md-tabs__link">
API Reference
</a>
</li>
</ul>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../../.." title="Refiners" class="md-nav__button md-logo" aria-label="Refiners" data-md-component="logo">
<img src="../../../assets/favicon.svg" alt="logo">
</a>
Refiners
</label>
<div class="md-nav__source">
<a href="https://github.com/finegrain-ai/refiners" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
Refiners
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
<label class="md-nav__link" for="__nav_1" id="__nav_1_label" tabindex="0">
<span class="md-ellipsis">
Home
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1">
<span class="md-nav__icon md-icon"></span>
Home
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../.." class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12 3.77-.75.84S9.97 6.06 8.68 7.94 6 12.07 6 14.23a6 6 0 0 0 6 6 6 6 0 0 0 6-6c0-2.16-1.39-4.41-2.68-6.29s-2.57-3.33-2.57-3.33zm0 3.13c.44.52.84.95 1.68 2.17 1.21 1.76 2.32 4 2.32 5.16 0 2.22-1.78 4-4 4s-4-1.78-4-4c0-1.16 1.11-3.4 2.32-5.16.84-1.22 1.24-1.65 1.68-2.17"/></svg>
<span class="md-ellipsis">
Welcome
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../home/why/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11h3v2h-3zM1 11h3v2H1zM13 1v3h-2V1zM4.92 3.5l2.13 2.14-1.42 1.41L3.5 4.93zm12.03 2.13 2.12-2.13 1.43 1.43-2.13 2.12zM12 6a6 6 0 0 1 6 6c0 2.22-1.21 4.16-3 5.2V19a1 1 0 0 1-1 1h-4a1 1 0 0 1-1-1v-1.8c-1.79-1.04-3-2.98-3-5.2a6 6 0 0 1 6-6m2 15v1a1 1 0 0 1-1 1h-2a1 1 0 0 1-1-1v-1zm-3-3h2v-2.13c1.73-.44 3-2.01 3-3.87a4 4 0 0 0-4-4 4 4 0 0 0-4 4c0 1.86 1.27 3.43 3 3.87z"/></svg>
<span class="md-ellipsis">
Manifesto
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<span class="md-ellipsis">
Getting started
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
Getting started
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../getting-started/recommended/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12 15.39-3.76 2.27.99-4.28-3.32-2.88 4.38-.37L12 6.09l1.71 4.04 4.38.37-3.32 2.88.99 4.28M22 9.24l-7.19-.61L12 2 9.19 8.63 2 9.24l5.45 4.73L5.82 21 12 17.27 18.18 21l-1.64-7.03z"/></svg>
<span class="md-ellipsis">
Recommended usage
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../getting-started/advanced/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9 1.09V6H7V1.09C4.16 1.57 2 4.03 2 7c0 2.22 1.21 4.15 3 5.19V21c0 .55.45 1 1 1h4c.55 0 1-.45 1-1v-8.81c1.79-1.04 3-2.97 3-5.19 0-2.97-2.16-5.43-5-5.91m1 9.37-1 .58V20H7v-8.96l-1-.58C4.77 9.74 4 8.42 4 7c0-1 .37-1.94 1-2.65V8h6V4.35c.63.71 1 1.65 1 2.65 0 1.42-.77 2.74-2 3.46m10.94 7.48a3.3 3.3 0 0 0 0-.89l.97-.73a.22.22 0 0 0 .06-.29l-.92-1.56c-.05-.1-.18-.14-.29-.1l-1.15.45c-.24-.17-.49-.32-.78-.44l-.17-1.19a.235.235 0 0 0-.23-.19h-1.85c-.12 0-.22.08-.24.19l-.17 1.19c-.29.12-.54.27-.78.44l-1.15-.45c-.1-.04-.24 0-.28.1l-.93 1.56c-.06.1-.03.22.06.29l.97.73c-.01.15-.03.3-.03.45s.02.29.03.44l-.97.74a.22.22 0 0 0-.06.29l.93 1.56c.04.1.18.13.28.1l1.15-.46c.24.18.49.33.78.45l.17 1.19c.02.11.12.19.24.19h1.85c.11 0 .21-.08.23-.19l.17-1.19c.29-.12.54-.27.78-.45l1.15.46c.11.03.24 0 .29-.1l.92-1.56a.22.22 0 0 0-.06-.29zM17.5 19c-.83 0-1.5-.67-1.5-1.5s.67-1.5 1.5-1.5 1.5.67 1.5 1.5-.67 1.5-1.5 1.5"/></svg>
<span class="md-ellipsis">
Advanced usage
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<span class="md-ellipsis">
Key Concepts
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
Key Concepts
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../concepts/chain/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 1a2.5 2.5 0 0 0-2.5 2.5A2.5 2.5 0 0 0 11 5.79V7H7a2 2 0 0 0-2 2v.71A2.5 2.5 0 0 0 3.5 12 2.5 2.5 0 0 0 5 14.29V15H4a2 2 0 0 0-2 2v1.21A2.5 2.5 0 0 0 .5 20.5 2.5 2.5 0 0 0 3 23a2.5 2.5 0 0 0 2.5-2.5A2.5 2.5 0 0 0 4 18.21V17h4v1.21a2.5 2.5 0 0 0-1.5 2.29A2.5 2.5 0 0 0 9 23a2.5 2.5 0 0 0 2.5-2.5 2.5 2.5 0 0 0-1.5-2.29V17a2 2 0 0 0-2-2H7v-.71A2.5 2.5 0 0 0 8.5 12 2.5 2.5 0 0 0 7 9.71V9h10v.71A2.5 2.5 0 0 0 15.5 12a2.5 2.5 0 0 0 1.5 2.29V15h-1a2 2 0 0 0-2 2v1.21a2.5 2.5 0 0 0-1.5 2.29A2.5 2.5 0 0 0 15 23a2.5 2.5 0 0 0 2.5-2.5 2.5 2.5 0 0 0-1.5-2.29V17h4v1.21a2.5 2.5 0 0 0-1.5 2.29A2.5 2.5 0 0 0 21 23a2.5 2.5 0 0 0 2.5-2.5 2.5 2.5 0 0 0-1.5-2.29V17a2 2 0 0 0-2-2h-1v-.71A2.5 2.5 0 0 0 20.5 12 2.5 2.5 0 0 0 19 9.71V9a2 2 0 0 0-2-2h-4V5.79a2.5 2.5 0 0 0 1.5-2.29A2.5 2.5 0 0 0 12 1m0 1.5a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1M6 11a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m12 0a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1M3 19.5a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m6 0a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m6 0a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m6 0a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1"/></svg>
<span class="md-ellipsis">
Chain
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../concepts/context/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9 22a1 1 0 0 1-1-1v-3H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h16a2 2 0 0 1 2 2v12a2 2 0 0 1-2 2h-6.1l-3.7 3.71c-.2.19-.45.29-.7.29zm1-6v3.08L13.08 16H20V4H4v12zm3-6h-2V6h2zm0 4h-2v-2h2z"/></svg>
<span class="md-ellipsis">
Context
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../concepts/adapter/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M2 12h2v5h16v-5h2v5a2 2 0 0 1-2 2H4a2 2 0 0 1-2-2m9-12h2v3h3v2h-3v3h-2v-3H8V8h3Z"/></svg>
<span class="md-ellipsis">
Adapter
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
<span class="md-ellipsis">
Guides
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
<span class="md-nav__icon md-icon"></span>
Guides
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../guides/adapting_sdxl/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M2 13h2v2h2v-2h2v2h2v-2h2v2h2v-5l3-3V1h2l4 2-4 2v2l3 3v12H11v-3a2 2 0 0 0-2-2 2 2 0 0 0-2 2v3H2zm16-3c-.55 0-1 .54-1 1.2V13h2v-1.8c0-.66-.45-1.2-1-1.2"/></svg>
<span class="md-ellipsis">
Adapting SDXL
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../guides/training_101/" class="md-nav__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 22a2 2 0 0 0 2-2V4a2 2 0 0 0-2-2h-6v7L9.5 7.5 7 9V2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2z"/></svg>
<span class="md-ellipsis">
Training 101
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" checked>
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="">
<span class="md-ellipsis">
API Reference
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_5">
<span class="md-nav__icon md-icon"></span>
API Reference
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_1" checked>
<label class="md-nav__link" for="__nav_5_1" id="__nav_5_1_label" tabindex="">
<span class="md-ellipsis">
Refiners
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_1_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_5_1">
<span class="md-nav__icon md-icon"></span>
Refiners
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_5_1_1" >
<label class="md-nav__link" for="__nav_5_1_1" id="__nav_5_1_1_label" tabindex="0">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Fluxion
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_5_1_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5_1_1">
<span class="md-nav__icon md-icon"></span>
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Fluxion
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../fluxion/adapters/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Adapters
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../fluxion/context/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Context
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../fluxion/layers/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Layers
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../fluxion/model_converter/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Model Converter
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../fluxion/utils/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Utils
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_1_2" checked>
<label class="md-nav__link" for="__nav_5_1_2" id="__nav_5_1_2_label" tabindex="0">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Foundation Models
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_5_1_2_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_5_1_2">
<span class="md-nav__icon md-icon"></span>
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Foundation Models
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../clip/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> CLIP
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../dinov2/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> DINOv2
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../latent_diffusion/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Latent Diffusion
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Segment Anything
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Segment Anything
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.HQSAMAdapter" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-class"></code>&nbsp;HQSAMAdapter
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-class"></code>&nbsp;SegmentAnything
</span>
</a>
<nav class="md-nav" aria-label=" SegmentAnything">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.image_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;image_encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.image_encoder_resolution" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;image_encoder_resolution
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.mask_decoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;mask_decoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.mask_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;mask_encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.point_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;point_encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.compute_image_embedding" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;compute_image_embedding
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.normalize" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;normalize
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.postprocess_masks" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;postprocess_masks
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.predict" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;predict
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.preprocess_image" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;preprocess_image
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnythingH" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-class"></code>&nbsp;SegmentAnythingH
</span>
</a>
<nav class="md-nav" aria-label=" SegmentAnythingH">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnythingH.image_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;image_encoder
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.compute_scaled_size" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;compute_scaled_size
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.image_to_scaled_tensor" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;image_to_scaled_tensor
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.normalize_coordinates" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;normalize_coordinates
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.pad_image_tensor" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;pad_image_tensor
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.postprocess_masks" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;postprocess_masks
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.preprocess_image" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;preprocess_image
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../swin/" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Swin Transformers
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.HQSAMAdapter" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-class"></code>&nbsp;HQSAMAdapter
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-class"></code>&nbsp;SegmentAnything
</span>
</a>
<nav class="md-nav" aria-label=" SegmentAnything">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.image_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;image_encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.image_encoder_resolution" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;image_encoder_resolution
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.mask_decoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;mask_decoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.mask_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;mask_encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.point_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;point_encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.compute_image_embedding" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;compute_image_embedding
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.normalize" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;normalize
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.postprocess_masks" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;postprocess_masks
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.predict" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;predict
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnything.preprocess_image" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-method"></code>&nbsp;preprocess_image
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnythingH" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-class"></code>&nbsp;SegmentAnythingH
</span>
</a>
<nav class="md-nav" aria-label=" SegmentAnythingH">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.SegmentAnythingH.image_encoder" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-attribute"></code>&nbsp;image_encoder
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.compute_scaled_size" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;compute_scaled_size
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.image_to_scaled_tensor" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;image_to_scaled_tensor
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.normalize_coordinates" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;normalize_coordinates
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.pad_image_tensor" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;pad_image_tensor
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.postprocess_masks" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;postprocess_masks
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#refiners.foundationals.segment_anything.utils.preprocess_image" class="md-nav__link">
<span class="md-ellipsis">
<code class="doc-symbol doc-symbol-toc doc-symbol-function"></code>&nbsp;preprocess_image
</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1><code class="doc-symbol doc-symbol-nav doc-symbol-module"></code> Segment Anything</h1>
<div class="doc doc-object doc-module">
<div class="doc doc-contents first">
<div class="doc doc-children">
<div class="doc doc-object doc-class">
<h2 id="refiners.foundationals.segment_anything.HQSAMAdapter" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">HQSAMAdapter</span>
<a href="#refiners.foundationals.segment_anything.HQSAMAdapter" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">HQSAMAdapter</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">target</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-internal" title="refiners.foundationals.segment_anything.model.SegmentAnything" href="#refiners.foundationals.segment_anything.SegmentAnything">SegmentAnything</a></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">hq_mask_only</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#bool">bool</a></span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">weights</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#dict">dict</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#str">str</a></span><span class="p">,</span> <span class="n"><span title="torch.Tensor">Tensor</span></span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="p">)</span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="refiners.fluxion.layers.Chain" href="../../fluxion/layers/#refiners.fluxion.layers.Chain">Chain</a></code>, <code><a class="autorefs autorefs-internal" title="refiners.fluxion.adapters.Adapter" href="../../fluxion/adapters/#refiners.fluxion.adapters.Adapter">Adapter</a>[<a class="autorefs autorefs-internal" title="refiners.foundationals.segment_anything.model.SegmentAnything" href="#refiners.foundationals.segment_anything.SegmentAnything">SegmentAnything</a>]</code></p>
<p>Adapter for SAM introducing HQ features.</p>
<p>See <a href="https://arxiv.org/abs/2306.01567">[arXiv:2306.01567] Segment Anything in High Quality</a> for details.</p>
<details class="example" open>
<summary>Example</summary>
<div class="language-py highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="kn">from</span> <span class="nn">refiners.fluxion.utils</span> <span class="kn">import</span> <span class="n">load_from_safetensors</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="c1"># Tips: run scripts/prepare_test_weights.py to download the weights</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="n">tensor_path</span> <span class="o">=</span> <span class="s2">&quot;./tests/weights/refiners-sam-hq-vit-h.safetensors&quot;</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="n">weights</span> <span class="o">=</span> <span class="n">load_from_safetensors</span><span class="p">(</span><span class="n">tensor_path</span><span class="p">)</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a>
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a><span class="n">hq_sam_adapter</span> <span class="o">=</span> <span class="n">HQSAMAdapter</span><span class="p">(</span><span class="n">sam_h</span><span class="p">,</span> <span class="n">weights</span><span class="o">=</span><span class="n">weights</span><span class="p">)</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a><span class="n">hq_sam_adapter</span><span class="o">.</span><span class="n">inject</span><span class="p">()</span> <span class="c1"># then use SAM as usual</span>
</span></code></pre></div>
</details>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>target</code></td>
<td>
<code><a class="autorefs autorefs-internal" title="refiners.foundationals.segment_anything.model.SegmentAnything" href="#refiners.foundationals.segment_anything.SegmentAnything">SegmentAnything</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The SegmentAnything model to adapt.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>hq_mask_only</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#bool">bool</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Whether to output only the high-quality mask or use it for mask correction (by summing it with the base SAM mask).</p>
</div>
</td>
<td>
<code>False</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>weights</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#dict">dict</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#str">str</a>, <span title="torch.Tensor">Tensor</span>] | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The weights of the HQSAMAdapter.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/hq_sam.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-313">313</a></span>
<span class="normal"><a href="#__codelineno-0-314">314</a></span>
<span class="normal"><a href="#__codelineno-0-315">315</a></span>
<span class="normal"><a href="#__codelineno-0-316">316</a></span>
<span class="normal"><a href="#__codelineno-0-317">317</a></span>
<span class="normal"><a href="#__codelineno-0-318">318</a></span>
<span class="normal"><a href="#__codelineno-0-319">319</a></span>
<span class="normal"><a href="#__codelineno-0-320">320</a></span>
<span class="normal"><a href="#__codelineno-0-321">321</a></span>
<span class="normal"><a href="#__codelineno-0-322">322</a></span>
<span class="normal"><a href="#__codelineno-0-323">323</a></span>
<span class="normal"><a href="#__codelineno-0-324">324</a></span>
<span class="normal"><a href="#__codelineno-0-325">325</a></span>
<span class="normal"><a href="#__codelineno-0-326">326</a></span>
<span class="normal"><a href="#__codelineno-0-327">327</a></span>
<span class="normal"><a href="#__codelineno-0-328">328</a></span>
<span class="normal"><a href="#__codelineno-0-329">329</a></span>
<span class="normal"><a href="#__codelineno-0-330">330</a></span>
<span class="normal"><a href="#__codelineno-0-331">331</a></span>
<span class="normal"><a href="#__codelineno-0-332">332</a></span>
<span class="normal"><a href="#__codelineno-0-333">333</a></span>
<span class="normal"><a href="#__codelineno-0-334">334</a></span>
<span class="normal"><a href="#__codelineno-0-335">335</a></span>
<span class="normal"><a href="#__codelineno-0-336">336</a></span>
<span class="normal"><a href="#__codelineno-0-337">337</a></span>
<span class="normal"><a href="#__codelineno-0-338">338</a></span>
<span class="normal"><a href="#__codelineno-0-339">339</a></span>
<span class="normal"><a href="#__codelineno-0-340">340</a></span>
<span class="normal"><a href="#__codelineno-0-341">341</a></span>
<span class="normal"><a href="#__codelineno-0-342">342</a></span>
<span class="normal"><a href="#__codelineno-0-343">343</a></span>
<span class="normal"><a href="#__codelineno-0-344">344</a></span>
<span class="normal"><a href="#__codelineno-0-345">345</a></span>
<span class="normal"><a href="#__codelineno-0-346">346</a></span>
<span class="normal"><a href="#__codelineno-0-347">347</a></span>
<span class="normal"><a href="#__codelineno-0-348">348</a></span>
<span class="normal"><a href="#__codelineno-0-349">349</a></span>
<span class="normal"><a href="#__codelineno-0-350">350</a></span>
<span class="normal"><a href="#__codelineno-0-351">351</a></span>
<span class="normal"><a href="#__codelineno-0-352">352</a></span>
<span class="normal"><a href="#__codelineno-0-353">353</a></span>
<span class="normal"><a href="#__codelineno-0-354">354</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-313"><a id="__codelineno-0-313" name="__codelineno-0-313"></a><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
</span><span id="__span-0-314"><a id="__codelineno-0-314" name="__codelineno-0-314"></a> <span class="bp">self</span><span class="p">,</span>
</span><span id="__span-0-315"><a id="__codelineno-0-315" name="__codelineno-0-315"></a> <span class="n">target</span><span class="p">:</span> <span class="n">SegmentAnything</span><span class="p">,</span>
</span><span id="__span-0-316"><a id="__codelineno-0-316" name="__codelineno-0-316"></a> <span class="n">hq_mask_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
</span><span id="__span-0-317"><a id="__codelineno-0-317" name="__codelineno-0-317"></a> <span class="n">weights</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-318"><a id="__codelineno-0-318" name="__codelineno-0-318"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-319"><a id="__codelineno-0-319" name="__codelineno-0-319"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize the adapter.</span>
</span><span id="__span-0-320"><a id="__codelineno-0-320" name="__codelineno-0-320"></a>
</span><span id="__span-0-321"><a id="__codelineno-0-321" name="__codelineno-0-321"></a><span class="sd"> Args:</span>
</span><span id="__span-0-322"><a id="__codelineno-0-322" name="__codelineno-0-322"></a><span class="sd"> target: The SegmentAnything model to adapt.</span>
</span><span id="__span-0-323"><a id="__codelineno-0-323" name="__codelineno-0-323"></a><span class="sd"> hq_mask_only: Whether to output only the high-quality mask or use it for mask correction (by summing it with the base SAM mask).</span>
</span><span id="__span-0-324"><a id="__codelineno-0-324" name="__codelineno-0-324"></a><span class="sd"> weights: The weights of the HQSAMAdapter.</span>
</span><span id="__span-0-325"><a id="__codelineno-0-325" name="__codelineno-0-325"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-326"><a id="__codelineno-0-326" name="__codelineno-0-326"></a> <span class="bp">self</span><span class="o">.</span><span class="n">vit_embedding_dim</span> <span class="o">=</span> <span class="n">target</span><span class="o">.</span><span class="n">image_encoder</span><span class="o">.</span><span class="n">embedding_dim</span>
</span><span id="__span-0-327"><a id="__codelineno-0-327" name="__codelineno-0-327"></a> <span class="bp">self</span><span class="o">.</span><span class="n">target_num_mask_tokens</span> <span class="o">=</span> <span class="n">target</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">num_multimask_outputs</span> <span class="o">+</span> <span class="mi">2</span>
</span><span id="__span-0-328"><a id="__codelineno-0-328" name="__codelineno-0-328"></a>
</span><span id="__span-0-329"><a id="__codelineno-0-329" name="__codelineno-0-329"></a> <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">setup_adapter</span><span class="p">(</span><span class="n">target</span><span class="p">):</span>
</span><span id="__span-0-330"><a id="__codelineno-0-330" name="__codelineno-0-330"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">target</span><span class="p">)</span>
</span><span id="__span-0-331"><a id="__codelineno-0-331" name="__codelineno-0-331"></a>
</span><span id="__span-0-332"><a id="__codelineno-0-332" name="__codelineno-0-332"></a> <span class="k">if</span> <span class="n">target</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">multimask_output</span><span class="p">:</span>
</span><span id="__span-0-333"><a id="__codelineno-0-333" name="__codelineno-0-333"></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Multi-mask mode is not supported in HQSAMAdapter.&quot;</span><span class="p">)</span>
</span><span id="__span-0-334"><a id="__codelineno-0-334" name="__codelineno-0-334"></a>
</span><span id="__span-0-335"><a id="__codelineno-0-335" name="__codelineno-0-335"></a> <span class="n">mask_prediction</span> <span class="o">=</span> <span class="n">target</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">ensure_find</span><span class="p">(</span><span class="n">MaskPrediction</span><span class="p">)</span>
</span><span id="__span-0-336"><a id="__codelineno-0-336" name="__codelineno-0-336"></a>
</span><span id="__span-0-337"><a id="__codelineno-0-337" name="__codelineno-0-337"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_mask_prediction_adapter</span> <span class="o">=</span> <span class="p">[</span>
</span><span id="__span-0-338"><a id="__codelineno-0-338" name="__codelineno-0-338"></a> <span class="n">MaskPredictionAdapter</span><span class="p">(</span>
</span><span id="__span-0-339"><a id="__codelineno-0-339" name="__codelineno-0-339"></a> <span class="n">mask_prediction</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">vit_embedding_dim</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">target_num_mask_tokens</span><span class="p">,</span> <span class="n">target</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">target</span><span class="o">.</span><span class="n">dtype</span>
</span><span id="__span-0-340"><a id="__codelineno-0-340" name="__codelineno-0-340"></a> <span class="p">)</span>
</span><span id="__span-0-341"><a id="__codelineno-0-341" name="__codelineno-0-341"></a> <span class="p">]</span>
</span><span id="__span-0-342"><a id="__codelineno-0-342" name="__codelineno-0-342"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_register_adapter_module</span><span class="p">(</span><span class="s2">&quot;Chain.HQSAMMaskPrediction&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_prediction_adapter</span><span class="o">.</span><span class="n">hq_sam_mask_prediction</span><span class="p">)</span>
</span><span id="__span-0-343"><a id="__codelineno-0-343" name="__codelineno-0-343"></a>
</span><span id="__span-0-344"><a id="__codelineno-0-344" name="__codelineno-0-344"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_image_encoder_adapter</span> <span class="o">=</span> <span class="p">[</span><span class="n">SAMViTAdapter</span><span class="p">(</span><span class="n">target</span><span class="o">.</span><span class="n">image_encoder</span><span class="p">)]</span>
</span><span id="__span-0-345"><a id="__codelineno-0-345" name="__codelineno-0-345"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_predictions_post_proc</span> <span class="o">=</span> <span class="p">[</span><span class="n">PredictionsPostProc</span><span class="p">(</span><span class="n">hq_mask_only</span><span class="p">)]</span>
</span><span id="__span-0-346"><a id="__codelineno-0-346" name="__codelineno-0-346"></a>
</span><span id="__span-0-347"><a id="__codelineno-0-347" name="__codelineno-0-347"></a> <span class="n">mask_decoder_tokens</span> <span class="o">=</span> <span class="n">target</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">ensure_find</span><span class="p">(</span><span class="n">MaskDecoderTokens</span><span class="p">)</span>
</span><span id="__span-0-348"><a id="__codelineno-0-348" name="__codelineno-0-348"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_mask_decoder_tokens_extender</span> <span class="o">=</span> <span class="p">[</span><span class="n">MaskDecoderTokensExtender</span><span class="p">(</span><span class="n">mask_decoder_tokens</span><span class="p">)]</span>
</span><span id="__span-0-349"><a id="__codelineno-0-349" name="__codelineno-0-349"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_register_adapter_module</span><span class="p">(</span><span class="s2">&quot;MaskDecoderTokensExtender.hq_token&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_decoder_tokens_extender</span><span class="o">.</span><span class="n">hq_token</span><span class="p">)</span>
</span><span id="__span-0-350"><a id="__codelineno-0-350" name="__codelineno-0-350"></a>
</span><span id="__span-0-351"><a id="__codelineno-0-351" name="__codelineno-0-351"></a> <span class="k">if</span> <span class="n">weights</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-352"><a id="__codelineno-0-352" name="__codelineno-0-352"></a> <span class="bp">self</span><span class="o">.</span><span class="n">load_weights</span><span class="p">(</span><span class="n">weights</span><span class="p">)</span>
</span><span id="__span-0-353"><a id="__codelineno-0-353" name="__codelineno-0-353"></a>
</span><span id="__span-0-354"><a id="__codelineno-0-354" name="__codelineno-0-354"></a> <span class="bp">self</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="n">target</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">target</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="refiners.foundationals.segment_anything.SegmentAnything" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">SegmentAnything</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">SegmentAnything</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">image_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.image_encoder.SAMViT">SAMViT</span></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">point_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.prompt_encoder.PointEncoder">PointEncoder</span></span><span class="p">,</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">mask_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.prompt_encoder.MaskEncoder">MaskEncoder</span></span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> <span class="n">mask_decoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.mask_decoder.MaskDecoder">MaskDecoder</span></span><span class="p">,</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a> <span class="n">device</span><span class="p">:</span> <span class="n"><span title="torch.device">device</span></span> <span class="o">|</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#str">str</a></span> <span class="o">=</span> <span class="s2">&quot;cpu&quot;</span><span class="p">,</span>
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a> <span class="n">dtype</span><span class="p">:</span> <span class="n"><span title="torch.dtype">dtype</span></span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a><span class="p">)</span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="refiners.fluxion.layers.Chain" href="../../fluxion/layers/#refiners.fluxion.layers.Chain">Chain</a></code></p>
<p>SegmentAnything model.</p>
<p>See <a href="https://arxiv.org/abs/2304.02643">[arXiv:2304.02643] Segment Anything</a></p>
<p>E.g. see <a class="autorefs autorefs-internal" href="#refiners.foundationals.segment_anything.SegmentAnythingH"><code>SegmentAnythingH</code></a> for usage.</p>
<p><span class="doc-section-title">Attributes:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code><span title="refiners.foundationals.segment_anything.SegmentAnything.mask_threshold">mask_threshold</span></code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></code>
</td>
<td>
<div class="doc-md-description">
<p>0.0</p>
</div>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>image_encoder</code></td>
<td>
<code><span title="refiners.foundationals.segment_anything.image_encoder.SAMViT">SAMViT</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The image encoder to use.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>point_encoder</code></td>
<td>
<code><span title="refiners.foundationals.segment_anything.prompt_encoder.PointEncoder">PointEncoder</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The point encoder to use.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>mask_encoder</code></td>
<td>
<code><span title="refiners.foundationals.segment_anything.prompt_encoder.MaskEncoder">MaskEncoder</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The mask encoder to use.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>mask_decoder</code></td>
<td>
<code><span title="refiners.foundationals.segment_anything.mask_decoder.MaskDecoder">MaskDecoder</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The mask decoder to use.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-40">40</a></span>
<span class="normal"><a href="#__codelineno-0-41">41</a></span>
<span class="normal"><a href="#__codelineno-0-42">42</a></span>
<span class="normal"><a href="#__codelineno-0-43">43</a></span>
<span class="normal"><a href="#__codelineno-0-44">44</a></span>
<span class="normal"><a href="#__codelineno-0-45">45</a></span>
<span class="normal"><a href="#__codelineno-0-46">46</a></span>
<span class="normal"><a href="#__codelineno-0-47">47</a></span>
<span class="normal"><a href="#__codelineno-0-48">48</a></span>
<span class="normal"><a href="#__codelineno-0-49">49</a></span>
<span class="normal"><a href="#__codelineno-0-50">50</a></span>
<span class="normal"><a href="#__codelineno-0-51">51</a></span>
<span class="normal"><a href="#__codelineno-0-52">52</a></span>
<span class="normal"><a href="#__codelineno-0-53">53</a></span>
<span class="normal"><a href="#__codelineno-0-54">54</a></span>
<span class="normal"><a href="#__codelineno-0-55">55</a></span>
<span class="normal"><a href="#__codelineno-0-56">56</a></span>
<span class="normal"><a href="#__codelineno-0-57">57</a></span>
<span class="normal"><a href="#__codelineno-0-58">58</a></span>
<span class="normal"><a href="#__codelineno-0-59">59</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a> <span class="bp">self</span><span class="p">,</span>
</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a> <span class="n">image_encoder</span><span class="p">:</span> <span class="n">SAMViT</span><span class="p">,</span>
</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a> <span class="n">point_encoder</span><span class="p">:</span> <span class="n">PointEncoder</span><span class="p">,</span>
</span><span id="__span-0-44"><a id="__codelineno-0-44" name="__codelineno-0-44"></a> <span class="n">mask_encoder</span><span class="p">:</span> <span class="n">MaskEncoder</span><span class="p">,</span>
</span><span id="__span-0-45"><a id="__codelineno-0-45" name="__codelineno-0-45"></a> <span class="n">mask_decoder</span><span class="p">:</span> <span class="n">MaskDecoder</span><span class="p">,</span>
</span><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a> <span class="n">device</span><span class="p">:</span> <span class="n">Device</span> <span class="o">|</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;cpu&quot;</span><span class="p">,</span>
</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a> <span class="n">dtype</span><span class="p">:</span> <span class="n">DType</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span>
</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize SegmentAnything model.</span>
</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a>
</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a><span class="sd"> Args:</span>
</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a><span class="sd"> image_encoder: The image encoder to use.</span>
</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a><span class="sd"> point_encoder: The point encoder to use.</span>
</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a><span class="sd"> mask_encoder: The mask encoder to use.</span>
</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a><span class="sd"> mask_decoder: The mask decoder to use.</span>
</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">image_encoder</span><span class="p">,</span> <span class="n">point_encoder</span><span class="p">,</span> <span class="n">mask_encoder</span><span class="p">,</span> <span class="n">mask_decoder</span><span class="p">)</span>
</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a>
</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a> <span class="bp">self</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-attribute">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.image_encoder" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">image_encoder</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.image_encoder" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">image_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.image_encoder.SAMViT">SAMViT</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>The image encoder.</p>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.image_encoder_resolution" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">image_encoder_resolution</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.image_encoder_resolution" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>The resolution of the image encoder.</p>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.mask_decoder" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">mask_decoder</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.mask_decoder" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">mask_decoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.mask_decoder.MaskDecoder">MaskDecoder</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>The mask decoder.</p>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.mask_encoder" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">mask_encoder</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.mask_encoder" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">mask_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.prompt_encoder.MaskEncoder">MaskEncoder</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>The mask encoder.</p>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.point_encoder" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">point_encoder</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.point_encoder" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">point_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.prompt_encoder.PointEncoder">PointEncoder</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>The point encoder.</p>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.compute_image_embedding" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">compute_image_embedding</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.compute_image_embedding" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">compute_image_embedding</span><span class="p">(</span><span class="n">image</span><span class="p">:</span> <span class="n"><span title="PIL.Image.Image">Image</span></span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><span title="refiners.foundationals.segment_anything.model.ImageEmbedding">ImageEmbedding</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Compute the emmbedding of an image.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>image</code></td>
<td>
<code><span title="PIL.Image.Image">Image</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The image to compute the embedding of.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="refiners.foundationals.segment_anything.model.ImageEmbedding">ImageEmbedding</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The computed image embedding.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-81">81</a></span>
<span class="normal"><a href="#__codelineno-0-82">82</a></span>
<span class="normal"><a href="#__codelineno-0-83">83</a></span>
<span class="normal"><a href="#__codelineno-0-84">84</a></span>
<span class="normal"><a href="#__codelineno-0-85">85</a></span>
<span class="normal"><a href="#__codelineno-0-86">86</a></span>
<span class="normal"><a href="#__codelineno-0-87">87</a></span>
<span class="normal"><a href="#__codelineno-0-88">88</a></span>
<span class="normal"><a href="#__codelineno-0-89">89</a></span>
<span class="normal"><a href="#__codelineno-0-90">90</a></span>
<span class="normal"><a href="#__codelineno-0-91">91</a></span>
<span class="normal"><a href="#__codelineno-0-92">92</a></span>
<span class="normal"><a href="#__codelineno-0-93">93</a></span>
<span class="normal"><a href="#__codelineno-0-94">94</a></span>
<span class="normal"><a href="#__codelineno-0-95">95</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a><span class="nd">@no_grad</span><span class="p">()</span>
</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a><span class="k">def</span> <span class="nf">compute_image_embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">image</span><span class="p">:</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ImageEmbedding</span><span class="p">:</span>
</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Compute the emmbedding of an image.</span>
</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a>
</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a><span class="sd"> Args:</span>
</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a><span class="sd"> image: The image to compute the embedding of.</span>
</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a>
</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a><span class="sd"> The computed image embedding.</span>
</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-91"><a id="__codelineno-0-91" name="__codelineno-0-91"></a> <span class="n">original_size</span> <span class="o">=</span> <span class="p">(</span><span class="n">image</span><span class="o">.</span><span class="n">height</span><span class="p">,</span> <span class="n">image</span><span class="o">.</span><span class="n">width</span><span class="p">)</span>
</span><span id="__span-0-92"><a id="__codelineno-0-92" name="__codelineno-0-92"></a> <span class="k">return</span> <span class="n">ImageEmbedding</span><span class="p">(</span>
</span><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a> <span class="n">features</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">image_encoder</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">preprocess_image</span><span class="p">(</span><span class="n">image</span><span class="p">)),</span>
</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a> <span class="n">original_image_size</span><span class="o">=</span><span class="n">original_size</span><span class="p">,</span>
</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a> <span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.normalize" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">normalize</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.normalize" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">normalize</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">coordinates</span><span class="p">:</span> <span class="n"><span title="torch.Tensor">Tensor</span></span><span class="p">,</span> <span class="n">original_size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">]</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><span title="torch.Tensor">Tensor</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>See <a class="autorefs autorefs-internal" href="#refiners.foundationals.segment_anything.utils.normalize_coordinates"><code>normalize_coordinates</code></a>
Args:
coordinates: a tensor of coordinates.
original_size: (h, w) the original size of the image.
Returns:
The [0,1] normalized coordinates tensor.</p>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-179">179</a></span>
<span class="normal"><a href="#__codelineno-0-180">180</a></span>
<span class="normal"><a href="#__codelineno-0-181">181</a></span>
<span class="normal"><a href="#__codelineno-0-182">182</a></span>
<span class="normal"><a href="#__codelineno-0-183">183</a></span>
<span class="normal"><a href="#__codelineno-0-184">184</a></span>
<span class="normal"><a href="#__codelineno-0-185">185</a></span>
<span class="normal"><a href="#__codelineno-0-186">186</a></span>
<span class="normal"><a href="#__codelineno-0-187">187</a></span>
<span class="normal"><a href="#__codelineno-0-188">188</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-179"><a id="__codelineno-0-179" name="__codelineno-0-179"></a><span class="k">def</span> <span class="nf">normalize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">coordinates</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">original_size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-180"><a id="__codelineno-0-180" name="__codelineno-0-180"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
</span><span id="__span-0-181"><a id="__codelineno-0-181" name="__codelineno-0-181"></a><span class="sd"> See [`normalize_coordinates`][refiners.foundationals.segment_anything.utils.normalize_coordinates]</span>
</span><span id="__span-0-182"><a id="__codelineno-0-182" name="__codelineno-0-182"></a><span class="sd"> Args:</span>
</span><span id="__span-0-183"><a id="__codelineno-0-183" name="__codelineno-0-183"></a><span class="sd"> coordinates: a tensor of coordinates.</span>
</span><span id="__span-0-184"><a id="__codelineno-0-184" name="__codelineno-0-184"></a><span class="sd"> original_size: (h, w) the original size of the image.</span>
</span><span id="__span-0-185"><a id="__codelineno-0-185" name="__codelineno-0-185"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-186"><a id="__codelineno-0-186" name="__codelineno-0-186"></a><span class="sd"> The [0,1] normalized coordinates tensor.</span>
</span><span id="__span-0-187"><a id="__codelineno-0-187" name="__codelineno-0-187"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-188"><a id="__codelineno-0-188" name="__codelineno-0-188"></a> <span class="k">return</span> <span class="n">normalize_coordinates</span><span class="p">(</span><span class="n">coordinates</span><span class="p">,</span> <span class="n">original_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">image_encoder_resolution</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.postprocess_masks" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">postprocess_masks</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.postprocess_masks" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">postprocess_masks</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">low_res_masks</span><span class="p">:</span> <span class="n"><span title="torch.Tensor">Tensor</span></span><span class="p">,</span> <span class="n">original_size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">]</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><span title="torch.Tensor">Tensor</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>See <a class="autorefs autorefs-internal" href="#refiners.foundationals.segment_anything.utils.postprocess_masks"><code>postprocess_masks</code></a>
Args:
low_res_masks: a mask tensor of size (N, 1, 256, 256)
original_size: (h, w) the original size of the image.
Returns:
The mask of shape (N, 1, H, W)</p>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-190">190</a></span>
<span class="normal"><a href="#__codelineno-0-191">191</a></span>
<span class="normal"><a href="#__codelineno-0-192">192</a></span>
<span class="normal"><a href="#__codelineno-0-193">193</a></span>
<span class="normal"><a href="#__codelineno-0-194">194</a></span>
<span class="normal"><a href="#__codelineno-0-195">195</a></span>
<span class="normal"><a href="#__codelineno-0-196">196</a></span>
<span class="normal"><a href="#__codelineno-0-197">197</a></span>
<span class="normal"><a href="#__codelineno-0-198">198</a></span>
<span class="normal"><a href="#__codelineno-0-199">199</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-190"><a id="__codelineno-0-190" name="__codelineno-0-190"></a><span class="k">def</span> <span class="nf">postprocess_masks</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">low_res_masks</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">original_size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-191"><a id="__codelineno-0-191" name="__codelineno-0-191"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
</span><span id="__span-0-192"><a id="__codelineno-0-192" name="__codelineno-0-192"></a><span class="sd"> See [`postprocess_masks`][refiners.foundationals.segment_anything.utils.postprocess_masks]</span>
</span><span id="__span-0-193"><a id="__codelineno-0-193" name="__codelineno-0-193"></a><span class="sd"> Args:</span>
</span><span id="__span-0-194"><a id="__codelineno-0-194" name="__codelineno-0-194"></a><span class="sd"> low_res_masks: a mask tensor of size (N, 1, 256, 256)</span>
</span><span id="__span-0-195"><a id="__codelineno-0-195" name="__codelineno-0-195"></a><span class="sd"> original_size: (h, w) the original size of the image.</span>
</span><span id="__span-0-196"><a id="__codelineno-0-196" name="__codelineno-0-196"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-197"><a id="__codelineno-0-197" name="__codelineno-0-197"></a><span class="sd"> The mask of shape (N, 1, H, W)</span>
</span><span id="__span-0-198"><a id="__codelineno-0-198" name="__codelineno-0-198"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-199"><a id="__codelineno-0-199" name="__codelineno-0-199"></a> <span class="k">return</span> <span class="n">postprocess_masks</span><span class="p">(</span><span class="n">low_res_masks</span><span class="p">,</span> <span class="n">original_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">image_encoder_resolution</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.predict" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">predict</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.predict" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">predict</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="nb">input</span><span class="p">:</span> <span class="n"><span title="PIL.Image.Image">Image</span></span> <span class="o">|</span> <span class="n"><span title="refiners.foundationals.segment_anything.model.ImageEmbedding">ImageEmbedding</span></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">foreground_points</span><span class="p">:</span> <span class="p">(</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n"><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></span><span class="p">]]</span> <span class="o">|</span> <span class="kc">None</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> <span class="p">)</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a> <span class="n">background_points</span><span class="p">:</span> <span class="p">(</span>
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a> <span class="n"><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></span><span class="p">]]</span> <span class="o">|</span> <span class="kc">None</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a> <span class="p">)</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a> <span class="n">box_points</span><span class="p">:</span> <span class="p">(</span>
</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10" href="#__codelineno-0-10"></a> <span class="n"><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a></span><span class="p">]]]</span> <span class="o">|</span> <span class="kc">None</span>
</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11" href="#__codelineno-0-11"></a> <span class="p">)</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12" href="#__codelineno-0-12"></a> <span class="n">low_res_mask</span><span class="p">:</span> <span class="p">(</span>
</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13" href="#__codelineno-0-13"></a> <span class="n"><span title="jaxtyping.Float">Float</span></span><span class="p">[</span><span class="n"><span title="torch.Tensor">Tensor</span></span><span class="p">,</span> <span class="s2">&quot;1 1 256 256&quot;</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span>
</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14" href="#__codelineno-0-14"></a> <span class="p">)</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15" href="#__codelineno-0-15"></a> <span class="n">binarize</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#bool">bool</a></span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16" href="#__codelineno-0-16"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><span title="torch.Tensor">Tensor</span></span><span class="p">,</span> <span class="n"><span title="torch.Tensor">Tensor</span></span><span class="p">,</span> <span class="n"><span title="torch.Tensor">Tensor</span></span><span class="p">]</span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Predict the masks of the input image.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>input</code></td>
<td>
<code><span title="PIL.Image.Image">Image</span> | <span title="refiners.foundationals.segment_anything.model.ImageEmbedding">ImageEmbedding</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The input image or its embedding.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>foreground_points</code></td>
<td>
<code><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a>]] | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The points of the foreground.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>background_points</code></td>
<td>
<code><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a>]] | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The points of the background.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>box_points</code></td>
<td>
<code><a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a>[<a class="autorefs autorefs-external" title="typing.Sequence" href="https://docs.python.org/3/library/typing.html#typing.Sequence">Sequence</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#float">float</a>]]] | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The points of the box.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>low_res_mask</code></td>
<td>
<code><span title="jaxtyping.Float">Float</span>[<span title="torch.Tensor">Tensor</span>, &#39;1 1 256 256&#39;] | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The low resolution mask.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>binarize</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#bool">bool</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Whether to binarize the masks.</p>
</div>
</td>
<td>
<code>True</code>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="torch.Tensor">Tensor</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The predicted masks.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><span title="torch.Tensor">Tensor</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The IOU prediction.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><span title="torch.Tensor">Tensor</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The low resolution masks.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-97"> 97</a></span>
<span class="normal"><a href="#__codelineno-0-98"> 98</a></span>
<span class="normal"><a href="#__codelineno-0-99"> 99</a></span>
<span class="normal"><a href="#__codelineno-0-100">100</a></span>
<span class="normal"><a href="#__codelineno-0-101">101</a></span>
<span class="normal"><a href="#__codelineno-0-102">102</a></span>
<span class="normal"><a href="#__codelineno-0-103">103</a></span>
<span class="normal"><a href="#__codelineno-0-104">104</a></span>
<span class="normal"><a href="#__codelineno-0-105">105</a></span>
<span class="normal"><a href="#__codelineno-0-106">106</a></span>
<span class="normal"><a href="#__codelineno-0-107">107</a></span>
<span class="normal"><a href="#__codelineno-0-108">108</a></span>
<span class="normal"><a href="#__codelineno-0-109">109</a></span>
<span class="normal"><a href="#__codelineno-0-110">110</a></span>
<span class="normal"><a href="#__codelineno-0-111">111</a></span>
<span class="normal"><a href="#__codelineno-0-112">112</a></span>
<span class="normal"><a href="#__codelineno-0-113">113</a></span>
<span class="normal"><a href="#__codelineno-0-114">114</a></span>
<span class="normal"><a href="#__codelineno-0-115">115</a></span>
<span class="normal"><a href="#__codelineno-0-116">116</a></span>
<span class="normal"><a href="#__codelineno-0-117">117</a></span>
<span class="normal"><a href="#__codelineno-0-118">118</a></span>
<span class="normal"><a href="#__codelineno-0-119">119</a></span>
<span class="normal"><a href="#__codelineno-0-120">120</a></span>
<span class="normal"><a href="#__codelineno-0-121">121</a></span>
<span class="normal"><a href="#__codelineno-0-122">122</a></span>
<span class="normal"><a href="#__codelineno-0-123">123</a></span>
<span class="normal"><a href="#__codelineno-0-124">124</a></span>
<span class="normal"><a href="#__codelineno-0-125">125</a></span>
<span class="normal"><a href="#__codelineno-0-126">126</a></span>
<span class="normal"><a href="#__codelineno-0-127">127</a></span>
<span class="normal"><a href="#__codelineno-0-128">128</a></span>
<span class="normal"><a href="#__codelineno-0-129">129</a></span>
<span class="normal"><a href="#__codelineno-0-130">130</a></span>
<span class="normal"><a href="#__codelineno-0-131">131</a></span>
<span class="normal"><a href="#__codelineno-0-132">132</a></span>
<span class="normal"><a href="#__codelineno-0-133">133</a></span>
<span class="normal"><a href="#__codelineno-0-134">134</a></span>
<span class="normal"><a href="#__codelineno-0-135">135</a></span>
<span class="normal"><a href="#__codelineno-0-136">136</a></span>
<span class="normal"><a href="#__codelineno-0-137">137</a></span>
<span class="normal"><a href="#__codelineno-0-138">138</a></span>
<span class="normal"><a href="#__codelineno-0-139">139</a></span>
<span class="normal"><a href="#__codelineno-0-140">140</a></span>
<span class="normal"><a href="#__codelineno-0-141">141</a></span>
<span class="normal"><a href="#__codelineno-0-142">142</a></span>
<span class="normal"><a href="#__codelineno-0-143">143</a></span>
<span class="normal"><a href="#__codelineno-0-144">144</a></span>
<span class="normal"><a href="#__codelineno-0-145">145</a></span>
<span class="normal"><a href="#__codelineno-0-146">146</a></span>
<span class="normal"><a href="#__codelineno-0-147">147</a></span>
<span class="normal"><a href="#__codelineno-0-148">148</a></span>
<span class="normal"><a href="#__codelineno-0-149">149</a></span>
<span class="normal"><a href="#__codelineno-0-150">150</a></span>
<span class="normal"><a href="#__codelineno-0-151">151</a></span>
<span class="normal"><a href="#__codelineno-0-152">152</a></span>
<span class="normal"><a href="#__codelineno-0-153">153</a></span>
<span class="normal"><a href="#__codelineno-0-154">154</a></span>
<span class="normal"><a href="#__codelineno-0-155">155</a></span>
<span class="normal"><a href="#__codelineno-0-156">156</a></span>
<span class="normal"><a href="#__codelineno-0-157">157</a></span>
<span class="normal"><a href="#__codelineno-0-158">158</a></span>
<span class="normal"><a href="#__codelineno-0-159">159</a></span>
<span class="normal"><a href="#__codelineno-0-160">160</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a><span class="nd">@no_grad</span><span class="p">()</span>
</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a><span class="k">def</span> <span class="nf">predict</span><span class="p">(</span>
</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a> <span class="bp">self</span><span class="p">,</span>
</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a> <span class="nb">input</span><span class="p">:</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span> <span class="o">|</span> <span class="n">ImageEmbedding</span><span class="p">,</span>
</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a> <span class="n">foreground_points</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a> <span class="n">background_points</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a> <span class="n">box_points</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">Sequence</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a> <span class="n">low_res_mask</span><span class="p">:</span> <span class="n">Float</span><span class="p">[</span><span class="n">Tensor</span><span class="p">,</span> <span class="s2">&quot;1 1 256 256&quot;</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a> <span class="n">binarize</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">Tensor</span><span class="p">]:</span>
</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Predict the masks of the input image.</span>
</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a>
</span><span id="__span-0-109"><a id="__codelineno-0-109" name="__codelineno-0-109"></a><span class="sd"> Args:</span>
</span><span id="__span-0-110"><a id="__codelineno-0-110" name="__codelineno-0-110"></a><span class="sd"> input: The input image or its embedding.</span>
</span><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a><span class="sd"> foreground_points: The points of the foreground.</span>
</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a><span class="sd"> background_points: The points of the background.</span>
</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a><span class="sd"> box_points: The points of the box.</span>
</span><span id="__span-0-114"><a id="__codelineno-0-114" name="__codelineno-0-114"></a><span class="sd"> low_res_mask: The low resolution mask.</span>
</span><span id="__span-0-115"><a id="__codelineno-0-115" name="__codelineno-0-115"></a><span class="sd"> binarize: Whether to binarize the masks.</span>
</span><span id="__span-0-116"><a id="__codelineno-0-116" name="__codelineno-0-116"></a>
</span><span id="__span-0-117"><a id="__codelineno-0-117" name="__codelineno-0-117"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-118"><a id="__codelineno-0-118" name="__codelineno-0-118"></a><span class="sd"> The predicted masks.</span>
</span><span id="__span-0-119"><a id="__codelineno-0-119" name="__codelineno-0-119"></a><span class="sd"> The IOU prediction.</span>
</span><span id="__span-0-120"><a id="__codelineno-0-120" name="__codelineno-0-120"></a><span class="sd"> The low resolution masks.</span>
</span><span id="__span-0-121"><a id="__codelineno-0-121" name="__codelineno-0-121"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-122"><a id="__codelineno-0-122" name="__codelineno-0-122"></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">ImageEmbedding</span><span class="p">):</span>
</span><span id="__span-0-123"><a id="__codelineno-0-123" name="__codelineno-0-123"></a> <span class="n">original_size</span> <span class="o">=</span> <span class="nb">input</span><span class="o">.</span><span class="n">original_image_size</span>
</span><span id="__span-0-124"><a id="__codelineno-0-124" name="__codelineno-0-124"></a> <span class="n">image_embedding</span> <span class="o">=</span> <span class="nb">input</span><span class="o">.</span><span class="n">features</span>
</span><span id="__span-0-125"><a id="__codelineno-0-125" name="__codelineno-0-125"></a> <span class="k">else</span><span class="p">:</span>
</span><span id="__span-0-126"><a id="__codelineno-0-126" name="__codelineno-0-126"></a> <span class="n">original_size</span> <span class="o">=</span> <span class="p">(</span><span class="nb">input</span><span class="o">.</span><span class="n">height</span><span class="p">,</span> <span class="nb">input</span><span class="o">.</span><span class="n">width</span><span class="p">)</span>
</span><span id="__span-0-127"><a id="__codelineno-0-127" name="__codelineno-0-127"></a> <span class="n">image_embedding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">image_encoder</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">preprocess_image</span><span class="p">(</span><span class="nb">input</span><span class="p">))</span>
</span><span id="__span-0-128"><a id="__codelineno-0-128" name="__codelineno-0-128"></a>
</span><span id="__span-0-129"><a id="__codelineno-0-129" name="__codelineno-0-129"></a> <span class="n">coordinates</span><span class="p">,</span> <span class="n">type_mask</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">point_encoder</span><span class="o">.</span><span class="n">points_to_tensor</span><span class="p">(</span>
</span><span id="__span-0-130"><a id="__codelineno-0-130" name="__codelineno-0-130"></a> <span class="n">foreground_points</span><span class="o">=</span><span class="n">foreground_points</span><span class="p">,</span>
</span><span id="__span-0-131"><a id="__codelineno-0-131" name="__codelineno-0-131"></a> <span class="n">background_points</span><span class="o">=</span><span class="n">background_points</span><span class="p">,</span>
</span><span id="__span-0-132"><a id="__codelineno-0-132" name="__codelineno-0-132"></a> <span class="n">box_points</span><span class="o">=</span><span class="n">box_points</span><span class="p">,</span>
</span><span id="__span-0-133"><a id="__codelineno-0-133" name="__codelineno-0-133"></a> <span class="p">)</span>
</span><span id="__span-0-134"><a id="__codelineno-0-134" name="__codelineno-0-134"></a> <span class="bp">self</span><span class="o">.</span><span class="n">point_encoder</span><span class="o">.</span><span class="n">set_type_mask</span><span class="p">(</span><span class="n">type_mask</span><span class="o">=</span><span class="n">type_mask</span><span class="p">)</span>
</span><span id="__span-0-135"><a id="__codelineno-0-135" name="__codelineno-0-135"></a>
</span><span id="__span-0-136"><a id="__codelineno-0-136" name="__codelineno-0-136"></a> <span class="k">if</span> <span class="n">low_res_mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-137"><a id="__codelineno-0-137" name="__codelineno-0-137"></a> <span class="n">mask_embedding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_encoder</span><span class="p">(</span><span class="n">low_res_mask</span><span class="p">)</span>
</span><span id="__span-0-138"><a id="__codelineno-0-138" name="__codelineno-0-138"></a> <span class="k">else</span><span class="p">:</span>
</span><span id="__span-0-139"><a id="__codelineno-0-139" name="__codelineno-0-139"></a> <span class="n">mask_embedding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_encoder</span><span class="o">.</span><span class="n">get_no_mask_dense_embedding</span><span class="p">(</span>
</span><span id="__span-0-140"><a id="__codelineno-0-140" name="__codelineno-0-140"></a> <span class="n">image_embedding_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">image_encoder</span><span class="o">.</span><span class="n">image_embedding_size</span>
</span><span id="__span-0-141"><a id="__codelineno-0-141" name="__codelineno-0-141"></a> <span class="p">)</span>
</span><span id="__span-0-142"><a id="__codelineno-0-142" name="__codelineno-0-142"></a>
</span><span id="__span-0-143"><a id="__codelineno-0-143" name="__codelineno-0-143"></a> <span class="n">point_embedding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">point_encoder</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">normalize</span><span class="p">(</span><span class="n">coordinates</span><span class="p">,</span> <span class="n">original_size</span><span class="o">=</span><span class="n">original_size</span><span class="p">))</span>
</span><span id="__span-0-144"><a id="__codelineno-0-144" name="__codelineno-0-144"></a> <span class="n">dense_positional_embedding</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">point_encoder</span><span class="o">.</span><span class="n">get_dense_positional_embedding</span><span class="p">(</span>
</span><span id="__span-0-145"><a id="__codelineno-0-145" name="__codelineno-0-145"></a> <span class="n">image_embedding_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">image_encoder</span><span class="o">.</span><span class="n">image_embedding_size</span>
</span><span id="__span-0-146"><a id="__codelineno-0-146" name="__codelineno-0-146"></a> <span class="p">)</span>
</span><span id="__span-0-147"><a id="__codelineno-0-147" name="__codelineno-0-147"></a>
</span><span id="__span-0-148"><a id="__codelineno-0-148" name="__codelineno-0-148"></a> <span class="bp">self</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">set_image_embedding</span><span class="p">(</span><span class="n">image_embedding</span><span class="o">=</span><span class="n">image_embedding</span><span class="p">)</span>
</span><span id="__span-0-149"><a id="__codelineno-0-149" name="__codelineno-0-149"></a> <span class="bp">self</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">set_mask_embedding</span><span class="p">(</span><span class="n">mask_embedding</span><span class="o">=</span><span class="n">mask_embedding</span><span class="p">)</span>
</span><span id="__span-0-150"><a id="__codelineno-0-150" name="__codelineno-0-150"></a> <span class="bp">self</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">set_point_embedding</span><span class="p">(</span><span class="n">point_embedding</span><span class="o">=</span><span class="n">point_embedding</span><span class="p">)</span>
</span><span id="__span-0-151"><a id="__codelineno-0-151" name="__codelineno-0-151"></a> <span class="bp">self</span><span class="o">.</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">set_dense_positional_embedding</span><span class="p">(</span><span class="n">dense_positional_embedding</span><span class="o">=</span><span class="n">dense_positional_embedding</span><span class="p">)</span>
</span><span id="__span-0-152"><a id="__codelineno-0-152" name="__codelineno-0-152"></a>
</span><span id="__span-0-153"><a id="__codelineno-0-153" name="__codelineno-0-153"></a> <span class="n">low_res_masks</span><span class="p">,</span> <span class="n">iou_predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_decoder</span><span class="p">()</span>
</span><span id="__span-0-154"><a id="__codelineno-0-154" name="__codelineno-0-154"></a>
</span><span id="__span-0-155"><a id="__codelineno-0-155" name="__codelineno-0-155"></a> <span class="n">high_res_masks</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">postprocess_masks</span><span class="p">(</span><span class="n">low_res_masks</span><span class="p">,</span> <span class="n">original_size</span><span class="p">)</span>
</span><span id="__span-0-156"><a id="__codelineno-0-156" name="__codelineno-0-156"></a>
</span><span id="__span-0-157"><a id="__codelineno-0-157" name="__codelineno-0-157"></a> <span class="k">if</span> <span class="n">binarize</span><span class="p">:</span>
</span><span id="__span-0-158"><a id="__codelineno-0-158" name="__codelineno-0-158"></a> <span class="n">high_res_masks</span> <span class="o">=</span> <span class="n">high_res_masks</span> <span class="o">&gt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">mask_threshold</span>
</span><span id="__span-0-159"><a id="__codelineno-0-159" name="__codelineno-0-159"></a>
</span><span id="__span-0-160"><a id="__codelineno-0-160" name="__codelineno-0-160"></a> <span class="k">return</span> <span class="n">high_res_masks</span><span class="p">,</span> <span class="n">iou_predictions</span><span class="p">,</span> <span class="n">low_res_masks</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="refiners.foundationals.segment_anything.SegmentAnything.preprocess_image" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-method"></code> <span class="doc doc-object-name doc-function-name">preprocess_image</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnything.preprocess_image" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">preprocess_image</span><span class="p">(</span><span class="n">image</span><span class="p">:</span> <span class="n"><span title="PIL.Image.Image">Image</span></span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><span title="torch.Tensor">Tensor</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>See <a class="autorefs autorefs-internal" href="#refiners.foundationals.segment_anything.utils.preprocess_image"><code>preprocess_image</code></a>
Args:
image: The image to preprocess.
Returns:
The preprocessed tensor.</p>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-169">169</a></span>
<span class="normal"><a href="#__codelineno-0-170">170</a></span>
<span class="normal"><a href="#__codelineno-0-171">171</a></span>
<span class="normal"><a href="#__codelineno-0-172">172</a></span>
<span class="normal"><a href="#__codelineno-0-173">173</a></span>
<span class="normal"><a href="#__codelineno-0-174">174</a></span>
<span class="normal"><a href="#__codelineno-0-175">175</a></span>
<span class="normal"><a href="#__codelineno-0-176">176</a></span>
<span class="normal"><a href="#__codelineno-0-177">177</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-169"><a id="__codelineno-0-169" name="__codelineno-0-169"></a><span class="k">def</span> <span class="nf">preprocess_image</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">image</span><span class="p">:</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-170"><a id="__codelineno-0-170" name="__codelineno-0-170"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
</span><span id="__span-0-171"><a id="__codelineno-0-171" name="__codelineno-0-171"></a><span class="sd"> See [`preprocess_image`][refiners.foundationals.segment_anything.utils.preprocess_image]</span>
</span><span id="__span-0-172"><a id="__codelineno-0-172" name="__codelineno-0-172"></a><span class="sd"> Args:</span>
</span><span id="__span-0-173"><a id="__codelineno-0-173" name="__codelineno-0-173"></a><span class="sd"> image: The image to preprocess.</span>
</span><span id="__span-0-174"><a id="__codelineno-0-174" name="__codelineno-0-174"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-175"><a id="__codelineno-0-175" name="__codelineno-0-175"></a><span class="sd"> The preprocessed tensor.</span>
</span><span id="__span-0-176"><a id="__codelineno-0-176" name="__codelineno-0-176"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-177"><a id="__codelineno-0-177" name="__codelineno-0-177"></a> <span class="k">return</span> <span class="n">preprocess_image</span><span class="p">(</span><span class="n">image</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">image_encoder_resolution</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="refiners.foundationals.segment_anything.SegmentAnythingH" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-class"></code> <span class="doc doc-object-name doc-class-name">SegmentAnythingH</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnythingH" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">SegmentAnythingH</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">image_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.image_encoder.SAMViTH">SAMViTH</span></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">point_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.prompt_encoder.PointEncoder">PointEncoder</span></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">mask_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.prompt_encoder.MaskEncoder">MaskEncoder</span></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> <span class="n">mask_decoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.mask_decoder.MaskDecoder">MaskDecoder</span></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a> <span class="n">multimask_output</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#bool">bool</a></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a> <span class="n">device</span><span class="p">:</span> <span class="n"><span title="torch.device">device</span></span> <span class="o">|</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#str">str</a></span> <span class="o">=</span> <span class="s2">&quot;cpu&quot;</span><span class="p">,</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a> <span class="n">dtype</span><span class="p">:</span> <span class="n"><span title="torch.dtype">dtype</span></span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span>
</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a><span class="p">)</span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="refiners.foundationals.segment_anything.model.SegmentAnything" href="#refiners.foundationals.segment_anything.SegmentAnything">SegmentAnything</a></code></p>
<p>SegmentAnything huge model.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>image_encoder</code></td>
<td>
<code><span title="refiners.foundationals.segment_anything.image_encoder.SAMViTH">SAMViTH</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The image encoder to use.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>point_encoder</code></td>
<td>
<code><span title="refiners.foundationals.segment_anything.prompt_encoder.PointEncoder">PointEncoder</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The point encoder to use.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>mask_encoder</code></td>
<td>
<code><span title="refiners.foundationals.segment_anything.prompt_encoder.MaskEncoder">MaskEncoder</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The mask encoder to use.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>mask_decoder</code></td>
<td>
<code><span title="refiners.foundationals.segment_anything.mask_decoder.MaskDecoder">MaskDecoder</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>The mask decoder to use.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>multimask_output</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#bool">bool</a> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>Whether to use multimask output.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>device</code></td>
<td>
<code><span title="torch.device">device</span> | <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#str">str</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The PyTorch device to use.</p>
</div>
</td>
<td>
<code>&#39;cpu&#39;</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>dtype</code></td>
<td>
<code><span title="torch.dtype">dtype</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The PyTorch data type to use.</p>
</div>
</td>
<td>
<code><span title="torch.float32">float32</span></code>
</td>
</tr>
</tbody>
</table>
<details class="example" open>
<summary>Example</summary>
<div class="language-py highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">device</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span> <span class="k">if</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">is_available</span><span class="p">()</span> <span class="k">else</span> <span class="s2">&quot;cpu&quot;</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="c1"># multimask_output=True is recommended for ambiguous prompts such as a single point.</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="c1"># Below, a box prompt is passed, so just use multimask_output=False which will return a single mask</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="n">sam_h</span> <span class="o">=</span> <span class="n">SegmentAnythingH</span><span class="p">(</span><span class="n">multimask_output</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a>
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a><span class="c1"># Tips: run scripts/prepare_test_weights.py to download the weights</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a><span class="n">tensors_path</span> <span class="o">=</span> <span class="s2">&quot;./tests/weights/segment-anything-h.safetensors&quot;</span>
</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a><span class="n">sam_h</span><span class="o">.</span><span class="n">load_from_safetensors</span><span class="p">(</span><span class="n">tensors_path</span><span class="o">=</span><span class="n">tensors_path</span><span class="p">)</span>
</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10" href="#__codelineno-0-10"></a>
</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11" href="#__codelineno-0-11"></a><span class="kn">from</span> <span class="nn">PIL</span> <span class="kn">import</span> <span class="n">Image</span>
</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12" href="#__codelineno-0-12"></a><span class="n">image</span> <span class="o">=</span> <span class="n">Image</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="s2">&quot;image.png&quot;</span><span class="p">)</span>
</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13" href="#__codelineno-0-13"></a>
</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14" href="#__codelineno-0-14"></a><span class="n">masks</span><span class="p">,</span> <span class="o">*</span><span class="n">_</span> <span class="o">=</span> <span class="n">sam_h</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">image</span><span class="p">,</span> <span class="n">box_points</span><span class="o">=</span><span class="p">[[(</span><span class="n">x1</span><span class="p">,</span> <span class="n">y1</span><span class="p">),</span> <span class="p">(</span><span class="n">x2</span><span class="p">,</span> <span class="n">y2</span><span class="p">)]])</span>
</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15" href="#__codelineno-0-15"></a>
</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16" href="#__codelineno-0-16"></a><span class="k">assert</span> <span class="n">masks</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">image</span><span class="o">.</span><span class="n">height</span><span class="p">,</span> <span class="n">image</span><span class="o">.</span><span class="n">width</span><span class="p">)</span>
</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17" href="#__codelineno-0-17"></a><span class="k">assert</span> <span class="n">masks</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="n">torch</span><span class="o">.</span><span class="n">bool</span>
</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18" href="#__codelineno-0-18"></a>
</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19" href="#__codelineno-0-19"></a><span class="c1"># convert it to [0,255] uint8 ndarray of shape (H, W)</span>
</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20" href="#__codelineno-0-20"></a><span class="n">mask</span> <span class="o">=</span> <span class="n">masks</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s2">&quot;uint8&quot;</span><span class="p">)</span> <span class="o">*</span> <span class="mi">255</span>
</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21" href="#__codelineno-0-21"></a>
</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22" href="#__codelineno-0-22"></a><span class="n">Image</span><span class="o">.</span><span class="n">fromarray</span><span class="p">(</span><span class="n">mask</span><span class="p">)</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="s2">&quot;mask_image.png&quot;</span><span class="p">)</span>
</span></code></pre></div>
</details>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/model.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-205">205</a></span>
<span class="normal"><a href="#__codelineno-0-206">206</a></span>
<span class="normal"><a href="#__codelineno-0-207">207</a></span>
<span class="normal"><a href="#__codelineno-0-208">208</a></span>
<span class="normal"><a href="#__codelineno-0-209">209</a></span>
<span class="normal"><a href="#__codelineno-0-210">210</a></span>
<span class="normal"><a href="#__codelineno-0-211">211</a></span>
<span class="normal"><a href="#__codelineno-0-212">212</a></span>
<span class="normal"><a href="#__codelineno-0-213">213</a></span>
<span class="normal"><a href="#__codelineno-0-214">214</a></span>
<span class="normal"><a href="#__codelineno-0-215">215</a></span>
<span class="normal"><a href="#__codelineno-0-216">216</a></span>
<span class="normal"><a href="#__codelineno-0-217">217</a></span>
<span class="normal"><a href="#__codelineno-0-218">218</a></span>
<span class="normal"><a href="#__codelineno-0-219">219</a></span>
<span class="normal"><a href="#__codelineno-0-220">220</a></span>
<span class="normal"><a href="#__codelineno-0-221">221</a></span>
<span class="normal"><a href="#__codelineno-0-222">222</a></span>
<span class="normal"><a href="#__codelineno-0-223">223</a></span>
<span class="normal"><a href="#__codelineno-0-224">224</a></span>
<span class="normal"><a href="#__codelineno-0-225">225</a></span>
<span class="normal"><a href="#__codelineno-0-226">226</a></span>
<span class="normal"><a href="#__codelineno-0-227">227</a></span>
<span class="normal"><a href="#__codelineno-0-228">228</a></span>
<span class="normal"><a href="#__codelineno-0-229">229</a></span>
<span class="normal"><a href="#__codelineno-0-230">230</a></span>
<span class="normal"><a href="#__codelineno-0-231">231</a></span>
<span class="normal"><a href="#__codelineno-0-232">232</a></span>
<span class="normal"><a href="#__codelineno-0-233">233</a></span>
<span class="normal"><a href="#__codelineno-0-234">234</a></span>
<span class="normal"><a href="#__codelineno-0-235">235</a></span>
<span class="normal"><a href="#__codelineno-0-236">236</a></span>
<span class="normal"><a href="#__codelineno-0-237">237</a></span>
<span class="normal"><a href="#__codelineno-0-238">238</a></span>
<span class="normal"><a href="#__codelineno-0-239">239</a></span>
<span class="normal"><a href="#__codelineno-0-240">240</a></span>
<span class="normal"><a href="#__codelineno-0-241">241</a></span>
<span class="normal"><a href="#__codelineno-0-242">242</a></span>
<span class="normal"><a href="#__codelineno-0-243">243</a></span>
<span class="normal"><a href="#__codelineno-0-244">244</a></span>
<span class="normal"><a href="#__codelineno-0-245">245</a></span>
<span class="normal"><a href="#__codelineno-0-246">246</a></span>
<span class="normal"><a href="#__codelineno-0-247">247</a></span>
<span class="normal"><a href="#__codelineno-0-248">248</a></span>
<span class="normal"><a href="#__codelineno-0-249">249</a></span>
<span class="normal"><a href="#__codelineno-0-250">250</a></span>
<span class="normal"><a href="#__codelineno-0-251">251</a></span>
<span class="normal"><a href="#__codelineno-0-252">252</a></span>
<span class="normal"><a href="#__codelineno-0-253">253</a></span>
<span class="normal"><a href="#__codelineno-0-254">254</a></span>
<span class="normal"><a href="#__codelineno-0-255">255</a></span>
<span class="normal"><a href="#__codelineno-0-256">256</a></span>
<span class="normal"><a href="#__codelineno-0-257">257</a></span>
<span class="normal"><a href="#__codelineno-0-258">258</a></span>
<span class="normal"><a href="#__codelineno-0-259">259</a></span>
<span class="normal"><a href="#__codelineno-0-260">260</a></span>
<span class="normal"><a href="#__codelineno-0-261">261</a></span>
<span class="normal"><a href="#__codelineno-0-262">262</a></span>
<span class="normal"><a href="#__codelineno-0-263">263</a></span>
<span class="normal"><a href="#__codelineno-0-264">264</a></span>
<span class="normal"><a href="#__codelineno-0-265">265</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-205"><a id="__codelineno-0-205" name="__codelineno-0-205"></a><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
</span><span id="__span-0-206"><a id="__codelineno-0-206" name="__codelineno-0-206"></a> <span class="bp">self</span><span class="p">,</span>
</span><span id="__span-0-207"><a id="__codelineno-0-207" name="__codelineno-0-207"></a> <span class="n">image_encoder</span><span class="p">:</span> <span class="n">SAMViTH</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-208"><a id="__codelineno-0-208" name="__codelineno-0-208"></a> <span class="n">point_encoder</span><span class="p">:</span> <span class="n">PointEncoder</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-209"><a id="__codelineno-0-209" name="__codelineno-0-209"></a> <span class="n">mask_encoder</span><span class="p">:</span> <span class="n">MaskEncoder</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-210"><a id="__codelineno-0-210" name="__codelineno-0-210"></a> <span class="n">mask_decoder</span><span class="p">:</span> <span class="n">MaskDecoder</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-211"><a id="__codelineno-0-211" name="__codelineno-0-211"></a> <span class="n">multimask_output</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-212"><a id="__codelineno-0-212" name="__codelineno-0-212"></a> <span class="n">device</span><span class="p">:</span> <span class="n">Device</span> <span class="o">|</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;cpu&quot;</span><span class="p">,</span>
</span><span id="__span-0-213"><a id="__codelineno-0-213" name="__codelineno-0-213"></a> <span class="n">dtype</span><span class="p">:</span> <span class="n">DType</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span>
</span><span id="__span-0-214"><a id="__codelineno-0-214" name="__codelineno-0-214"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
</span><span id="__span-0-215"><a id="__codelineno-0-215" name="__codelineno-0-215"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize SegmentAnything huge model.</span>
</span><span id="__span-0-216"><a id="__codelineno-0-216" name="__codelineno-0-216"></a>
</span><span id="__span-0-217"><a id="__codelineno-0-217" name="__codelineno-0-217"></a><span class="sd"> Args:</span>
</span><span id="__span-0-218"><a id="__codelineno-0-218" name="__codelineno-0-218"></a><span class="sd"> image_encoder: The image encoder to use.</span>
</span><span id="__span-0-219"><a id="__codelineno-0-219" name="__codelineno-0-219"></a><span class="sd"> point_encoder: The point encoder to use.</span>
</span><span id="__span-0-220"><a id="__codelineno-0-220" name="__codelineno-0-220"></a><span class="sd"> mask_encoder: The mask encoder to use.</span>
</span><span id="__span-0-221"><a id="__codelineno-0-221" name="__codelineno-0-221"></a><span class="sd"> mask_decoder: The mask decoder to use.</span>
</span><span id="__span-0-222"><a id="__codelineno-0-222" name="__codelineno-0-222"></a><span class="sd"> multimask_output: Whether to use multimask output.</span>
</span><span id="__span-0-223"><a id="__codelineno-0-223" name="__codelineno-0-223"></a><span class="sd"> device: The PyTorch device to use.</span>
</span><span id="__span-0-224"><a id="__codelineno-0-224" name="__codelineno-0-224"></a><span class="sd"> dtype: The PyTorch data type to use.</span>
</span><span id="__span-0-225"><a id="__codelineno-0-225" name="__codelineno-0-225"></a>
</span><span id="__span-0-226"><a id="__codelineno-0-226" name="__codelineno-0-226"></a><span class="sd"> Example:</span>
</span><span id="__span-0-227"><a id="__codelineno-0-227" name="__codelineno-0-227"></a><span class="sd"> ```py</span>
</span><span id="__span-0-228"><a id="__codelineno-0-228" name="__codelineno-0-228"></a><span class="sd"> device=&quot;cuda&quot; if torch.cuda.is_available() else &quot;cpu&quot;</span>
</span><span id="__span-0-229"><a id="__codelineno-0-229" name="__codelineno-0-229"></a>
</span><span id="__span-0-230"><a id="__codelineno-0-230" name="__codelineno-0-230"></a><span class="sd"> # multimask_output=True is recommended for ambiguous prompts such as a single point.</span>
</span><span id="__span-0-231"><a id="__codelineno-0-231" name="__codelineno-0-231"></a><span class="sd"> # Below, a box prompt is passed, so just use multimask_output=False which will return a single mask</span>
</span><span id="__span-0-232"><a id="__codelineno-0-232" name="__codelineno-0-232"></a><span class="sd"> sam_h = SegmentAnythingH(multimask_output=False, device=device)</span>
</span><span id="__span-0-233"><a id="__codelineno-0-233" name="__codelineno-0-233"></a>
</span><span id="__span-0-234"><a id="__codelineno-0-234" name="__codelineno-0-234"></a><span class="sd"> # Tips: run scripts/prepare_test_weights.py to download the weights</span>
</span><span id="__span-0-235"><a id="__codelineno-0-235" name="__codelineno-0-235"></a><span class="sd"> tensors_path = &quot;./tests/weights/segment-anything-h.safetensors&quot;</span>
</span><span id="__span-0-236"><a id="__codelineno-0-236" name="__codelineno-0-236"></a><span class="sd"> sam_h.load_from_safetensors(tensors_path=tensors_path)</span>
</span><span id="__span-0-237"><a id="__codelineno-0-237" name="__codelineno-0-237"></a>
</span><span id="__span-0-238"><a id="__codelineno-0-238" name="__codelineno-0-238"></a><span class="sd"> from PIL import Image</span>
</span><span id="__span-0-239"><a id="__codelineno-0-239" name="__codelineno-0-239"></a><span class="sd"> image = Image.open(&quot;image.png&quot;)</span>
</span><span id="__span-0-240"><a id="__codelineno-0-240" name="__codelineno-0-240"></a>
</span><span id="__span-0-241"><a id="__codelineno-0-241" name="__codelineno-0-241"></a><span class="sd"> masks, *_ = sam_h.predict(image, box_points=[[(x1, y1), (x2, y2)]])</span>
</span><span id="__span-0-242"><a id="__codelineno-0-242" name="__codelineno-0-242"></a>
</span><span id="__span-0-243"><a id="__codelineno-0-243" name="__codelineno-0-243"></a><span class="sd"> assert masks.shape == (1, 1, image.height, image.width)</span>
</span><span id="__span-0-244"><a id="__codelineno-0-244" name="__codelineno-0-244"></a><span class="sd"> assert masks.dtype == torch.bool</span>
</span><span id="__span-0-245"><a id="__codelineno-0-245" name="__codelineno-0-245"></a>
</span><span id="__span-0-246"><a id="__codelineno-0-246" name="__codelineno-0-246"></a><span class="sd"> # convert it to [0,255] uint8 ndarray of shape (H, W)</span>
</span><span id="__span-0-247"><a id="__codelineno-0-247" name="__codelineno-0-247"></a><span class="sd"> mask = masks[0, 0].cpu().numpy().astype(&quot;uint8&quot;) * 255</span>
</span><span id="__span-0-248"><a id="__codelineno-0-248" name="__codelineno-0-248"></a>
</span><span id="__span-0-249"><a id="__codelineno-0-249" name="__codelineno-0-249"></a><span class="sd"> Image.fromarray(mask).save(&quot;mask_image.png&quot;)</span>
</span><span id="__span-0-250"><a id="__codelineno-0-250" name="__codelineno-0-250"></a><span class="sd"> ```</span>
</span><span id="__span-0-251"><a id="__codelineno-0-251" name="__codelineno-0-251"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-252"><a id="__codelineno-0-252" name="__codelineno-0-252"></a> <span class="n">image_encoder</span> <span class="o">=</span> <span class="n">image_encoder</span> <span class="ow">or</span> <span class="n">SAMViTH</span><span class="p">()</span>
</span><span id="__span-0-253"><a id="__codelineno-0-253" name="__codelineno-0-253"></a> <span class="n">point_encoder</span> <span class="o">=</span> <span class="n">point_encoder</span> <span class="ow">or</span> <span class="n">PointEncoder</span><span class="p">()</span>
</span><span id="__span-0-254"><a id="__codelineno-0-254" name="__codelineno-0-254"></a> <span class="n">mask_encoder</span> <span class="o">=</span> <span class="n">mask_encoder</span> <span class="ow">or</span> <span class="n">MaskEncoder</span><span class="p">()</span>
</span><span id="__span-0-255"><a id="__codelineno-0-255" name="__codelineno-0-255"></a>
</span><span id="__span-0-256"><a id="__codelineno-0-256" name="__codelineno-0-256"></a> <span class="k">if</span> <span class="n">mask_decoder</span><span class="p">:</span>
</span><span id="__span-0-257"><a id="__codelineno-0-257" name="__codelineno-0-257"></a> <span class="k">assert</span> <span class="p">(</span>
</span><span id="__span-0-258"><a id="__codelineno-0-258" name="__codelineno-0-258"></a> <span class="n">multimask_output</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">mask_decoder</span><span class="o">.</span><span class="n">multimask_output</span> <span class="o">==</span> <span class="n">multimask_output</span>
</span><span id="__span-0-259"><a id="__codelineno-0-259" name="__codelineno-0-259"></a> <span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;mask_decoder.multimask_output </span><span class="si">{</span><span class="n">mask_decoder</span><span class="o">.</span><span class="n">multimask_output</span><span class="si">}</span><span class="s2"> should match multimask_output (</span><span class="si">{</span><span class="n">multimask_output</span><span class="si">}</span><span class="s2">)&quot;</span>
</span><span id="__span-0-260"><a id="__codelineno-0-260" name="__codelineno-0-260"></a> <span class="k">else</span><span class="p">:</span>
</span><span id="__span-0-261"><a id="__codelineno-0-261" name="__codelineno-0-261"></a> <span class="n">mask_decoder</span> <span class="o">=</span> <span class="n">MaskDecoder</span><span class="p">(</span><span class="n">multimask_output</span><span class="p">)</span> <span class="k">if</span> <span class="n">multimask_output</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">MaskDecoder</span><span class="p">()</span>
</span><span id="__span-0-262"><a id="__codelineno-0-262" name="__codelineno-0-262"></a>
</span><span id="__span-0-263"><a id="__codelineno-0-263" name="__codelineno-0-263"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">image_encoder</span><span class="p">,</span> <span class="n">point_encoder</span><span class="p">,</span> <span class="n">mask_encoder</span><span class="p">,</span> <span class="n">mask_decoder</span><span class="p">)</span>
</span><span id="__span-0-264"><a id="__codelineno-0-264" name="__codelineno-0-264"></a>
</span><span id="__span-0-265"><a id="__codelineno-0-265" name="__codelineno-0-265"></a> <span class="bp">self</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-attribute">
<h3 id="refiners.foundationals.segment_anything.SegmentAnythingH.image_encoder" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-attribute"></code> <span class="doc doc-object-name doc-attribute-name">image_encoder</span>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#refiners.foundationals.segment_anything.SegmentAnythingH.image_encoder" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="n">image_encoder</span><span class="p">:</span> <span class="n"><span title="refiners.foundationals.segment_anything.image_encoder.SAMViTH">SAMViTH</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>The image encoder.</p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-module">
<div class="doc doc-contents first">
<div class="doc doc-children">
<div class="doc doc-object doc-function">
<h2 id="refiners.foundationals.segment_anything.utils.compute_scaled_size" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-function"></code> <span class="doc doc-object-name doc-function-name">compute_scaled_size</span>
<a href="#refiners.foundationals.segment_anything.utils.compute_scaled_size" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">compute_scaled_size</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">],</span> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">]</span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Compute the scaled size as expected by the image encoder.
This computed size keep the ratio of the input image, and scale it to fit inside the square (image_encoder_resolution, image_encoder_resolution) of image encoder.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>size</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The size (h, w) of the input image.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>image_encoder_resolution</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Image encoder resolution.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The target height.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The target width.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/utils.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-7"> 7</a></span>
<span class="normal"><a href="#__codelineno-0-8"> 8</a></span>
<span class="normal"><a href="#__codelineno-0-9"> 9</a></span>
<span class="normal"><a href="#__codelineno-0-10">10</a></span>
<span class="normal"><a href="#__codelineno-0-11">11</a></span>
<span class="normal"><a href="#__codelineno-0-12">12</a></span>
<span class="normal"><a href="#__codelineno-0-13">13</a></span>
<span class="normal"><a href="#__codelineno-0-14">14</a></span>
<span class="normal"><a href="#__codelineno-0-15">15</a></span>
<span class="normal"><a href="#__codelineno-0-16">16</a></span>
<span class="normal"><a href="#__codelineno-0-17">17</a></span>
<span class="normal"><a href="#__codelineno-0-18">18</a></span>
<span class="normal"><a href="#__codelineno-0-19">19</a></span>
<span class="normal"><a href="#__codelineno-0-20">20</a></span>
<span class="normal"><a href="#__codelineno-0-21">21</a></span>
<span class="normal"><a href="#__codelineno-0-22">22</a></span>
<span class="normal"><a href="#__codelineno-0-23">23</a></span>
<span class="normal"><a href="#__codelineno-0-24">24</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7"></a><span class="k">def</span> <span class="nf">compute_scaled_size</span><span class="p">(</span><span class="n">size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]:</span>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Compute the scaled size as expected by the image encoder.</span>
</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9"></a><span class="sd"> This computed size keep the ratio of the input image, and scale it to fit inside the square (image_encoder_resolution, image_encoder_resolution) of image encoder.</span>
</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10"></a>
</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11"></a><span class="sd"> Args:</span>
</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12"></a><span class="sd"> size: The size (h, w) of the input image.</span>
</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13"></a><span class="sd"> image_encoder_resolution: Image encoder resolution.</span>
</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14"></a>
</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16"></a><span class="sd"> The target height.</span>
</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17"></a><span class="sd"> The target width.</span>
</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19"></a> <span class="n">oldh</span><span class="p">,</span> <span class="n">oldw</span> <span class="o">=</span> <span class="n">size</span>
</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20"></a> <span class="n">scale</span> <span class="o">=</span> <span class="n">image_encoder_resolution</span> <span class="o">*</span> <span class="mf">1.0</span> <span class="o">/</span> <span class="nb">max</span><span class="p">(</span><span class="n">oldh</span><span class="p">,</span> <span class="n">oldw</span><span class="p">)</span>
</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21"></a> <span class="n">newh</span><span class="p">,</span> <span class="n">neww</span> <span class="o">=</span> <span class="n">oldh</span> <span class="o">*</span> <span class="n">scale</span><span class="p">,</span> <span class="n">oldw</span> <span class="o">*</span> <span class="n">scale</span>
</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22"></a> <span class="n">neww</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">neww</span> <span class="o">+</span> <span class="mf">0.5</span><span class="p">)</span>
</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23"></a> <span class="n">newh</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">newh</span> <span class="o">+</span> <span class="mf">0.5</span><span class="p">)</span>
</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24"></a> <span class="k">return</span> <span class="p">(</span><span class="n">newh</span><span class="p">,</span> <span class="n">neww</span><span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="refiners.foundationals.segment_anything.utils.image_to_scaled_tensor" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-function"></code> <span class="doc doc-object-name doc-function-name">image_to_scaled_tensor</span>
<a href="#refiners.foundationals.segment_anything.utils.image_to_scaled_tensor" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">image_to_scaled_tensor</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">image</span><span class="p">:</span> <span class="n"><span title="PIL.Image.Image">Image</span></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">scaled_size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">],</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">device</span><span class="p">:</span> <span class="n"><span title="torch.device">device</span></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> <span class="n">dtype</span><span class="p">:</span> <span class="n"><span title="torch.dtype">dtype</span></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><span title="torch.Tensor">Tensor</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Resize the image to <code>scaled_size</code> and convert it to a tensor.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>image</code></td>
<td>
<code><span title="PIL.Image.Image">Image</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The image.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>scaled_size</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The target size (h, w).</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>device</code></td>
<td>
<code><span title="torch.device">device</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>Tensor device.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>dtype</code></td>
<td>
<code><span title="torch.dtype">dtype</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>Tensor dtype.</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
</tbody>
</table>
<p>Returns:
a Tensor of shape (1, c, h, w)</p>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/utils.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-27">27</a></span>
<span class="normal"><a href="#__codelineno-0-28">28</a></span>
<span class="normal"><a href="#__codelineno-0-29">29</a></span>
<span class="normal"><a href="#__codelineno-0-30">30</a></span>
<span class="normal"><a href="#__codelineno-0-31">31</a></span>
<span class="normal"><a href="#__codelineno-0-32">32</a></span>
<span class="normal"><a href="#__codelineno-0-33">33</a></span>
<span class="normal"><a href="#__codelineno-0-34">34</a></span>
<span class="normal"><a href="#__codelineno-0-35">35</a></span>
<span class="normal"><a href="#__codelineno-0-36">36</a></span>
<span class="normal"><a href="#__codelineno-0-37">37</a></span>
<span class="normal"><a href="#__codelineno-0-38">38</a></span>
<span class="normal"><a href="#__codelineno-0-39">39</a></span>
<span class="normal"><a href="#__codelineno-0-40">40</a></span>
<span class="normal"><a href="#__codelineno-0-41">41</a></span>
<span class="normal"><a href="#__codelineno-0-42">42</a></span>
<span class="normal"><a href="#__codelineno-0-43">43</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27"></a><span class="k">def</span> <span class="nf">image_to_scaled_tensor</span><span class="p">(</span>
</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28"></a> <span class="n">image</span><span class="p">:</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span><span class="p">,</span> <span class="n">scaled_size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> <span class="n">device</span><span class="p">:</span> <span class="n">Device</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">dtype</span><span class="p">:</span> <span class="n">DType</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Resize the image to `scaled_size` and convert it to a tensor.</span>
</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31"></a>
</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32"></a><span class="sd"> Args:</span>
</span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33"></a><span class="sd"> image: The image.</span>
</span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34"></a><span class="sd"> scaled_size: The target size (h, w).</span>
</span><span id="__span-0-35"><a id="__codelineno-0-35" name="__codelineno-0-35"></a><span class="sd"> device: Tensor device.</span>
</span><span id="__span-0-36"><a id="__codelineno-0-36" name="__codelineno-0-36"></a><span class="sd"> dtype: Tensor dtype.</span>
</span><span id="__span-0-37"><a id="__codelineno-0-37" name="__codelineno-0-37"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-38"><a id="__codelineno-0-38" name="__codelineno-0-38"></a><span class="sd"> a Tensor of shape (1, c, h, w)</span>
</span><span id="__span-0-39"><a id="__codelineno-0-39" name="__codelineno-0-39"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-40"><a id="__codelineno-0-40" name="__codelineno-0-40"></a> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">scaled_size</span>
</span><span id="__span-0-41"><a id="__codelineno-0-41" name="__codelineno-0-41"></a> <span class="n">resized</span> <span class="o">=</span> <span class="n">image</span><span class="o">.</span><span class="n">resize</span><span class="p">((</span><span class="n">w</span><span class="p">,</span> <span class="n">h</span><span class="p">),</span> <span class="n">resample</span><span class="o">=</span><span class="n">Image</span><span class="o">.</span><span class="n">Resampling</span><span class="o">.</span><span class="n">BILINEAR</span><span class="p">)</span> <span class="c1"># type: ignore</span>
</span><span id="__span-0-42"><a id="__codelineno-0-42" name="__codelineno-0-42"></a>
</span><span id="__span-0-43"><a id="__codelineno-0-43" name="__codelineno-0-43"></a> <span class="k">return</span> <span class="n">image_to_tensor</span><span class="p">(</span><span class="n">resized</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span> <span class="o">*</span> <span class="mf">255.0</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="refiners.foundationals.segment_anything.utils.normalize_coordinates" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-function"></code> <span class="doc doc-object-name doc-function-name">normalize_coordinates</span>
<a href="#refiners.foundationals.segment_anything.utils.normalize_coordinates" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">normalize_coordinates</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">coordinates</span><span class="p">:</span> <span class="n"><span title="torch.Tensor">Tensor</span></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">original_size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">],</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><span title="torch.Tensor">Tensor</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Normalize the coordinates in the [0,1] range</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>coordinates</code></td>
<td>
<code><span title="torch.Tensor">Tensor</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The coordinates to normalize.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>original_size</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The original image size.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>image_encoder_resolution</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Image encoder resolution.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="torch.Tensor">Tensor</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The normalized coordinates.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/utils.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-111">111</a></span>
<span class="normal"><a href="#__codelineno-0-112">112</a></span>
<span class="normal"><a href="#__codelineno-0-113">113</a></span>
<span class="normal"><a href="#__codelineno-0-114">114</a></span>
<span class="normal"><a href="#__codelineno-0-115">115</a></span>
<span class="normal"><a href="#__codelineno-0-116">116</a></span>
<span class="normal"><a href="#__codelineno-0-117">117</a></span>
<span class="normal"><a href="#__codelineno-0-118">118</a></span>
<span class="normal"><a href="#__codelineno-0-119">119</a></span>
<span class="normal"><a href="#__codelineno-0-120">120</a></span>
<span class="normal"><a href="#__codelineno-0-121">121</a></span>
<span class="normal"><a href="#__codelineno-0-122">122</a></span>
<span class="normal"><a href="#__codelineno-0-123">123</a></span>
<span class="normal"><a href="#__codelineno-0-124">124</a></span>
<span class="normal"><a href="#__codelineno-0-125">125</a></span>
<span class="normal"><a href="#__codelineno-0-126">126</a></span>
<span class="normal"><a href="#__codelineno-0-127">127</a></span>
<span class="normal"><a href="#__codelineno-0-128">128</a></span>
<span class="normal"><a href="#__codelineno-0-129">129</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-111"><a id="__codelineno-0-111" name="__codelineno-0-111"></a><span class="k">def</span> <span class="nf">normalize_coordinates</span><span class="p">(</span><span class="n">coordinates</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">original_size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-112"><a id="__codelineno-0-112" name="__codelineno-0-112"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Normalize the coordinates in the [0,1] range</span>
</span><span id="__span-0-113"><a id="__codelineno-0-113" name="__codelineno-0-113"></a>
</span><span id="__span-0-114"><a id="__codelineno-0-114" name="__codelineno-0-114"></a><span class="sd"> Args:</span>
</span><span id="__span-0-115"><a id="__codelineno-0-115" name="__codelineno-0-115"></a><span class="sd"> coordinates: The coordinates to normalize.</span>
</span><span id="__span-0-116"><a id="__codelineno-0-116" name="__codelineno-0-116"></a><span class="sd"> original_size: The original image size.</span>
</span><span id="__span-0-117"><a id="__codelineno-0-117" name="__codelineno-0-117"></a><span class="sd"> image_encoder_resolution: Image encoder resolution.</span>
</span><span id="__span-0-118"><a id="__codelineno-0-118" name="__codelineno-0-118"></a>
</span><span id="__span-0-119"><a id="__codelineno-0-119" name="__codelineno-0-119"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-120"><a id="__codelineno-0-120" name="__codelineno-0-120"></a><span class="sd"> The normalized coordinates.</span>
</span><span id="__span-0-121"><a id="__codelineno-0-121" name="__codelineno-0-121"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-122"><a id="__codelineno-0-122" name="__codelineno-0-122"></a> <span class="n">scaled_size</span> <span class="o">=</span> <span class="n">compute_scaled_size</span><span class="p">(</span><span class="n">original_size</span><span class="p">,</span> <span class="n">image_encoder_resolution</span><span class="p">)</span>
</span><span id="__span-0-123"><a id="__codelineno-0-123" name="__codelineno-0-123"></a> <span class="n">coordinates</span><span class="p">[:,</span> <span class="p">:,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
</span><span id="__span-0-124"><a id="__codelineno-0-124" name="__codelineno-0-124"></a> <span class="p">(</span><span class="n">coordinates</span><span class="p">[:,</span> <span class="p">:,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">*</span> <span class="p">(</span><span class="n">scaled_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">/</span> <span class="n">original_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span> <span class="o">+</span> <span class="mf">0.5</span>
</span><span id="__span-0-125"><a id="__codelineno-0-125" name="__codelineno-0-125"></a> <span class="p">)</span> <span class="o">/</span> <span class="n">image_encoder_resolution</span>
</span><span id="__span-0-126"><a id="__codelineno-0-126" name="__codelineno-0-126"></a> <span class="n">coordinates</span><span class="p">[:,</span> <span class="p">:,</span> <span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
</span><span id="__span-0-127"><a id="__codelineno-0-127" name="__codelineno-0-127"></a> <span class="p">(</span><span class="n">coordinates</span><span class="p">[:,</span> <span class="p">:,</span> <span class="mi">1</span><span class="p">]</span> <span class="o">*</span> <span class="p">(</span><span class="n">scaled_size</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">/</span> <span class="n">original_size</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span> <span class="o">+</span> <span class="mf">0.5</span>
</span><span id="__span-0-128"><a id="__codelineno-0-128" name="__codelineno-0-128"></a> <span class="p">)</span> <span class="o">/</span> <span class="n">image_encoder_resolution</span>
</span><span id="__span-0-129"><a id="__codelineno-0-129" name="__codelineno-0-129"></a> <span class="k">return</span> <span class="n">coordinates</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="refiners.foundationals.segment_anything.utils.pad_image_tensor" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-function"></code> <span class="doc doc-object-name doc-function-name">pad_image_tensor</span>
<a href="#refiners.foundationals.segment_anything.utils.pad_image_tensor" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">pad_image_tensor</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">image_tensor</span><span class="p">:</span> <span class="n"><span title="torch.Tensor">Tensor</span></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">scaled_size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">],</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><span title="torch.Tensor">Tensor</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Pad an image with zeros to make it square.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>image_tensor</code></td>
<td>
<code><span title="torch.Tensor">Tensor</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The image tensor to pad.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>scaled_size</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The scaled size (h, w).</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>image_encoder_resolution</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Image encoder resolution.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="torch.Tensor">Tensor</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The padded image.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/utils.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-72">72</a></span>
<span class="normal"><a href="#__codelineno-0-73">73</a></span>
<span class="normal"><a href="#__codelineno-0-74">74</a></span>
<span class="normal"><a href="#__codelineno-0-75">75</a></span>
<span class="normal"><a href="#__codelineno-0-76">76</a></span>
<span class="normal"><a href="#__codelineno-0-77">77</a></span>
<span class="normal"><a href="#__codelineno-0-78">78</a></span>
<span class="normal"><a href="#__codelineno-0-79">79</a></span>
<span class="normal"><a href="#__codelineno-0-80">80</a></span>
<span class="normal"><a href="#__codelineno-0-81">81</a></span>
<span class="normal"><a href="#__codelineno-0-82">82</a></span>
<span class="normal"><a href="#__codelineno-0-83">83</a></span>
<span class="normal"><a href="#__codelineno-0-84">84</a></span>
<span class="normal"><a href="#__codelineno-0-85">85</a></span>
<span class="normal"><a href="#__codelineno-0-86">86</a></span>
<span class="normal"><a href="#__codelineno-0-87">87</a></span>
<span class="normal"><a href="#__codelineno-0-88">88</a></span>
<span class="normal"><a href="#__codelineno-0-89">89</a></span>
<span class="normal"><a href="#__codelineno-0-90">90</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-72"><a id="__codelineno-0-72" name="__codelineno-0-72"></a><span class="k">def</span> <span class="nf">pad_image_tensor</span><span class="p">(</span><span class="n">image_tensor</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">scaled_size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-73"><a id="__codelineno-0-73" name="__codelineno-0-73"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Pad an image with zeros to make it square.</span>
</span><span id="__span-0-74"><a id="__codelineno-0-74" name="__codelineno-0-74"></a>
</span><span id="__span-0-75"><a id="__codelineno-0-75" name="__codelineno-0-75"></a><span class="sd"> Args:</span>
</span><span id="__span-0-76"><a id="__codelineno-0-76" name="__codelineno-0-76"></a><span class="sd"> image_tensor: The image tensor to pad.</span>
</span><span id="__span-0-77"><a id="__codelineno-0-77" name="__codelineno-0-77"></a><span class="sd"> scaled_size: The scaled size (h, w).</span>
</span><span id="__span-0-78"><a id="__codelineno-0-78" name="__codelineno-0-78"></a><span class="sd"> image_encoder_resolution: Image encoder resolution.</span>
</span><span id="__span-0-79"><a id="__codelineno-0-79" name="__codelineno-0-79"></a>
</span><span id="__span-0-80"><a id="__codelineno-0-80" name="__codelineno-0-80"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-81"><a id="__codelineno-0-81" name="__codelineno-0-81"></a><span class="sd"> The padded image.</span>
</span><span id="__span-0-82"><a id="__codelineno-0-82" name="__codelineno-0-82"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-83"><a id="__codelineno-0-83" name="__codelineno-0-83"></a> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">image_tensor</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span> <span class="o">==</span> <span class="mi">4</span>
</span><span id="__span-0-84"><a id="__codelineno-0-84" name="__codelineno-0-84"></a> <span class="k">assert</span> <span class="n">image_tensor</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="o">&lt;=</span> <span class="n">image_encoder_resolution</span>
</span><span id="__span-0-85"><a id="__codelineno-0-85" name="__codelineno-0-85"></a> <span class="k">assert</span> <span class="n">image_tensor</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="o">&lt;=</span> <span class="n">image_encoder_resolution</span>
</span><span id="__span-0-86"><a id="__codelineno-0-86" name="__codelineno-0-86"></a>
</span><span id="__span-0-87"><a id="__codelineno-0-87" name="__codelineno-0-87"></a> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">scaled_size</span>
</span><span id="__span-0-88"><a id="__codelineno-0-88" name="__codelineno-0-88"></a> <span class="n">padh</span> <span class="o">=</span> <span class="n">image_encoder_resolution</span> <span class="o">-</span> <span class="n">h</span>
</span><span id="__span-0-89"><a id="__codelineno-0-89" name="__codelineno-0-89"></a> <span class="n">padw</span> <span class="o">=</span> <span class="n">image_encoder_resolution</span> <span class="o">-</span> <span class="n">w</span>
</span><span id="__span-0-90"><a id="__codelineno-0-90" name="__codelineno-0-90"></a> <span class="k">return</span> <span class="n">pad</span><span class="p">(</span><span class="n">image_tensor</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">padw</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">padh</span><span class="p">))</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="refiners.foundationals.segment_anything.utils.postprocess_masks" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-function"></code> <span class="doc doc-object-name doc-function-name">postprocess_masks</span>
<a href="#refiners.foundationals.segment_anything.utils.postprocess_masks" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">postprocess_masks</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">low_res_masks</span><span class="p">:</span> <span class="n"><span title="torch.Tensor">Tensor</span></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">original_size</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a></span><span class="p">[</span><span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">],</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><span title="torch.Tensor">Tensor</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Postprocess the masks to fit the original image size and remove zero-padding (if any).</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>low_res_masks</code></td>
<td>
<code><span title="torch.Tensor">Tensor</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The masks to postprocess.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>original_size</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a>[<a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>, <a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The original size (h, w).</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>image_encoder_resolution</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Image encoder resolution.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="torch.Tensor">Tensor</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The postprocessed masks.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/utils.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-93"> 93</a></span>
<span class="normal"><a href="#__codelineno-0-94"> 94</a></span>
<span class="normal"><a href="#__codelineno-0-95"> 95</a></span>
<span class="normal"><a href="#__codelineno-0-96"> 96</a></span>
<span class="normal"><a href="#__codelineno-0-97"> 97</a></span>
<span class="normal"><a href="#__codelineno-0-98"> 98</a></span>
<span class="normal"><a href="#__codelineno-0-99"> 99</a></span>
<span class="normal"><a href="#__codelineno-0-100">100</a></span>
<span class="normal"><a href="#__codelineno-0-101">101</a></span>
<span class="normal"><a href="#__codelineno-0-102">102</a></span>
<span class="normal"><a href="#__codelineno-0-103">103</a></span>
<span class="normal"><a href="#__codelineno-0-104">104</a></span>
<span class="normal"><a href="#__codelineno-0-105">105</a></span>
<span class="normal"><a href="#__codelineno-0-106">106</a></span>
<span class="normal"><a href="#__codelineno-0-107">107</a></span>
<span class="normal"><a href="#__codelineno-0-108">108</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-93"><a id="__codelineno-0-93" name="__codelineno-0-93"></a><span class="k">def</span> <span class="nf">postprocess_masks</span><span class="p">(</span><span class="n">low_res_masks</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">,</span> <span class="n">original_size</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-94"><a id="__codelineno-0-94" name="__codelineno-0-94"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Postprocess the masks to fit the original image size and remove zero-padding (if any).</span>
</span><span id="__span-0-95"><a id="__codelineno-0-95" name="__codelineno-0-95"></a>
</span><span id="__span-0-96"><a id="__codelineno-0-96" name="__codelineno-0-96"></a><span class="sd"> Args:</span>
</span><span id="__span-0-97"><a id="__codelineno-0-97" name="__codelineno-0-97"></a><span class="sd"> low_res_masks: The masks to postprocess.</span>
</span><span id="__span-0-98"><a id="__codelineno-0-98" name="__codelineno-0-98"></a><span class="sd"> original_size: The original size (h, w).</span>
</span><span id="__span-0-99"><a id="__codelineno-0-99" name="__codelineno-0-99"></a><span class="sd"> image_encoder_resolution: Image encoder resolution.</span>
</span><span id="__span-0-100"><a id="__codelineno-0-100" name="__codelineno-0-100"></a>
</span><span id="__span-0-101"><a id="__codelineno-0-101" name="__codelineno-0-101"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-102"><a id="__codelineno-0-102" name="__codelineno-0-102"></a><span class="sd"> The postprocessed masks.</span>
</span><span id="__span-0-103"><a id="__codelineno-0-103" name="__codelineno-0-103"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-104"><a id="__codelineno-0-104" name="__codelineno-0-104"></a> <span class="n">scaled_size</span> <span class="o">=</span> <span class="n">compute_scaled_size</span><span class="p">(</span><span class="n">original_size</span><span class="p">,</span> <span class="n">image_encoder_resolution</span><span class="p">)</span>
</span><span id="__span-0-105"><a id="__codelineno-0-105" name="__codelineno-0-105"></a> <span class="n">masks</span> <span class="o">=</span> <span class="n">interpolate</span><span class="p">(</span><span class="n">low_res_masks</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">Size</span><span class="p">((</span><span class="n">image_encoder_resolution</span><span class="p">,</span> <span class="n">image_encoder_resolution</span><span class="p">)),</span> <span class="n">mode</span><span class="o">=</span><span class="s2">&quot;bilinear&quot;</span><span class="p">)</span>
</span><span id="__span-0-106"><a id="__codelineno-0-106" name="__codelineno-0-106"></a> <span class="n">masks</span> <span class="o">=</span> <span class="n">masks</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="p">:</span> <span class="n">scaled_size</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="p">:</span> <span class="n">scaled_size</span><span class="p">[</span><span class="mi">1</span><span class="p">]]</span> <span class="c1"># remove padding added at `preprocess_image` time</span>
</span><span id="__span-0-107"><a id="__codelineno-0-107" name="__codelineno-0-107"></a> <span class="n">masks</span> <span class="o">=</span> <span class="n">interpolate</span><span class="p">(</span><span class="n">masks</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">Size</span><span class="p">(</span><span class="n">original_size</span><span class="p">),</span> <span class="n">mode</span><span class="o">=</span><span class="s2">&quot;bilinear&quot;</span><span class="p">)</span>
</span><span id="__span-0-108"><a id="__codelineno-0-108" name="__codelineno-0-108"></a> <span class="k">return</span> <span class="n">masks</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="refiners.foundationals.segment_anything.utils.preprocess_image" class="doc doc-heading">
<code class="doc-symbol doc-symbol-heading doc-symbol-function"></code> <span class="doc doc-object-name doc-function-name">preprocess_image</span>
<a href="#refiners.foundationals.segment_anything.utils.preprocess_image" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="language-python doc-signature highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nf">preprocess_image</span><span class="p">(</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> <span class="n">image</span><span class="p">:</span> <span class="n"><span title="PIL.Image.Image">Image</span></span><span class="p">,</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="n"><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></span><span class="p">,</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> <span class="n">device</span><span class="p">:</span> <span class="n"><span title="torch.device">device</span></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> <span class="n">dtype</span><span class="p">:</span> <span class="n"><span title="torch.dtype">dtype</span></span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n"><span title="torch.Tensor">Tensor</span></span>
</span></code></pre></div>
<div class="doc doc-contents ">
<p>Preprocess an image without distorting its aspect ratio.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>image</code></td>
<td>
<code><span title="PIL.Image.Image">Image</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The image to preprocess before calling the image encoder.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>image_encoder_resolution</code></td>
<td>
<code><a class="autorefs autorefs-external" href="https://docs.python.org/3/library/functions.html#int">int</a></code>
</td>
<td>
<div class="doc-md-description">
<p>Image encoder resolution.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td><code>device</code></td>
<td>
<code><span title="torch.device">device</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>Tensor device (None by default).</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
<tr class="doc-section-item">
<td><code>dtype</code></td>
<td>
<code><span title="torch.dtype">dtype</span> | None</code>
</td>
<td>
<div class="doc-md-description">
<p>Tensor dtype (None by default).</p>
</div>
</td>
<td>
<code>None</code>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="torch.Tensor">Tensor</span></code>
</td>
<td>
<div class="doc-md-description">
<p>The preprocessed image.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>src/refiners/foundationals/segment_anything/utils.py</code></summary>
<div class="language-python highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-46">46</a></span>
<span class="normal"><a href="#__codelineno-0-47">47</a></span>
<span class="normal"><a href="#__codelineno-0-48">48</a></span>
<span class="normal"><a href="#__codelineno-0-49">49</a></span>
<span class="normal"><a href="#__codelineno-0-50">50</a></span>
<span class="normal"><a href="#__codelineno-0-51">51</a></span>
<span class="normal"><a href="#__codelineno-0-52">52</a></span>
<span class="normal"><a href="#__codelineno-0-53">53</a></span>
<span class="normal"><a href="#__codelineno-0-54">54</a></span>
<span class="normal"><a href="#__codelineno-0-55">55</a></span>
<span class="normal"><a href="#__codelineno-0-56">56</a></span>
<span class="normal"><a href="#__codelineno-0-57">57</a></span>
<span class="normal"><a href="#__codelineno-0-58">58</a></span>
<span class="normal"><a href="#__codelineno-0-59">59</a></span>
<span class="normal"><a href="#__codelineno-0-60">60</a></span>
<span class="normal"><a href="#__codelineno-0-61">61</a></span>
<span class="normal"><a href="#__codelineno-0-62">62</a></span>
<span class="normal"><a href="#__codelineno-0-63">63</a></span>
<span class="normal"><a href="#__codelineno-0-64">64</a></span>
<span class="normal"><a href="#__codelineno-0-65">65</a></span>
<span class="normal"><a href="#__codelineno-0-66">66</a></span>
<span class="normal"><a href="#__codelineno-0-67">67</a></span>
<span class="normal"><a href="#__codelineno-0-68">68</a></span>
<span class="normal"><a href="#__codelineno-0-69">69</a></span></pre></div></td><td class="code"><div><pre><span></span><code><span id="__span-0-46"><a id="__codelineno-0-46" name="__codelineno-0-46"></a><span class="k">def</span> <span class="nf">preprocess_image</span><span class="p">(</span>
</span><span id="__span-0-47"><a id="__codelineno-0-47" name="__codelineno-0-47"></a> <span class="n">image</span><span class="p">:</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span><span class="p">,</span> <span class="n">image_encoder_resolution</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">device</span><span class="p">:</span> <span class="n">Device</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">dtype</span><span class="p">:</span> <span class="n">DType</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
</span><span id="__span-0-48"><a id="__codelineno-0-48" name="__codelineno-0-48"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tensor</span><span class="p">:</span>
</span><span id="__span-0-49"><a id="__codelineno-0-49" name="__codelineno-0-49"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Preprocess an image without distorting its aspect ratio.</span>
</span><span id="__span-0-50"><a id="__codelineno-0-50" name="__codelineno-0-50"></a>
</span><span id="__span-0-51"><a id="__codelineno-0-51" name="__codelineno-0-51"></a><span class="sd"> Args:</span>
</span><span id="__span-0-52"><a id="__codelineno-0-52" name="__codelineno-0-52"></a><span class="sd"> image: The image to preprocess before calling the image encoder.</span>
</span><span id="__span-0-53"><a id="__codelineno-0-53" name="__codelineno-0-53"></a><span class="sd"> image_encoder_resolution: Image encoder resolution.</span>
</span><span id="__span-0-54"><a id="__codelineno-0-54" name="__codelineno-0-54"></a><span class="sd"> device: Tensor device (None by default).</span>
</span><span id="__span-0-55"><a id="__codelineno-0-55" name="__codelineno-0-55"></a><span class="sd"> dtype: Tensor dtype (None by default).</span>
</span><span id="__span-0-56"><a id="__codelineno-0-56" name="__codelineno-0-56"></a>
</span><span id="__span-0-57"><a id="__codelineno-0-57" name="__codelineno-0-57"></a><span class="sd"> Returns:</span>
</span><span id="__span-0-58"><a id="__codelineno-0-58" name="__codelineno-0-58"></a><span class="sd"> The preprocessed image.</span>
</span><span id="__span-0-59"><a id="__codelineno-0-59" name="__codelineno-0-59"></a><span class="sd"> &quot;&quot;&quot;</span>
</span><span id="__span-0-60"><a id="__codelineno-0-60" name="__codelineno-0-60"></a>
</span><span id="__span-0-61"><a id="__codelineno-0-61" name="__codelineno-0-61"></a> <span class="n">scaled_size</span> <span class="o">=</span> <span class="n">compute_scaled_size</span><span class="p">((</span><span class="n">image</span><span class="o">.</span><span class="n">height</span><span class="p">,</span> <span class="n">image</span><span class="o">.</span><span class="n">width</span><span class="p">),</span> <span class="n">image_encoder_resolution</span><span class="p">)</span>
</span><span id="__span-0-62"><a id="__codelineno-0-62" name="__codelineno-0-62"></a>
</span><span id="__span-0-63"><a id="__codelineno-0-63" name="__codelineno-0-63"></a> <span class="n">image_tensor</span> <span class="o">=</span> <span class="n">image_to_scaled_tensor</span><span class="p">(</span><span class="n">image</span><span class="p">,</span> <span class="n">scaled_size</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
</span><span id="__span-0-64"><a id="__codelineno-0-64" name="__codelineno-0-64"></a>
</span><span id="__span-0-65"><a id="__codelineno-0-65" name="__codelineno-0-65"></a> <span class="k">return</span> <span class="n">pad_image_tensor</span><span class="p">(</span>
</span><span id="__span-0-66"><a id="__codelineno-0-66" name="__codelineno-0-66"></a> <span class="n">normalize</span><span class="p">(</span><span class="n">image_tensor</span><span class="p">,</span> <span class="n">mean</span><span class="o">=</span><span class="p">[</span><span class="mf">123.675</span><span class="p">,</span> <span class="mf">116.28</span><span class="p">,</span> <span class="mf">103.53</span><span class="p">],</span> <span class="n">std</span><span class="o">=</span><span class="p">[</span><span class="mf">58.395</span><span class="p">,</span> <span class="mf">57.12</span><span class="p">,</span> <span class="mf">57.375</span><span class="p">]),</span>
</span><span id="__span-0-67"><a id="__codelineno-0-67" name="__codelineno-0-67"></a> <span class="n">scaled_size</span><span class="p">,</span>
</span><span id="__span-0-68"><a id="__codelineno-0-68" name="__codelineno-0-68"></a> <span class="n">image_encoder_resolution</span><span class="p">,</span>
</span><span id="__span-0-69"><a id="__codelineno-0-69" name="__codelineno-0-69"></a> <span class="p">)</span>
</span></code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg>
Back to top
</button>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
<div class="md-copyright__highlight">
© Lagon Technologies
</div>
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
<div class="md-social">
<a href="https://discord.gg/mCmjNUVV7d" target="_blank" rel="noopener" title="discord.gg" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 640 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M524.531 69.836a1.5 1.5 0 0 0-.764-.7A485 485 0 0 0 404.081 32.03a1.82 1.82 0 0 0-1.923.91 338 338 0 0 0-14.9 30.6 447.9 447.9 0 0 0-134.426 0 310 310 0 0 0-15.135-30.6 1.89 1.89 0 0 0-1.924-.91 483.7 483.7 0 0 0-119.688 37.107 1.7 1.7 0 0 0-.788.676C39.068 183.651 18.186 294.69 28.43 404.354a2.02 2.02 0 0 0 .765 1.375 487.7 487.7 0 0 0 146.825 74.189 1.9 1.9 0 0 0 2.063-.676A348 348 0 0 0 208.12 430.4a1.86 1.86 0 0 0-1.019-2.588 321 321 0 0 1-45.868-21.853 1.885 1.885 0 0 1-.185-3.126 251 251 0 0 0 9.109-7.137 1.82 1.82 0 0 1 1.9-.256c96.229 43.917 200.41 43.917 295.5 0a1.81 1.81 0 0 1 1.924.233 235 235 0 0 0 9.132 7.16 1.884 1.884 0 0 1-.162 3.126 301.4 301.4 0 0 1-45.89 21.83 1.875 1.875 0 0 0-1 2.611 391 391 0 0 0 30.014 48.815 1.86 1.86 0 0 0 2.063.7A486 486 0 0 0 610.7 405.729a1.88 1.88 0 0 0 .765-1.352c12.264-126.783-20.532-236.912-86.934-334.541M222.491 337.58c-28.972 0-52.844-26.587-52.844-59.239s23.409-59.241 52.844-59.241c29.665 0 53.306 26.82 52.843 59.239 0 32.654-23.41 59.241-52.843 59.241m195.38 0c-28.971 0-52.843-26.587-52.843-59.239s23.409-59.241 52.843-59.241c29.667 0 53.307 26.82 52.844 59.239 0 32.654-23.177 59.241-52.844 59.241"/></svg>
</a>
<a href="https://github.com/finegrain-ai/refiners" target="_blank" rel="noopener" title="github.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8M97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
</a>
<a href="https://twitter.com/finegrain_ai" target="_blank" rel="noopener" title="twitter.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253"/></svg>
</a>
<a href="https://www.linkedin.com/company/finegrain-ai/" target="_blank" rel="noopener" title="www.linkedin.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M416 32H31.9C14.3 32 0 46.5 0 64.3v383.4C0 465.5 14.3 480 31.9 480H416c17.6 0 32-14.5 32-32.3V64.3c0-17.8-14.4-32.3-32-32.3M135.4 416H69V202.2h66.5V416zm-33.2-243c-21.3 0-38.5-17.3-38.5-38.5S80.9 96 102.2 96c21.2 0 38.5 17.3 38.5 38.5 0 21.3-17.2 38.5-38.5 38.5m282.1 243h-66.4V312c0-24.8-.5-56.7-34.5-56.7-34.6 0-39.9 27-39.9 54.9V416h-66.4V202.2h63.7v29.2h.9c8.9-16.8 30.6-34.5 62.9-34.5 67.2 0 79.7 44.3 79.7 101.9z"/></svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "../../..", "features": ["navigation.tabs", "navigation.sections", "navigation.top", "navigation.tracking", "navigation.expand", "navigation.path", "toc.follow", "navigation.tabs.sticky", "content.code.copy", "announce.dismiss"], "search": "../../../assets/javascripts/workers/search.6ce7567c.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
<script src="../../../assets/javascripts/bundle.56dfad97.min.js"></script>
</body>
</html>