refiners/tests/foundationals/segment_anything/utils.py

from collections.abc import Sequence
from dataclasses import dataclass
from typing import Any, TypedDict

import numpy as np
import numpy.typing as npt
import torch
from jaxtyping import Bool
from torch import Tensor, nn

NDArrayUInt8 = npt.NDArray[np.uint8]
NDArray = npt.NDArray[Any]


class SAMInput(TypedDict):
    image: Tensor
    original_size: tuple[int, int]
    point_coords: Tensor | None
    point_labels: Tensor | None
    boxes: Tensor | None
    mask_inputs: Tensor | None


class SAMOutput(TypedDict):
    masks: Tensor
    iou_predictions: Tensor
    low_res_logits: Tensor


class FacebookSAM(nn.Module):
    image_encoder: nn.Module
    prompt_encoder: nn.Module
    mask_decoder: nn.Module

    def __call__(self, batched_input: list[SAMInput], multimask_output: bool) -> list[SAMOutput]: ...

    @property
    def device(self) -> Any: ...


class FacebookSAMPredictor:
    model: FacebookSAM

    def set_image(self, image: NDArrayUInt8, image_format: str = "RGB") -> None: ...

    def predict(
        self,
        point_coords: NDArray | None = None,
        point_labels: NDArray | None = None,
        box: NDArray | None = None,
        mask_input: NDArray | None = None,
        multimask_output: bool = True,
        return_logits: bool = False,
    ) -> tuple[NDArray, NDArray, NDArray]: ...


@dataclass
class SAMPrompt:
    foreground_points: Sequence[tuple[float, float]] | None = None
    background_points: Sequence[tuple[float, float]] | None = None
    box_points: Sequence[Sequence[tuple[float, float]]] | None = None
    low_res_mask: Tensor | None = None

    def facebook_predict_kwargs(self) -> dict[str, NDArray]:
        prompt: dict[str, NDArray] = {}
        # Note: the order matters since `points_to_tensor` processes points that way (background -> foreground -> etc)
        if self.background_points:
            prompt["point_coords"] = np.array(self.background_points)
            prompt["point_labels"] = np.array([0] * len(self.background_points))
        if self.foreground_points:
            coords = np.array(self.foreground_points)
            prompt["point_coords"] = (
                coords if "point_coords" not in prompt else np.concatenate((prompt["point_coords"], coords))
            )
            labels = np.array([1] * len(self.foreground_points))
            prompt["point_labels"] = (
                labels if "point_labels" not in prompt else np.concatenate((prompt["point_labels"], labels))
            )
        if self.box_points:
            prompt["box"] = np.array([coord for batch in self.box_points for xy in batch for coord in xy]).reshape(
                len(self.box_points), 4
            )
        if self.low_res_mask is not None:
            prompt["mask_input"] = np.array(self.low_res_mask)
        return prompt

    def facebook_prompt_encoder_kwargs(
        self, device: torch.device | None = None
    ) -> dict[str, Tensor | tuple[Tensor, Tensor | None] | None]:
        prompt = self.facebook_predict_kwargs()
        coords: Tensor | None = None
        labels: Tensor | None = None
        boxes: Tensor | None = None
        masks: Tensor | None = None
        if "point_coords" in prompt:
            coords = torch.as_tensor(prompt["point_coords"], dtype=torch.float, device=device).unsqueeze(0)
        if "point_labels" in prompt:
            labels = torch.as_tensor(prompt["point_labels"], dtype=torch.int, device=device).unsqueeze(0)
        if "box" in prompt:
            boxes = torch.as_tensor(prompt["box"], dtype=torch.float, device=device).unsqueeze(0)
        points = (coords, labels) if coords is not None else None
        if "mask_input" in prompt:
            masks = torch.as_tensor(prompt["mask_input"], dtype=torch.float, device=device).unsqueeze(0)
        return {"points": points, "boxes": boxes, "masks": masks}


def intersection_over_union(
    input_mask: Bool[Tensor, "height width"], other_mask: Bool[Tensor, "height width"]
) -> float:
    inter = (input_mask & other_mask).sum(dtype=torch.float32).item()
    union = (input_mask | other_mask).sum(dtype=torch.float32).item()
    return inter / union if union > 0 else 1.0
add Segment Anything (SAM) to foundational models Note: dense prompts (i.e. masks) support is still partial (see MaskEncoder) Co-authored-by: Cédric Deltheil <cedric@deltheil.me> 2023-09-21 08:19:19 +00:00			`from collections.abc import Sequence`
			`from dataclasses import dataclass`
			`from typing import Any, TypedDict`

			`import numpy as np`
			`import numpy.typing as npt`
			`import torch`
run lint rules using latest isort settings 2023-12-11 10:46:38 +00:00			`from jaxtyping import Bool`
			`from torch import Tensor, nn`
add Segment Anything (SAM) to foundational models Note: dense prompts (i.e. masks) support is still partial (see MaskEncoder) Co-authored-by: Cédric Deltheil <cedric@deltheil.me> 2023-09-21 08:19:19 +00:00
			`NDArrayUInt8 = npt.NDArray[np.uint8]`
			`NDArray = npt.NDArray[Any]`


			`class SAMInput(TypedDict):`
			`image: Tensor`
			`original_size: tuple[int, int]`
			`point_coords: Tensor \| None`
			`point_labels: Tensor \| None`
			`boxes: Tensor \| None`
			`mask_inputs: Tensor \| None`


			`class SAMOutput(TypedDict):`
			`masks: Tensor`
			`iou_predictions: Tensor`
			`low_res_logits: Tensor`


			`class FacebookSAM(nn.Module):`
			`image_encoder: nn.Module`
			`prompt_encoder: nn.Module`
			`mask_decoder: nn.Module`

ruff 3 formatting (Rye 0.28) 2024-03-08 09:35:42 +00:00			`def __call__(self, batched_input: list[SAMInput], multimask_output: bool) -> list[SAMOutput]: ...`
add Segment Anything (SAM) to foundational models Note: dense prompts (i.e. masks) support is still partial (see MaskEncoder) Co-authored-by: Cédric Deltheil <cedric@deltheil.me> 2023-09-21 08:19:19 +00:00
			`@property`
ruff 3 formatting (Rye 0.28) 2024-03-08 09:35:42 +00:00			`def device(self) -> Any: ...`
add Segment Anything (SAM) to foundational models Note: dense prompts (i.e. masks) support is still partial (see MaskEncoder) Co-authored-by: Cédric Deltheil <cedric@deltheil.me> 2023-09-21 08:19:19 +00:00

			`class FacebookSAMPredictor:`
			`model: FacebookSAM`

ruff 3 formatting (Rye 0.28) 2024-03-08 09:35:42 +00:00			`def set_image(self, image: NDArrayUInt8, image_format: str = "RGB") -> None: ...`
add Segment Anything (SAM) to foundational models Note: dense prompts (i.e. masks) support is still partial (see MaskEncoder) Co-authored-by: Cédric Deltheil <cedric@deltheil.me> 2023-09-21 08:19:19 +00:00
			`def predict(`
			`self,`
			`point_coords: NDArray \| None = None,`
			`point_labels: NDArray \| None = None,`
			`box: NDArray \| None = None,`
			`mask_input: NDArray \| None = None,`
			`multimask_output: bool = True,`
			`return_logits: bool = False,`
ruff 3 formatting (Rye 0.28) 2024-03-08 09:35:42 +00:00			`) -> tuple[NDArray, NDArray, NDArray]: ...`
add Segment Anything (SAM) to foundational models Note: dense prompts (i.e. masks) support is still partial (see MaskEncoder) Co-authored-by: Cédric Deltheil <cedric@deltheil.me> 2023-09-21 08:19:19 +00:00

			`@dataclass`
			`class SAMPrompt:`
			`foreground_points: Sequence[tuple[float, float]] \| None = None`
			`background_points: Sequence[tuple[float, float]] \| None = None`
			`box_points: Sequence[Sequence[tuple[float, float]]] \| None = None`
SegmentAnything: add dense mask prompt support 2024-01-05 17:45:03 +00:00			`low_res_mask: Tensor \| None = None`
add Segment Anything (SAM) to foundational models Note: dense prompts (i.e. masks) support is still partial (see MaskEncoder) Co-authored-by: Cédric Deltheil <cedric@deltheil.me> 2023-09-21 08:19:19 +00:00
			`def facebook_predict_kwargs(self) -> dict[str, NDArray]:`
			`prompt: dict[str, NDArray] = {}`
			# Note: the order matters since `points_to_tensor` processes points that way (background -> foreground -> etc)
			`if self.background_points:`
			`prompt["point_coords"] = np.array(self.background_points)`
			`prompt["point_labels"] = np.array([0] * len(self.background_points))`
			`if self.foreground_points:`
			`coords = np.array(self.foreground_points)`
			`prompt["point_coords"] = (`
			`coords if "point_coords" not in prompt else np.concatenate((prompt["point_coords"], coords))`
			`)`
			`labels = np.array([1] * len(self.foreground_points))`
			`prompt["point_labels"] = (`
			`labels if "point_labels" not in prompt else np.concatenate((prompt["point_labels"], labels))`
			`)`
			`if self.box_points:`
			`prompt["box"] = np.array([coord for batch in self.box_points for xy in batch for coord in xy]).reshape(`
			`len(self.box_points), 4`
			`)`
SegmentAnything: add dense mask prompt support 2024-01-05 17:45:03 +00:00			`if self.low_res_mask is not None:`
			`prompt["mask_input"] = np.array(self.low_res_mask)`
add Segment Anything (SAM) to foundational models Note: dense prompts (i.e. masks) support is still partial (see MaskEncoder) Co-authored-by: Cédric Deltheil <cedric@deltheil.me> 2023-09-21 08:19:19 +00:00			`return prompt`

SegmentAnything: add dense mask prompt support 2024-01-05 17:45:03 +00:00			`def facebook_prompt_encoder_kwargs(`
			`self, device: torch.device \| None = None`
			`) -> dict[str, Tensor \| tuple[Tensor, Tensor \| None] \| None]:`
add Segment Anything (SAM) to foundational models Note: dense prompts (i.e. masks) support is still partial (see MaskEncoder) Co-authored-by: Cédric Deltheil <cedric@deltheil.me> 2023-09-21 08:19:19 +00:00			`prompt = self.facebook_predict_kwargs()`
			`coords: Tensor \| None = None`
			`labels: Tensor \| None = None`
			`boxes: Tensor \| None = None`
SegmentAnything: add dense mask prompt support 2024-01-05 17:45:03 +00:00			`masks: Tensor \| None = None`
add Segment Anything (SAM) to foundational models Note: dense prompts (i.e. masks) support is still partial (see MaskEncoder) Co-authored-by: Cédric Deltheil <cedric@deltheil.me> 2023-09-21 08:19:19 +00:00			`if "point_coords" in prompt:`
			`coords = torch.as_tensor(prompt["point_coords"], dtype=torch.float, device=device).unsqueeze(0)`
			`if "point_labels" in prompt:`
			`labels = torch.as_tensor(prompt["point_labels"], dtype=torch.int, device=device).unsqueeze(0)`
			`if "box" in prompt:`
			`boxes = torch.as_tensor(prompt["box"], dtype=torch.float, device=device).unsqueeze(0)`
			`points = (coords, labels) if coords is not None else None`
SegmentAnything: add dense mask prompt support 2024-01-05 17:45:03 +00:00			`if "mask_input" in prompt:`
			`masks = torch.as_tensor(prompt["mask_input"], dtype=torch.float, device=device).unsqueeze(0)`
			`return {"points": points, "boxes": boxes, "masks": masks}`
add Segment Anything (SAM) to foundational models Note: dense prompts (i.e. masks) support is still partial (see MaskEncoder) Co-authored-by: Cédric Deltheil <cedric@deltheil.me> 2023-09-21 08:19:19 +00:00

			`def intersection_over_union(`
			`input_mask: Bool[Tensor, "height width"], other_mask: Bool[Tensor, "height width"]`
			`) -> float:`
			`inter = (input_mask & other_mask).sum(dtype=torch.float32).item()`
			`union = (input_mask \| other_mask).sum(dtype=torch.float32).item()`
			`return inter / union if union > 0 else 1.0`