add comfyui custom nodes

2024-11-24 23:28:45 +00:00 · 2024-09-05 09:30:48 +00:00 · 2024-09-05 09:30:48 +00:00 · b67efb26df
parent b6f547d14a
commit b67efb26df
9 changed files with 510 additions and 0 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -92,6 +92,7 @@ dev-dependencies = [
    "pytest>=8.0.0",
    "coverage>=7.4.1",
    "typos>=1.18.2",
    "comfy-cli>=1.1.6",
 ]
--- a/src/comfyui-refiners/LICENSE
+++ b/src/comfyui-refiners/LICENSE
@ -0,0 +1 @@
 ../../LICENSE
--- a/src/comfyui-refiners/README.md
+++ b/src/comfyui-refiners/README.md
@ -0,0 +1,30 @@
 <div align="center">
 <picture>
  <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/finegrain-ai/refiners/main/assets/logo_dark.png">
  <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/finegrain-ai/refiners/main/assets/logo_light.png">
  <img alt="Finegrain Refiners Library" width="352" height="128" style="max-width: 100%;">
 </picture>
 **The simplest way to train and run adapters on top of foundation models**
 [**Manifesto**](https://refine.rs/home/why/) |
 [**Docs**](https://refine.rs) |
 [**Guides**](https://refine.rs/guides/adapting_sdxl/) |
 [**Discussions**](https://github.com/finegrain-ai/refiners/discussions) |
 [**Discord**](https://discord.gg/mCmjNUVV7d)
 </div>
 ## Installation
 The nodes are published at https://registry.comfy.org/publishers/finegrain/nodes/comfyui-refiners.
 To easily install the nodes, run the following command:
 ```bash
 comfy node registry-install comfyui-refiners
 ```
 You may also download the nodes by cliking the "Download Latest" button and unzipping the content of the archive into you custom_nodes directory.
 See https://docs.comfy.org/registry/overview for more information.
--- a/src/comfyui-refiners/init.py
+++ b/src/comfyui-refiners/init.py
@ -0,0 +1,15 @@
 from typing import Any
 from .box_segmenter import NODE_CLASS_MAPPINGS as box_segmenter_mappings
 from .grounding_dino import NODE_CLASS_MAPPINGS as grounding_dino_mappings
 from .huggingface import NODE_CLASS_MAPPINGS as huggingface_mappings
 from .utils import NODE_CLASS_MAPPINGS as utils_mappings
 NODE_CLASS_MAPPINGS: dict[str, Any] = {}
 NODE_CLASS_MAPPINGS.update(box_segmenter_mappings)
 NODE_CLASS_MAPPINGS.update(grounding_dino_mappings)
 NODE_CLASS_MAPPINGS.update(huggingface_mappings)
 NODE_CLASS_MAPPINGS.update(utils_mappings)
 NODE_DISPLAY_NAME_MAPPINGS = {k: v.__name__ for k, v in NODE_CLASS_MAPPINGS.items()}
 __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
--- a/src/comfyui-refiners/box_segmenter.py
+++ b/src/comfyui-refiners/box_segmenter.py
@ -0,0 +1,105 @@
 from typing import Any
 import torch
 from refiners.fluxion.utils import image_to_tensor, no_grad, tensor_to_image
 from refiners.solutions import BoxSegmenter as _BoxSegmenter
 from refiners.solutions.box_segmenter import BoundingBox
 class LoadBoxSegmenter:
    @classmethod
    def INPUT_TYPES(cls) -> dict[str, Any]:
        return {
            "required": {
                "checkpoint": ("PATH", {}),
                "margin": (
                    "FLOAT",
                    {
                        "default": 0.05,
                        "min": 0.0,
                        "max": 1.0,
                        "step": 0.01,
                    },
                ),
                "device": ("STRING", {"default": "cuda"}),
            }
        }
    RETURN_TYPES = ("MODEL",)
    RETURN_NAMES = ("model",)
    DESCRIPTION = "Load a BoxSegmenter refiners model."
    CATEGORY = "Refiners/Solutions"
    FUNCTION = "load"
    def load(
        self,
        checkpoint: str,
        margin: float,
        device: str,
    ) -> tuple[_BoxSegmenter]:
        """Load a BoxSegmenter refiners model.
        Args:
            checkpoint: The path to the checkpoint file.
            margin: The bbox margin to use when processing images.
            device: The torch device to load the model on.
        Returns:
            A BoxSegmenter model instance.
        """
        return (
            _BoxSegmenter(
                weights=checkpoint,
                margin=margin,
                device=device,
            ),
        )
 class BoxSegmenter:
    @classmethod
    def INPUT_TYPES(cls) -> dict[str, Any]:
        return {
            "required": {
                "model": ("MODEL", {}),
                "image": ("IMAGE", {}),
            },
            "optional": {
                "bbox": ("BOUNDING_BOX", {}),
            },
        }
    RETURN_TYPES = ("MASK",)
    RETURN_NAMES = ("mask",)
    DESCRIPTION = "Segment an image using a BoxSegmenter model and a bbox."
    CATEGORY = "Refiners/Solutions"
    FUNCTION = "process"
    @no_grad()
    def process(
        self,
        model: _BoxSegmenter,
        image: torch.Tensor,
        bbox: BoundingBox | None = None,
    ) -> tuple[torch.Tensor]:
        """Segment an image using a BoxSegmenter model and a bbox.
        Args:
            model: The BoxSegmenter model to use.
            image: The input image to process.
            bbox: Where in the image to apply the model.
        Returns:
            The mask of the segmented object.
        """
        pil_image = tensor_to_image(image.permute(0, 3, 1, 2))
        mask = model(img=pil_image, box_prompt=bbox)
        mask_tensor = image_to_tensor(mask).squeeze(1)
        return (mask_tensor,)
 NODE_CLASS_MAPPINGS: dict[str, Any] = {
    "BoxSegmenter": BoxSegmenter,
    "LoadBoxSegmenter": LoadBoxSegmenter,
 }
--- a/src/comfyui-refiners/grounding_dino.py
+++ b/src/comfyui-refiners/grounding_dino.py
@ -0,0 +1,191 @@
 from typing import Any, Sequence
 import torch
 from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor  # type: ignore
 from refiners.fluxion.utils import no_grad, tensor_to_image
 from .utils import BoundingBox, get_dtype
 class LoadGroundingDino:
    @classmethod
    def INPUT_TYPES(cls) -> dict[str, Any]:
        return {
            "required": {
                "checkpoint": ("PATH", {}),
                "dtype": (
                    "STRING",
                    {
                        "default": "float32",
                    },
                ),
                "device": (
                    "STRING",
                    {
                        "default": "cuda",
                    },
                ),
            }
        }
    RETURN_TYPES = ("PROCESSOR", "MODEL")
    RETURN_NAMES = ("processor", "model")
    DESCRIPTION = "Load a grounding dino model."
    CATEGORY = "Refiners/Solutions"
    FUNCTION = "load"
    def load(
        self,
        checkpoint: str,
        dtype: str,
        device: str,
    ) -> tuple[GroundingDinoProcessor, GroundingDinoForObjectDetection]:
        """Load a grounding dino model.
        Args:
            checkpoint: The path to the checkpoint folder.
            dtype: The torch data type to use.
            device: The torch device to load the model on.
        Returns:
            The grounding dino processor and model instances.
        """
        processor = GroundingDinoProcessor.from_pretrained(checkpoint)  # type: ignore
        assert isinstance(processor, GroundingDinoProcessor)
        model = GroundingDinoForObjectDetection.from_pretrained(checkpoint, torch_dtype=get_dtype(dtype))  # type: ignore
        model = model.to(device=device)  # type: ignore
        assert isinstance(model, GroundingDinoForObjectDetection)
        return (processor, model)
 # NOTE: not yet natively supported in Refiners, hence the transformers dependency
 class GroundingDino:
    @classmethod
    def INPUT_TYPES(cls) -> dict[str, Any]:
        return {
            "required": {
                "processor": ("PROCESSOR", {}),
                "model": ("MODEL", {}),
                "image": ("IMAGE", {}),
                "prompt": ("STRING", {}),
                "box_threshold": (
                    "FLOAT",
                    {
                        "default": 0.25,
                        "min": 0.0,
                        "max": 1.0,
                        "step": 0.01,
                    },
                ),
                "text_threshold": (
                    "FLOAT",
                    {
                        "default": 0.25,
                        "min": 0.0,
                        "max": 1.0,
                        "step": 0.01,
                    },
                ),
            },
        }
    RETURN_TYPES = ("BOUNDING_BOX",)
    RETURN_NAMES = ("bbox",)
    DESCRIPTION = "Detect an object in an image using a GroundingDino model."
    CATEGORY = "Refiners/Solutions"
    FUNCTION = "process"
    @staticmethod
    def corners_to_pixels_format(
        bboxes: torch.Tensor,
        width: int,
        height: int,
    ) -> torch.Tensor:
        x1, y1, x2, y2 = bboxes.round().to(torch.int32).unbind(-1)
        return torch.stack(
            tensors=(
                x1.clamp_(0, width),
                y1.clamp_(0, height),
                x2.clamp_(0, width),
                y2.clamp_(0, height),
            ),
            dim=-1,
        )
    @staticmethod
    def bbox_union(bboxes: Sequence[list[int]]) -> BoundingBox | None:
        if not bboxes:
            return None
        for bbox in bboxes:
            assert len(bbox) == 4
            assert all(isinstance(x, int) for x in bbox)
        return (
            min(bbox[0] for bbox in bboxes),
            min(bbox[1] for bbox in bboxes),
            max(bbox[2] for bbox in bboxes),
            max(bbox[3] for bbox in bboxes),
        )
    @no_grad()
    def process(
        self,
        processor: GroundingDinoProcessor,
        model: GroundingDinoForObjectDetection,
        image: torch.Tensor,
        prompt: str,
        box_threshold: float,
        text_threshold: float,
    ) -> tuple[BoundingBox]:
        """Detect an object in an image using a GroundingDino model and a text prompt.
        Args:
            processor: The image processor to use.
            model: The grounding dino model to use.
            image: The input image to detect in.
            prompt: The text prompt of what to detect in the image.
            box_threshold: The score threshold for the bounding boxes.
            text_threshold: The score threshold for the text.
        Returns:
            The union of the bounding boxes found in the image.
        """
        # prepare the inputs
        pil_image = tensor_to_image(image.permute(0, 3, 1, 2))
        # NOTE: queries must be in lower cas + end with a dot. See:
        # https://github.com/IDEA-Research/GroundingDINO/blob/856dde2/groundingdino/util/inference.py#L22-L26
        inputs = processor(images=pil_image, text=f"{prompt.lower()}.", return_tensors="pt").to(device=model.device)
        # get the model's prediction
        outputs = model(**inputs)
        # post-process the model's prediction
        results: dict[str, Any] = processor.post_process_grounded_object_detection(  # type: ignore
            outputs=outputs,
            input_ids=inputs["input_ids"],
            target_sizes=[(pil_image.height, pil_image.width)],
            box_threshold=box_threshold,
            text_threshold=text_threshold,
        )[0]
        # retrieve the bounding boxes
        assert "boxes" in results
        bboxes = results["boxes"].cpu()  # type: ignore
        assert isinstance(bboxes, torch.Tensor)
        assert bboxes.shape[0] != 0, "No bounding boxes found. Try adjusting the thresholds or pick another prompt."
        bboxes = self.corners_to_pixels_format(bboxes, pil_image.width, pil_image.height)  # type: ignore
        # compute the union of the bounding boxes
        bbox = self.bbox_union(bboxes.numpy().tolist())  # type: ignore
        assert bbox is not None
        return (bbox,)
 NODE_CLASS_MAPPINGS: dict[str, Any] = {
    "GroundingDino": GroundingDino,
    "LoadGroundingDino": LoadGroundingDino,
 }
--- a/src/comfyui-refiners/huggingface.py
+++ b/src/comfyui-refiners/huggingface.py
@ -0,0 +1,63 @@
 from pathlib import Path
 from typing import Any
 from huggingface_hub import hf_hub_download, snapshot_download  # type: ignore
 class HfHubDownload:
    @classmethod
    def INPUT_TYPES(cls) -> dict[str, Any]:
        return {
            "required": {
                "repo_id": ("STRING", {}),
            },
            "optional": {
                "filename": ("STRING", {}),
                "revision": (
                    "STRING",
                    {
                        "default": "main",
                    },
                ),
            },
        }
    RETURN_TYPES = ("PATH",)
    RETURN_NAMES = ("path",)
    DESCRIPTION = "Download file(s) from the HuggingFace Hub."
    CATEGORY = "Refiners/HuggingFace"
    FUNCTION = "download"
    def download(
        self,
        repo_id: str,
        filename: str,
        revision: str,
    ) -> tuple[Path]:
        """Download file(s) from the HuggingFace Hub.
        Args:
            repo_id: The HuggingFace repository ID.
            filename: The filename to download, if empty, the entire repository will be downloaded.
            revision: The git revision to download.
        Returns:
            The path to the downloaded file(s).
        """
        if filename == "":
            path = snapshot_download(
                repo_id=repo_id,
                revision=revision,
            )
        else:
            path = hf_hub_download(
                repo_id=repo_id,
                filename=filename,
                revision=revision,
            )
        return (Path(path),)
 NODE_CLASS_MAPPINGS: dict[str, Any] = {
    "HfHubDownload": HfHubDownload,
 }
--- a/src/comfyui-refiners/pyproject.toml
+++ b/src/comfyui-refiners/pyproject.toml
@ -0,0 +1,18 @@
 [project]
 name = "comfyui-refiners"
 description = "ComfyUI custom nodes for refiners models"
 version = "1.0.1"
 license = { file = "LICENSE" }
 dependencies = [
    "refiners @ git+https://github.com/finegrain-ai/refiners.git",
    "huggingface_hub",
    "transformers",
 ]
 [project.urls]
 Repository = "https://github.com/finegrain-ai/refiners"
 [tool.comfy]
 PublisherId = "finegrain"
 DisplayName = "refiners"
 Icon = "https://raw.githubusercontent.com/finegrain-ai/refiners/main/assets/dropy.png"
--- a/src/comfyui-refiners/utils.py
+++ b/src/comfyui-refiners/utils.py
@ -0,0 +1,86 @@
 from typing import Any
 import torch
 from PIL import ImageDraw
 from refiners.fluxion.utils import image_to_tensor, tensor_to_image
 BoundingBox = tuple[int, int, int, int]
 class DrawBoundingBox:
    @classmethod
    def INPUT_TYPES(cls) -> dict[str, Any]:
        return {
            "required": {
                "image": ("IMAGE", {}),
                "bbox": ("BOUNDING_BOX", {}),
                "color": ("STRING", {"default": "red"}),
                "width": ("INT", {"default": 3}),
            },
        }
    RETURN_TYPES = ("IMAGE",)
    RETURN_NAMES = ("image",)
    DESCRIPTION = "Draw a bounding box on an image."
    CATEGORY = "Refiners/Helpers"
    FUNCTION = "process"
    def process(
        self,
        image: torch.Tensor,
        bbox: BoundingBox,
        color: str,
        width: int,
    ) -> tuple[torch.Tensor]:
        """Draw a bounding box on an image.
        Args:
            image: The image to draw on.
            bbox: The bounding box to draw.
            color: The color of the bounding box.
            width: The width of the bounding box.
        """
        pil_image = tensor_to_image(image.permute(0, 3, 1, 2))
        draw = ImageDraw.Draw(pil_image)
        draw.rectangle(bbox, outline=color, width=width)
        image = image_to_tensor(pil_image).permute(0, 2, 3, 1)
        return (image,)
 def get_dtype(dtype: str) -> torch.dtype:
    """Converts a string dtype to a torch.dtype.
    See also https://pytorch.org/docs/stable/tensor_attributes.html#torch-dtype"""
    match dtype:
        case "float32" | "float":
            return torch.float32
        case "float64" | "double":
            return torch.float64
        case "complex64" | "cfloat":
            return torch.complex64
        case "complex128" | "cdouble":
            return torch.complex128
        case "float16" | "half":
            return torch.float16
        case "bfloat16":
            return torch.bfloat16
        case "uint8":
            return torch.uint8
        case "int8":
            return torch.int8
        case "int16" | "short":
            return torch.int16
        case "int32" | "int":
            return torch.int32
        case "int64" | "long":
            return torch.int64
        case "bool":
            return torch.bool
        case _:
            raise ValueError(f"Unknown dtype: {dtype}")
 NODE_CLASS_MAPPINGS: dict[str, Any] = {
    "DrawBoundingBox": DrawBoundingBox,
 }