REVA-QCAV/src/utils/dataset.py

import logging
from os import listdir
from os.path import splitext
from pathlib import Path

import albumentations as A
import numpy as np
import torch
from PIL import Image
from torch.utils.data import Dataset


class SphereDataset(Dataset):
    def __init__(self, images_dir: str, transform: A.Compose, masks_dir: str = None):
        self.images_dir = Path(images_dir)
        self.masks_dir = Path(masks_dir) if masks_dir else None

        self.ids = [splitext(file)[0] for file in listdir(images_dir) if not file.startswith(".")]

        if not self.ids:
            raise RuntimeError(f"No input file found in {images_dir}, make sure you put your images there")

        logging.info(f"Creating dataset with {len(self.ids)} examples")

    def __len__(self):
        return len(self.ids)

    @staticmethod
    def preprocess(pil_img, scale, is_mask):
        w, h = pil_img.size
        newW, newH = int(scale * w), int(scale * h)

        assert newW > 0 and newH > 0, "Scale is too small, resized images would have no pixel"

        pil_img = pil_img.resize((newW, newH), resample=Image.NEAREST if is_mask else Image.BICUBIC)
        img_ndarray = np.asarray(pil_img)

        if not is_mask:
            if img_ndarray.ndim == 2:
                img_ndarray = img_ndarray[np.newaxis, ...]
            else:
                img_ndarray = img_ndarray.transpose((2, 0, 1))

            img_ndarray = img_ndarray / 255

        return img_ndarray

    @staticmethod
    def load(filename):
        ext = splitext(filename)[1]

        if ext in [".npz", ".npy"]:
            return Image.fromarray(np.load(filename))
        elif ext in [".pt", ".pth"]:
            return Image.fromarray(torch.load(filename).numpy())
        else:
            return Image.open(filename)

    def __getitem__(self, idx):
        name = self.ids[idx]

        mask_file = list(self.masks_dir.glob(name + self.mask_suffix + ".*"))
        img_file = list(self.images_dir.glob(name + ".*"))

        assert len(img_file) == 1, f"Either no image or multiple images found for the ID {name}: {img_file}"
        assert len(mask_file) == 1, f"Either no mask or multiple masks found for the ID {name}: {mask_file}"

        mask = self.load(mask_file[0])
        img = self.load(img_file[0])

        assert (
            img.size == mask.size
        ), f"Image and mask {name} should be the same size, but are {img.size} and {mask.size}"

        img = self.preprocess(img, self.scale, is_mask=False)
        mask = self.preprocess(mask, self.scale, is_mask=True)

        return {
            "image": torch.as_tensor(img.copy()).float().contiguous(),
            "mask": torch.as_tensor(mask.copy()).long().contiguous(),
        }
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`import logging`
			`from os import listdir`
			`from os.path import splitext`
			`from pathlib import Path`

fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`import albumentations as A`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`import numpy as np`
			`import torch`
			`from PIL import Image`
			`from torch.utils.data import Dataset`


style: formatting Former-commit-id: 2ccef30ce44d33beb611b63adef635ab2c1226bb 2022-06-27 14:40:04 +00:00			`class SphereDataset(Dataset):`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`def __init__(self, images_dir: str, transform: A.Compose, masks_dir: str = None):`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`self.images_dir = Path(images_dir)`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`self.masks_dir = Path(masks_dir) if masks_dir else None`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`self.ids = [splitext(file)[0] for file in listdir(images_dir) if not file.startswith(".")]`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`if not self.ids:`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`raise RuntimeError(f"No input file found in {images_dir}, make sure you put your images there")`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`logging.info(f"Creating dataset with {len(self.ids)} examples")`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
			`def __len__(self):`
			`return len(self.ids)`

Simplifying - avoid using classmethod if the class argument is not used, use staticmethod instead - avoid testing for is_mask three times if one is enough Former-commit-id: 1116e3096cd5210a0596d1f3938989016e0f3b6c 2022-03-31 14:23:37 +00:00			`@staticmethod`
			`def preprocess(pil_img, scale, is_mask):`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`w, h = pil_img.size`
			`newW, newH = int(scale * w), int(scale * h)`
style: formatting Former-commit-id: 2ccef30ce44d33beb611b63adef635ab2c1226bb 2022-06-27 14:40:04 +00:00
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`assert newW > 0 and newH > 0, "Scale is too small, resized images would have no pixel"`
style: formatting Former-commit-id: 2ccef30ce44d33beb611b63adef635ab2c1226bb 2022-06-27 14:40:04 +00:00
Fix mask resampling in some cases Former-commit-id: 8f317cb13c17ef25a86b25a0c24390e04cd4db82 2021-11-03 08:39:21 +00:00			`pil_img = pil_img.resize((newW, newH), resample=Image.NEAREST if is_mask else Image.BICUBIC)`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`img_ndarray = np.asarray(pil_img)`

			`if not is_mask:`
Simplifying - avoid using classmethod if the class argument is not used, use staticmethod instead - avoid testing for is_mask three times if one is enough Former-commit-id: 1116e3096cd5210a0596d1f3938989016e0f3b6c 2022-03-31 14:23:37 +00:00			`if img_ndarray.ndim == 2:`
			`img_ndarray = img_ndarray[np.newaxis, ...]`
			`else:`
			`img_ndarray = img_ndarray.transpose((2, 0, 1))`

Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`img_ndarray = img_ndarray / 255`

			`return img_ndarray`

Simplifying - avoid using classmethod if the class argument is not used, use staticmethod instead - avoid testing for is_mask three times if one is enough Former-commit-id: 1116e3096cd5210a0596d1f3938989016e0f3b6c 2022-03-31 14:23:37 +00:00			`@staticmethod`
			`def load(filename):`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`ext = splitext(filename)[1]`
style: formatting Former-commit-id: 2ccef30ce44d33beb611b63adef635ab2c1226bb 2022-06-27 14:40:04 +00:00
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`if ext in [".npz", ".npy"]:`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`return Image.fromarray(np.load(filename))`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`elif ext in [".pt", ".pth"]:`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`return Image.fromarray(torch.load(filename).numpy())`
			`else:`
			`return Image.open(filename)`

			`def __getitem__(self, idx):`
			`name = self.ids[idx]`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`mask_file = list(self.masks_dir.glob(name + self.mask_suffix + ".*"))`
			`img_file = list(self.images_dir.glob(name + ".*"))`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`assert len(img_file) == 1, f"Either no image or multiple images found for the ID {name}: {img_file}"`
			`assert len(mask_file) == 1, f"Either no mask or multiple masks found for the ID {name}: {mask_file}"`
style: formatting Former-commit-id: 2ccef30ce44d33beb611b63adef635ab2c1226bb 2022-06-27 14:40:04 +00:00
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`mask = self.load(mask_file[0])`
			`img = self.load(img_file[0])`

style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`assert (`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`img.size == mask.size`
			`), f"Image and mask {name} should be the same size, but are {img.size} and {mask.size}"`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
			`img = self.preprocess(img, self.scale, is_mask=False)`
			`mask = self.preprocess(mask, self.scale, is_mask=True)`

			`return {`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`"image": torch.as_tensor(img.copy()).float().contiguous(),`
			`"mask": torch.as_tensor(mask.copy()).long().contiguous(),`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`}`