REVA-QCAV/src/data/dataset.py

from pathlib import Path

import albumentations as A
import numpy as np
from albumentations.pytorch import ToTensorV2
from PIL import Image
from torch.utils.data import Dataset


class SyntheticDataset(Dataset):
    def __init__(self, image_dir, transform):
        self.images = list(Path(image_dir).glob("**/*.jpg"))
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        # open and convert image
        image = np.array(Image.open(self.images[index]).convert("RGB"), dtype=np.uint8)

        # create empty mask of same size
        mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)

        # augment image and mask
        augmentations = self.transform(image=image, mask=mask)
        image = augmentations["image"]
        mask = augmentations["mask"]

        # convert image & mask to Tensor float in [0, 1]
        post_process = A.Compose(
            [
                A.ToFloat(max_value=255),
                ToTensorV2(),
            ],
        )
        augmentations = post_process(image=image, mask=mask)
        image = augmentations["image"]
        mask = augmentations["mask"]

        # make sure image and mask are floats
        image = image.float()
        mask = mask.float()

        return image, mask


class LabeledDataset(Dataset):
    def __init__(self, image_dir):
        self.images = list(Path(image_dir).glob("**/*.jpg"))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        # open and convert image
        image = np.array(Image.open(self.images[index]).convert("RGB"), dtype=np.uint8)

        # open and convert mask
        mask_path = self.images[index].parent.joinpath("MASK.PNG")
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.uint8) // 255

        # convert image & mask to Tensor float in [0, 1]
        post_process = A.Compose(
            [
                A.SmallestMaxSize(1024),
                A.ToFloat(max_value=255),
                ToTensorV2(),
            ],
        )
        augmentations = post_process(image=image, mask=mask)
        image = augmentations["image"]
        mask = augmentations["mask"]

        # make sure image and mask are floats, TODO: mettre dans le post_process, ToFloat Image only
        image = image.float()
        mask = mask.float()

        return image, mask


class LabeledDataset2(Dataset):
    def __init__(self, image_dir):
        self.images = list(Path(image_dir).glob("**/*.jpg"))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        # open and convert image
        image = np.array(Image.open(self.images[index]).convert("RGB"), dtype=np.uint8)

        # open and convert mask
        mask_path = self.images[index].parent.joinpath("MASK.PNG")
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.uint8) // 255

        # convert image & mask to Tensor float in [0, 1]
        post_process = A.Compose(
            [
                A.ToFloat(max_value=255),
                ToTensorV2(),
            ],
        )
        augmentations = post_process(image=image, mask=mask)
        image = augmentations["image"]
        mask = augmentations["mask"]

        # make sure image and mask are floats, TODO: mettre dans le post_process, ToFloat Image only
        image = image.float()
        mask = mask.float()

        return image, mask
feat: combine multiple datasets Former-commit-id: bec67c2948227cdbfe05bb8f69e8083f7591e965 [formerly b24d77ef7c42456893a0b797e0a6c925a2402d08] Former-commit-id: 391e17bf37e44151215179cde2d157f8e5bb5182 2022-07-01 10:00:25 +00:00			`from pathlib import Path`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
feat: ajout des données de test Former-commit-id: c582ae71d296afe90d25127f541c696052172a2a [formerly b5fe53254d424e3d6ea74573378a716ccd429d84] Former-commit-id: f0ae70c1025d70af43b0f172e5abaeeba819999f 2022-07-04 12:38:48 +00:00			`import albumentations as A`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`import numpy as np`
feat: ajout des données de test Former-commit-id: c582ae71d296afe90d25127f541c696052172a2a [formerly b5fe53254d424e3d6ea74573378a716ccd429d84] Former-commit-id: f0ae70c1025d70af43b0f172e5abaeeba819999f 2022-07-04 12:38:48 +00:00			`from albumentations.pytorch import ToTensorV2`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`from PIL import Image`
			`from torch.utils.data import Dataset`


feat: split two types of datasets Former-commit-id: 2609316692d315f4b0df614c533bf28d20ffaf21 [formerly c1a425cb33fefa2809e591f0fe527236f6386863] Former-commit-id: c3f96d3f272652a6162b17112be0e722c99eef57 2022-07-08 14:23:22 +00:00			`class SyntheticDataset(Dataset):`
			`def __init__(self, image_dir, transform):`
feat: combine multiple datasets Former-commit-id: bec67c2948227cdbfe05bb8f69e8083f7591e965 [formerly b24d77ef7c42456893a0b797e0a6c925a2402d08] Former-commit-id: 391e17bf37e44151215179cde2d157f8e5bb5182 2022-07-01 10:00:25 +00:00			`self.images = list(Path(image_dir).glob("*/.jpg"))`
wtf am i doing Former-commit-id: dde43cce52408ec8f67372b365796b9014ceee57 2022-06-28 09:36:43 +00:00			`self.transform = transform`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
			`def __len__(self):`
wtf am i doing Former-commit-id: dde43cce52408ec8f67372b365796b9014ceee57 2022-06-28 09:36:43 +00:00			`return len(self.images)`
style: formatting Former-commit-id: 2ccef30ce44d33beb611b63adef635ab2c1226bb 2022-06-27 14:40:04 +00:00
wtf am i doing Former-commit-id: dde43cce52408ec8f67372b365796b9014ceee57 2022-06-28 09:36:43 +00:00			`def __getitem__(self, index):`
feat: split two types of datasets Former-commit-id: 2609316692d315f4b0df614c533bf28d20ffaf21 [formerly c1a425cb33fefa2809e591f0fe527236f6386863] Former-commit-id: c3f96d3f272652a6162b17112be0e722c99eef57 2022-07-08 14:23:22 +00:00			`# open and convert image`
feat: combine multiple datasets Former-commit-id: bec67c2948227cdbfe05bb8f69e8083f7591e965 [formerly b24d77ef7c42456893a0b797e0a6c925a2402d08] Former-commit-id: 391e17bf37e44151215179cde2d157f8e5bb5182 2022-07-01 10:00:25 +00:00			`image = np.array(Image.open(self.images[index]).convert("RGB"), dtype=np.uint8)`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
feat: split two types of datasets Former-commit-id: 2609316692d315f4b0df614c533bf28d20ffaf21 [formerly c1a425cb33fefa2809e591f0fe527236f6386863] Former-commit-id: c3f96d3f272652a6162b17112be0e722c99eef57 2022-07-08 14:23:22 +00:00			`# create empty mask of same size`
			`mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)`

			`# augment image and mask`
			`augmentations = self.transform(image=image, mask=mask)`
			`image = augmentations["image"]`
			`mask = augmentations["mask"]`

			`# convert image & mask to Tensor float in [0, 1]`
			`post_process = A.Compose(`
			`[`
			`A.ToFloat(max_value=255),`
			`ToTensorV2(),`
			`],`
			`)`
			`augmentations = post_process(image=image, mask=mask)`
			`image = augmentations["image"]`
			`mask = augmentations["mask"]`
feat: binarized the masks + lots of new metrics to fix Former-commit-id: c840d14f722503d241f6bb6d899630ad6345aca0 [formerly e435a21234620add4f0e4e269a4141e5c1508cd9] Former-commit-id: 8006af185fd68cc88b2305a02513106c16758d77 2022-06-30 21:28:38 +00:00
			`# make sure image and mask are floats`
			`image = image.float()`
			`mask = mask.float()`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
wtf am i doing Former-commit-id: dde43cce52408ec8f67372b365796b9014ceee57 2022-06-28 09:36:43 +00:00			`return image, mask`
feat: split two types of datasets Former-commit-id: 2609316692d315f4b0df614c533bf28d20ffaf21 [formerly c1a425cb33fefa2809e591f0fe527236f6386863] Former-commit-id: c3f96d3f272652a6162b17112be0e722c99eef57 2022-07-08 14:23:22 +00:00

			`class LabeledDataset(Dataset):`
			`def __init__(self, image_dir):`
			`self.images = list(Path(image_dir).glob("*/.jpg"))`

			`def __len__(self):`
			`return len(self.images)`

			`def __getitem__(self, index):`
			`# open and convert image`
			`image = np.array(Image.open(self.images[index]).convert("RGB"), dtype=np.uint8)`

			`# open and convert mask`
			`mask_path = self.images[index].parent.joinpath("MASK.PNG")`
feat: got precision 16 back Former-commit-id: 6b19dc9bd17078bb2c151d5cd96e7ba4da9e1b89 [formerly 5d1eac2ed10be960c89407ad265ff350e11c1adf] Former-commit-id: 1db4ca0ce11ac818408b94625b872c1202b5d4ed 2022-07-11 15:02:13 +00:00			`mask = np.array(Image.open(mask_path).convert("L"), dtype=np.uint8) // 255`
feat: split two types of datasets Former-commit-id: 2609316692d315f4b0df614c533bf28d20ffaf21 [formerly c1a425cb33fefa2809e591f0fe527236f6386863] Former-commit-id: c3f96d3f272652a6162b17112be0e722c99eef57 2022-07-08 14:23:22 +00:00
			`# convert image & mask to Tensor float in [0, 1]`
			`post_process = A.Compose(`
			`[`
refactor: making the code actually work Former-commit-id: 302f77ef109ca44eef8c4ce6c5b3f59ba2891884 [formerly 5ae3d84ea691440283f73b5f12e2c85b7d95b191] Former-commit-id: 33cbfef93d9be0cefd526e1cdbd7c0435db4c613 2022-07-10 15:12:00 +00:00			`A.SmallestMaxSize(1024),`
feat: split two types of datasets Former-commit-id: 2609316692d315f4b0df614c533bf28d20ffaf21 [formerly c1a425cb33fefa2809e591f0fe527236f6386863] Former-commit-id: c3f96d3f272652a6162b17112be0e722c99eef57 2022-07-08 14:23:22 +00:00			`A.ToFloat(max_value=255),`
			`ToTensorV2(),`
			`],`
			`)`
			`augmentations = post_process(image=image, mask=mask)`
			`image = augmentations["image"]`
			`mask = augmentations["mask"]`

feat: got precision 16 back Former-commit-id: 6b19dc9bd17078bb2c151d5cd96e7ba4da9e1b89 [formerly 5d1eac2ed10be960c89407ad265ff350e11c1adf] Former-commit-id: 1db4ca0ce11ac818408b94625b872c1202b5d4ed 2022-07-11 15:02:13 +00:00			`# make sure image and mask are floats, TODO: mettre dans le post_process, ToFloat Image only`
			`image = image.float()`
			`mask = mask.float()`

feat: split two types of datasets Former-commit-id: 2609316692d315f4b0df614c533bf28d20ffaf21 [formerly c1a425cb33fefa2809e591f0fe527236f6386863] Former-commit-id: c3f96d3f272652a6162b17112be0e722c99eef57 2022-07-08 14:23:22 +00:00			`return image, mask`
feat: random code I don't want to make commit messages for Former-commit-id: b94db28e25c4ada7f69d65185198a701bb5d6bfd [formerly 2476ee5d84287e40c8fb341f569249dc8aaff3e5] Former-commit-id: 0a4b7a1f925165172b009f8812d3083e70f10201 2022-07-12 09:18:03 +00:00

			`class LabeledDataset2(Dataset):`
			`def __init__(self, image_dir):`
			`self.images = list(Path(image_dir).glob("*/.jpg"))`

			`def __len__(self):`
			`return len(self.images)`

			`def __getitem__(self, index):`
			`# open and convert image`
			`image = np.array(Image.open(self.images[index]).convert("RGB"), dtype=np.uint8)`

			`# open and convert mask`
			`mask_path = self.images[index].parent.joinpath("MASK.PNG")`
			`mask = np.array(Image.open(mask_path).convert("L"), dtype=np.uint8) // 255`

			`# convert image & mask to Tensor float in [0, 1]`
			`post_process = A.Compose(`
			`[`
			`A.ToFloat(max_value=255),`
			`ToTensorV2(),`
			`],`
			`)`
			`augmentations = post_process(image=image, mask=mask)`
			`image = augmentations["image"]`
			`mask = augmentations["mask"]`

			`# make sure image and mask are floats, TODO: mettre dans le post_process, ToFloat Image only`
			`image = image.float()`
			`mask = mask.float()`

			`return image, mask`