From 2cc47bbb9e1b62ec60f8151e8fd5502e0389f9a9 Mon Sep 17 00:00:00 2001 From: Laurent Fainsin Date: Sat, 1 Apr 2023 18:31:24 +0200 Subject: [PATCH] feat: cleanup dataset loaders a bit --- ...{spheres.py => spheres_antoine_laurent.py} | 29 ++++--------------- src/spheres_illumination.py | 21 ++++---------- src/spheres_predict.py | 21 ++------------ src/spheres_synth.py | 17 ++++------- 4 files changed, 20 insertions(+), 68 deletions(-) rename src/{spheres.py => spheres_antoine_laurent.py} (93%) diff --git a/src/spheres.py b/src/spheres_antoine_laurent.py similarity index 93% rename from src/spheres.py rename to src/spheres_antoine_laurent.py index d62e027..5ac39d6 100644 --- a/src/spheres.py +++ b/src/spheres_antoine_laurent.py @@ -1,5 +1,3 @@ -"""Dataset class AI or NOT HuggingFace competition.""" - import json import pathlib @@ -8,8 +6,8 @@ import datasets import numpy as np prefix = "/data/local-files/?d=spheres/" -dataset_path = pathlib.Path("./dataset3/spheres/") -annotation_path = pathlib.Path("./annotations2.json") +dataset_path = pathlib.Path("./dataset_antoine_laurent/") +annotation_path = dataset_path / "annotations.json" _VERSION = "1.0.0" @@ -20,20 +18,13 @@ _HOMEPAGE = "" _LICENSE = "" _NAMES = [ - # "White", - # "Black", - # "Grey", - # "Red", - # "Chrome", "Matte", "Shiny", "Chrome", ] -class spheres(datasets.GeneratorBasedBuilder): - """spheres image dataset.""" - +class SphereAntoineLaurent(datasets.GeneratorBasedBuilder): def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, @@ -83,10 +74,6 @@ class spheres(datasets.GeneratorBasedBuilder): image_name = image_name[len(prefix) :] image_name = pathlib.Path(image_name) - # skip shitty images - # if "Soulages" in str(image_name): - # continue - # check image_name exists assert (dataset_path / image_name).is_file() @@ -117,7 +104,7 @@ class spheres(datasets.GeneratorBasedBuilder): label = annotations[0]["value"]["keypointlabels"][0] for annotation in annotations: assert annotation["value"]["keypointlabels"][0] == label - + if label == "White": label = "Matte" elif label == "Black": @@ -202,7 +189,7 @@ if __name__ == "__main__": # load dataset dataset = datasets.load_dataset("src/spheres.py", split="train") - print("a") + print("dataset loaded") labels = dataset.features["objects"][0]["category_id"].names id2label = {k: v for k, v in enumerate(labels)} @@ -214,16 +201,12 @@ if __name__ == "__main__": print() idx = 0 - while True: image = dataset[idx]["image"] if "DSC_4234" in image.filename: break idx += 1 - if idx > 10000: - break - print(f"image path: {image.filename}") print(f"data: {dataset[idx]}") @@ -239,4 +222,4 @@ if __name__ == "__main__": draw.text(bbox[:2], text=id2label[obj["category_id"]], fill="black") # save image - image.save("example.jpg") + image.save("example_antoine_laurent.jpg") diff --git a/src/spheres_illumination.py b/src/spheres_illumination.py index 90fbb4b..5c6828c 100644 --- a/src/spheres_illumination.py +++ b/src/spheres_illumination.py @@ -1,12 +1,9 @@ -"""Dataset class AI or NOT HuggingFace competition.""" - +import json import pathlib -import json import datasets -dataset_path_train = pathlib.Path("/home/laurent/proj-long/dataset_illumination/") -dataset_path_test = pathlib.Path("/home/laurent/proj-long/dataset_illumination_test/") +dataset_path_train = pathlib.Path("./dataset_illumination/") _VERSION = "1.0.0" @@ -23,9 +20,7 @@ _NAMES = [ ] -class spheresSynth(datasets.GeneratorBasedBuilder): - """spheres image dataset.""" - +class SphereIllumination(datasets.GeneratorBasedBuilder): def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, @@ -60,12 +55,6 @@ class spheresSynth(datasets.GeneratorBasedBuilder): "dataset_path": dataset_path_train, }, ), - datasets.SplitGenerator( - name=datasets.Split.TEST, - gen_kwargs={ - "dataset_path": dataset_path_test, - }, - ), ] def _generate_examples(self, dataset_path: pathlib.Path): @@ -75,7 +64,7 @@ class spheresSynth(datasets.GeneratorBasedBuilder): original_width = 6020 original_height = 4024 - + # create png iterator object_index = 0 jpgs = dataset_path.rglob("*.jpg") @@ -172,4 +161,4 @@ if __name__ == "__main__": draw.text(bbox[:2], text=id2label[obj["category_id"]], fill="black") # save image - image.save(f"example_{idx}.jpg") + image.save(f"example_illumination_{idx}.jpg") diff --git a/src/spheres_predict.py b/src/spheres_predict.py index 1ad8b9f..79e75b3 100644 --- a/src/spheres_predict.py +++ b/src/spheres_predict.py @@ -1,10 +1,8 @@ -"""Dataset class AI or NOT HuggingFace competition.""" - import pathlib import datasets -dataset_path = pathlib.Path("/home/laurent/proj-long/dataset_predict/") +dataset_path = pathlib.Path("./dataset_predict/") _VERSION = "1.0.0" @@ -21,9 +19,7 @@ _NAMES = [ ] -class spheresSynth(datasets.GeneratorBasedBuilder): - """spheres image dataset.""" - +class SpherePredict(datasets.GeneratorBasedBuilder): def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, @@ -98,16 +94,5 @@ if __name__ == "__main__": print(f"image path: {image.filename}") print(f"data: {dataset[idx]}") - draw = ImageDraw.Draw(image) - for obj in dataset[idx]["objects"]: - bbox = ( - obj["bbox"][0], - obj["bbox"][1], - obj["bbox"][0] + obj["bbox"][2], - obj["bbox"][1] + obj["bbox"][3], - ) - draw.rectangle(bbox, outline="red", width=3) - draw.text(bbox[:2], text=id2label[obj["category_id"]], fill="black") - # save image - image.save(f"example_{idx}.jpg") + image.save(f"example_predict_{idx}.jpg") diff --git a/src/spheres_synth.py b/src/spheres_synth.py index 0d7c28a..ed1402d 100644 --- a/src/spheres_synth.py +++ b/src/spheres_synth.py @@ -1,12 +1,8 @@ -"""Dataset class AI or NOT HuggingFace competition.""" - import pathlib -import cv2 import datasets -import numpy as np -dataset_path = pathlib.Path("/home/laurent/proj-long/dataset_render/") +dataset_path = pathlib.Path("./dataset_render/") _VERSION = "1.0.0" @@ -23,8 +19,7 @@ _NAMES = [ ] -class spheresSynth(datasets.GeneratorBasedBuilder): - """spheres image dataset.""" +class SphereSynth(datasets.GeneratorBasedBuilder): def _info(self): return datasets.DatasetInfo( @@ -113,7 +108,7 @@ class spheresSynth(datasets.GeneratorBasedBuilder): category = "Chrome" elif category == "Cyan": category = "Shiny" - + categories.append(category) # generate data @@ -156,8 +151,8 @@ if __name__ == "__main__": for idx in range(10): image = dataset[idx]["image"] - # print(f"image path: {image.filename}") - # print(f"data: {dataset[idx]}") + print(f"image path: {image.filename}") + print(f"data: {dataset[idx]}") draw = ImageDraw.Draw(image) for obj in dataset[idx]["objects"]: @@ -171,4 +166,4 @@ if __name__ == "__main__": draw.text(bbox[:2], text=id2label[obj["category_id"]], fill="black") # save image - image.save(f"example_{idx}.jpg") + image.save(f"example_synth_{idx}.jpg")