76 lines
2.5 KiB
Python
76 lines
2.5 KiB
Python
from pathlib import Path
|
|
|
|
import datasets
|
|
import h5py
|
|
import numpy as np
|
|
|
|
DATASET_DIR = Path("/gpfs_new/cold-data/InputData/public_datasets/rotor37/rotor37_1200/")
|
|
H5FILE_TRAIN = DATASET_DIR / "h5" / "blade_meshes_train.h5"
|
|
H5FILE_TEST = DATASET_DIR / "h5" / "blade_meshes_test.h5"
|
|
N_POINTS = 29773
|
|
|
|
_VERSION = "1.0.0"
|
|
|
|
_DESCRIPTION = """
|
|
This dataset is a collection of 1200 pointclouds, each representing a blade of a wind turbine.
|
|
The dataset is split into 2 subsets: train and test, with 1000 and 200 clouds respectively.
|
|
Each pointcloud has 29773 points, each point has 3D coordinates, 3D normals and physical properties.
|
|
"""
|
|
|
|
|
|
class Rotor37(datasets.GeneratorBasedBuilder):
|
|
"""Rotor37 dataset."""
|
|
|
|
def _info(self):
|
|
return datasets.DatasetInfo(
|
|
version=_VERSION,
|
|
description=_DESCRIPTION,
|
|
features=datasets.Features(
|
|
{
|
|
"positions": datasets.Array2D(shape=(N_POINTS, 3), dtype="float32"),
|
|
"normals": datasets.Array2D(shape=(N_POINTS, 3), dtype="float32"),
|
|
"features": datasets.Array2D(shape=(N_POINTS, 4), dtype="float32"),
|
|
}
|
|
),
|
|
)
|
|
|
|
def _split_generators(self, dl_manager):
|
|
return [
|
|
datasets.SplitGenerator(
|
|
name=datasets.Split.TEST, # type: ignore
|
|
gen_kwargs={
|
|
"h5file": H5FILE_TEST,
|
|
},
|
|
),
|
|
datasets.SplitGenerator(
|
|
name=datasets.Split.TRAIN, # type: ignore
|
|
gen_kwargs={
|
|
"h5file": H5FILE_TRAIN,
|
|
},
|
|
),
|
|
]
|
|
|
|
def _generate_examples(self, h5file: Path):
|
|
with h5py.File(h5file, "r") as f:
|
|
# compute mean and std of positions
|
|
positions = np.asarray(f["points"])
|
|
positions_mean = positions.mean(axis=(0, 1))
|
|
positions_std = positions.std(axis=(0, 1))
|
|
|
|
# normalize positions
|
|
positions = (positions - positions_mean) / positions_std
|
|
|
|
# zip attributes
|
|
attributes = zip(
|
|
positions,
|
|
f["normals"], # type: ignore
|
|
f["output_fields"], # type: ignore
|
|
)
|
|
|
|
for index, (positions, normals, fields) in enumerate(attributes):
|
|
yield index, {
|
|
"positions": positions,
|
|
"normals": normals,
|
|
"features": fields,
|
|
}
|