feat: add huggingface rotor37 dataset
This commit is contained in:
parent
a97c2e87f9
commit
df48f8272a
38
.vscode/launch.json
vendored
38
.vscode/launch.json
vendored
|
@ -1,20 +1,20 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Current File",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true,
|
||||
"args": [
|
||||
"--category",
|
||||
"car",
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Current File",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true,
|
||||
"args": [
|
||||
"--category",
|
||||
"car",
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
@ -1,194 +1,65 @@
|
|||
from pathlib import Path
|
||||
|
||||
import datasets
|
||||
import h5py
|
||||
import numpy as np
|
||||
import pyvista as pv
|
||||
import torch
|
||||
from rich.progress import track
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
DATASET_DIR = Path("/gpfs_new/cold-data/InputData/public_datasets/rotor37/rotor37_1200/")
|
||||
VTKFILE_NOMINAL = DATASET_DIR / "ncs" / "nominal_blade.vtk"
|
||||
H5FILE_TRAIN = DATASET_DIR / "h5" / "blade_meshes_train.h5"
|
||||
H5FILE_TEST = DATASET_DIR / "h5" / "blade_meshes_test.h5"
|
||||
CARDINALITY_TRAIN = 1000
|
||||
CARDINALITY_TEST = 200
|
||||
N_POINTS = 29773
|
||||
|
||||
_VERSION = "1.0.0"
|
||||
|
||||
_DESCRIPTION = """
|
||||
This dataset is a collection of 1200 pointclouds, each representing a blade of a wind turbine.
|
||||
The dataset is split into 2 subsets: train and test, with 1000 and 200 clouds respectively.
|
||||
Each pointcloud has 29773 points, each point has 3D coordinates, 3D normals and physical properties.
|
||||
"""
|
||||
|
||||
|
||||
def rotate_nominal_blade(blade: pv.PolyData) -> None:
|
||||
"""Rotate nominal blade points.
|
||||
class Rotor37(datasets.GeneratorBasedBuilder):
|
||||
"""Rotor37 dataset."""
|
||||
|
||||
The nominal blade must be rotated to match the orientation of the other blades.
|
||||
Rotations applied (sequentially) are:
|
||||
- -90° around z-axis
|
||||
- -90° around y-axis
|
||||
def _info(self):
|
||||
return datasets.DatasetInfo(
|
||||
version=_VERSION,
|
||||
description=_DESCRIPTION,
|
||||
features=datasets.Features(
|
||||
{
|
||||
"positions": datasets.Array2D(shape=(N_POINTS, 3), dtype="float32"),
|
||||
"normals": datasets.Array2D(shape=(N_POINTS, 3), dtype="float32"),
|
||||
"features": datasets.Array2D(shape=(N_POINTS, 4), dtype="float32"),
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
Args:
|
||||
blade (pyvista.PolyData): blade to rotate
|
||||
"""
|
||||
THETA = -90
|
||||
PHI = -90
|
||||
|
||||
RZ = np.array(
|
||||
[
|
||||
[np.cos(np.deg2rad(THETA)), -np.sin(np.deg2rad(THETA)), 0],
|
||||
[np.sin(np.deg2rad(THETA)), np.cos(np.deg2rad(THETA)), 0],
|
||||
[0, 0, 1],
|
||||
def _split_generators(self, dl_manager):
|
||||
return [
|
||||
datasets.SplitGenerator(
|
||||
name=datasets.Split.TEST, # type: ignore
|
||||
gen_kwargs={
|
||||
"h5file": H5FILE_TEST,
|
||||
},
|
||||
),
|
||||
datasets.SplitGenerator(
|
||||
name=datasets.Split.TRAIN, # type: ignore
|
||||
gen_kwargs={
|
||||
"h5file": H5FILE_TRAIN,
|
||||
},
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
RY = np.array(
|
||||
[
|
||||
[np.cos(np.deg2rad(PHI)), 0, np.sin(np.deg2rad(PHI))],
|
||||
[0, 1, 0],
|
||||
[-np.sin(np.deg2rad(PHI)), 0, np.cos(np.deg2rad(PHI))],
|
||||
]
|
||||
)
|
||||
|
||||
# rotation of θ° around z-axis
|
||||
blade.points = np.asarray(blade.points) @ RZ
|
||||
blade.point_data["Normals"] = np.asarray(blade.point_normals) @ RZ
|
||||
|
||||
# rotation of φ° around y-axis
|
||||
blade.points = np.asarray(blade.points) @ RY
|
||||
blade.point_data["Normals"] = np.asarray(blade.point_normals) @ RY
|
||||
|
||||
|
||||
class Rotor37Dataset(Dataset):
|
||||
"""Rotor37 dataset.
|
||||
|
||||
This dataset is a collection of 1200 graphs, each representing a blade of a wind turbine.
|
||||
The dataset is split into 2 subsets: train and test, with 1000 and 200 graphs respectively.
|
||||
Each graph is a 3D mesh, with 3D deformations from a nominal blade, 3D normals, 3D faces and physical properties.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
root: str,
|
||||
split: str = "train",
|
||||
):
|
||||
"""Initialize a new Rotor37 dataset instance.
|
||||
|
||||
Args:
|
||||
root (str): root directory of the dataset
|
||||
split (str): split of the dataset, either "train" or "test"
|
||||
"""
|
||||
# set split
|
||||
assert split in ("train", "test")
|
||||
self.split = split
|
||||
|
||||
# set cardinality and h5file according to split
|
||||
self.cardinality = CARDINALITY_TRAIN if split == "train" else CARDINALITY_TEST
|
||||
self.h5file = H5FILE_TRAIN if split == "train" else H5FILE_TEST
|
||||
|
||||
super().__init__(root, transform, pre_transform)
|
||||
|
||||
@property
|
||||
def raw_file_names(self) -> list[str]:
|
||||
"""No raw files."""
|
||||
return []
|
||||
|
||||
@property
|
||||
def processed_file_names(self) -> list[str]:
|
||||
"""Processed files are named data_{split}_{idx:04d}.pt, where idx is the index of the graph."""
|
||||
return [f"data_{self.split}_{idx:04d}.pt" for idx in range(self.cardinality)]
|
||||
|
||||
def download(self):
|
||||
"""No need to download, data already in cluster."""
|
||||
pass
|
||||
|
||||
def process(self) -> None:
|
||||
"""Process the dataset.
|
||||
|
||||
The dataset is processed by loading the nominal blade, and then loading all deformed blades.
|
||||
For each deformed blade, the following attributes are computed and stored in a `Data` object:
|
||||
- delta: deformed blade - nominal blade
|
||||
- fields: physical properties of the blade
|
||||
- normals: normals of the blade
|
||||
- edges: edges of the blade
|
||||
- faces: faces of the blade
|
||||
|
||||
The `Data` object is then saved to disk.
|
||||
"""
|
||||
# load nominal blade
|
||||
vtk_reader = pv.get_reader(VTKFILE_NOMINAL)
|
||||
nominal = vtk_reader.read()
|
||||
rotate_nominal_blade(nominal)
|
||||
nominal_positions = torch.as_tensor(nominal.points, dtype=torch.float32)
|
||||
|
||||
# load all deformed blades
|
||||
with h5py.File(self.h5file, "r") as h5file:
|
||||
# NB: torch.as_tensor(np.asarray(data)) is a bit ugly
|
||||
# but torch torch.as_tensor(data) complains about data being an array of numpy arrays, and is also slower
|
||||
|
||||
# common edges and faces matrix for each graph
|
||||
edges = torch.as_tensor(np.asarray(h5file["adj"]), dtype=torch.int64).transpose(0, 1)
|
||||
faces = torch.as_tensor(np.asarray(h5file["faces"]), dtype=torch.int64).transpose(0, 1)
|
||||
|
||||
# attributes specific to each graph
|
||||
def _generate_examples(self, h5file: Path):
|
||||
with h5py.File(h5file, "r") as f:
|
||||
attributes = zip(
|
||||
h5file["points"], # type: ignore
|
||||
h5file["normals"], # type: ignore
|
||||
h5file["output_fields"], # type: ignore
|
||||
f["points"], # type: ignore
|
||||
f["normals"], # type: ignore
|
||||
f["output_fields"], # type: ignore
|
||||
)
|
||||
|
||||
# for each graph
|
||||
for idx, (positions, normals, fields) in track(
|
||||
enumerate(attributes),
|
||||
total=self.cardinality,
|
||||
):
|
||||
# convert to torch tensors
|
||||
positions = torch.as_tensor(np.asarray(positions), dtype=torch.float32)
|
||||
fields = torch.as_tensor(np.asarray(fields), dtype=torch.float32)
|
||||
normals = torch.as_tensor(np.asarray(normals), dtype=torch.float32)
|
||||
delta = positions - nominal_positions
|
||||
|
||||
# save data to disk
|
||||
|
||||
def len(self) -> int:
|
||||
"""Return the cardinality of the dataset."""
|
||||
return self.cardinality
|
||||
|
||||
def get(self, idx) -> Data:
|
||||
"""Load and return the graph `Data`.
|
||||
|
||||
Args:
|
||||
idx (int): index of the graph to return
|
||||
|
||||
Returns:
|
||||
Data: graph at index `idx`
|
||||
"""
|
||||
return torch.load(self.processed_dir / f"data_{self.split}_{idx:04d}.pt")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""Return a string representation of the dataset."""
|
||||
return f"{self.__class__.__name__}({self.split}, {len(self)})"
|
||||
|
||||
@property
|
||||
def processed_dir(self) -> Path:
|
||||
"""Wrap processed_dir to return a Path instead of a str."""
|
||||
return Path(super().processed_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch_geometric.loader import DataLoader
|
||||
|
||||
# load test split
|
||||
ds_test = Rotor37Dataset(root="./datasets/Rotor37/", split="test")
|
||||
print(ds_test)
|
||||
print(ds_test[0])
|
||||
|
||||
# create test data loader
|
||||
ld_test = DataLoader(ds_test, batch_size=8, shuffle=True)
|
||||
print(ld_test)
|
||||
print(next(iter(ld_test)))
|
||||
|
||||
# load train split
|
||||
ds_train = Rotor37Dataset(root="./datasets/Rotor37/", split="train")
|
||||
print(ds_train)
|
||||
print(ds_train[0])
|
||||
|
||||
# create train data loader
|
||||
ld_train = DataLoader(ds_train, batch_size=8, shuffle=True)
|
||||
print(ld_train)
|
||||
print(next(iter(ld_train)))
|
||||
for index, (positions, normals, fields) in enumerate(attributes):
|
||||
yield index, {
|
||||
"positions": positions,
|
||||
"normals": normals,
|
||||
"features": fields,
|
||||
}
|
||||
|
|
11
dataset/test_rotor37_data.py
Normal file
11
dataset/test_rotor37_data.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
import datasets
|
||||
|
||||
train_ds = datasets.load_dataset("dataset/rotor37_data.py", split="train")
|
||||
train_ds = train_ds.with_format("torch")
|
||||
print(train_ds)
|
||||
|
||||
test_ds = datasets.load_dataset("dataset/rotor37_data.py", split="test")
|
||||
test_ds = test_ds.with_format("torch")
|
||||
print(test_ds)
|
||||
|
||||
print("yay")
|
|
@ -18,7 +18,6 @@ dependencies:
|
|||
- trimesh
|
||||
- scipy
|
||||
- scikit-learn
|
||||
- h5py
|
||||
- pyvista
|
||||
- datasets
|
||||
#---# toolings
|
||||
|
|
Loading…
Reference in a new issue