PVD/dataset/rotor37_data.py

195 lines
6.5 KiB
Python
Raw Normal View History

from pathlib import Path
import h5py
import numpy as np
import pyvista as pv
import torch
from rich.progress import track
from torch.utils.data import Dataset
DATASET_DIR = Path("/gpfs_new/cold-data/InputData/public_datasets/rotor37/rotor37_1200/")
VTKFILE_NOMINAL = DATASET_DIR / "ncs" / "nominal_blade.vtk"
H5FILE_TRAIN = DATASET_DIR / "h5" / "blade_meshes_train.h5"
H5FILE_TEST = DATASET_DIR / "h5" / "blade_meshes_test.h5"
CARDINALITY_TRAIN = 1000
CARDINALITY_TEST = 200
def rotate_nominal_blade(blade: pv.PolyData) -> None:
"""Rotate nominal blade points.
The nominal blade must be rotated to match the orientation of the other blades.
Rotations applied (sequentially) are:
- -90° around z-axis
- -90° around y-axis
Args:
blade (pyvista.PolyData): blade to rotate
"""
THETA = -90
PHI = -90
RZ = np.array(
[
[np.cos(np.deg2rad(THETA)), -np.sin(np.deg2rad(THETA)), 0],
[np.sin(np.deg2rad(THETA)), np.cos(np.deg2rad(THETA)), 0],
[0, 0, 1],
]
)
RY = np.array(
[
[np.cos(np.deg2rad(PHI)), 0, np.sin(np.deg2rad(PHI))],
[0, 1, 0],
[-np.sin(np.deg2rad(PHI)), 0, np.cos(np.deg2rad(PHI))],
]
)
# rotation of θ° around z-axis
blade.points = np.asarray(blade.points) @ RZ
blade.point_data["Normals"] = np.asarray(blade.point_normals) @ RZ
# rotation of φ° around y-axis
blade.points = np.asarray(blade.points) @ RY
blade.point_data["Normals"] = np.asarray(blade.point_normals) @ RY
class Rotor37Dataset(Dataset):
"""Rotor37 dataset.
This dataset is a collection of 1200 graphs, each representing a blade of a wind turbine.
The dataset is split into 2 subsets: train and test, with 1000 and 200 graphs respectively.
Each graph is a 3D mesh, with 3D deformations from a nominal blade, 3D normals, 3D faces and physical properties.
"""
def __init__(
self,
root: str,
split: str = "train",
):
"""Initialize a new Rotor37 dataset instance.
Args:
root (str): root directory of the dataset
split (str): split of the dataset, either "train" or "test"
"""
# set split
assert split in ("train", "test")
self.split = split
# set cardinality and h5file according to split
self.cardinality = CARDINALITY_TRAIN if split == "train" else CARDINALITY_TEST
self.h5file = H5FILE_TRAIN if split == "train" else H5FILE_TEST
super().__init__(root, transform, pre_transform)
@property
def raw_file_names(self) -> list[str]:
"""No raw files."""
return []
@property
def processed_file_names(self) -> list[str]:
"""Processed files are named data_{split}_{idx:04d}.pt, where idx is the index of the graph."""
return [f"data_{self.split}_{idx:04d}.pt" for idx in range(self.cardinality)]
def download(self):
"""No need to download, data already in cluster."""
pass
def process(self) -> None:
"""Process the dataset.
The dataset is processed by loading the nominal blade, and then loading all deformed blades.
For each deformed blade, the following attributes are computed and stored in a `Data` object:
- delta: deformed blade - nominal blade
- fields: physical properties of the blade
- normals: normals of the blade
- edges: edges of the blade
- faces: faces of the blade
The `Data` object is then saved to disk.
"""
# load nominal blade
vtk_reader = pv.get_reader(VTKFILE_NOMINAL)
nominal = vtk_reader.read()
rotate_nominal_blade(nominal)
nominal_positions = torch.as_tensor(nominal.points, dtype=torch.float32)
# load all deformed blades
with h5py.File(self.h5file, "r") as h5file:
# NB: torch.as_tensor(np.asarray(data)) is a bit ugly
# but torch torch.as_tensor(data) complains about data being an array of numpy arrays, and is also slower
# common edges and faces matrix for each graph
edges = torch.as_tensor(np.asarray(h5file["adj"]), dtype=torch.int64).transpose(0, 1)
faces = torch.as_tensor(np.asarray(h5file["faces"]), dtype=torch.int64).transpose(0, 1)
# attributes specific to each graph
attributes = zip(
h5file["points"], # type: ignore
h5file["normals"], # type: ignore
h5file["output_fields"], # type: ignore
)
# for each graph
for idx, (positions, normals, fields) in track(
enumerate(attributes),
total=self.cardinality,
):
# convert to torch tensors
positions = torch.as_tensor(np.asarray(positions), dtype=torch.float32)
fields = torch.as_tensor(np.asarray(fields), dtype=torch.float32)
normals = torch.as_tensor(np.asarray(normals), dtype=torch.float32)
delta = positions - nominal_positions
# save data to disk
def len(self) -> int:
"""Return the cardinality of the dataset."""
return self.cardinality
def get(self, idx) -> Data:
"""Load and return the graph `Data`.
Args:
idx (int): index of the graph to return
Returns:
Data: graph at index `idx`
"""
return torch.load(self.processed_dir / f"data_{self.split}_{idx:04d}.pt")
def __repr__(self) -> str:
"""Return a string representation of the dataset."""
return f"{self.__class__.__name__}({self.split}, {len(self)})"
@property
def processed_dir(self) -> Path:
"""Wrap processed_dir to return a Path instead of a str."""
return Path(super().processed_dir)
if __name__ == "__main__":
from torch_geometric.loader import DataLoader
# load test split
ds_test = Rotor37Dataset(root="./datasets/Rotor37/", split="test")
print(ds_test)
print(ds_test[0])
# create test data loader
ld_test = DataLoader(ds_test, batch_size=8, shuffle=True)
print(ld_test)
print(next(iter(ld_test)))
# load train split
ds_train = Rotor37Dataset(root="./datasets/Rotor37/", split="train")
print(ds_train)
print(ds_train[0])
# create train data loader
ld_train = DataLoader(ds_train, batch_size=8, shuffle=True)
print(ld_train)
print(next(iter(ld_train)))