from pathlib import Path import h5py import numpy as np import pyvista as pv import torch from rich.progress import track from torch.utils.data import Dataset DATASET_DIR = Path("/gpfs_new/cold-data/InputData/public_datasets/rotor37/rotor37_1200/") VTKFILE_NOMINAL = DATASET_DIR / "ncs" / "nominal_blade.vtk" H5FILE_TRAIN = DATASET_DIR / "h5" / "blade_meshes_train.h5" H5FILE_TEST = DATASET_DIR / "h5" / "blade_meshes_test.h5" CARDINALITY_TRAIN = 1000 CARDINALITY_TEST = 200 def rotate_nominal_blade(blade: pv.PolyData) -> None: """Rotate nominal blade points. The nominal blade must be rotated to match the orientation of the other blades. Rotations applied (sequentially) are: - -90° around z-axis - -90° around y-axis Args: blade (pyvista.PolyData): blade to rotate """ THETA = -90 PHI = -90 RZ = np.array( [ [np.cos(np.deg2rad(THETA)), -np.sin(np.deg2rad(THETA)), 0], [np.sin(np.deg2rad(THETA)), np.cos(np.deg2rad(THETA)), 0], [0, 0, 1], ] ) RY = np.array( [ [np.cos(np.deg2rad(PHI)), 0, np.sin(np.deg2rad(PHI))], [0, 1, 0], [-np.sin(np.deg2rad(PHI)), 0, np.cos(np.deg2rad(PHI))], ] ) # rotation of θ° around z-axis blade.points = np.asarray(blade.points) @ RZ blade.point_data["Normals"] = np.asarray(blade.point_normals) @ RZ # rotation of φ° around y-axis blade.points = np.asarray(blade.points) @ RY blade.point_data["Normals"] = np.asarray(blade.point_normals) @ RY class Rotor37Dataset(Dataset): """Rotor37 dataset. This dataset is a collection of 1200 graphs, each representing a blade of a wind turbine. The dataset is split into 2 subsets: train and test, with 1000 and 200 graphs respectively. Each graph is a 3D mesh, with 3D deformations from a nominal blade, 3D normals, 3D faces and physical properties. """ def __init__( self, root: str, split: str = "train", ): """Initialize a new Rotor37 dataset instance. Args: root (str): root directory of the dataset split (str): split of the dataset, either "train" or "test" """ # set split assert split in ("train", "test") self.split = split # set cardinality and h5file according to split self.cardinality = CARDINALITY_TRAIN if split == "train" else CARDINALITY_TEST self.h5file = H5FILE_TRAIN if split == "train" else H5FILE_TEST super().__init__(root, transform, pre_transform) @property def raw_file_names(self) -> list[str]: """No raw files.""" return [] @property def processed_file_names(self) -> list[str]: """Processed files are named data_{split}_{idx:04d}.pt, where idx is the index of the graph.""" return [f"data_{self.split}_{idx:04d}.pt" for idx in range(self.cardinality)] def download(self): """No need to download, data already in cluster.""" pass def process(self) -> None: """Process the dataset. The dataset is processed by loading the nominal blade, and then loading all deformed blades. For each deformed blade, the following attributes are computed and stored in a `Data` object: - delta: deformed blade - nominal blade - fields: physical properties of the blade - normals: normals of the blade - edges: edges of the blade - faces: faces of the blade The `Data` object is then saved to disk. """ # load nominal blade vtk_reader = pv.get_reader(VTKFILE_NOMINAL) nominal = vtk_reader.read() rotate_nominal_blade(nominal) nominal_positions = torch.as_tensor(nominal.points, dtype=torch.float32) # load all deformed blades with h5py.File(self.h5file, "r") as h5file: # NB: torch.as_tensor(np.asarray(data)) is a bit ugly # but torch torch.as_tensor(data) complains about data being an array of numpy arrays, and is also slower # common edges and faces matrix for each graph edges = torch.as_tensor(np.asarray(h5file["adj"]), dtype=torch.int64).transpose(0, 1) faces = torch.as_tensor(np.asarray(h5file["faces"]), dtype=torch.int64).transpose(0, 1) # attributes specific to each graph attributes = zip( h5file["points"], # type: ignore h5file["normals"], # type: ignore h5file["output_fields"], # type: ignore ) # for each graph for idx, (positions, normals, fields) in track( enumerate(attributes), total=self.cardinality, ): # convert to torch tensors positions = torch.as_tensor(np.asarray(positions), dtype=torch.float32) fields = torch.as_tensor(np.asarray(fields), dtype=torch.float32) normals = torch.as_tensor(np.asarray(normals), dtype=torch.float32) delta = positions - nominal_positions # save data to disk def len(self) -> int: """Return the cardinality of the dataset.""" return self.cardinality def get(self, idx) -> Data: """Load and return the graph `Data`. Args: idx (int): index of the graph to return Returns: Data: graph at index `idx` """ return torch.load(self.processed_dir / f"data_{self.split}_{idx:04d}.pt") def __repr__(self) -> str: """Return a string representation of the dataset.""" return f"{self.__class__.__name__}({self.split}, {len(self)})" @property def processed_dir(self) -> Path: """Wrap processed_dir to return a Path instead of a str.""" return Path(super().processed_dir) if __name__ == "__main__": from torch_geometric.loader import DataLoader # load test split ds_test = Rotor37Dataset(root="./datasets/Rotor37/", split="test") print(ds_test) print(ds_test[0]) # create test data loader ld_test = DataLoader(ds_test, batch_size=8, shuffle=True) print(ld_test) print(next(iter(ld_test))) # load train split ds_train = Rotor37Dataset(root="./datasets/Rotor37/", split="train") print(ds_train) print(ds_train[0]) # create train data loader ld_train = DataLoader(ds_train, batch_size=8, shuffle=True) print(ld_train) print(next(iter(ld_train)))