diff --git a/datasets/__init__.py b/dataset/__init__.py similarity index 100% rename from datasets/__init__.py rename to dataset/__init__.py diff --git a/datasets/partnet.py b/dataset/partnet.py similarity index 100% rename from datasets/partnet.py rename to dataset/partnet.py diff --git a/dataset/rotor37_data.py b/dataset/rotor37_data.py new file mode 100644 index 0000000..b15b265 --- /dev/null +++ b/dataset/rotor37_data.py @@ -0,0 +1,194 @@ +from pathlib import Path + +import h5py +import numpy as np +import pyvista as pv +import torch +from rich.progress import track +from torch.utils.data import Dataset + +DATASET_DIR = Path("/gpfs_new/cold-data/InputData/public_datasets/rotor37/rotor37_1200/") +VTKFILE_NOMINAL = DATASET_DIR / "ncs" / "nominal_blade.vtk" +H5FILE_TRAIN = DATASET_DIR / "h5" / "blade_meshes_train.h5" +H5FILE_TEST = DATASET_DIR / "h5" / "blade_meshes_test.h5" +CARDINALITY_TRAIN = 1000 +CARDINALITY_TEST = 200 + + +def rotate_nominal_blade(blade: pv.PolyData) -> None: + """Rotate nominal blade points. + + The nominal blade must be rotated to match the orientation of the other blades. + Rotations applied (sequentially) are: + - -90° around z-axis + - -90° around y-axis + + Args: + blade (pyvista.PolyData): blade to rotate + """ + THETA = -90 + PHI = -90 + + RZ = np.array( + [ + [np.cos(np.deg2rad(THETA)), -np.sin(np.deg2rad(THETA)), 0], + [np.sin(np.deg2rad(THETA)), np.cos(np.deg2rad(THETA)), 0], + [0, 0, 1], + ] + ) + + RY = np.array( + [ + [np.cos(np.deg2rad(PHI)), 0, np.sin(np.deg2rad(PHI))], + [0, 1, 0], + [-np.sin(np.deg2rad(PHI)), 0, np.cos(np.deg2rad(PHI))], + ] + ) + + # rotation of θ° around z-axis + blade.points = np.asarray(blade.points) @ RZ + blade.point_data["Normals"] = np.asarray(blade.point_normals) @ RZ + + # rotation of φ° around y-axis + blade.points = np.asarray(blade.points) @ RY + blade.point_data["Normals"] = np.asarray(blade.point_normals) @ RY + + +class Rotor37Dataset(Dataset): + """Rotor37 dataset. + + This dataset is a collection of 1200 graphs, each representing a blade of a wind turbine. + The dataset is split into 2 subsets: train and test, with 1000 and 200 graphs respectively. + Each graph is a 3D mesh, with 3D deformations from a nominal blade, 3D normals, 3D faces and physical properties. + """ + + def __init__( + self, + root: str, + split: str = "train", + ): + """Initialize a new Rotor37 dataset instance. + + Args: + root (str): root directory of the dataset + split (str): split of the dataset, either "train" or "test" + """ + # set split + assert split in ("train", "test") + self.split = split + + # set cardinality and h5file according to split + self.cardinality = CARDINALITY_TRAIN if split == "train" else CARDINALITY_TEST + self.h5file = H5FILE_TRAIN if split == "train" else H5FILE_TEST + + super().__init__(root, transform, pre_transform) + + @property + def raw_file_names(self) -> list[str]: + """No raw files.""" + return [] + + @property + def processed_file_names(self) -> list[str]: + """Processed files are named data_{split}_{idx:04d}.pt, where idx is the index of the graph.""" + return [f"data_{self.split}_{idx:04d}.pt" for idx in range(self.cardinality)] + + def download(self): + """No need to download, data already in cluster.""" + pass + + def process(self) -> None: + """Process the dataset. + + The dataset is processed by loading the nominal blade, and then loading all deformed blades. + For each deformed blade, the following attributes are computed and stored in a `Data` object: + - delta: deformed blade - nominal blade + - fields: physical properties of the blade + - normals: normals of the blade + - edges: edges of the blade + - faces: faces of the blade + + The `Data` object is then saved to disk. + """ + # load nominal blade + vtk_reader = pv.get_reader(VTKFILE_NOMINAL) + nominal = vtk_reader.read() + rotate_nominal_blade(nominal) + nominal_positions = torch.as_tensor(nominal.points, dtype=torch.float32) + + # load all deformed blades + with h5py.File(self.h5file, "r") as h5file: + # NB: torch.as_tensor(np.asarray(data)) is a bit ugly + # but torch torch.as_tensor(data) complains about data being an array of numpy arrays, and is also slower + + # common edges and faces matrix for each graph + edges = torch.as_tensor(np.asarray(h5file["adj"]), dtype=torch.int64).transpose(0, 1) + faces = torch.as_tensor(np.asarray(h5file["faces"]), dtype=torch.int64).transpose(0, 1) + + # attributes specific to each graph + attributes = zip( + h5file["points"], # type: ignore + h5file["normals"], # type: ignore + h5file["output_fields"], # type: ignore + ) + + # for each graph + for idx, (positions, normals, fields) in track( + enumerate(attributes), + total=self.cardinality, + ): + # convert to torch tensors + positions = torch.as_tensor(np.asarray(positions), dtype=torch.float32) + fields = torch.as_tensor(np.asarray(fields), dtype=torch.float32) + normals = torch.as_tensor(np.asarray(normals), dtype=torch.float32) + delta = positions - nominal_positions + + # save data to disk + + def len(self) -> int: + """Return the cardinality of the dataset.""" + return self.cardinality + + def get(self, idx) -> Data: + """Load and return the graph `Data`. + + Args: + idx (int): index of the graph to return + + Returns: + Data: graph at index `idx` + """ + return torch.load(self.processed_dir / f"data_{self.split}_{idx:04d}.pt") + + def __repr__(self) -> str: + """Return a string representation of the dataset.""" + return f"{self.__class__.__name__}({self.split}, {len(self)})" + + @property + def processed_dir(self) -> Path: + """Wrap processed_dir to return a Path instead of a str.""" + return Path(super().processed_dir) + + +if __name__ == "__main__": + from torch_geometric.loader import DataLoader + + # load test split + ds_test = Rotor37Dataset(root="./datasets/Rotor37/", split="test") + print(ds_test) + print(ds_test[0]) + + # create test data loader + ld_test = DataLoader(ds_test, batch_size=8, shuffle=True) + print(ld_test) + print(next(iter(ld_test))) + + # load train split + ds_train = Rotor37Dataset(root="./datasets/Rotor37/", split="train") + print(ds_train) + print(ds_train[0]) + + # create train data loader + ld_train = DataLoader(ds_train, batch_size=8, shuffle=True) + print(ld_train) + print(next(iter(ld_train))) diff --git a/datasets/shapenet_data_pc.py b/dataset/shapenet_data_pc.py similarity index 99% rename from datasets/shapenet_data_pc.py rename to dataset/shapenet_data_pc.py index 0e8ff8a..6330304 100644 --- a/datasets/shapenet_data_pc.py +++ b/dataset/shapenet_data_pc.py @@ -102,6 +102,7 @@ class Uniform15KPC(Dataset): self.all_cate_mids = [] self.cate_idx_lst = [] self.all_points = [] + for cate_idx, subd in enumerate(self.subdirs): # NOTE: [subd] here is synset id sub_path = os.path.join(root_dir, subd, self.split) @@ -158,7 +159,6 @@ class Uniform15KPC(Dataset): self.all_points_std = self.all_points.max(axis=1).reshape(B, 1, input_dim) - self.all_points.min( axis=1 ).reshape(B, 1, input_dim) - else: # normalize across the dataset self.all_points_mean = self.all_points.reshape(-1, input_dim).mean(axis=0).reshape(1, 1, input_dim) if normalize_std_per_axis: @@ -292,6 +292,3 @@ class ShapeNet15kPointClouds(Uniform15KPC): input_dim=3, use_mask=use_mask, ) - - -#################################################################################### diff --git a/datasets/shapenet_data_sv.py b/dataset/shapenet_data_sv.py similarity index 100% rename from datasets/shapenet_data_sv.py rename to dataset/shapenet_data_sv.py diff --git a/environment.yml b/environment.yml index 49ea1b5..78a0b86 100644 --- a/environment.yml +++ b/environment.yml @@ -18,8 +18,12 @@ dependencies: - trimesh - scipy - scikit-learn -#---# tooling (linting, typing...) + - h5py + - pyvista + - datasets +#---# toolings - ruff - mypy - black - isort + - bandit diff --git a/model/pvcnn_completion.py b/model/pvcnn_completion.py index 7603f86..cd5f2c5 100644 --- a/model/pvcnn_completion.py +++ b/model/pvcnn_completion.py @@ -4,7 +4,15 @@ import numpy as np import torch import torch.nn as nn -from modules import Attention, PointNetAModule, PointNetFPModule, PointNetSAModule, PVConv, SharedMLP, Swish +from modules import ( + Attention, + PointNetAModule, + PointNetFPModule, + PointNetSAModule, + PVConv, + SharedMLP, + Swish, +) def _linear_gn_relu(in_channels, out_channels): diff --git a/model/pvcnn_generation.py b/model/pvcnn_generation.py index 99720d7..32180bf 100644 --- a/model/pvcnn_generation.py +++ b/model/pvcnn_generation.py @@ -4,7 +4,15 @@ import numpy as np import torch import torch.nn as nn -from modules import Attention, PointNetAModule, PointNetFPModule, PointNetSAModule, PVConv, SharedMLP, Swish +from modules import ( + Attention, + PointNetAModule, + PointNetFPModule, + PointNetSAModule, + PVConv, + SharedMLP, + Swish, +) def _linear_gn_relu(in_channels, out_channels): diff --git a/test_completion.py b/test_completion.py index 0f76d35..ccf48e0 100644 --- a/test_completion.py +++ b/test_completion.py @@ -5,8 +5,8 @@ import torch.nn as nn import torch.utils.data from torch.distributions import Normal -from datasets.shapenet_data_pc import ShapeNet15kPointClouds -from datasets.shapenet_data_sv import * +from dataset.shapenet_data_pc import ShapeNet15kPointClouds +from dataset.shapenet_data_sv import * from metrics.evaluation_metrics import EMD_CD, compute_all_metrics from model.pvcnn_completion import PVCNN2Base from utils.file_utils import * diff --git a/test_generation.py b/test_generation.py index 40aafde..58f0574 100644 --- a/test_generation.py +++ b/test_generation.py @@ -7,7 +7,7 @@ import torch.utils.data from torch.distributions import Normal from tqdm import tqdm -from datasets.shapenet_data_pc import ShapeNet15kPointClouds +from dataset.shapenet_data_pc import ShapeNet15kPointClouds from metrics.evaluation_metrics import compute_all_metrics from metrics.evaluation_metrics import jsd_between_point_cloud_sets as JSD from model.pvcnn_generation import PVCNN2Base diff --git a/train_completion.py b/train_completion.py index e5a5afc..e048092 100644 --- a/train_completion.py +++ b/train_completion.py @@ -7,8 +7,8 @@ import torch.optim as optim import torch.utils.data from torch.distributions import Normal -from datasets.shapenet_data_pc import ShapeNet15kPointClouds -from datasets.shapenet_data_sv import ShapeNet_Multiview_Points +from dataset.shapenet_data_pc import ShapeNet15kPointClouds +from dataset.shapenet_data_sv import ShapeNet_Multiview_Points from model.pvcnn_completion import PVCNN2Base from utils.file_utils import * from utils.visualize import * diff --git a/train_generation.py b/train_generation.py index e248334..d87c78a 100644 --- a/train_generation.py +++ b/train_generation.py @@ -8,7 +8,7 @@ import torch.optim as optim import torch.utils.data from torch.distributions import Normal -from datasets.shapenet_data_pc import ShapeNet15kPointClouds +from dataset.shapenet_data_pc import ShapeNet15kPointClouds from model.pvcnn_generation import PVCNN2Base from utils.file_utils import * from utils.visualize import *