# # # 0=================================0 # | Kernel Point Convolutions | # 0=================================0 # # # ---------------------------------------------------------------------------------------------------------------------- # # Class handling ModelNet40 dataset. # Implements a Dataset, a Sampler, and a collate_fn # # ---------------------------------------------------------------------------------------------------------------------- # # Hugues THOMAS - 11/06/2018 # # ---------------------------------------------------------------------------------------------------------------------- # # Imports and global variables # \**********************************/ # # Common libs import time import numpy as np import pickle import torch # OS functions from os.path import exists, join # Dataset parent class from datasetss.common import PointCloudDataset from torch.utils.data import Sampler, get_worker_info from utils.mayavi_visu import * from datasetss.common import grid_subsampling from utils.config import bcolors # ---------------------------------------------------------------------------------------------------------------------- # # Dataset class definition # \******************************/ class ModelNet40Dataset(PointCloudDataset): """Class to handle Modelnet 40 dataset.""" def __init__(self, config, train=True, orient_correction=True): """ This dataset is small enough to be stored in-memory, so load all point clouds here """ PointCloudDataset.__init__(self, "ModelNet40") ############ # Parameters ############ # Dict from labels to names self.label_to_names = { 0: "airplane", 1: "bathtub", 2: "bed", 3: "bench", 4: "bookshelf", 5: "bottle", 6: "bowl", 7: "car", 8: "chair", 9: "cone", 10: "cup", 11: "curtain", 12: "desk", 13: "door", 14: "dresser", 15: "flower_pot", 16: "glass_box", 17: "guitar", 18: "keyboard", 19: "lamp", 20: "laptop", 21: "mantel", 22: "monitor", 23: "night_stand", 24: "person", 25: "piano", 26: "plant", 27: "radio", 28: "range_hood", 29: "sink", 30: "sofa", 31: "stairs", 32: "stool", 33: "table", 34: "tent", 35: "toilet", 36: "tv_stand", 37: "vase", 38: "wardrobe", 39: "xbox", } # Initialize a bunch of variables concerning class labels self.init_labels() # List of classes ignored during training (can be empty) self.ignored_labels = np.array([]) # Dataset folder self.path = "./Data/ModelNet40" # Type of task conducted on this dataset self.dataset_task = "classification" # Update number of class and data task in configuration config.num_classes = self.num_classes config.dataset_task = self.dataset_task # Parameters from config self.config = config # Training or test set self.train = train # Number of models and models used per epoch if self.train: self.num_models = 9843 if ( config.epoch_steps and config.epoch_steps * config.batch_num < self.num_models ): self.epoch_n = config.epoch_steps * config.batch_num else: self.epoch_n = self.num_models else: self.num_models = 2468 self.epoch_n = min( self.num_models, config.validation_size * config.batch_num ) ############# # Load models ############# if 0 < self.config.first_subsampling_dl <= 0.01: raise ValueError("subsampling_parameter too low (should be over 1 cm") ( self.input_points, self.input_normals, self.input_labels, ) = self.load_subsampled_clouds(orient_correction) return def __len__(self): """ Return the length of data here """ return self.num_models def __getitem__(self, idx_list): """ The main thread gives a list of indices to load a batch. Each worker is going to work in parallel to load a different list of indices. """ ################### # Gather batch data ################### tp_list = [] tn_list = [] tl_list = [] ti_list = [] s_list = [] R_list = [] for p_i in idx_list: # Get points and labels points = self.input_points[p_i].astype(np.float32) normals = self.input_normals[p_i].astype(np.float32) label = self.label_to_idx[self.input_labels[p_i]] # Data augmentation points, normals, scale, R = self.augmentation_transform(points, normals) # Stack batch tp_list += [points] tn_list += [normals] tl_list += [label] ti_list += [p_i] s_list += [scale] R_list += [R] ################### # Concatenate batch ################### # show_ModelNet_examples(tp_list, cloud_normals=tn_list) stacked_points = np.concatenate(tp_list, axis=0) stacked_normals = np.concatenate(tn_list, axis=0) labels = np.array(tl_list, dtype=np.int64) model_inds = np.array(ti_list, dtype=np.int32) stack_lengths = np.array([tp.shape[0] for tp in tp_list], dtype=np.int32) scales = np.array(s_list, dtype=np.float32) rots = np.stack(R_list, axis=0) # Input features stacked_features = np.ones_like(stacked_points[:, :1], dtype=np.float32) if self.config.in_features_dim == 1: pass elif self.config.in_features_dim == 4: stacked_features = np.hstack((stacked_features, stacked_normals)) else: raise ValueError( "Only accepted input dimensions are 1, 4 and 7 (without and with XYZ)" ) ####################### # Create network inputs ####################### # # Points, neighbors, pooling indices for each layers # # Get the whole input list input_list = self.classification_inputs( stacked_points, stacked_features, labels, stack_lengths ) # Add scale and rotation for testing input_list += [scales, rots, model_inds] return input_list def load_subsampled_clouds(self, orient_correction): # Restart timer t0 = time.time() # Load wanted points if possible if self.train: split = "training" else: split = "test" print( "\nLoading {:s} points subsampled at {:.3f}".format( split, self.config.first_subsampling_dl ) ) filename = join( self.path, "{:s}_{:.3f}_record.pkl".format(split, self.config.first_subsampling_dl), ) if exists(filename): with open(filename, "rb") as file: input_points, input_normals, input_labels = pickle.load(file) # Else compute them from original points else: # Collect training file names if self.train: names = np.loadtxt( join(self.path, "modelnet40_train.txt"), dtype=np.str ) else: names = np.loadtxt(join(self.path, "modelnet40_test.txt"), dtype=np.str) # Initialize containers input_points = [] input_normals = [] # Advanced display N = len(names) progress_n = 30 fmt_str = "[{:<" + str(progress_n) + "}] {:5.1f}%" # Collect point clouds for i, cloud_name in enumerate(names): # Read points class_folder = "_".join(cloud_name.split("_")[:-1]) txt_file = join(self.path, class_folder, cloud_name) + ".txt" data = np.loadtxt(txt_file, delimiter=",", dtype=np.float32) # Subsample them if self.config.first_subsampling_dl > 0: points, normals = grid_subsampling( data[:, :3], features=data[:, 3:], sampleDl=self.config.first_subsampling_dl, ) else: points = data[:, :3] normals = data[:, 3:] print("", end="\r") print( fmt_str.format("#" * ((i * progress_n) // N), 100 * i / N), end="", flush=True, ) # Add to list input_points += [points] input_normals += [normals] print("", end="\r") print(fmt_str.format("#" * progress_n, 100), end="", flush=True) print() # Get labels label_names = ["_".join(name.split("_")[:-1]) for name in names] input_labels = np.array([self.name_to_label[name] for name in label_names]) # Save for later use with open(filename, "wb") as file: pickle.dump((input_points, input_normals, input_labels), file) lengths = [p.shape[0] for p in input_points] sizes = [l * 4 * 6 for l in lengths] print( "{:.1f} MB loaded in {:.1f}s".format(np.sum(sizes) * 1e-6, time.time() - t0) ) if orient_correction: input_points = [pp[:, [0, 2, 1]] for pp in input_points] input_normals = [nn[:, [0, 2, 1]] for nn in input_normals] return input_points, input_normals, input_labels # ---------------------------------------------------------------------------------------------------------------------- # # Utility classes definition # \********************************/ class ModelNet40Sampler(Sampler): """Sampler for ModelNet40""" def __init__( self, dataset: ModelNet40Dataset, use_potential=True, balance_labels=False ): Sampler.__init__(self, dataset) # Does the sampler use potential for regular sampling self.use_potential = use_potential # Should be balance the classes when sampling self.balance_labels = balance_labels # Dataset used by the sampler (no copy is made in memory) self.dataset = dataset # Create potentials if self.use_potential: self.potentials = np.random.rand(len(dataset.input_labels)) * 0.1 + 0.1 else: self.potentials = None # Initialize value for batch limit (max number of points per batch). self.batch_limit = 10000 return def __iter__(self): """ Yield next batch indices here """ ########################################## # Initialize the list of generated indices ########################################## if self.use_potential: if self.balance_labels: gen_indices = [] pick_n = self.dataset.epoch_n // self.dataset.num_classes + 1 for i, l in enumerate(self.dataset.label_values): # Get the potentials of the objects of this class label_inds = np.where(np.equal(self.dataset.input_labels, l))[0] class_potentials = self.potentials[label_inds] # Get the indices to generate thanks to potentials if pick_n < class_potentials.shape[0]: pick_indices = np.argpartition(class_potentials, pick_n)[ :pick_n ] else: pick_indices = np.random.permutation(class_potentials.shape[0]) class_indices = label_inds[pick_indices] gen_indices.append(class_indices) # Stack the chosen indices of all classes gen_indices = np.random.permutation(np.hstack(gen_indices)) else: # Get indices with the minimum potential if self.dataset.epoch_n < self.potentials.shape[0]: gen_indices = np.argpartition( self.potentials, self.dataset.epoch_n )[: self.dataset.epoch_n] else: gen_indices = np.random.permutation(self.potentials.shape[0]) gen_indices = np.random.permutation(gen_indices) # Update potentials (Change the order for the next epoch) self.potentials[gen_indices] = np.ceil(self.potentials[gen_indices]) self.potentials[gen_indices] += ( np.random.rand(gen_indices.shape[0]) * 0.1 + 0.1 ) else: if self.balance_labels: pick_n = self.dataset.epoch_n // self.dataset.num_classes + 1 gen_indices = [] for l in self.dataset.label_values: label_inds = np.where(np.equal(self.dataset.input_labels, l))[0] rand_inds = np.random.choice(label_inds, size=pick_n, replace=True) gen_indices += [rand_inds] gen_indices = np.random.permutation(np.hstack(gen_indices)) else: gen_indices = np.random.permutation(self.dataset.num_models)[ : self.dataset.epoch_n ] ################ # Generator loop ################ # Initialize concatenation lists ti_list = [] batch_n = 0 # Generator loop for p_i in gen_indices: # Size of picked cloud n = self.dataset.input_points[p_i].shape[0] # In case batch is full, yield it and reset it if batch_n + n > self.batch_limit and batch_n > 0: yield np.array(ti_list, dtype=np.int32) ti_list = [] batch_n = 0 # Add data to current batch ti_list += [p_i] # Update batch size batch_n += n yield np.array(ti_list, dtype=np.int32) return 0 def __len__(self): """ The number of yielded samples is variable """ return None def calibration(self, dataloader, untouched_ratio=0.9, verbose=False): """ Method performing batch and neighbors calibration. Batch calibration: Set "batch_limit" (the maximum number of points allowed in every batch) so that the average batch size (number of stacked pointclouds) is the one asked. Neighbors calibration: Set the "neighborhood_limits" (the maximum number of neighbors allowed in convolutions) so that 90% of the neighborhoods remain untouched. There is a limit for each layer. """ ############################## # Previously saved calibration ############################## print("\nStarting Calibration (use verbose=True for more details)") t0 = time.time() redo = False # Batch limit # *********** # Load batch_limit dictionary batch_lim_file = join(self.dataset.path, "batch_limits.pkl") if exists(batch_lim_file): with open(batch_lim_file, "rb") as file: batch_lim_dict = pickle.load(file) else: batch_lim_dict = {} # Check if the batch limit associated with current parameters exists key = "{:.3f}_{:d}".format( self.dataset.config.first_subsampling_dl, self.dataset.config.batch_num ) if key in batch_lim_dict: self.batch_limit = batch_lim_dict[key] else: redo = True if verbose: print("\nPrevious calibration found:") print("Check batch limit dictionary") if key in batch_lim_dict: color = bcolors.OKGREEN v = str(int(batch_lim_dict[key])) else: color = bcolors.FAIL v = "?" print('{:}"{:s}": {:s}{:}'.format(color, key, v, bcolors.ENDC)) # Neighbors limit # *************** # Load neighb_limits dictionary neighb_lim_file = join(self.dataset.path, "neighbors_limits.pkl") if exists(neighb_lim_file): with open(neighb_lim_file, "rb") as file: neighb_lim_dict = pickle.load(file) else: neighb_lim_dict = {} # Check if the limit associated with current parameters exists (for each layer) neighb_limits = [] for layer_ind in range(self.dataset.config.num_layers): dl = self.dataset.config.first_subsampling_dl * (2**layer_ind) if self.dataset.config.deform_layers[layer_ind]: r = dl * self.dataset.config.deform_radius else: r = dl * self.dataset.config.conv_radius key = "{:.3f}_{:.3f}".format(dl, r) if key in neighb_lim_dict: neighb_limits += [neighb_lim_dict[key]] if len(neighb_limits) == self.dataset.config.num_layers: self.dataset.neighborhood_limits = neighb_limits else: redo = True if verbose: print("Check neighbors limit dictionary") for layer_ind in range(self.dataset.config.num_layers): dl = self.dataset.config.first_subsampling_dl * (2**layer_ind) if self.dataset.config.deform_layers[layer_ind]: r = dl * self.dataset.config.deform_radius else: r = dl * self.dataset.config.conv_radius key = "{:.3f}_{:.3f}".format(dl, r) if key in neighb_lim_dict: color = bcolors.OKGREEN v = str(neighb_lim_dict[key]) else: color = bcolors.FAIL v = "?" print('{:}"{:s}": {:s}{:}'.format(color, key, v, bcolors.ENDC)) if redo: ############################ # Neighbors calib parameters ############################ # From config parameter, compute higher bound of neighbors number in a neighborhood hist_n = int( np.ceil(4 / 3 * np.pi * (self.dataset.config.conv_radius + 1) ** 3) ) # Histogram of neighborhood sizes neighb_hists = np.zeros( (self.dataset.config.num_layers, hist_n), dtype=np.int32 ) ######################## # Batch calib parameters ######################## # Estimated average batch size and target value estim_b = 0 target_b = self.dataset.config.batch_num # Calibration parameters low_pass_T = 10 Kp = 100.0 finer = False # Convergence parameters smooth_errors = [] converge_threshold = 0.1 # Loop parameters last_display = time.time() i = 0 breaking = False ##################### # Perform calibration ##################### for epoch in range(10): for batch_i, batch in enumerate(dataloader): # Update neighborhood histogram counts = [ np.sum(neighb_mat.numpy() < neighb_mat.shape[0], axis=1) for neighb_mat in batch.neighbors ] hists = [np.bincount(c, minlength=hist_n)[:hist_n] for c in counts] neighb_hists += np.vstack(hists) # batch length b = len(batch.labels) # Update estim_b (low pass filter) estim_b += (b - estim_b) / low_pass_T # Estimate error (noisy) error = target_b - b # Save smooth errors for convergene check smooth_errors.append(target_b - estim_b) if len(smooth_errors) > 10: smooth_errors = smooth_errors[1:] # Update batch limit with P controller self.batch_limit += Kp * error # finer low pass filter when closing in if not finer and np.abs(estim_b - target_b) < 1: low_pass_T = 100 finer = True # Convergence if finer and np.max(np.abs(smooth_errors)) < converge_threshold: breaking = True break i += 1 t = time.time() # Console display (only one per second) if verbose and (t - last_display) > 1.0: last_display = t message = "Step {:5d} estim_b ={:5.2f} batch_limit ={:7d}" print(message.format(i, estim_b, int(self.batch_limit))) if breaking: break # Use collected neighbor histogram to get neighbors limit cumsum = np.cumsum(neighb_hists.T, axis=0) percentiles = np.sum( cumsum < (untouched_ratio * cumsum[hist_n - 1, :]), axis=0 ) self.dataset.neighborhood_limits = percentiles if verbose: # Crop histogram while np.sum(neighb_hists[:, -1]) == 0: neighb_hists = neighb_hists[:, :-1] hist_n = neighb_hists.shape[1] print("\n**************************************************\n") line0 = "neighbors_num " for layer in range(neighb_hists.shape[0]): line0 += "| layer {:2d} ".format(layer) print(line0) for neighb_size in range(hist_n): line0 = " {:4d} ".format(neighb_size) for layer in range(neighb_hists.shape[0]): if neighb_size > percentiles[layer]: color = bcolors.FAIL else: color = bcolors.OKGREEN line0 += "|{:}{:10d}{:} ".format( color, neighb_hists[layer, neighb_size], bcolors.ENDC ) print(line0) print("\n**************************************************\n") print("\nchosen neighbors limits: ", percentiles) print() # Save batch_limit dictionary key = "{:.3f}_{:d}".format( self.dataset.config.first_subsampling_dl, self.dataset.config.batch_num ) batch_lim_dict[key] = self.batch_limit with open(batch_lim_file, "wb") as file: pickle.dump(batch_lim_dict, file) # Save neighb_limit dictionary for layer_ind in range(self.dataset.config.num_layers): dl = self.dataset.config.first_subsampling_dl * (2**layer_ind) if self.dataset.config.deform_layers[layer_ind]: r = dl * self.dataset.config.deform_radius else: r = dl * self.dataset.config.conv_radius key = "{:.3f}_{:.3f}".format(dl, r) neighb_lim_dict[key] = self.dataset.neighborhood_limits[layer_ind] with open(neighb_lim_file, "wb") as file: pickle.dump(neighb_lim_dict, file) print("Calibration done in {:.1f}s\n".format(time.time() - t0)) return class ModelNet40CustomBatch: """Custom batch definition with memory pinning for ModelNet40""" def __init__(self, input_list): # Get rid of batch dimension input_list = input_list[0] # Number of layers L = (len(input_list) - 5) // 4 # Extract input tensors from the list of numpy array ind = 0 self.points = [ torch.from_numpy(nparray) for nparray in input_list[ind : ind + L] ] ind += L self.neighbors = [ torch.from_numpy(nparray) for nparray in input_list[ind : ind + L] ] ind += L self.pools = [ torch.from_numpy(nparray) for nparray in input_list[ind : ind + L] ] ind += L self.lengths = [ torch.from_numpy(nparray) for nparray in input_list[ind : ind + L] ] ind += L self.features = torch.from_numpy(input_list[ind]) ind += 1 self.labels = torch.from_numpy(input_list[ind]) ind += 1 self.scales = torch.from_numpy(input_list[ind]) ind += 1 self.rots = torch.from_numpy(input_list[ind]) ind += 1 self.model_inds = torch.from_numpy(input_list[ind]) return def pin_memory(self): """ Manual pinning of the memory """ self.points = [in_tensor.pin_memory() for in_tensor in self.points] self.neighbors = [in_tensor.pin_memory() for in_tensor in self.neighbors] self.pools = [in_tensor.pin_memory() for in_tensor in self.pools] self.lengths = [in_tensor.pin_memory() for in_tensor in self.lengths] self.features = self.features.pin_memory() self.labels = self.labels.pin_memory() self.scales = self.scales.pin_memory() self.rots = self.rots.pin_memory() self.model_inds = self.model_inds.pin_memory() return self def to(self, device): self.points = [in_tensor.to(device) for in_tensor in self.points] self.neighbors = [in_tensor.to(device) for in_tensor in self.neighbors] self.pools = [in_tensor.to(device) for in_tensor in self.pools] self.lengths = [in_tensor.to(device) for in_tensor in self.lengths] self.features = self.features.to(device) self.labels = self.labels.to(device) self.scales = self.scales.to(device) self.rots = self.rots.to(device) self.model_inds = self.model_inds.to(device) return self def unstack_points(self, layer=None): """Unstack the points""" return self.unstack_elements("points", layer) def unstack_neighbors(self, layer=None): """Unstack the neighbors indices""" return self.unstack_elements("neighbors", layer) def unstack_pools(self, layer=None): """Unstack the pooling indices""" return self.unstack_elements("pools", layer) def unstack_elements(self, element_name, layer=None, to_numpy=True): """ Return a list of the stacked elements in the batch at a certain layer. If no layer is given, then return all layers """ if element_name == "points": elements = self.points elif element_name == "neighbors": elements = self.neighbors elif element_name == "pools": elements = self.pools[:-1] else: raise ValueError("Unknown element name: {:s}".format(element_name)) all_p_list = [] for layer_i, layer_elems in enumerate(elements): if layer is None or layer == layer_i: i0 = 0 p_list = [] if element_name == "pools": lengths = self.lengths[layer_i + 1] else: lengths = self.lengths[layer_i] for b_i, length in enumerate(lengths): elem = layer_elems[i0 : i0 + length] if element_name == "neighbors": elem[elem >= self.points[layer_i].shape[0]] = -1 elem[elem >= 0] -= i0 elif element_name == "pools": elem[elem >= self.points[layer_i].shape[0]] = -1 elem[elem >= 0] -= torch.sum(self.lengths[layer_i][:b_i]) i0 += length if to_numpy: p_list.append(elem.numpy()) else: p_list.append(elem) if layer == layer_i: return p_list all_p_list.append(p_list) return all_p_list def ModelNet40Collate(batch_data): return ModelNet40CustomBatch(batch_data) # ---------------------------------------------------------------------------------------------------------------------- # # Debug functions # \*********************/ def debug_sampling(dataset, sampler, loader): """Shows which labels are sampled according to strategy chosen""" label_sum = np.zeros((dataset.num_classes), dtype=np.int32) for epoch in range(10): for batch_i, (points, normals, labels, indices, in_sizes) in enumerate(loader): # print(batch_i, tuple(points.shape), tuple(normals.shape), labels, indices, in_sizes) label_sum += np.bincount(labels.numpy(), minlength=dataset.num_classes) print(label_sum) # print(sampler.potentials[:6]) print("******************") print("*******************************************") _, counts = np.unique(dataset.input_labels, return_counts=True) print(counts) def debug_timing(dataset, sampler, loader): """Timing of generator function""" t = [time.time()] last_display = time.time() mean_dt = np.zeros(2) estim_b = dataset.config.batch_num for epoch in range(10): for batch_i, batch in enumerate(loader): # print(batch_i, tuple(points.shape), tuple(normals.shape), labels, indices, in_sizes) # New time t = t[-1:] t += [time.time()] # Update estim_b (low pass filter) estim_b += (len(batch.labels) - estim_b) / 100 # Pause simulating computations time.sleep(0.050) t += [time.time()] # Average timing mean_dt = 0.9 * mean_dt + 0.1 * (np.array(t[1:]) - np.array(t[:-1])) # Console display (only one per second) if (t[-1] - last_display) > -1.0: last_display = t[-1] message = "Step {:08d} -> (ms/batch) {:8.2f} {:8.2f} / batch = {:.2f}" print( message.format( batch_i, 1000 * mean_dt[0], 1000 * mean_dt[1], estim_b ) ) print("************* Epoch ended *************") _, counts = np.unique(dataset.input_labels, return_counts=True) print(counts) def debug_show_clouds(dataset, sampler, loader): for epoch in range(10): L = dataset.config.num_layers for batch_i, batch in enumerate(loader): # Print characteristics of input tensors print("\nPoints tensors") for i in range(L): print(batch.points[i].dtype, batch.points[i].shape) print("\nNeigbors tensors") for i in range(L): print(batch.neighbors[i].dtype, batch.neighbors[i].shape) print("\nPools tensors") for i in range(L): print(batch.pools[i].dtype, batch.pools[i].shape) print("\nStack lengths") for i in range(L): print(batch.lengths[i].dtype, batch.lengths[i].shape) print("\nFeatures") print(batch.features.dtype, batch.features.shape) print("\nLabels") print(batch.labels.dtype, batch.labels.shape) print("\nAugment Scales") print(batch.scales.dtype, batch.scales.shape) print("\nAugment Rotations") print(batch.rots.dtype, batch.rots.shape) print("\nModel indices") print(batch.model_inds.dtype, batch.model_inds.shape) print("\nAre input tensors pinned") print(batch.neighbors[0].is_pinned()) print(batch.neighbors[-1].is_pinned()) print(batch.points[0].is_pinned()) print(batch.points[-1].is_pinned()) print(batch.labels.is_pinned()) print(batch.scales.is_pinned()) print(batch.rots.is_pinned()) print(batch.model_inds.is_pinned()) show_input_batch(batch) print("*******************************************") _, counts = np.unique(dataset.input_labels, return_counts=True) print(counts) def debug_batch_and_neighbors_calib(dataset, sampler, loader): """Timing of generator function""" t = [time.time()] last_display = time.time() mean_dt = np.zeros(2) for epoch in range(10): for batch_i, input_list in enumerate(loader): # print(batch_i, tuple(points.shape), tuple(normals.shape), labels, indices, in_sizes) # New time t = t[-1:] t += [time.time()] # Pause simulating computations time.sleep(0.01) t += [time.time()] # Average timing mean_dt = 0.9 * mean_dt + 0.1 * (np.array(t[1:]) - np.array(t[:-1])) # Console display (only one per second) if (t[-1] - last_display) > 1.0: last_display = t[-1] message = "Step {:08d} -> Average timings (ms/batch) {:8.2f} {:8.2f} " print(message.format(batch_i, 1000 * mean_dt[0], 1000 * mean_dt[1])) print("************* Epoch ended *************") _, counts = np.unique(dataset.input_labels, return_counts=True) print(counts) class ModelNet40WorkerInitDebug: """Callable class that Initializes workers.""" def __init__(self, dataset): self.dataset = dataset return def __call__(self, worker_id): # Print workers info worker_info = get_worker_info() print(worker_info) # Get associated dataset dataset = worker_info.dataset # the dataset copy in this worker process # In windows, each worker has its own copy of the dataset. In Linux, this is shared in memory print(dataset.input_labels.__array_interface__["data"]) print(worker_info.dataset.input_labels.__array_interface__["data"]) print(self.dataset.input_labels.__array_interface__["data"]) # configure the dataset to only process the split workload return