1017 lines
34 KiB
Python
1017 lines
34 KiB
Python
#
|
|
#
|
|
# 0=================================0
|
|
# | Kernel Point Convolutions |
|
|
# 0=================================0
|
|
#
|
|
#
|
|
# ----------------------------------------------------------------------------------------------------------------------
|
|
#
|
|
# Class handling ModelNet40 dataset.
|
|
# Implements a Dataset, a Sampler, and a collate_fn
|
|
#
|
|
# ----------------------------------------------------------------------------------------------------------------------
|
|
#
|
|
# Hugues THOMAS - 11/06/2018
|
|
#
|
|
|
|
|
|
# ----------------------------------------------------------------------------------------------------------------------
|
|
#
|
|
# Imports and global variables
|
|
# \**********************************/
|
|
#
|
|
|
|
# Common libs
|
|
import time
|
|
import numpy as np
|
|
import pickle
|
|
import torch
|
|
|
|
|
|
# OS functions
|
|
from os.path import exists, join
|
|
|
|
# Dataset parent class
|
|
from datasetss.common import PointCloudDataset
|
|
from torch.utils.data import Sampler, get_worker_info
|
|
from utils.mayavi_visu import *
|
|
|
|
from datasetss.common import grid_subsampling
|
|
from utils.config import bcolors
|
|
|
|
# ----------------------------------------------------------------------------------------------------------------------
|
|
#
|
|
# Dataset class definition
|
|
# \******************************/
|
|
|
|
|
|
class ModelNet40Dataset(PointCloudDataset):
|
|
"""Class to handle Modelnet 40 dataset."""
|
|
|
|
def __init__(self, config, train=True, orient_correction=True):
|
|
"""
|
|
This dataset is small enough to be stored in-memory, so load all point clouds here
|
|
"""
|
|
PointCloudDataset.__init__(self, "ModelNet40")
|
|
|
|
############
|
|
# Parameters
|
|
############
|
|
|
|
# Dict from labels to names
|
|
self.label_to_names = {
|
|
0: "airplane",
|
|
1: "bathtub",
|
|
2: "bed",
|
|
3: "bench",
|
|
4: "bookshelf",
|
|
5: "bottle",
|
|
6: "bowl",
|
|
7: "car",
|
|
8: "chair",
|
|
9: "cone",
|
|
10: "cup",
|
|
11: "curtain",
|
|
12: "desk",
|
|
13: "door",
|
|
14: "dresser",
|
|
15: "flower_pot",
|
|
16: "glass_box",
|
|
17: "guitar",
|
|
18: "keyboard",
|
|
19: "lamp",
|
|
20: "laptop",
|
|
21: "mantel",
|
|
22: "monitor",
|
|
23: "night_stand",
|
|
24: "person",
|
|
25: "piano",
|
|
26: "plant",
|
|
27: "radio",
|
|
28: "range_hood",
|
|
29: "sink",
|
|
30: "sofa",
|
|
31: "stairs",
|
|
32: "stool",
|
|
33: "table",
|
|
34: "tent",
|
|
35: "toilet",
|
|
36: "tv_stand",
|
|
37: "vase",
|
|
38: "wardrobe",
|
|
39: "xbox",
|
|
}
|
|
|
|
# Initialize a bunch of variables concerning class labels
|
|
self.init_labels()
|
|
|
|
# List of classes ignored during training (can be empty)
|
|
self.ignored_labels = np.array([])
|
|
|
|
# Dataset folder
|
|
self.path = "./Data/ModelNet40"
|
|
|
|
# Type of task conducted on this dataset
|
|
self.dataset_task = "classification"
|
|
|
|
# Update number of class and data task in configuration
|
|
config.num_classes = self.num_classes
|
|
config.dataset_task = self.dataset_task
|
|
|
|
# Parameters from config
|
|
self.config = config
|
|
|
|
# Training or test set
|
|
self.train = train
|
|
|
|
# Number of models and models used per epoch
|
|
if self.train:
|
|
self.num_models = 9843
|
|
if (
|
|
config.epoch_steps
|
|
and config.epoch_steps * config.batch_num < self.num_models
|
|
):
|
|
self.epoch_n = config.epoch_steps * config.batch_num
|
|
else:
|
|
self.epoch_n = self.num_models
|
|
else:
|
|
self.num_models = 2468
|
|
self.epoch_n = min(
|
|
self.num_models, config.validation_size * config.batch_num
|
|
)
|
|
|
|
#############
|
|
# Load models
|
|
#############
|
|
|
|
if 0 < self.config.first_subsampling_dl <= 0.01:
|
|
raise ValueError("subsampling_parameter too low (should be over 1 cm")
|
|
|
|
(
|
|
self.input_points,
|
|
self.input_normals,
|
|
self.input_labels,
|
|
) = self.load_subsampled_clouds(orient_correction)
|
|
|
|
return
|
|
|
|
def __len__(self):
|
|
"""
|
|
Return the length of data here
|
|
"""
|
|
return self.num_models
|
|
|
|
def __getitem__(self, idx_list):
|
|
"""
|
|
The main thread gives a list of indices to load a batch. Each worker is going to work in parallel to load a
|
|
different list of indices.
|
|
"""
|
|
|
|
###################
|
|
# Gather batch data
|
|
###################
|
|
|
|
tp_list = []
|
|
tn_list = []
|
|
tl_list = []
|
|
ti_list = []
|
|
s_list = []
|
|
R_list = []
|
|
|
|
for p_i in idx_list:
|
|
# Get points and labels
|
|
points = self.input_points[p_i].astype(np.float32)
|
|
normals = self.input_normals[p_i].astype(np.float32)
|
|
label = self.label_to_idx[self.input_labels[p_i]]
|
|
|
|
# Data augmentation
|
|
points, normals, scale, R = self.augmentation_transform(points, normals)
|
|
|
|
# Stack batch
|
|
tp_list += [points]
|
|
tn_list += [normals]
|
|
tl_list += [label]
|
|
ti_list += [p_i]
|
|
s_list += [scale]
|
|
R_list += [R]
|
|
|
|
###################
|
|
# Concatenate batch
|
|
###################
|
|
|
|
# show_ModelNet_examples(tp_list, cloud_normals=tn_list)
|
|
|
|
stacked_points = np.concatenate(tp_list, axis=0)
|
|
stacked_normals = np.concatenate(tn_list, axis=0)
|
|
labels = np.array(tl_list, dtype=np.int64)
|
|
model_inds = np.array(ti_list, dtype=np.int32)
|
|
stack_lengths = np.array([tp.shape[0] for tp in tp_list], dtype=np.int32)
|
|
scales = np.array(s_list, dtype=np.float32)
|
|
rots = np.stack(R_list, axis=0)
|
|
|
|
# Input features
|
|
stacked_features = np.ones_like(stacked_points[:, :1], dtype=np.float32)
|
|
if self.config.in_features_dim == 1:
|
|
pass
|
|
elif self.config.in_features_dim == 4:
|
|
stacked_features = np.hstack((stacked_features, stacked_normals))
|
|
else:
|
|
raise ValueError(
|
|
"Only accepted input dimensions are 1, 4 and 7 (without and with XYZ)"
|
|
)
|
|
|
|
#######################
|
|
# Create network inputs
|
|
#######################
|
|
#
|
|
# Points, neighbors, pooling indices for each layers
|
|
#
|
|
|
|
# Get the whole input list
|
|
input_list = self.classification_inputs(
|
|
stacked_points, stacked_features, labels, stack_lengths
|
|
)
|
|
|
|
# Add scale and rotation for testing
|
|
input_list += [scales, rots, model_inds]
|
|
|
|
return input_list
|
|
|
|
def load_subsampled_clouds(self, orient_correction):
|
|
# Restart timer
|
|
t0 = time.time()
|
|
|
|
# Load wanted points if possible
|
|
if self.train:
|
|
split = "training"
|
|
else:
|
|
split = "test"
|
|
|
|
print(
|
|
"\nLoading {:s} points subsampled at {:.3f}".format(
|
|
split, self.config.first_subsampling_dl
|
|
)
|
|
)
|
|
filename = join(
|
|
self.path,
|
|
"{:s}_{:.3f}_record.pkl".format(split, self.config.first_subsampling_dl),
|
|
)
|
|
|
|
if exists(filename):
|
|
with open(filename, "rb") as file:
|
|
input_points, input_normals, input_labels = pickle.load(file)
|
|
|
|
# Else compute them from original points
|
|
else:
|
|
# Collect training file names
|
|
if self.train:
|
|
names = np.loadtxt(
|
|
join(self.path, "modelnet40_train.txt"), dtype=np.str
|
|
)
|
|
else:
|
|
names = np.loadtxt(join(self.path, "modelnet40_test.txt"), dtype=np.str)
|
|
|
|
# Initialize containers
|
|
input_points = []
|
|
input_normals = []
|
|
|
|
# Advanced display
|
|
N = len(names)
|
|
progress_n = 30
|
|
fmt_str = "[{:<" + str(progress_n) + "}] {:5.1f}%"
|
|
|
|
# Collect point clouds
|
|
for i, cloud_name in enumerate(names):
|
|
# Read points
|
|
class_folder = "_".join(cloud_name.split("_")[:-1])
|
|
txt_file = join(self.path, class_folder, cloud_name) + ".txt"
|
|
data = np.loadtxt(txt_file, delimiter=",", dtype=np.float32)
|
|
|
|
# Subsample them
|
|
if self.config.first_subsampling_dl > 0:
|
|
points, normals = grid_subsampling(
|
|
data[:, :3],
|
|
features=data[:, 3:],
|
|
sampleDl=self.config.first_subsampling_dl,
|
|
)
|
|
else:
|
|
points = data[:, :3]
|
|
normals = data[:, 3:]
|
|
|
|
print("", end="\r")
|
|
print(
|
|
fmt_str.format("#" * ((i * progress_n) // N), 100 * i / N),
|
|
end="",
|
|
flush=True,
|
|
)
|
|
|
|
# Add to list
|
|
input_points += [points]
|
|
input_normals += [normals]
|
|
|
|
print("", end="\r")
|
|
print(fmt_str.format("#" * progress_n, 100), end="", flush=True)
|
|
print()
|
|
|
|
# Get labels
|
|
label_names = ["_".join(name.split("_")[:-1]) for name in names]
|
|
input_labels = np.array([self.name_to_label[name] for name in label_names])
|
|
|
|
# Save for later use
|
|
with open(filename, "wb") as file:
|
|
pickle.dump((input_points, input_normals, input_labels), file)
|
|
|
|
lengths = [p.shape[0] for p in input_points]
|
|
sizes = [l * 4 * 6 for l in lengths]
|
|
print(
|
|
"{:.1f} MB loaded in {:.1f}s".format(np.sum(sizes) * 1e-6, time.time() - t0)
|
|
)
|
|
|
|
if orient_correction:
|
|
input_points = [pp[:, [0, 2, 1]] for pp in input_points]
|
|
input_normals = [nn[:, [0, 2, 1]] for nn in input_normals]
|
|
|
|
return input_points, input_normals, input_labels
|
|
|
|
|
|
# ----------------------------------------------------------------------------------------------------------------------
|
|
#
|
|
# Utility classes definition
|
|
# \********************************/
|
|
|
|
|
|
class ModelNet40Sampler(Sampler):
|
|
"""Sampler for ModelNet40"""
|
|
|
|
def __init__(
|
|
self, dataset: ModelNet40Dataset, use_potential=True, balance_labels=False
|
|
):
|
|
Sampler.__init__(self, dataset)
|
|
|
|
# Does the sampler use potential for regular sampling
|
|
self.use_potential = use_potential
|
|
|
|
# Should be balance the classes when sampling
|
|
self.balance_labels = balance_labels
|
|
|
|
# Dataset used by the sampler (no copy is made in memory)
|
|
self.dataset = dataset
|
|
|
|
# Create potentials
|
|
if self.use_potential:
|
|
self.potentials = np.random.rand(len(dataset.input_labels)) * 0.1 + 0.1
|
|
else:
|
|
self.potentials = None
|
|
|
|
# Initialize value for batch limit (max number of points per batch).
|
|
self.batch_limit = 10000
|
|
|
|
return
|
|
|
|
def __iter__(self):
|
|
"""
|
|
Yield next batch indices here
|
|
"""
|
|
|
|
##########################################
|
|
# Initialize the list of generated indices
|
|
##########################################
|
|
|
|
if self.use_potential:
|
|
if self.balance_labels:
|
|
gen_indices = []
|
|
pick_n = self.dataset.epoch_n // self.dataset.num_classes + 1
|
|
for i, l in enumerate(self.dataset.label_values):
|
|
# Get the potentials of the objects of this class
|
|
label_inds = np.where(np.equal(self.dataset.input_labels, l))[0]
|
|
class_potentials = self.potentials[label_inds]
|
|
|
|
# Get the indices to generate thanks to potentials
|
|
if pick_n < class_potentials.shape[0]:
|
|
pick_indices = np.argpartition(class_potentials, pick_n)[
|
|
:pick_n
|
|
]
|
|
else:
|
|
pick_indices = np.random.permutation(class_potentials.shape[0])
|
|
class_indices = label_inds[pick_indices]
|
|
gen_indices.append(class_indices)
|
|
|
|
# Stack the chosen indices of all classes
|
|
gen_indices = np.random.permutation(np.hstack(gen_indices))
|
|
|
|
else:
|
|
# Get indices with the minimum potential
|
|
if self.dataset.epoch_n < self.potentials.shape[0]:
|
|
gen_indices = np.argpartition(
|
|
self.potentials, self.dataset.epoch_n
|
|
)[: self.dataset.epoch_n]
|
|
else:
|
|
gen_indices = np.random.permutation(self.potentials.shape[0])
|
|
gen_indices = np.random.permutation(gen_indices)
|
|
|
|
# Update potentials (Change the order for the next epoch)
|
|
self.potentials[gen_indices] = np.ceil(self.potentials[gen_indices])
|
|
self.potentials[gen_indices] += (
|
|
np.random.rand(gen_indices.shape[0]) * 0.1 + 0.1
|
|
)
|
|
|
|
else:
|
|
if self.balance_labels:
|
|
pick_n = self.dataset.epoch_n // self.dataset.num_classes + 1
|
|
gen_indices = []
|
|
for l in self.dataset.label_values:
|
|
label_inds = np.where(np.equal(self.dataset.input_labels, l))[0]
|
|
rand_inds = np.random.choice(label_inds, size=pick_n, replace=True)
|
|
gen_indices += [rand_inds]
|
|
gen_indices = np.random.permutation(np.hstack(gen_indices))
|
|
else:
|
|
gen_indices = np.random.permutation(self.dataset.num_models)[
|
|
: self.dataset.epoch_n
|
|
]
|
|
|
|
################
|
|
# Generator loop
|
|
################
|
|
|
|
# Initialize concatenation lists
|
|
ti_list = []
|
|
batch_n = 0
|
|
|
|
# Generator loop
|
|
for p_i in gen_indices:
|
|
# Size of picked cloud
|
|
n = self.dataset.input_points[p_i].shape[0]
|
|
|
|
# In case batch is full, yield it and reset it
|
|
if batch_n + n > self.batch_limit and batch_n > 0:
|
|
yield np.array(ti_list, dtype=np.int32)
|
|
ti_list = []
|
|
batch_n = 0
|
|
|
|
# Add data to current batch
|
|
ti_list += [p_i]
|
|
|
|
# Update batch size
|
|
batch_n += n
|
|
|
|
yield np.array(ti_list, dtype=np.int32)
|
|
|
|
return 0
|
|
|
|
def __len__(self):
|
|
"""
|
|
The number of yielded samples is variable
|
|
"""
|
|
return None
|
|
|
|
def calibration(self, dataloader, untouched_ratio=0.9, verbose=False):
|
|
"""
|
|
Method performing batch and neighbors calibration.
|
|
Batch calibration: Set "batch_limit" (the maximum number of points allowed in every batch) so that the
|
|
average batch size (number of stacked pointclouds) is the one asked.
|
|
Neighbors calibration: Set the "neighborhood_limits" (the maximum number of neighbors allowed in convolutions)
|
|
so that 90% of the neighborhoods remain untouched. There is a limit for each layer.
|
|
"""
|
|
|
|
##############################
|
|
# Previously saved calibration
|
|
##############################
|
|
|
|
print("\nStarting Calibration (use verbose=True for more details)")
|
|
t0 = time.time()
|
|
|
|
redo = False
|
|
|
|
# Batch limit
|
|
# ***********
|
|
|
|
# Load batch_limit dictionary
|
|
batch_lim_file = join(self.dataset.path, "batch_limits.pkl")
|
|
if exists(batch_lim_file):
|
|
with open(batch_lim_file, "rb") as file:
|
|
batch_lim_dict = pickle.load(file)
|
|
else:
|
|
batch_lim_dict = {}
|
|
|
|
# Check if the batch limit associated with current parameters exists
|
|
key = "{:.3f}_{:d}".format(
|
|
self.dataset.config.first_subsampling_dl, self.dataset.config.batch_num
|
|
)
|
|
if key in batch_lim_dict:
|
|
self.batch_limit = batch_lim_dict[key]
|
|
else:
|
|
redo = True
|
|
|
|
if verbose:
|
|
print("\nPrevious calibration found:")
|
|
print("Check batch limit dictionary")
|
|
if key in batch_lim_dict:
|
|
color = bcolors.OKGREEN
|
|
v = str(int(batch_lim_dict[key]))
|
|
else:
|
|
color = bcolors.FAIL
|
|
v = "?"
|
|
print('{:}"{:s}": {:s}{:}'.format(color, key, v, bcolors.ENDC))
|
|
|
|
# Neighbors limit
|
|
# ***************
|
|
|
|
# Load neighb_limits dictionary
|
|
neighb_lim_file = join(self.dataset.path, "neighbors_limits.pkl")
|
|
if exists(neighb_lim_file):
|
|
with open(neighb_lim_file, "rb") as file:
|
|
neighb_lim_dict = pickle.load(file)
|
|
else:
|
|
neighb_lim_dict = {}
|
|
|
|
# Check if the limit associated with current parameters exists (for each layer)
|
|
neighb_limits = []
|
|
for layer_ind in range(self.dataset.config.num_layers):
|
|
dl = self.dataset.config.first_subsampling_dl * (2**layer_ind)
|
|
if self.dataset.config.deform_layers[layer_ind]:
|
|
r = dl * self.dataset.config.deform_radius
|
|
else:
|
|
r = dl * self.dataset.config.conv_radius
|
|
|
|
key = "{:.3f}_{:.3f}".format(dl, r)
|
|
if key in neighb_lim_dict:
|
|
neighb_limits += [neighb_lim_dict[key]]
|
|
|
|
if len(neighb_limits) == self.dataset.config.num_layers:
|
|
self.dataset.neighborhood_limits = neighb_limits
|
|
else:
|
|
redo = True
|
|
|
|
if verbose:
|
|
print("Check neighbors limit dictionary")
|
|
for layer_ind in range(self.dataset.config.num_layers):
|
|
dl = self.dataset.config.first_subsampling_dl * (2**layer_ind)
|
|
if self.dataset.config.deform_layers[layer_ind]:
|
|
r = dl * self.dataset.config.deform_radius
|
|
else:
|
|
r = dl * self.dataset.config.conv_radius
|
|
key = "{:.3f}_{:.3f}".format(dl, r)
|
|
|
|
if key in neighb_lim_dict:
|
|
color = bcolors.OKGREEN
|
|
v = str(neighb_lim_dict[key])
|
|
else:
|
|
color = bcolors.FAIL
|
|
v = "?"
|
|
print('{:}"{:s}": {:s}{:}'.format(color, key, v, bcolors.ENDC))
|
|
|
|
if redo:
|
|
############################
|
|
# Neighbors calib parameters
|
|
############################
|
|
|
|
# From config parameter, compute higher bound of neighbors number in a neighborhood
|
|
hist_n = int(
|
|
np.ceil(4 / 3 * np.pi * (self.dataset.config.conv_radius + 1) ** 3)
|
|
)
|
|
|
|
# Histogram of neighborhood sizes
|
|
neighb_hists = np.zeros(
|
|
(self.dataset.config.num_layers, hist_n), dtype=np.int32
|
|
)
|
|
|
|
########################
|
|
# Batch calib parameters
|
|
########################
|
|
|
|
# Estimated average batch size and target value
|
|
estim_b = 0
|
|
target_b = self.dataset.config.batch_num
|
|
|
|
# Calibration parameters
|
|
low_pass_T = 10
|
|
Kp = 100.0
|
|
finer = False
|
|
|
|
# Convergence parameters
|
|
smooth_errors = []
|
|
converge_threshold = 0.1
|
|
|
|
# Loop parameters
|
|
last_display = time.time()
|
|
i = 0
|
|
breaking = False
|
|
|
|
#####################
|
|
# Perform calibration
|
|
#####################
|
|
|
|
for epoch in range(10):
|
|
for batch_i, batch in enumerate(dataloader):
|
|
# Update neighborhood histogram
|
|
counts = [
|
|
np.sum(neighb_mat.numpy() < neighb_mat.shape[0], axis=1)
|
|
for neighb_mat in batch.neighbors
|
|
]
|
|
hists = [np.bincount(c, minlength=hist_n)[:hist_n] for c in counts]
|
|
neighb_hists += np.vstack(hists)
|
|
|
|
# batch length
|
|
b = len(batch.labels)
|
|
|
|
# Update estim_b (low pass filter)
|
|
estim_b += (b - estim_b) / low_pass_T
|
|
|
|
# Estimate error (noisy)
|
|
error = target_b - b
|
|
|
|
# Save smooth errors for convergene check
|
|
smooth_errors.append(target_b - estim_b)
|
|
if len(smooth_errors) > 10:
|
|
smooth_errors = smooth_errors[1:]
|
|
|
|
# Update batch limit with P controller
|
|
self.batch_limit += Kp * error
|
|
|
|
# finer low pass filter when closing in
|
|
if not finer and np.abs(estim_b - target_b) < 1:
|
|
low_pass_T = 100
|
|
finer = True
|
|
|
|
# Convergence
|
|
if finer and np.max(np.abs(smooth_errors)) < converge_threshold:
|
|
breaking = True
|
|
break
|
|
|
|
i += 1
|
|
t = time.time()
|
|
|
|
# Console display (only one per second)
|
|
if verbose and (t - last_display) > 1.0:
|
|
last_display = t
|
|
message = "Step {:5d} estim_b ={:5.2f} batch_limit ={:7d}"
|
|
print(message.format(i, estim_b, int(self.batch_limit)))
|
|
|
|
if breaking:
|
|
break
|
|
|
|
# Use collected neighbor histogram to get neighbors limit
|
|
cumsum = np.cumsum(neighb_hists.T, axis=0)
|
|
percentiles = np.sum(
|
|
cumsum < (untouched_ratio * cumsum[hist_n - 1, :]), axis=0
|
|
)
|
|
self.dataset.neighborhood_limits = percentiles
|
|
|
|
if verbose:
|
|
# Crop histogram
|
|
while np.sum(neighb_hists[:, -1]) == 0:
|
|
neighb_hists = neighb_hists[:, :-1]
|
|
hist_n = neighb_hists.shape[1]
|
|
|
|
print("\n**************************************************\n")
|
|
line0 = "neighbors_num "
|
|
for layer in range(neighb_hists.shape[0]):
|
|
line0 += "| layer {:2d} ".format(layer)
|
|
print(line0)
|
|
for neighb_size in range(hist_n):
|
|
line0 = " {:4d} ".format(neighb_size)
|
|
for layer in range(neighb_hists.shape[0]):
|
|
if neighb_size > percentiles[layer]:
|
|
color = bcolors.FAIL
|
|
else:
|
|
color = bcolors.OKGREEN
|
|
line0 += "|{:}{:10d}{:} ".format(
|
|
color, neighb_hists[layer, neighb_size], bcolors.ENDC
|
|
)
|
|
|
|
print(line0)
|
|
|
|
print("\n**************************************************\n")
|
|
print("\nchosen neighbors limits: ", percentiles)
|
|
print()
|
|
|
|
# Save batch_limit dictionary
|
|
key = "{:.3f}_{:d}".format(
|
|
self.dataset.config.first_subsampling_dl, self.dataset.config.batch_num
|
|
)
|
|
batch_lim_dict[key] = self.batch_limit
|
|
with open(batch_lim_file, "wb") as file:
|
|
pickle.dump(batch_lim_dict, file)
|
|
|
|
# Save neighb_limit dictionary
|
|
for layer_ind in range(self.dataset.config.num_layers):
|
|
dl = self.dataset.config.first_subsampling_dl * (2**layer_ind)
|
|
if self.dataset.config.deform_layers[layer_ind]:
|
|
r = dl * self.dataset.config.deform_radius
|
|
else:
|
|
r = dl * self.dataset.config.conv_radius
|
|
key = "{:.3f}_{:.3f}".format(dl, r)
|
|
neighb_lim_dict[key] = self.dataset.neighborhood_limits[layer_ind]
|
|
with open(neighb_lim_file, "wb") as file:
|
|
pickle.dump(neighb_lim_dict, file)
|
|
|
|
print("Calibration done in {:.1f}s\n".format(time.time() - t0))
|
|
return
|
|
|
|
|
|
class ModelNet40CustomBatch:
|
|
"""Custom batch definition with memory pinning for ModelNet40"""
|
|
|
|
def __init__(self, input_list):
|
|
# Get rid of batch dimension
|
|
input_list = input_list[0]
|
|
|
|
# Number of layers
|
|
L = (len(input_list) - 5) // 4
|
|
|
|
# Extract input tensors from the list of numpy array
|
|
ind = 0
|
|
self.points = [
|
|
torch.from_numpy(nparray) for nparray in input_list[ind : ind + L]
|
|
]
|
|
ind += L
|
|
self.neighbors = [
|
|
torch.from_numpy(nparray) for nparray in input_list[ind : ind + L]
|
|
]
|
|
ind += L
|
|
self.pools = [
|
|
torch.from_numpy(nparray) for nparray in input_list[ind : ind + L]
|
|
]
|
|
ind += L
|
|
self.lengths = [
|
|
torch.from_numpy(nparray) for nparray in input_list[ind : ind + L]
|
|
]
|
|
ind += L
|
|
self.features = torch.from_numpy(input_list[ind])
|
|
ind += 1
|
|
self.labels = torch.from_numpy(input_list[ind])
|
|
ind += 1
|
|
self.scales = torch.from_numpy(input_list[ind])
|
|
ind += 1
|
|
self.rots = torch.from_numpy(input_list[ind])
|
|
ind += 1
|
|
self.model_inds = torch.from_numpy(input_list[ind])
|
|
|
|
return
|
|
|
|
def pin_memory(self):
|
|
"""
|
|
Manual pinning of the memory
|
|
"""
|
|
|
|
self.points = [in_tensor.pin_memory() for in_tensor in self.points]
|
|
self.neighbors = [in_tensor.pin_memory() for in_tensor in self.neighbors]
|
|
self.pools = [in_tensor.pin_memory() for in_tensor in self.pools]
|
|
self.lengths = [in_tensor.pin_memory() for in_tensor in self.lengths]
|
|
self.features = self.features.pin_memory()
|
|
self.labels = self.labels.pin_memory()
|
|
self.scales = self.scales.pin_memory()
|
|
self.rots = self.rots.pin_memory()
|
|
self.model_inds = self.model_inds.pin_memory()
|
|
|
|
return self
|
|
|
|
def to(self, device):
|
|
self.points = [in_tensor.to(device) for in_tensor in self.points]
|
|
self.neighbors = [in_tensor.to(device) for in_tensor in self.neighbors]
|
|
self.pools = [in_tensor.to(device) for in_tensor in self.pools]
|
|
self.lengths = [in_tensor.to(device) for in_tensor in self.lengths]
|
|
self.features = self.features.to(device)
|
|
self.labels = self.labels.to(device)
|
|
self.scales = self.scales.to(device)
|
|
self.rots = self.rots.to(device)
|
|
self.model_inds = self.model_inds.to(device)
|
|
|
|
return self
|
|
|
|
def unstack_points(self, layer=None):
|
|
"""Unstack the points"""
|
|
return self.unstack_elements("points", layer)
|
|
|
|
def unstack_neighbors(self, layer=None):
|
|
"""Unstack the neighbors indices"""
|
|
return self.unstack_elements("neighbors", layer)
|
|
|
|
def unstack_pools(self, layer=None):
|
|
"""Unstack the pooling indices"""
|
|
return self.unstack_elements("pools", layer)
|
|
|
|
def unstack_elements(self, element_name, layer=None, to_numpy=True):
|
|
"""
|
|
Return a list of the stacked elements in the batch at a certain layer. If no layer is given, then return all
|
|
layers
|
|
"""
|
|
|
|
if element_name == "points":
|
|
elements = self.points
|
|
elif element_name == "neighbors":
|
|
elements = self.neighbors
|
|
elif element_name == "pools":
|
|
elements = self.pools[:-1]
|
|
else:
|
|
raise ValueError("Unknown element name: {:s}".format(element_name))
|
|
|
|
all_p_list = []
|
|
for layer_i, layer_elems in enumerate(elements):
|
|
if layer is None or layer == layer_i:
|
|
i0 = 0
|
|
p_list = []
|
|
if element_name == "pools":
|
|
lengths = self.lengths[layer_i + 1]
|
|
else:
|
|
lengths = self.lengths[layer_i]
|
|
|
|
for b_i, length in enumerate(lengths):
|
|
elem = layer_elems[i0 : i0 + length]
|
|
if element_name == "neighbors":
|
|
elem[elem >= self.points[layer_i].shape[0]] = -1
|
|
elem[elem >= 0] -= i0
|
|
elif element_name == "pools":
|
|
elem[elem >= self.points[layer_i].shape[0]] = -1
|
|
elem[elem >= 0] -= torch.sum(self.lengths[layer_i][:b_i])
|
|
i0 += length
|
|
|
|
if to_numpy:
|
|
p_list.append(elem.numpy())
|
|
else:
|
|
p_list.append(elem)
|
|
|
|
if layer == layer_i:
|
|
return p_list
|
|
|
|
all_p_list.append(p_list)
|
|
|
|
return all_p_list
|
|
|
|
|
|
def ModelNet40Collate(batch_data):
|
|
return ModelNet40CustomBatch(batch_data)
|
|
|
|
|
|
# ----------------------------------------------------------------------------------------------------------------------
|
|
#
|
|
# Debug functions
|
|
# \*********************/
|
|
|
|
|
|
def debug_sampling(dataset, sampler, loader):
|
|
"""Shows which labels are sampled according to strategy chosen"""
|
|
label_sum = np.zeros((dataset.num_classes), dtype=np.int32)
|
|
for epoch in range(10):
|
|
for batch_i, (points, normals, labels, indices, in_sizes) in enumerate(loader):
|
|
# print(batch_i, tuple(points.shape), tuple(normals.shape), labels, indices, in_sizes)
|
|
|
|
label_sum += np.bincount(labels.numpy(), minlength=dataset.num_classes)
|
|
print(label_sum)
|
|
# print(sampler.potentials[:6])
|
|
|
|
print("******************")
|
|
print("*******************************************")
|
|
|
|
_, counts = np.unique(dataset.input_labels, return_counts=True)
|
|
print(counts)
|
|
|
|
|
|
def debug_timing(dataset, sampler, loader):
|
|
"""Timing of generator function"""
|
|
|
|
t = [time.time()]
|
|
last_display = time.time()
|
|
mean_dt = np.zeros(2)
|
|
estim_b = dataset.config.batch_num
|
|
|
|
for epoch in range(10):
|
|
for batch_i, batch in enumerate(loader):
|
|
# print(batch_i, tuple(points.shape), tuple(normals.shape), labels, indices, in_sizes)
|
|
|
|
# New time
|
|
t = t[-1:]
|
|
t += [time.time()]
|
|
|
|
# Update estim_b (low pass filter)
|
|
estim_b += (len(batch.labels) - estim_b) / 100
|
|
|
|
# Pause simulating computations
|
|
time.sleep(0.050)
|
|
t += [time.time()]
|
|
|
|
# Average timing
|
|
mean_dt = 0.9 * mean_dt + 0.1 * (np.array(t[1:]) - np.array(t[:-1]))
|
|
|
|
# Console display (only one per second)
|
|
if (t[-1] - last_display) > -1.0:
|
|
last_display = t[-1]
|
|
message = "Step {:08d} -> (ms/batch) {:8.2f} {:8.2f} / batch = {:.2f}"
|
|
print(
|
|
message.format(
|
|
batch_i, 1000 * mean_dt[0], 1000 * mean_dt[1], estim_b
|
|
)
|
|
)
|
|
|
|
print("************* Epoch ended *************")
|
|
|
|
_, counts = np.unique(dataset.input_labels, return_counts=True)
|
|
print(counts)
|
|
|
|
|
|
def debug_show_clouds(dataset, sampler, loader):
|
|
for epoch in range(10):
|
|
L = dataset.config.num_layers
|
|
|
|
for batch_i, batch in enumerate(loader):
|
|
# Print characteristics of input tensors
|
|
print("\nPoints tensors")
|
|
for i in range(L):
|
|
print(batch.points[i].dtype, batch.points[i].shape)
|
|
print("\nNeigbors tensors")
|
|
for i in range(L):
|
|
print(batch.neighbors[i].dtype, batch.neighbors[i].shape)
|
|
print("\nPools tensors")
|
|
for i in range(L):
|
|
print(batch.pools[i].dtype, batch.pools[i].shape)
|
|
print("\nStack lengths")
|
|
for i in range(L):
|
|
print(batch.lengths[i].dtype, batch.lengths[i].shape)
|
|
print("\nFeatures")
|
|
print(batch.features.dtype, batch.features.shape)
|
|
print("\nLabels")
|
|
print(batch.labels.dtype, batch.labels.shape)
|
|
print("\nAugment Scales")
|
|
print(batch.scales.dtype, batch.scales.shape)
|
|
print("\nAugment Rotations")
|
|
print(batch.rots.dtype, batch.rots.shape)
|
|
print("\nModel indices")
|
|
print(batch.model_inds.dtype, batch.model_inds.shape)
|
|
|
|
print("\nAre input tensors pinned")
|
|
print(batch.neighbors[0].is_pinned())
|
|
print(batch.neighbors[-1].is_pinned())
|
|
print(batch.points[0].is_pinned())
|
|
print(batch.points[-1].is_pinned())
|
|
print(batch.labels.is_pinned())
|
|
print(batch.scales.is_pinned())
|
|
print(batch.rots.is_pinned())
|
|
print(batch.model_inds.is_pinned())
|
|
|
|
show_input_batch(batch)
|
|
|
|
print("*******************************************")
|
|
|
|
_, counts = np.unique(dataset.input_labels, return_counts=True)
|
|
print(counts)
|
|
|
|
|
|
def debug_batch_and_neighbors_calib(dataset, sampler, loader):
|
|
"""Timing of generator function"""
|
|
|
|
t = [time.time()]
|
|
last_display = time.time()
|
|
mean_dt = np.zeros(2)
|
|
|
|
for epoch in range(10):
|
|
for batch_i, input_list in enumerate(loader):
|
|
# print(batch_i, tuple(points.shape), tuple(normals.shape), labels, indices, in_sizes)
|
|
|
|
# New time
|
|
t = t[-1:]
|
|
t += [time.time()]
|
|
|
|
# Pause simulating computations
|
|
time.sleep(0.01)
|
|
t += [time.time()]
|
|
|
|
# Average timing
|
|
mean_dt = 0.9 * mean_dt + 0.1 * (np.array(t[1:]) - np.array(t[:-1]))
|
|
|
|
# Console display (only one per second)
|
|
if (t[-1] - last_display) > 1.0:
|
|
last_display = t[-1]
|
|
message = "Step {:08d} -> Average timings (ms/batch) {:8.2f} {:8.2f} "
|
|
print(message.format(batch_i, 1000 * mean_dt[0], 1000 * mean_dt[1]))
|
|
|
|
print("************* Epoch ended *************")
|
|
|
|
_, counts = np.unique(dataset.input_labels, return_counts=True)
|
|
print(counts)
|
|
|
|
|
|
class ModelNet40WorkerInitDebug:
|
|
"""Callable class that Initializes workers."""
|
|
|
|
def __init__(self, dataset):
|
|
self.dataset = dataset
|
|
return
|
|
|
|
def __call__(self, worker_id):
|
|
# Print workers info
|
|
worker_info = get_worker_info()
|
|
print(worker_info)
|
|
|
|
# Get associated dataset
|
|
dataset = worker_info.dataset # the dataset copy in this worker process
|
|
|
|
# In windows, each worker has its own copy of the dataset. In Linux, this is shared in memory
|
|
print(dataset.input_labels.__array_interface__["data"])
|
|
print(worker_info.dataset.input_labels.__array_interface__["data"])
|
|
print(self.dataset.input_labels.__array_interface__["data"])
|
|
|
|
# configure the dataset to only process the split workload
|
|
|
|
return
|