997 lines
35 KiB
Python
997 lines
35 KiB
Python
|
#
|
||
|
#
|
||
|
# 0=================================0
|
||
|
# | Kernel Point Convolutions |
|
||
|
# 0=================================0
|
||
|
#
|
||
|
#
|
||
|
# ----------------------------------------------------------------------------------------------------------------------
|
||
|
#
|
||
|
# Class handling ModelNet40 dataset.
|
||
|
# Implements a Dataset, a Sampler, and a collate_fn
|
||
|
#
|
||
|
# ----------------------------------------------------------------------------------------------------------------------
|
||
|
#
|
||
|
# Hugues THOMAS - 11/06/2018
|
||
|
#
|
||
|
|
||
|
|
||
|
# ----------------------------------------------------------------------------------------------------------------------
|
||
|
#
|
||
|
# Imports and global variables
|
||
|
# \**********************************/
|
||
|
#
|
||
|
|
||
|
# Common libs
|
||
|
import time
|
||
|
import numpy as np
|
||
|
import pickle
|
||
|
import torch
|
||
|
import math
|
||
|
|
||
|
|
||
|
# OS functions
|
||
|
from os import listdir
|
||
|
from os.path import exists, join
|
||
|
|
||
|
# Dataset parent class
|
||
|
from datasets.common import PointCloudDataset
|
||
|
from torch.utils.data import Sampler, get_worker_info
|
||
|
from utils.mayavi_visu import *
|
||
|
|
||
|
from datasets.common import grid_subsampling
|
||
|
from utils.config import bcolors
|
||
|
|
||
|
# ----------------------------------------------------------------------------------------------------------------------
|
||
|
#
|
||
|
# Dataset class definition
|
||
|
# \******************************/
|
||
|
|
||
|
|
||
|
class ModelNet40Dataset(PointCloudDataset):
|
||
|
"""Class to handle Modelnet 40 dataset."""
|
||
|
|
||
|
def __init__(self, config, train=True, orient_correction=True):
|
||
|
"""
|
||
|
This dataset is small enough to be stored in-memory, so load all point clouds here
|
||
|
"""
|
||
|
PointCloudDataset.__init__(self, 'ModelNet40')
|
||
|
|
||
|
############
|
||
|
# Parameters
|
||
|
############
|
||
|
|
||
|
# Dict from labels to names
|
||
|
self.label_to_names = {0: 'airplane',
|
||
|
1: 'bathtub',
|
||
|
2: 'bed',
|
||
|
3: 'bench',
|
||
|
4: 'bookshelf',
|
||
|
5: 'bottle',
|
||
|
6: 'bowl',
|
||
|
7: 'car',
|
||
|
8: 'chair',
|
||
|
9: 'cone',
|
||
|
10: 'cup',
|
||
|
11: 'curtain',
|
||
|
12: 'desk',
|
||
|
13: 'door',
|
||
|
14: 'dresser',
|
||
|
15: 'flower_pot',
|
||
|
16: 'glass_box',
|
||
|
17: 'guitar',
|
||
|
18: 'keyboard',
|
||
|
19: 'lamp',
|
||
|
20: 'laptop',
|
||
|
21: 'mantel',
|
||
|
22: 'monitor',
|
||
|
23: 'night_stand',
|
||
|
24: 'person',
|
||
|
25: 'piano',
|
||
|
26: 'plant',
|
||
|
27: 'radio',
|
||
|
28: 'range_hood',
|
||
|
29: 'sink',
|
||
|
30: 'sofa',
|
||
|
31: 'stairs',
|
||
|
32: 'stool',
|
||
|
33: 'table',
|
||
|
34: 'tent',
|
||
|
35: 'toilet',
|
||
|
36: 'tv_stand',
|
||
|
37: 'vase',
|
||
|
38: 'wardrobe',
|
||
|
39: 'xbox'}
|
||
|
|
||
|
# Initialize a bunch of variables concerning class labels
|
||
|
self.init_labels()
|
||
|
|
||
|
# List of classes ignored during training (can be empty)
|
||
|
self.ignored_labels = np.array([])
|
||
|
|
||
|
# Dataset folder
|
||
|
self.path = '../../Data/ModelNet40'
|
||
|
|
||
|
# Type of task conducted on this dataset
|
||
|
self.dataset_task = 'classification'
|
||
|
|
||
|
# Update number of class and data task in configuration
|
||
|
config.num_classes = self.num_classes
|
||
|
config.dataset_task = self.dataset_task
|
||
|
|
||
|
# Parameters from config
|
||
|
self.config = config
|
||
|
|
||
|
# Training or test set
|
||
|
self.train = train
|
||
|
|
||
|
# Number of models and models used per epoch
|
||
|
if self.train:
|
||
|
self.num_models = 9843
|
||
|
if config.epoch_steps and config.epoch_steps * config.batch_num < self.num_models:
|
||
|
self.epoch_n = config.epoch_steps * config.batch_num
|
||
|
else:
|
||
|
self.epoch_n = self.num_models
|
||
|
else:
|
||
|
self.num_models = 2468
|
||
|
self.epoch_n = min(self.num_models, config.validation_size * config.batch_num)
|
||
|
|
||
|
#############
|
||
|
# Load models
|
||
|
#############
|
||
|
|
||
|
if 0 < self.config.first_subsampling_dl <= 0.01:
|
||
|
raise ValueError('subsampling_parameter too low (should be over 1 cm')
|
||
|
|
||
|
self.input_points, self.input_normals, self.input_labels = self.load_subsampled_clouds(orient_correction)
|
||
|
|
||
|
return
|
||
|
|
||
|
def __len__(self):
|
||
|
"""
|
||
|
Return the length of data here
|
||
|
"""
|
||
|
return self.num_models
|
||
|
|
||
|
def __getitem__(self, idx_list):
|
||
|
"""
|
||
|
The main thread gives a list of indices to load a batch. Each worker is going to work in parallel to load a
|
||
|
different list of indices.
|
||
|
"""
|
||
|
|
||
|
###################
|
||
|
# Gather batch data
|
||
|
###################
|
||
|
|
||
|
tp_list = []
|
||
|
tn_list = []
|
||
|
tl_list = []
|
||
|
ti_list = []
|
||
|
s_list = []
|
||
|
R_list = []
|
||
|
|
||
|
for p_i in idx_list:
|
||
|
|
||
|
# Get points and labels
|
||
|
points = self.input_points[p_i].astype(np.float32)
|
||
|
normals = self.input_normals[p_i].astype(np.float32)
|
||
|
label = self.label_to_idx[self.input_labels[p_i]]
|
||
|
|
||
|
# Data augmentation
|
||
|
points, normals, scale, R = self.augmentation_transform(points, normals)
|
||
|
|
||
|
# Stack batch
|
||
|
tp_list += [points]
|
||
|
tn_list += [normals]
|
||
|
tl_list += [label]
|
||
|
ti_list += [p_i]
|
||
|
s_list += [scale]
|
||
|
R_list += [R]
|
||
|
|
||
|
###################
|
||
|
# Concatenate batch
|
||
|
###################
|
||
|
|
||
|
#show_ModelNet_examples(tp_list, cloud_normals=tn_list)
|
||
|
|
||
|
stacked_points = np.concatenate(tp_list, axis=0)
|
||
|
stacked_normals = np.concatenate(tn_list, axis=0)
|
||
|
labels = np.array(tl_list, dtype=np.int64)
|
||
|
model_inds = np.array(ti_list, dtype=np.int32)
|
||
|
stack_lengths = np.array([tp.shape[0] for tp in tp_list], dtype=np.int32)
|
||
|
scales = np.array(s_list, dtype=np.float32)
|
||
|
rots = np.stack(R_list, axis=0)
|
||
|
|
||
|
# Input features
|
||
|
stacked_features = np.ones_like(stacked_points[:, :1], dtype=np.float32)
|
||
|
if self.config.in_features_dim == 1:
|
||
|
pass
|
||
|
elif self.config.in_features_dim == 4:
|
||
|
stacked_features = np.hstack((stacked_features, stacked_normals))
|
||
|
else:
|
||
|
raise ValueError('Only accepted input dimensions are 1, 4 and 7 (without and with XYZ)')
|
||
|
|
||
|
#######################
|
||
|
# Create network inputs
|
||
|
#######################
|
||
|
#
|
||
|
# Points, neighbors, pooling indices for each layers
|
||
|
#
|
||
|
|
||
|
# Get the whole input list
|
||
|
input_list = self.classification_inputs(stacked_points,
|
||
|
stacked_features,
|
||
|
labels,
|
||
|
stack_lengths)
|
||
|
|
||
|
# Add scale and rotation for testing
|
||
|
input_list += [scales, rots, model_inds]
|
||
|
|
||
|
return input_list
|
||
|
|
||
|
def load_subsampled_clouds(self, orient_correction):
|
||
|
|
||
|
# Restart timer
|
||
|
t0 = time.time()
|
||
|
|
||
|
# Load wanted points if possible
|
||
|
if self.train:
|
||
|
split ='training'
|
||
|
else:
|
||
|
split = 'test'
|
||
|
|
||
|
print('\nLoading {:s} points subsampled at {:.3f}'.format(split, self.config.first_subsampling_dl))
|
||
|
filename = join(self.path, '{:s}_{:.3f}_record.pkl'.format(split, self.config.first_subsampling_dl))
|
||
|
|
||
|
if exists(filename):
|
||
|
with open(filename, 'rb') as file:
|
||
|
input_points, input_normals, input_labels = pickle.load(file)
|
||
|
|
||
|
# Else compute them from original points
|
||
|
else:
|
||
|
|
||
|
# Collect training file names
|
||
|
if self.train:
|
||
|
names = np.loadtxt(join(self.path, 'modelnet40_train.txt'), dtype=np.str)
|
||
|
else:
|
||
|
names = np.loadtxt(join(self.path, 'modelnet40_test.txt'), dtype=np.str)
|
||
|
|
||
|
# Initialize containers
|
||
|
input_points = []
|
||
|
input_normals = []
|
||
|
|
||
|
# Advanced display
|
||
|
N = len(names)
|
||
|
progress_n = 30
|
||
|
fmt_str = '[{:<' + str(progress_n) + '}] {:5.1f}%'
|
||
|
|
||
|
# Collect point clouds
|
||
|
for i, cloud_name in enumerate(names):
|
||
|
|
||
|
# Read points
|
||
|
class_folder = '_'.join(cloud_name.split('_')[:-1])
|
||
|
txt_file = join(self.path, class_folder, cloud_name) + '.txt'
|
||
|
data = np.loadtxt(txt_file, delimiter=',', dtype=np.float32)
|
||
|
|
||
|
# Subsample them
|
||
|
if self.config.first_subsampling_dl > 0:
|
||
|
points, normals = grid_subsampling(data[:, :3],
|
||
|
features=data[:, 3:],
|
||
|
sampleDl=self.config.first_subsampling_dl)
|
||
|
else:
|
||
|
points = data[:, :3]
|
||
|
normals = data[:, 3:]
|
||
|
|
||
|
print('', end='\r')
|
||
|
print(fmt_str.format('#' * ((i * progress_n) // N), 100 * i / N), end='', flush=True)
|
||
|
|
||
|
# Add to list
|
||
|
input_points += [points]
|
||
|
input_normals += [normals]
|
||
|
|
||
|
print('', end='\r')
|
||
|
print(fmt_str.format('#' * progress_n, 100), end='', flush=True)
|
||
|
print()
|
||
|
|
||
|
# Get labels
|
||
|
label_names = ['_'.join(name.split('_')[:-1]) for name in names]
|
||
|
input_labels = np.array([self.name_to_label[name] for name in label_names])
|
||
|
|
||
|
# Save for later use
|
||
|
with open(filename, 'wb') as file:
|
||
|
pickle.dump((input_points,
|
||
|
input_normals,
|
||
|
input_labels), file)
|
||
|
|
||
|
lengths = [p.shape[0] for p in input_points]
|
||
|
sizes = [l * 4 * 6 for l in lengths]
|
||
|
print('{:.1f} MB loaded in {:.1f}s'.format(np.sum(sizes) * 1e-6, time.time() - t0))
|
||
|
|
||
|
if orient_correction:
|
||
|
input_points = [pp[:, [0, 2, 1]] for pp in input_points]
|
||
|
input_normals = [nn[:, [0, 2, 1]] for nn in input_normals]
|
||
|
|
||
|
return input_points, input_normals, input_labels
|
||
|
|
||
|
# ----------------------------------------------------------------------------------------------------------------------
|
||
|
#
|
||
|
# Utility classes definition
|
||
|
# \********************************/
|
||
|
|
||
|
|
||
|
class ModelNet40Sampler(Sampler):
|
||
|
"""Sampler for ModelNet40"""
|
||
|
|
||
|
def __init__(self, dataset: ModelNet40Dataset, use_potential=True, balance_labels=False):
|
||
|
Sampler.__init__(self, dataset)
|
||
|
|
||
|
# Does the sampler use potential for regular sampling
|
||
|
self.use_potential = use_potential
|
||
|
|
||
|
# Should be balance the classes when sampling
|
||
|
self.balance_labels = balance_labels
|
||
|
|
||
|
# Dataset used by the sampler (no copy is made in memory)
|
||
|
self.dataset = dataset
|
||
|
|
||
|
# Create potentials
|
||
|
if self.use_potential:
|
||
|
self.potentials = np.random.rand(len(dataset.input_labels)) * 0.1 + 0.1
|
||
|
else:
|
||
|
self.potentials = None
|
||
|
|
||
|
# Initialize value for batch limit (max number of points per batch).
|
||
|
self.batch_limit = 10000
|
||
|
|
||
|
return
|
||
|
|
||
|
def __iter__(self):
|
||
|
"""
|
||
|
Yield next batch indices here
|
||
|
"""
|
||
|
|
||
|
##########################################
|
||
|
# Initialize the list of generated indices
|
||
|
##########################################
|
||
|
|
||
|
if self.use_potential:
|
||
|
if self.balance_labels:
|
||
|
|
||
|
gen_indices = []
|
||
|
pick_n = self.dataset.epoch_n // self.dataset.num_classes + 1
|
||
|
for i, l in enumerate(self.dataset.label_values):
|
||
|
|
||
|
# Get the potentials of the objects of this class
|
||
|
label_inds = np.where(np.equal(self.dataset.input_labels, l))[0]
|
||
|
class_potentials = self.potentials[label_inds]
|
||
|
|
||
|
# Get the indices to generate thanks to potentials
|
||
|
if pick_n < class_potentials.shape[0]:
|
||
|
pick_indices = np.argpartition(class_potentials, pick_n)[:pick_n]
|
||
|
else:
|
||
|
pick_indices = np.random.permutation(class_potentials.shape[0])
|
||
|
class_indices = label_inds[pick_indices]
|
||
|
gen_indices.append(class_indices)
|
||
|
|
||
|
# Stack the chosen indices of all classes
|
||
|
gen_indices = np.random.permutation(np.hstack(gen_indices))
|
||
|
|
||
|
else:
|
||
|
|
||
|
# Get indices with the minimum potential
|
||
|
if self.dataset.epoch_n < self.potentials.shape[0]:
|
||
|
gen_indices = np.argpartition(self.potentials, self.dataset.epoch_n)[:self.dataset.epoch_n]
|
||
|
else:
|
||
|
gen_indices = np.random.permutation(self.potentials.shape[0])
|
||
|
gen_indices = np.random.permutation(gen_indices)
|
||
|
|
||
|
# Update potentials (Change the order for the next epoch)
|
||
|
self.potentials[gen_indices] = np.ceil(self.potentials[gen_indices])
|
||
|
self.potentials[gen_indices] += np.random.rand(gen_indices.shape[0]) * 0.1 + 0.1
|
||
|
|
||
|
else:
|
||
|
if self.balance_labels:
|
||
|
pick_n = self.dataset.epoch_n // self.dataset.num_classes + 1
|
||
|
gen_indices = []
|
||
|
for l in self.dataset.label_values:
|
||
|
label_inds = np.where(np.equal(self.dataset.input_labels, l))[0]
|
||
|
rand_inds = np.random.choice(label_inds, size=pick_n, replace=True)
|
||
|
gen_indices += [rand_inds]
|
||
|
gen_indices = np.random.permutation(np.hstack(gen_indices))
|
||
|
else:
|
||
|
gen_indices = np.random.permutation(self.dataset.num_models)[:self.dataset.epoch_n]
|
||
|
|
||
|
################
|
||
|
# Generator loop
|
||
|
################
|
||
|
|
||
|
# Initialize concatenation lists
|
||
|
ti_list = []
|
||
|
batch_n = 0
|
||
|
|
||
|
# Generator loop
|
||
|
for p_i in gen_indices:
|
||
|
|
||
|
# Size of picked cloud
|
||
|
n = self.dataset.input_points[p_i].shape[0]
|
||
|
|
||
|
# In case batch is full, yield it and reset it
|
||
|
if batch_n + n > self.batch_limit and batch_n > 0:
|
||
|
yield np.array(ti_list, dtype=np.int32)
|
||
|
ti_list = []
|
||
|
batch_n = 0
|
||
|
|
||
|
# Add data to current batch
|
||
|
ti_list += [p_i]
|
||
|
|
||
|
# Update batch size
|
||
|
batch_n += n
|
||
|
|
||
|
yield np.array(ti_list, dtype=np.int32)
|
||
|
|
||
|
return 0
|
||
|
|
||
|
def __len__(self):
|
||
|
"""
|
||
|
The number of yielded samples is variable
|
||
|
"""
|
||
|
return None
|
||
|
|
||
|
def calibration(self, dataloader, untouched_ratio=0.9, verbose=False):
|
||
|
"""
|
||
|
Method performing batch and neighbors calibration.
|
||
|
Batch calibration: Set "batch_limit" (the maximum number of points allowed in every batch) so that the
|
||
|
average batch size (number of stacked pointclouds) is the one asked.
|
||
|
Neighbors calibration: Set the "neighborhood_limits" (the maximum number of neighbors allowed in convolutions)
|
||
|
so that 90% of the neighborhoods remain untouched. There is a limit for each layer.
|
||
|
"""
|
||
|
|
||
|
##############################
|
||
|
# Previously saved calibration
|
||
|
##############################
|
||
|
|
||
|
print('\nStarting Calibration (use verbose=True for more details)')
|
||
|
t0 = time.time()
|
||
|
|
||
|
redo = False
|
||
|
|
||
|
# Batch limit
|
||
|
# ***********
|
||
|
|
||
|
# Load batch_limit dictionary
|
||
|
batch_lim_file = join(self.dataset.path, 'batch_limits.pkl')
|
||
|
if exists(batch_lim_file):
|
||
|
with open(batch_lim_file, 'rb') as file:
|
||
|
batch_lim_dict = pickle.load(file)
|
||
|
else:
|
||
|
batch_lim_dict = {}
|
||
|
|
||
|
# Check if the batch limit associated with current parameters exists
|
||
|
key = '{:.3f}_{:d}'.format(self.dataset.config.first_subsampling_dl,
|
||
|
self.dataset.config.batch_num)
|
||
|
if key in batch_lim_dict:
|
||
|
self.batch_limit = batch_lim_dict[key]
|
||
|
else:
|
||
|
redo = True
|
||
|
|
||
|
if verbose:
|
||
|
print('\nPrevious calibration found:')
|
||
|
print('Check batch limit dictionary')
|
||
|
if key in batch_lim_dict:
|
||
|
color = bcolors.OKGREEN
|
||
|
v = str(int(batch_lim_dict[key]))
|
||
|
else:
|
||
|
color = bcolors.FAIL
|
||
|
v = '?'
|
||
|
print('{:}\"{:s}\": {:s}{:}'.format(color, key, v, bcolors.ENDC))
|
||
|
|
||
|
# Neighbors limit
|
||
|
# ***************
|
||
|
|
||
|
# Load neighb_limits dictionary
|
||
|
neighb_lim_file = join(self.dataset.path, 'neighbors_limits.pkl')
|
||
|
if exists(neighb_lim_file):
|
||
|
with open(neighb_lim_file, 'rb') as file:
|
||
|
neighb_lim_dict = pickle.load(file)
|
||
|
else:
|
||
|
neighb_lim_dict = {}
|
||
|
|
||
|
# Check if the limit associated with current parameters exists (for each layer)
|
||
|
neighb_limits = []
|
||
|
for layer_ind in range(self.dataset.config.num_layers):
|
||
|
|
||
|
dl = self.dataset.config.first_subsampling_dl * (2**layer_ind)
|
||
|
if self.dataset.config.deform_layers[layer_ind]:
|
||
|
r = dl * self.dataset.config.deform_radius
|
||
|
else:
|
||
|
r = dl * self.dataset.config.conv_radius
|
||
|
|
||
|
key = '{:.3f}_{:.3f}'.format(dl, r)
|
||
|
if key in neighb_lim_dict:
|
||
|
neighb_limits += [neighb_lim_dict[key]]
|
||
|
|
||
|
if len(neighb_limits) == self.dataset.config.num_layers:
|
||
|
self.dataset.neighborhood_limits = neighb_limits
|
||
|
else:
|
||
|
redo = True
|
||
|
|
||
|
if verbose:
|
||
|
print('Check neighbors limit dictionary')
|
||
|
for layer_ind in range(self.dataset.config.num_layers):
|
||
|
dl = self.dataset.config.first_subsampling_dl * (2**layer_ind)
|
||
|
if self.dataset.config.deform_layers[layer_ind]:
|
||
|
r = dl * self.dataset.config.deform_radius
|
||
|
else:
|
||
|
r = dl * self.dataset.config.conv_radius
|
||
|
key = '{:.3f}_{:.3f}'.format(dl, r)
|
||
|
|
||
|
if key in neighb_lim_dict:
|
||
|
color = bcolors.OKGREEN
|
||
|
v = str(neighb_lim_dict[key])
|
||
|
else:
|
||
|
color = bcolors.FAIL
|
||
|
v = '?'
|
||
|
print('{:}\"{:s}\": {:s}{:}'.format(color, key, v, bcolors.ENDC))
|
||
|
|
||
|
if redo:
|
||
|
|
||
|
############################
|
||
|
# Neighbors calib parameters
|
||
|
############################
|
||
|
|
||
|
# From config parameter, compute higher bound of neighbors number in a neighborhood
|
||
|
hist_n = int(np.ceil(4 / 3 * np.pi * (self.dataset.config.conv_radius + 1) ** 3))
|
||
|
|
||
|
# Histogram of neighborhood sizes
|
||
|
neighb_hists = np.zeros((self.dataset.config.num_layers, hist_n), dtype=np.int32)
|
||
|
|
||
|
########################
|
||
|
# Batch calib parameters
|
||
|
########################
|
||
|
|
||
|
# Estimated average batch size and target value
|
||
|
estim_b = 0
|
||
|
target_b = self.dataset.config.batch_num
|
||
|
|
||
|
# Calibration parameters
|
||
|
low_pass_T = 10
|
||
|
Kp = 100.0
|
||
|
finer = False
|
||
|
|
||
|
# Convergence parameters
|
||
|
smooth_errors = []
|
||
|
converge_threshold = 0.1
|
||
|
|
||
|
# Loop parameters
|
||
|
last_display = time.time()
|
||
|
i = 0
|
||
|
breaking = False
|
||
|
|
||
|
#####################
|
||
|
# Perform calibration
|
||
|
#####################
|
||
|
|
||
|
for epoch in range(10):
|
||
|
for batch_i, batch in enumerate(dataloader):
|
||
|
|
||
|
# Update neighborhood histogram
|
||
|
counts = [np.sum(neighb_mat.numpy() < neighb_mat.shape[0], axis=1) for neighb_mat in batch.neighbors]
|
||
|
hists = [np.bincount(c, minlength=hist_n)[:hist_n] for c in counts]
|
||
|
neighb_hists += np.vstack(hists)
|
||
|
|
||
|
# batch length
|
||
|
b = len(batch.labels)
|
||
|
|
||
|
# Update estim_b (low pass filter)
|
||
|
estim_b += (b - estim_b) / low_pass_T
|
||
|
|
||
|
# Estimate error (noisy)
|
||
|
error = target_b - b
|
||
|
|
||
|
# Save smooth errors for convergene check
|
||
|
smooth_errors.append(target_b - estim_b)
|
||
|
if len(smooth_errors) > 10:
|
||
|
smooth_errors = smooth_errors[1:]
|
||
|
|
||
|
# Update batch limit with P controller
|
||
|
self.batch_limit += Kp * error
|
||
|
|
||
|
# finer low pass filter when closing in
|
||
|
if not finer and np.abs(estim_b - target_b) < 1:
|
||
|
low_pass_T = 100
|
||
|
finer = True
|
||
|
|
||
|
# Convergence
|
||
|
if finer and np.max(np.abs(smooth_errors)) < converge_threshold:
|
||
|
breaking = True
|
||
|
break
|
||
|
|
||
|
i += 1
|
||
|
t = time.time()
|
||
|
|
||
|
# Console display (only one per second)
|
||
|
if verbose and (t - last_display) > 1.0:
|
||
|
last_display = t
|
||
|
message = 'Step {:5d} estim_b ={:5.2f} batch_limit ={:7d}'
|
||
|
print(message.format(i,
|
||
|
estim_b,
|
||
|
int(self.batch_limit)))
|
||
|
|
||
|
if breaking:
|
||
|
break
|
||
|
|
||
|
# Use collected neighbor histogram to get neighbors limit
|
||
|
cumsum = np.cumsum(neighb_hists.T, axis=0)
|
||
|
percentiles = np.sum(cumsum < (untouched_ratio * cumsum[hist_n - 1, :]), axis=0)
|
||
|
self.dataset.neighborhood_limits = percentiles
|
||
|
|
||
|
if verbose:
|
||
|
|
||
|
# Crop histogram
|
||
|
while np.sum(neighb_hists[:, -1]) == 0:
|
||
|
neighb_hists = neighb_hists[:, :-1]
|
||
|
hist_n = neighb_hists.shape[1]
|
||
|
|
||
|
print('\n**************************************************\n')
|
||
|
line0 = 'neighbors_num '
|
||
|
for layer in range(neighb_hists.shape[0]):
|
||
|
line0 += '| layer {:2d} '.format(layer)
|
||
|
print(line0)
|
||
|
for neighb_size in range(hist_n):
|
||
|
line0 = ' {:4d} '.format(neighb_size)
|
||
|
for layer in range(neighb_hists.shape[0]):
|
||
|
if neighb_size > percentiles[layer]:
|
||
|
color = bcolors.FAIL
|
||
|
else:
|
||
|
color = bcolors.OKGREEN
|
||
|
line0 += '|{:}{:10d}{:} '.format(color,
|
||
|
neighb_hists[layer, neighb_size],
|
||
|
bcolors.ENDC)
|
||
|
|
||
|
print(line0)
|
||
|
|
||
|
print('\n**************************************************\n')
|
||
|
print('\nchosen neighbors limits: ', percentiles)
|
||
|
print()
|
||
|
|
||
|
# Save batch_limit dictionary
|
||
|
key = '{:.3f}_{:d}'.format(self.dataset.config.first_subsampling_dl,
|
||
|
self.dataset.config.batch_num)
|
||
|
batch_lim_dict[key] = self.batch_limit
|
||
|
with open(batch_lim_file, 'wb') as file:
|
||
|
pickle.dump(batch_lim_dict, file)
|
||
|
|
||
|
# Save neighb_limit dictionary
|
||
|
for layer_ind in range(self.dataset.config.num_layers):
|
||
|
dl = self.dataset.config.first_subsampling_dl * (2 ** layer_ind)
|
||
|
if self.dataset.config.deform_layers[layer_ind]:
|
||
|
r = dl * self.dataset.config.deform_radius
|
||
|
else:
|
||
|
r = dl * self.dataset.config.conv_radius
|
||
|
key = '{:.3f}_{:.3f}'.format(dl, r)
|
||
|
neighb_lim_dict[key] = self.dataset.neighborhood_limits[layer_ind]
|
||
|
with open(neighb_lim_file, 'wb') as file:
|
||
|
pickle.dump(neighb_lim_dict, file)
|
||
|
|
||
|
|
||
|
print('Calibration done in {:.1f}s\n'.format(time.time() - t0))
|
||
|
return
|
||
|
|
||
|
|
||
|
class ModelNet40CustomBatch:
|
||
|
"""Custom batch definition with memory pinning for ModelNet40"""
|
||
|
|
||
|
def __init__(self, input_list):
|
||
|
|
||
|
# Get rid of batch dimension
|
||
|
input_list = input_list[0]
|
||
|
|
||
|
# Number of layers
|
||
|
L = (len(input_list) - 5) // 4
|
||
|
|
||
|
# Extract input tensors from the list of numpy array
|
||
|
ind = 0
|
||
|
self.points = [torch.from_numpy(nparray) for nparray in input_list[ind:ind+L]]
|
||
|
ind += L
|
||
|
self.neighbors = [torch.from_numpy(nparray) for nparray in input_list[ind:ind+L]]
|
||
|
ind += L
|
||
|
self.pools = [torch.from_numpy(nparray) for nparray in input_list[ind:ind+L]]
|
||
|
ind += L
|
||
|
self.lengths = [torch.from_numpy(nparray) for nparray in input_list[ind:ind+L]]
|
||
|
ind += L
|
||
|
self.features = torch.from_numpy(input_list[ind])
|
||
|
ind += 1
|
||
|
self.labels = torch.from_numpy(input_list[ind])
|
||
|
ind += 1
|
||
|
self.scales = torch.from_numpy(input_list[ind])
|
||
|
ind += 1
|
||
|
self.rots = torch.from_numpy(input_list[ind])
|
||
|
ind += 1
|
||
|
self.model_inds = torch.from_numpy(input_list[ind])
|
||
|
|
||
|
return
|
||
|
|
||
|
def pin_memory(self):
|
||
|
"""
|
||
|
Manual pinning of the memory
|
||
|
"""
|
||
|
|
||
|
self.points = [in_tensor.pin_memory() for in_tensor in self.points]
|
||
|
self.neighbors = [in_tensor.pin_memory() for in_tensor in self.neighbors]
|
||
|
self.pools = [in_tensor.pin_memory() for in_tensor in self.pools]
|
||
|
self.lengths = [in_tensor.pin_memory() for in_tensor in self.lengths]
|
||
|
self.features = self.features.pin_memory()
|
||
|
self.labels = self.labels.pin_memory()
|
||
|
self.scales = self.scales.pin_memory()
|
||
|
self.rots = self.rots.pin_memory()
|
||
|
self.model_inds = self.model_inds.pin_memory()
|
||
|
|
||
|
return self
|
||
|
|
||
|
def to(self, device):
|
||
|
|
||
|
self.points = [in_tensor.to(device) for in_tensor in self.points]
|
||
|
self.neighbors = [in_tensor.to(device) for in_tensor in self.neighbors]
|
||
|
self.pools = [in_tensor.to(device) for in_tensor in self.pools]
|
||
|
self.lengths = [in_tensor.to(device) for in_tensor in self.lengths]
|
||
|
self.features = self.features.to(device)
|
||
|
self.labels = self.labels.to(device)
|
||
|
self.scales = self.scales.to(device)
|
||
|
self.rots = self.rots.to(device)
|
||
|
self.model_inds = self.model_inds.to(device)
|
||
|
|
||
|
return self
|
||
|
|
||
|
def unstack_points(self, layer=None):
|
||
|
"""Unstack the points"""
|
||
|
return self.unstack_elements('points', layer)
|
||
|
|
||
|
def unstack_neighbors(self, layer=None):
|
||
|
"""Unstack the neighbors indices"""
|
||
|
return self.unstack_elements('neighbors', layer)
|
||
|
|
||
|
def unstack_pools(self, layer=None):
|
||
|
"""Unstack the pooling indices"""
|
||
|
return self.unstack_elements('pools', layer)
|
||
|
|
||
|
def unstack_elements(self, element_name, layer=None, to_numpy=True):
|
||
|
"""
|
||
|
Return a list of the stacked elements in the batch at a certain layer. If no layer is given, then return all
|
||
|
layers
|
||
|
"""
|
||
|
|
||
|
if element_name == 'points':
|
||
|
elements = self.points
|
||
|
elif element_name == 'neighbors':
|
||
|
elements = self.neighbors
|
||
|
elif element_name == 'pools':
|
||
|
elements = self.pools[:-1]
|
||
|
else:
|
||
|
raise ValueError('Unknown element name: {:s}'.format(element_name))
|
||
|
|
||
|
all_p_list = []
|
||
|
for layer_i, layer_elems in enumerate(elements):
|
||
|
|
||
|
if layer is None or layer == layer_i:
|
||
|
|
||
|
i0 = 0
|
||
|
p_list = []
|
||
|
if element_name == 'pools':
|
||
|
lengths = self.lengths[layer_i+1]
|
||
|
else:
|
||
|
lengths = self.lengths[layer_i]
|
||
|
|
||
|
for b_i, length in enumerate(lengths):
|
||
|
|
||
|
elem = layer_elems[i0:i0 + length]
|
||
|
if element_name == 'neighbors':
|
||
|
elem[elem >= self.points[layer_i].shape[0]] = -1
|
||
|
elem[elem >= 0] -= i0
|
||
|
elif element_name == 'pools':
|
||
|
elem[elem >= self.points[layer_i].shape[0]] = -1
|
||
|
elem[elem >= 0] -= torch.sum(self.lengths[layer_i][:b_i])
|
||
|
i0 += length
|
||
|
|
||
|
if to_numpy:
|
||
|
p_list.append(elem.numpy())
|
||
|
else:
|
||
|
p_list.append(elem)
|
||
|
|
||
|
if layer == layer_i:
|
||
|
return p_list
|
||
|
|
||
|
all_p_list.append(p_list)
|
||
|
|
||
|
return all_p_list
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
def ModelNet40Collate(batch_data):
|
||
|
return ModelNet40CustomBatch(batch_data)
|
||
|
|
||
|
|
||
|
class ModelNet40WorkerInitDebug():
|
||
|
"""Callable class that Initializes workers."""
|
||
|
|
||
|
def __init__(self, dataset):
|
||
|
self.dataset = dataset
|
||
|
return
|
||
|
|
||
|
def __call__(self, worker_id):
|
||
|
|
||
|
# Print workers info
|
||
|
worker_info = get_worker_info()
|
||
|
print(worker_info)
|
||
|
|
||
|
# Get associated dataset
|
||
|
dataset = worker_info.dataset # the dataset copy in this worker process
|
||
|
|
||
|
# In windows, each worker has its own copy of the dataset. In Linux, this is shared in memory
|
||
|
print(dataset.input_labels.__array_interface__['data'])
|
||
|
print(worker_info.dataset.input_labels.__array_interface__['data'])
|
||
|
print(self.dataset.input_labels.__array_interface__['data'])
|
||
|
|
||
|
# configure the dataset to only process the split workload
|
||
|
|
||
|
return
|
||
|
|
||
|
|
||
|
# ----------------------------------------------------------------------------------------------------------------------
|
||
|
#
|
||
|
# Debug functions
|
||
|
# \*********************/
|
||
|
|
||
|
|
||
|
def debug_sampling(dataset, sampler, loader):
|
||
|
"""Shows which labels are sampled according to strategy chosen"""
|
||
|
label_sum = np.zeros((dataset.num_classes), dtype=np.int32)
|
||
|
for epoch in range(10):
|
||
|
|
||
|
for batch_i, (points, normals, labels, indices, in_sizes) in enumerate(loader):
|
||
|
# print(batch_i, tuple(points.shape), tuple(normals.shape), labels, indices, in_sizes)
|
||
|
|
||
|
label_sum += np.bincount(labels.numpy(), minlength=dataset.num_classes)
|
||
|
print(label_sum)
|
||
|
#print(sampler.potentials[:6])
|
||
|
|
||
|
print('******************')
|
||
|
print('*******************************************')
|
||
|
|
||
|
_, counts = np.unique(dataset.input_labels, return_counts=True)
|
||
|
print(counts)
|
||
|
|
||
|
|
||
|
def debug_timing(dataset, sampler, loader):
|
||
|
"""Timing of generator function"""
|
||
|
|
||
|
t = [time.time()]
|
||
|
last_display = time.time()
|
||
|
mean_dt = np.zeros(2)
|
||
|
estim_b = dataset.config.batch_num
|
||
|
|
||
|
for epoch in range(10):
|
||
|
|
||
|
for batch_i, batch in enumerate(loader):
|
||
|
# print(batch_i, tuple(points.shape), tuple(normals.shape), labels, indices, in_sizes)
|
||
|
|
||
|
# New time
|
||
|
t = t[-1:]
|
||
|
t += [time.time()]
|
||
|
|
||
|
# Update estim_b (low pass filter)
|
||
|
estim_b += (len(batch.labels) - estim_b) / 100
|
||
|
|
||
|
# Pause simulating computations
|
||
|
time.sleep(0.050)
|
||
|
t += [time.time()]
|
||
|
|
||
|
# Average timing
|
||
|
mean_dt = 0.9 * mean_dt + 0.1 * (np.array(t[1:]) - np.array(t[:-1]))
|
||
|
|
||
|
# Console display (only one per second)
|
||
|
if (t[-1] - last_display) > -1.0:
|
||
|
last_display = t[-1]
|
||
|
message = 'Step {:08d} -> (ms/batch) {:8.2f} {:8.2f} / batch = {:.2f}'
|
||
|
print(message.format(batch_i,
|
||
|
1000 * mean_dt[0],
|
||
|
1000 * mean_dt[1],
|
||
|
estim_b))
|
||
|
|
||
|
print('************* Epoch ended *************')
|
||
|
|
||
|
_, counts = np.unique(dataset.input_labels, return_counts=True)
|
||
|
print(counts)
|
||
|
|
||
|
|
||
|
def debug_show_clouds(dataset, sampler, loader):
|
||
|
|
||
|
|
||
|
for epoch in range(10):
|
||
|
|
||
|
clouds = []
|
||
|
cloud_normals = []
|
||
|
cloud_labels = []
|
||
|
|
||
|
L = dataset.config.num_layers
|
||
|
|
||
|
for batch_i, batch in enumerate(loader):
|
||
|
|
||
|
# Print characteristics of input tensors
|
||
|
print('\nPoints tensors')
|
||
|
for i in range(L):
|
||
|
print(batch.points[i].dtype, batch.points[i].shape)
|
||
|
print('\nNeigbors tensors')
|
||
|
for i in range(L):
|
||
|
print(batch.neighbors[i].dtype, batch.neighbors[i].shape)
|
||
|
print('\nPools tensors')
|
||
|
for i in range(L):
|
||
|
print(batch.pools[i].dtype, batch.pools[i].shape)
|
||
|
print('\nStack lengths')
|
||
|
for i in range(L):
|
||
|
print(batch.lengths[i].dtype, batch.lengths[i].shape)
|
||
|
print('\nFeatures')
|
||
|
print(batch.features.dtype, batch.features.shape)
|
||
|
print('\nLabels')
|
||
|
print(batch.labels.dtype, batch.labels.shape)
|
||
|
print('\nAugment Scales')
|
||
|
print(batch.scales.dtype, batch.scales.shape)
|
||
|
print('\nAugment Rotations')
|
||
|
print(batch.rots.dtype, batch.rots.shape)
|
||
|
print('\nModel indices')
|
||
|
print(batch.model_inds.dtype, batch.model_inds.shape)
|
||
|
|
||
|
print('\nAre input tensors pinned')
|
||
|
print(batch.neighbors[0].is_pinned())
|
||
|
print(batch.neighbors[-1].is_pinned())
|
||
|
print(batch.points[0].is_pinned())
|
||
|
print(batch.points[-1].is_pinned())
|
||
|
print(batch.labels.is_pinned())
|
||
|
print(batch.scales.is_pinned())
|
||
|
print(batch.rots.is_pinned())
|
||
|
print(batch.model_inds.is_pinned())
|
||
|
|
||
|
show_input_batch(batch)
|
||
|
|
||
|
print('*******************************************')
|
||
|
|
||
|
_, counts = np.unique(dataset.input_labels, return_counts=True)
|
||
|
print(counts)
|
||
|
|
||
|
|
||
|
def debug_batch_and_neighbors_calib(dataset, sampler, loader):
|
||
|
"""Timing of generator function"""
|
||
|
|
||
|
t = [time.time()]
|
||
|
last_display = time.time()
|
||
|
mean_dt = np.zeros(2)
|
||
|
|
||
|
for epoch in range(10):
|
||
|
|
||
|
for batch_i, input_list in enumerate(loader):
|
||
|
# print(batch_i, tuple(points.shape), tuple(normals.shape), labels, indices, in_sizes)
|
||
|
|
||
|
# New time
|
||
|
t = t[-1:]
|
||
|
t += [time.time()]
|
||
|
|
||
|
# Pause simulating computations
|
||
|
time.sleep(0.01)
|
||
|
t += [time.time()]
|
||
|
|
||
|
# Average timing
|
||
|
mean_dt = 0.9 * mean_dt + 0.1 * (np.array(t[1:]) - np.array(t[:-1]))
|
||
|
|
||
|
# Console display (only one per second)
|
||
|
if (t[-1] - last_display) > 1.0:
|
||
|
last_display = t[-1]
|
||
|
message = 'Step {:08d} -> Average timings (ms/batch) {:8.2f} {:8.2f} '
|
||
|
print(message.format(batch_i,
|
||
|
1000 * mean_dt[0],
|
||
|
1000 * mean_dt[1]))
|
||
|
|
||
|
print('************* Epoch ended *************')
|
||
|
|
||
|
_, counts = np.unique(dataset.input_labels, return_counts=True)
|
||
|
print(counts)
|