KPConv-PyTorch/datasetss/S3DIS.py

1788 lines
64 KiB
Python
Raw Normal View History

2020-03-31 19:42:35 +00:00
#
#
# 0=================================0
# | Kernel Point Convolutions |
# 0=================================0
#
#
# ----------------------------------------------------------------------------------------------------------------------
#
# Class handling S3DIS dataset.
# Implements a Dataset, a Sampler, and a collate_fn
#
# ----------------------------------------------------------------------------------------------------------------------
#
# Hugues THOMAS - 11/06/2018
#
# ----------------------------------------------------------------------------------------------------------------------
#
# Imports and global variables
# \**********************************/
#
# Common libs
import time
import numpy as np
import pickle
import torch
import warnings
2020-03-31 19:42:35 +00:00
from multiprocessing import Lock
# OS functions
from os import listdir
from os.path import exists, join, isdir
# Dataset parent class
from datasetss.common import PointCloudDataset
2020-03-31 19:42:35 +00:00
from torch.utils.data import Sampler, get_worker_info
from utils.mayavi_visu import *
from datasetss.common import grid_subsampling
2020-03-31 19:42:35 +00:00
from utils.config import bcolors
# ----------------------------------------------------------------------------------------------------------------------
#
# Dataset class definition
# \******************************/
class S3DISDataset(PointCloudDataset):
2020-04-09 21:13:27 +00:00
"""Class to handle S3DIS dataset."""
2020-03-31 19:42:35 +00:00
2023-05-15 15:18:10 +00:00
def __init__(self, config, set="training", use_potentials=True, load_data=True):
2020-03-31 19:42:35 +00:00
"""
This dataset is small enough to be stored in-memory, so load all point clouds here
"""
2023-05-15 15:18:10 +00:00
PointCloudDataset.__init__(self, "S3DIS")
2020-03-31 19:42:35 +00:00
############
# Parameters
############
# Dict from labels to names
2023-05-15 15:18:10 +00:00
self.label_to_names = {
0: "ceiling",
1: "floor",
2: "wall",
3: "beam",
4: "column",
5: "window",
6: "door",
7: "chair",
8: "table",
9: "bookcase",
10: "sofa",
11: "board",
12: "clutter",
}
2020-03-31 19:42:35 +00:00
# Initialize a bunch of variables concerning class labels
self.init_labels()
# List of classes ignored during training (can be empty)
self.ignored_labels = np.array([])
# Dataset folder
2023-05-15 15:18:10 +00:00
self.path = "./Data/S3DIS"
2020-03-31 19:42:35 +00:00
# Type of task conducted on this dataset
2023-05-15 15:18:10 +00:00
self.dataset_task = "cloud_segmentation"
2020-03-31 19:42:35 +00:00
# Update number of class and data task in configuration
config.num_classes = self.num_classes - len(self.ignored_labels)
2020-03-31 19:42:35 +00:00
config.dataset_task = self.dataset_task
# Parameters from config
self.config = config
# Training or test set
self.set = set
# Using potential or random epoch generation
self.use_potentials = use_potentials
# Path of the training files
2023-05-15 15:18:10 +00:00
self.train_path = "original_ply"
2020-03-31 19:42:35 +00:00
# List of files to process
ply_path = join(self.path, self.train_path)
# Proportion of validation scenes
2023-05-15 15:18:10 +00:00
self.cloud_names = ["Area_1", "Area_2", "Area_3", "Area_4", "Area_5", "Area_6"]
2020-03-31 19:42:35 +00:00
self.all_splits = [0, 1, 2, 3, 4, 5]
self.validation_split = 4
# Number of models used per epoch
2023-05-15 15:18:10 +00:00
if self.set == "training":
2020-03-31 19:42:35 +00:00
self.epoch_n = config.epoch_steps * config.batch_num
2023-05-15 15:18:10 +00:00
elif self.set in ["validation", "test", "ERF"]:
2020-03-31 19:42:35 +00:00
self.epoch_n = config.validation_size * config.batch_num
else:
2023-05-15 15:18:10 +00:00
raise ValueError("Unknown set for S3DIS data: ", self.set)
2020-03-31 19:42:35 +00:00
# Stop data is not needed
if not load_data:
return
###################
# Prepare ply files
###################
2020-04-27 23:33:58 +00:00
self.prepare_S3DIS_ply()
2020-03-31 19:42:35 +00:00
################
# Load ply files
################
# List of training files
2020-04-09 21:13:27 +00:00
self.files = []
for i, f in enumerate(self.cloud_names):
2023-05-15 15:18:10 +00:00
if self.set == "training":
2020-04-09 21:13:27 +00:00
if self.all_splits[i] != self.validation_split:
2023-05-15 15:18:10 +00:00
self.files += [join(ply_path, f + ".ply")]
elif self.set in ["validation", "test", "ERF"]:
2020-04-09 21:13:27 +00:00
if self.all_splits[i] == self.validation_split:
2023-05-15 15:18:10 +00:00
self.files += [join(ply_path, f + ".ply")]
2020-04-09 21:13:27 +00:00
else:
2023-05-15 15:18:10 +00:00
raise ValueError("Unknown set for S3DIS data: ", self.set)
if self.set == "training":
self.cloud_names = [
f
for i, f in enumerate(self.cloud_names)
if self.all_splits[i] != self.validation_split
]
elif self.set in ["validation", "test", "ERF"]:
self.cloud_names = [
f
for i, f in enumerate(self.cloud_names)
if self.all_splits[i] == self.validation_split
]
2020-03-31 19:42:35 +00:00
if 0 < self.config.first_subsampling_dl <= 0.01:
2023-05-15 15:18:10 +00:00
raise ValueError("subsampling_parameter too low (should be over 1 cm")
2020-03-31 19:42:35 +00:00
# Initiate containers
self.input_trees = []
self.input_colors = []
self.input_labels = []
self.pot_trees = []
self.num_clouds = 0
2020-04-09 21:13:27 +00:00
self.test_proj = []
2020-03-31 19:42:35 +00:00
self.validation_labels = []
# Start loading
self.load_subsampled_clouds()
############################
# Batch selection parameters
############################
# Initialize value for batch limit (max number of points per batch).
self.batch_limit = torch.tensor([1], dtype=torch.float32)
self.batch_limit.share_memory_()
# Initialize potentials
if use_potentials:
self.potentials = []
self.min_potentials = []
self.argmin_potentials = []
for i, tree in enumerate(self.pot_trees):
2023-05-15 15:18:10 +00:00
self.potentials += [
torch.from_numpy(np.random.rand(tree.data.shape[0]) * 1e-3)
]
2020-03-31 19:42:35 +00:00
min_ind = int(torch.argmin(self.potentials[-1]))
self.argmin_potentials += [min_ind]
self.min_potentials += [float(self.potentials[-1][min_ind])]
# Share potential memory
2023-05-15 15:18:10 +00:00
self.argmin_potentials = torch.from_numpy(
np.array(self.argmin_potentials, dtype=np.int64)
)
self.min_potentials = torch.from_numpy(
np.array(self.min_potentials, dtype=np.float64)
)
2020-03-31 19:42:35 +00:00
self.argmin_potentials.share_memory_()
self.min_potentials.share_memory_()
for i, _ in enumerate(self.pot_trees):
self.potentials[i].share_memory_()
2023-05-15 15:18:10 +00:00
self.worker_waiting = torch.tensor(
[0 for _ in range(config.input_threads)], dtype=torch.int32
)
2020-03-31 19:42:35 +00:00
self.worker_waiting.share_memory_()
2020-04-03 15:22:57 +00:00
self.epoch_inds = None
self.epoch_i = 0
2020-03-31 19:42:35 +00:00
else:
self.potentials = None
self.min_potentials = None
self.argmin_potentials = None
2023-05-15 15:18:10 +00:00
self.epoch_inds = torch.from_numpy(
np.zeros((2, self.epoch_n), dtype=np.int64)
)
2020-04-03 15:22:57 +00:00
self.epoch_i = torch.from_numpy(np.zeros((1,), dtype=np.int64))
self.epoch_i.share_memory_()
self.epoch_inds.share_memory_()
self.worker_lock = Lock()
2020-03-31 19:42:35 +00:00
# For ERF visualization, we want only one cloud per batch and no randomness
2023-05-15 15:18:10 +00:00
if self.set == "ERF":
2020-03-31 19:42:35 +00:00
self.batch_limit = torch.tensor([1], dtype=torch.float32)
self.batch_limit.share_memory_()
np.random.seed(42)
return
def __len__(self):
"""
Return the length of data here
"""
return len(self.cloud_names)
2020-04-03 15:22:57 +00:00
def __getitem__(self, batch_i):
2020-03-31 19:42:35 +00:00
"""
The main thread gives a list of indices to load a batch. Each worker is going to work in parallel to load a
different list of indices.
"""
2020-04-03 15:22:57 +00:00
if self.use_potentials:
return self.potential_item(batch_i)
else:
return self.random_item(batch_i)
def potential_item(self, batch_i, debug_workers=False):
2020-04-11 15:18:18 +00:00
t = [time.time()]
2020-03-31 19:42:35 +00:00
# Initiate concatanation lists
p_list = []
f_list = []
l_list = []
i_list = []
pi_list = []
ci_list = []
s_list = []
R_list = []
batch_n = 0
2022-04-04 21:43:20 +00:00
failed_attempts = 0
2020-03-31 19:42:35 +00:00
info = get_worker_info()
2020-04-11 15:18:18 +00:00
if info is not None:
wid = info.id
else:
wid = None
2020-03-31 19:42:35 +00:00
while True:
2020-04-11 15:18:18 +00:00
t += [time.time()]
2020-03-31 19:42:35 +00:00
if debug_workers:
2023-05-15 15:18:10 +00:00
message = ""
2020-03-31 19:42:35 +00:00
for wi in range(info.num_workers):
if wi == wid:
2023-05-15 15:18:10 +00:00
message += " {:}X{:} ".format(bcolors.FAIL, bcolors.ENDC)
2020-03-31 19:42:35 +00:00
elif self.worker_waiting[wi] == 0:
2023-05-15 15:18:10 +00:00
message += " "
2020-03-31 19:42:35 +00:00
elif self.worker_waiting[wi] == 1:
2023-05-15 15:18:10 +00:00
message += " | "
2020-03-31 19:42:35 +00:00
elif self.worker_waiting[wi] == 2:
2023-05-15 15:18:10 +00:00
message += " o "
2020-03-31 19:42:35 +00:00
print(message)
self.worker_waiting[wid] = 0
2020-04-03 15:22:57 +00:00
with self.worker_lock:
2020-03-31 19:42:35 +00:00
if debug_workers:
2023-05-15 15:18:10 +00:00
message = ""
2020-03-31 19:42:35 +00:00
for wi in range(info.num_workers):
if wi == wid:
2023-05-15 15:18:10 +00:00
message += " {:}v{:} ".format(bcolors.OKGREEN, bcolors.ENDC)
2020-03-31 19:42:35 +00:00
elif self.worker_waiting[wi] == 0:
2023-05-15 15:18:10 +00:00
message += " "
2020-03-31 19:42:35 +00:00
elif self.worker_waiting[wi] == 1:
2023-05-15 15:18:10 +00:00
message += " | "
2020-03-31 19:42:35 +00:00
elif self.worker_waiting[wi] == 2:
2023-05-15 15:18:10 +00:00
message += " o "
2020-03-31 19:42:35 +00:00
print(message)
self.worker_waiting[wid] = 1
# Get potential minimum
cloud_ind = int(torch.argmin(self.min_potentials))
point_ind = int(self.argmin_potentials[cloud_ind])
# Get potential points from tree structure
pot_points = np.array(self.pot_trees[cloud_ind].data, copy=False)
# Center point of input region
center_point = pot_points[point_ind, :].reshape(1, -1)
# Add a small noise to center point
2023-05-15 15:18:10 +00:00
if self.set != "ERF":
center_point += np.random.normal(
scale=self.config.in_radius / 10, size=center_point.shape
)
2020-03-31 19:42:35 +00:00
# Indices of points in input region
2023-05-15 15:18:10 +00:00
pot_inds, dists = self.pot_trees[cloud_ind].query_radius(
center_point, r=self.config.in_radius, return_distance=True
)
2020-03-31 19:42:35 +00:00
d2s = np.square(dists[0])
pot_inds = pot_inds[0]
# Update potentials (Tukey weights)
2023-05-15 15:18:10 +00:00
if self.set != "ERF":
2020-03-31 19:42:35 +00:00
tukeys = np.square(1 - d2s / np.square(self.config.in_radius))
tukeys[d2s > np.square(self.config.in_radius)] = 0
self.potentials[cloud_ind][pot_inds] += tukeys
min_ind = torch.argmin(self.potentials[cloud_ind])
2023-05-15 15:18:10 +00:00
self.min_potentials[[cloud_ind]] = self.potentials[cloud_ind][
min_ind
]
2020-03-31 19:42:35 +00:00
self.argmin_potentials[[cloud_ind]] = min_ind
2020-04-11 15:18:18 +00:00
t += [time.time()]
2020-03-31 19:42:35 +00:00
# Get points from tree structure
points = np.array(self.input_trees[cloud_ind].data, copy=False)
# Indices of points in input region
2023-05-15 15:18:10 +00:00
input_inds = self.input_trees[cloud_ind].query_radius(
center_point, r=self.config.in_radius
)[0]
2020-03-31 19:42:35 +00:00
2020-04-11 15:18:18 +00:00
t += [time.time()]
2020-03-31 19:42:35 +00:00
# Number collected
n = input_inds.shape[0]
2022-04-04 21:34:53 +00:00
# Safe check for empty spheres
if n < 2:
2022-04-04 21:43:20 +00:00
failed_attempts += 1
2022-04-06 16:35:19 +00:00
if failed_attempts > 100 * self.config.batch_num:
2023-05-15 15:18:10 +00:00
raise ValueError(
"It seems this dataset only containes empty input spheres"
)
2022-04-04 21:34:53 +00:00
t += [time.time()]
t += [time.time()]
continue
2020-03-31 19:42:35 +00:00
# Collect labels and colors
input_points = (points[input_inds] - center_point).astype(np.float32)
input_colors = self.input_colors[cloud_ind][input_inds]
2023-05-15 15:18:10 +00:00
if self.set in ["test", "ERF"]:
2020-03-31 19:42:35 +00:00
input_labels = np.zeros(input_points.shape[0])
else:
input_labels = self.input_labels[cloud_ind][input_inds]
input_labels = np.array([self.label_to_idx[l] for l in input_labels])
2020-04-11 15:18:18 +00:00
t += [time.time()]
2020-03-31 19:42:35 +00:00
# Data augmentation
input_points, scale, R = self.augmentation_transform(input_points)
# Color augmentation
if np.random.rand() > self.config.augment_color:
input_colors *= 0
# Get original height as additional feature
2023-05-15 15:18:10 +00:00
input_features = np.hstack(
(input_colors, input_points[:, 2:] + center_point[:, 2:])
).astype(np.float32)
2020-03-31 19:42:35 +00:00
2020-04-11 15:18:18 +00:00
t += [time.time()]
2020-03-31 19:42:35 +00:00
# Stack batch
p_list += [input_points]
f_list += [input_features]
l_list += [input_labels]
pi_list += [input_inds]
i_list += [point_ind]
ci_list += [cloud_ind]
s_list += [scale]
R_list += [R]
# Update batch size
batch_n += n
# In case batch is full, stop
if batch_n > int(self.batch_limit):
break
# Randomly drop some points (act as an augmentation process and a safety for GPU memory consumption)
2020-04-03 15:22:57 +00:00
# if n > int(self.batch_limit):
2020-03-31 19:42:35 +00:00
# input_inds = np.random.choice(input_inds, size=int(self.batch_limit) - 1, replace=False)
# n = input_inds.shape[0]
###################
# Concatenate batch
###################
stacked_points = np.concatenate(p_list, axis=0)
features = np.concatenate(f_list, axis=0)
labels = np.concatenate(l_list, axis=0)
point_inds = np.array(i_list, dtype=np.int32)
cloud_inds = np.array(ci_list, dtype=np.int32)
input_inds = np.concatenate(pi_list, axis=0)
stack_lengths = np.array([pp.shape[0] for pp in p_list], dtype=np.int32)
scales = np.array(s_list, dtype=np.float32)
rots = np.stack(R_list, axis=0)
# Input features
stacked_features = np.ones_like(stacked_points[:, :1], dtype=np.float32)
if self.config.in_features_dim == 1:
pass
elif self.config.in_features_dim == 4:
stacked_features = np.hstack((stacked_features, features[:, :3]))
elif self.config.in_features_dim == 5:
stacked_features = np.hstack((stacked_features, features))
else:
2023-05-15 15:18:10 +00:00
raise ValueError(
"Only accepted input dimensions are 1, 4 and 7 (without and with XYZ)"
)
2020-03-31 19:42:35 +00:00
#######################
# Create network inputs
#######################
#
# Points, neighbors, pooling indices for each layers
#
2020-04-11 15:18:18 +00:00
t += [time.time()]
2020-03-31 19:42:35 +00:00
# Get the whole input list
2023-05-15 15:18:10 +00:00
input_list = self.segmentation_inputs(
stacked_points, stacked_features, labels, stack_lengths
)
2020-03-31 19:42:35 +00:00
2020-04-11 15:18:18 +00:00
t += [time.time()]
2020-03-31 19:42:35 +00:00
# Add scale and rotation for testing
input_list += [scales, rots, cloud_inds, point_inds, input_inds]
if debug_workers:
2023-05-15 15:18:10 +00:00
message = ""
2020-03-31 19:42:35 +00:00
for wi in range(info.num_workers):
if wi == wid:
2023-05-15 15:18:10 +00:00
message += " {:}0{:} ".format(bcolors.OKBLUE, bcolors.ENDC)
2020-03-31 19:42:35 +00:00
elif self.worker_waiting[wi] == 0:
2023-05-15 15:18:10 +00:00
message += " "
2020-03-31 19:42:35 +00:00
elif self.worker_waiting[wi] == 1:
2023-05-15 15:18:10 +00:00
message += " | "
2020-03-31 19:42:35 +00:00
elif self.worker_waiting[wi] == 2:
2023-05-15 15:18:10 +00:00
message += " o "
2020-03-31 19:42:35 +00:00
print(message)
self.worker_waiting[wid] = 2
2020-04-11 15:18:18 +00:00
t += [time.time()]
# Display timings
debugT = False
if debugT:
2023-05-15 15:18:10 +00:00
print("\n************************\n")
print("Timings:")
2020-04-11 15:18:18 +00:00
ti = 0
N = 5
2023-05-15 15:18:10 +00:00
mess = "Init ...... {:5.1f}ms /"
loop_times = [
1000 * (t[ti + N * i + 1] - t[ti + N * i])
for i in range(len(stack_lengths))
]
2020-04-11 15:18:18 +00:00
for dt in loop_times:
2023-05-15 15:18:10 +00:00
mess += " {:5.1f}".format(dt)
2020-04-11 15:18:18 +00:00
print(mess.format(np.sum(loop_times)))
ti += 1
2023-05-15 15:18:10 +00:00
mess = "Pots ...... {:5.1f}ms /"
loop_times = [
1000 * (t[ti + N * i + 1] - t[ti + N * i])
for i in range(len(stack_lengths))
]
2020-04-11 15:18:18 +00:00
for dt in loop_times:
2023-05-15 15:18:10 +00:00
mess += " {:5.1f}".format(dt)
2020-04-11 15:18:18 +00:00
print(mess.format(np.sum(loop_times)))
ti += 1
2023-05-15 15:18:10 +00:00
mess = "Sphere .... {:5.1f}ms /"
loop_times = [
1000 * (t[ti + N * i + 1] - t[ti + N * i])
for i in range(len(stack_lengths))
]
2020-04-11 15:18:18 +00:00
for dt in loop_times:
2023-05-15 15:18:10 +00:00
mess += " {:5.1f}".format(dt)
2020-04-11 15:18:18 +00:00
print(mess.format(np.sum(loop_times)))
ti += 1
2023-05-15 15:18:10 +00:00
mess = "Collect ... {:5.1f}ms /"
loop_times = [
1000 * (t[ti + N * i + 1] - t[ti + N * i])
for i in range(len(stack_lengths))
]
2020-04-11 15:18:18 +00:00
for dt in loop_times:
2023-05-15 15:18:10 +00:00
mess += " {:5.1f}".format(dt)
2020-04-11 15:18:18 +00:00
print(mess.format(np.sum(loop_times)))
ti += 1
2023-05-15 15:18:10 +00:00
mess = "Augment ... {:5.1f}ms /"
loop_times = [
1000 * (t[ti + N * i + 1] - t[ti + N * i])
for i in range(len(stack_lengths))
]
2020-04-11 15:18:18 +00:00
for dt in loop_times:
2023-05-15 15:18:10 +00:00
mess += " {:5.1f}".format(dt)
2020-04-11 15:18:18 +00:00
print(mess.format(np.sum(loop_times)))
ti += N * (len(stack_lengths) - 1) + 1
2023-05-15 15:18:10 +00:00
print("concat .... {:5.1f}ms".format(1000 * (t[ti + 1] - t[ti])))
2020-04-11 15:18:18 +00:00
ti += 1
2023-05-15 15:18:10 +00:00
print("input ..... {:5.1f}ms".format(1000 * (t[ti + 1] - t[ti])))
2020-04-11 15:18:18 +00:00
ti += 1
2023-05-15 15:18:10 +00:00
print("stack ..... {:5.1f}ms".format(1000 * (t[ti + 1] - t[ti])))
2020-04-11 15:18:18 +00:00
ti += 1
2023-05-15 15:18:10 +00:00
print("\n************************\n")
2020-03-31 19:42:35 +00:00
return input_list
2020-04-03 15:22:57 +00:00
def random_item(self, batch_i):
# Initiate concatanation lists
p_list = []
f_list = []
l_list = []
i_list = []
pi_list = []
ci_list = []
s_list = []
R_list = []
batch_n = 0
2022-04-04 21:43:20 +00:00
failed_attempts = 0
2020-04-03 15:22:57 +00:00
while True:
with self.worker_lock:
# Get potential minimum
cloud_ind = int(self.epoch_inds[0, self.epoch_i])
point_ind = int(self.epoch_inds[1, self.epoch_i])
# Update epoch indice
self.epoch_i += 1
if self.epoch_i >= int(self.epoch_inds.shape[1]):
self.epoch_i -= int(self.epoch_inds.shape[1])
2020-04-03 15:22:57 +00:00
# Get points from tree structure
points = np.array(self.input_trees[cloud_ind].data, copy=False)
# Center point of input region
center_point = points[point_ind, :].reshape(1, -1)
# Add a small noise to center point
2023-05-15 15:18:10 +00:00
if self.set != "ERF":
center_point += np.random.normal(
scale=self.config.in_radius / 10, size=center_point.shape
)
2020-04-03 15:22:57 +00:00
# Indices of points in input region
2023-05-15 15:18:10 +00:00
input_inds = self.input_trees[cloud_ind].query_radius(
center_point, r=self.config.in_radius
)[0]
2020-04-03 15:22:57 +00:00
# Number collected
n = input_inds.shape[0]
2023-05-15 15:18:10 +00:00
2022-04-04 21:43:20 +00:00
# Safe check for empty spheres
if n < 2:
failed_attempts += 1
2022-04-06 16:35:19 +00:00
if failed_attempts > 100 * self.config.batch_num:
2023-05-15 15:18:10 +00:00
raise ValueError(
"It seems this dataset only containes empty input spheres"
)
2022-04-04 21:43:20 +00:00
continue
2020-04-03 15:22:57 +00:00
# Collect labels and colors
input_points = (points[input_inds] - center_point).astype(np.float32)
input_colors = self.input_colors[cloud_ind][input_inds]
2023-05-15 15:18:10 +00:00
if self.set in ["test", "ERF"]:
2020-04-03 15:22:57 +00:00
input_labels = np.zeros(input_points.shape[0])
else:
input_labels = self.input_labels[cloud_ind][input_inds]
input_labels = np.array([self.label_to_idx[l] for l in input_labels])
# Data augmentation
input_points, scale, R = self.augmentation_transform(input_points)
# Color augmentation
if np.random.rand() > self.config.augment_color:
input_colors *= 0
# Get original height as additional feature
2023-05-15 15:18:10 +00:00
input_features = np.hstack(
(input_colors, input_points[:, 2:] + center_point[:, 2:])
).astype(np.float32)
2020-04-03 15:22:57 +00:00
# Stack batch
p_list += [input_points]
f_list += [input_features]
l_list += [input_labels]
pi_list += [input_inds]
i_list += [point_ind]
ci_list += [cloud_ind]
s_list += [scale]
R_list += [R]
# Update batch size
batch_n += n
# In case batch is full, stop
if batch_n > int(self.batch_limit):
break
# Randomly drop some points (act as an augmentation process and a safety for GPU memory consumption)
# if n > int(self.batch_limit):
# input_inds = np.random.choice(input_inds, size=int(self.batch_limit) - 1, replace=False)
# n = input_inds.shape[0]
###################
# Concatenate batch
###################
stacked_points = np.concatenate(p_list, axis=0)
features = np.concatenate(f_list, axis=0)
labels = np.concatenate(l_list, axis=0)
point_inds = np.array(i_list, dtype=np.int32)
cloud_inds = np.array(ci_list, dtype=np.int32)
input_inds = np.concatenate(pi_list, axis=0)
stack_lengths = np.array([pp.shape[0] for pp in p_list], dtype=np.int32)
scales = np.array(s_list, dtype=np.float32)
rots = np.stack(R_list, axis=0)
# Input features
stacked_features = np.ones_like(stacked_points[:, :1], dtype=np.float32)
if self.config.in_features_dim == 1:
pass
elif self.config.in_features_dim == 4:
stacked_features = np.hstack((stacked_features, features[:, :3]))
elif self.config.in_features_dim == 5:
stacked_features = np.hstack((stacked_features, features))
else:
2023-05-15 15:18:10 +00:00
raise ValueError(
"Only accepted input dimensions are 1, 4 and 7 (without and with XYZ)"
)
2020-04-03 15:22:57 +00:00
#######################
# Create network inputs
#######################
#
# Points, neighbors, pooling indices for each layers
#
# Get the whole input list
2023-05-15 15:18:10 +00:00
input_list = self.segmentation_inputs(
stacked_points, stacked_features, labels, stack_lengths
)
2020-04-03 15:22:57 +00:00
# Add scale and rotation for testing
input_list += [scales, rots, cloud_inds, point_inds, input_inds]
return input_list
2020-03-31 19:42:35 +00:00
def prepare_S3DIS_ply(self):
2023-05-15 15:18:10 +00:00
print("\nPreparing ply files")
2020-03-31 19:42:35 +00:00
t0 = time.time()
# Folder for the ply files
ply_path = join(self.path, self.train_path)
if not exists(ply_path):
makedirs(ply_path)
for cloud_name in self.cloud_names:
# Pass if the cloud has already been computed
2023-05-15 15:18:10 +00:00
cloud_file = join(ply_path, cloud_name + ".ply")
2020-03-31 19:42:35 +00:00
if exists(cloud_file):
continue
# Get rooms of the current cloud
cloud_folder = join(self.path, cloud_name)
2023-05-15 15:18:10 +00:00
room_folders = [
join(cloud_folder, room)
for room in listdir(cloud_folder)
if isdir(join(cloud_folder, room))
]
2020-03-31 19:42:35 +00:00
# Initiate containers
cloud_points = np.empty((0, 3), dtype=np.float32)
cloud_colors = np.empty((0, 3), dtype=np.uint8)
cloud_classes = np.empty((0, 1), dtype=np.int32)
# Loop over rooms
for i, room_folder in enumerate(room_folders):
2023-05-15 15:18:10 +00:00
print(
"Cloud %s - Room %d/%d : %s"
% (cloud_name, i + 1, len(room_folders), room_folder.split("/")[-1])
)
2020-03-31 19:42:35 +00:00
2023-05-15 15:18:10 +00:00
for object_name in listdir(join(room_folder, "Annotations")):
if object_name[-4:] == ".txt":
2020-03-31 19:42:35 +00:00
# Text file containing point of the object
2023-05-15 15:18:10 +00:00
object_file = join(room_folder, "Annotations", object_name)
2020-03-31 19:42:35 +00:00
# Object class and ID
2023-05-15 15:18:10 +00:00
tmp = object_name[:-4].split("_")[0]
2020-03-31 19:42:35 +00:00
if tmp in self.name_to_label:
object_class = self.name_to_label[tmp]
2023-05-15 15:18:10 +00:00
elif tmp in ["stairs"]:
object_class = self.name_to_label["clutter"]
2020-03-31 19:42:35 +00:00
else:
2023-05-15 15:18:10 +00:00
raise ValueError("Unknown object name: " + str(tmp))
2020-03-31 19:42:35 +00:00
# Correct bug in S3DIS dataset
2023-05-15 15:18:10 +00:00
if object_name == "ceiling_1.txt":
with open(object_file, "r") as f:
2020-03-31 19:42:35 +00:00
lines = f.readlines()
for l_i, line in enumerate(lines):
2023-05-15 15:18:10 +00:00
if "103.0\x100000" in line:
lines[l_i] = line.replace(
"103.0\x100000", "103.000000"
)
with open(object_file, "w") as f:
2020-03-31 19:42:35 +00:00
f.writelines(lines)
# Read object points and colors
object_data = np.loadtxt(object_file, dtype=np.float32)
# Stack all data
2023-05-15 15:18:10 +00:00
cloud_points = np.vstack(
(cloud_points, object_data[:, 0:3].astype(np.float32))
)
cloud_colors = np.vstack(
(cloud_colors, object_data[:, 3:6].astype(np.uint8))
)
object_classes = np.full(
(object_data.shape[0], 1), object_class, dtype=np.int32
)
2020-03-31 19:42:35 +00:00
cloud_classes = np.vstack((cloud_classes, object_classes))
# Save as ply
2023-05-15 15:18:10 +00:00
write_ply(
cloud_file,
(cloud_points, cloud_colors, cloud_classes),
["x", "y", "z", "red", "green", "blue", "class"],
)
2020-03-31 19:42:35 +00:00
2023-05-15 15:18:10 +00:00
print("Done in {:.1f}s".format(time.time() - t0))
2020-03-31 19:42:35 +00:00
return
def load_subsampled_clouds(self):
# Parameter
dl = self.config.first_subsampling_dl
# Create path for files
2023-05-15 15:18:10 +00:00
tree_path = join(self.path, "input_{:.3f}".format(dl))
2020-03-31 19:42:35 +00:00
if not exists(tree_path):
makedirs(tree_path)
##############
# Load KDTrees
##############
2020-04-09 21:13:27 +00:00
for i, file_path in enumerate(self.files):
2020-03-31 19:42:35 +00:00
# Restart timer
t0 = time.time()
# Get cloud name
cloud_name = self.cloud_names[i]
# Name of the input files
2023-05-15 15:18:10 +00:00
KDTree_file = join(tree_path, "{:s}_KDTree.pkl".format(cloud_name))
sub_ply_file = join(tree_path, "{:s}.ply".format(cloud_name))
2020-03-31 19:42:35 +00:00
# Check if inputs have already been computed
if exists(KDTree_file):
2023-05-15 15:18:10 +00:00
print(
"\nFound KDTree for cloud {:s}, subsampled at {:.3f}".format(
cloud_name, dl
)
)
2020-03-31 19:42:35 +00:00
# read ply with data
data = read_ply(sub_ply_file)
2023-05-15 15:18:10 +00:00
sub_colors = np.vstack((data["red"], data["green"], data["blue"])).T
sub_labels = data["class"]
2020-03-31 19:42:35 +00:00
# Read pkl with search tree
2023-05-15 15:18:10 +00:00
with open(KDTree_file, "rb") as f:
2020-03-31 19:42:35 +00:00
search_tree = pickle.load(f)
else:
2023-05-15 15:18:10 +00:00
print(
"\nPreparing KDTree for cloud {:s}, subsampled at {:.3f}".format(
cloud_name, dl
)
)
2020-03-31 19:42:35 +00:00
# Read ply file
data = read_ply(file_path)
2023-05-15 15:18:10 +00:00
points = np.vstack((data["x"], data["y"], data["z"])).T
colors = np.vstack((data["red"], data["green"], data["blue"])).T
labels = data["class"]
2020-03-31 19:42:35 +00:00
# Subsample cloud
2023-05-15 15:18:10 +00:00
sub_points, sub_colors, sub_labels = grid_subsampling(
points, features=colors, labels=labels, sampleDl=dl
)
2020-03-31 19:42:35 +00:00
# Rescale float color and squeeze label
sub_colors = sub_colors / 255
sub_labels = np.squeeze(sub_labels)
# Get chosen neighborhoods
search_tree = KDTree(sub_points, leaf_size=10)
2023-05-15 15:18:10 +00:00
# search_tree = nnfln.KDTree(n_neighbors=1, metric='L2', leaf_size=10)
# search_tree.fit(sub_points)
2020-03-31 19:42:35 +00:00
# Save KDTree
2023-05-15 15:18:10 +00:00
with open(KDTree_file, "wb") as f:
2020-03-31 19:42:35 +00:00
pickle.dump(search_tree, f)
# Save ply
2023-05-15 15:18:10 +00:00
write_ply(
sub_ply_file,
[sub_points, sub_colors, sub_labels],
["x", "y", "z", "red", "green", "blue", "class"],
)
2020-03-31 19:42:35 +00:00
# Fill data containers
self.input_trees += [search_tree]
self.input_colors += [sub_colors]
self.input_labels += [sub_labels]
size = sub_colors.shape[0] * 4 * 7
2023-05-15 15:18:10 +00:00
print("{:.1f} MB loaded in {:.1f}s".format(size * 1e-6, time.time() - t0))
2020-03-31 19:42:35 +00:00
############################
# Coarse potential locations
############################
# Only necessary for validation and test sets
if self.use_potentials:
2023-05-15 15:18:10 +00:00
print("\nPreparing potentials")
2020-03-31 19:42:35 +00:00
# Restart timer
t0 = time.time()
pot_dl = self.config.in_radius / 10
cloud_ind = 0
2020-04-09 21:13:27 +00:00
for i, file_path in enumerate(self.files):
2020-03-31 19:42:35 +00:00
# Get cloud name
cloud_name = self.cloud_names[i]
# Name of the input files
2023-05-15 15:18:10 +00:00
coarse_KDTree_file = join(
tree_path, "{:s}_coarse_KDTree.pkl".format(cloud_name)
)
2020-03-31 19:42:35 +00:00
# Check if inputs have already been computed
if exists(coarse_KDTree_file):
# Read pkl with search tree
2023-05-15 15:18:10 +00:00
with open(coarse_KDTree_file, "rb") as f:
2020-03-31 19:42:35 +00:00
search_tree = pickle.load(f)
else:
# Subsample cloud
sub_points = np.array(self.input_trees[cloud_ind].data, copy=False)
2023-05-15 15:18:10 +00:00
coarse_points = grid_subsampling(
sub_points.astype(np.float32), sampleDl=pot_dl
)
2020-03-31 19:42:35 +00:00
# Get chosen neighborhoods
search_tree = KDTree(coarse_points, leaf_size=10)
# Save KDTree
2023-05-15 15:18:10 +00:00
with open(coarse_KDTree_file, "wb") as f:
2020-03-31 19:42:35 +00:00
pickle.dump(search_tree, f)
# Fill data containers
self.pot_trees += [search_tree]
cloud_ind += 1
2023-05-15 15:18:10 +00:00
print("Done in {:.1f}s".format(time.time() - t0))
2020-03-31 19:42:35 +00:00
######################
# Reprojection indices
######################
2020-04-03 15:22:57 +00:00
# Get number of clouds
self.num_clouds = len(self.input_trees)
2020-03-31 19:42:35 +00:00
# Only necessary for validation and test sets
2023-05-15 15:18:10 +00:00
if self.set in ["validation", "test"]:
print("\nPreparing reprojection indices for testing")
2020-03-31 19:42:35 +00:00
# Get validation/test reprojection indices
2020-04-09 21:13:27 +00:00
for i, file_path in enumerate(self.files):
2020-03-31 19:42:35 +00:00
# Restart timer
t0 = time.time()
# Get info on this cloud
cloud_name = self.cloud_names[i]
# File name for saving
2023-05-15 15:18:10 +00:00
proj_file = join(tree_path, "{:s}_proj.pkl".format(cloud_name))
2020-03-31 19:42:35 +00:00
# Try to load previous indices
if exists(proj_file):
2023-05-15 15:18:10 +00:00
with open(proj_file, "rb") as f:
2020-03-31 19:42:35 +00:00
proj_inds, labels = pickle.load(f)
else:
data = read_ply(file_path)
2023-05-15 15:18:10 +00:00
points = np.vstack((data["x"], data["y"], data["z"])).T
labels = data["class"]
2020-03-31 19:42:35 +00:00
# Compute projection inds
2020-04-09 21:13:27 +00:00
idxs = self.input_trees[i].query(points, return_distance=False)
2023-05-15 15:18:10 +00:00
# dists, idxs = self.input_trees[i_cloud].kneighbors(points)
2020-03-31 19:42:35 +00:00
proj_inds = np.squeeze(idxs).astype(np.int32)
# Save
2023-05-15 15:18:10 +00:00
with open(proj_file, "wb") as f:
2020-03-31 19:42:35 +00:00
pickle.dump([proj_inds, labels], f)
2020-04-09 21:13:27 +00:00
self.test_proj += [proj_inds]
2020-03-31 19:42:35 +00:00
self.validation_labels += [labels]
2023-05-15 15:18:10 +00:00
print("{:s} done in {:.1f}s".format(cloud_name, time.time() - t0))
2020-03-31 19:42:35 +00:00
print()
return
def load_evaluation_points(self, file_path):
"""
Load points (from test or validation split) on which the metrics should be evaluated
"""
# Get original points
data = read_ply(file_path)
2023-05-15 15:18:10 +00:00
return np.vstack((data["x"], data["y"], data["z"])).T
2020-04-09 21:13:27 +00:00
2020-03-31 19:42:35 +00:00
# ----------------------------------------------------------------------------------------------------------------------
#
# Utility classes definition
# \********************************/
class S3DISSampler(Sampler):
"""Sampler for S3DIS"""
2020-04-03 15:22:57 +00:00
def __init__(self, dataset: S3DISDataset):
2020-03-31 19:42:35 +00:00
Sampler.__init__(self, dataset)
# Dataset used by the sampler (no copy is made in memory)
self.dataset = dataset
# Number of step per epoch
2023-05-15 15:18:10 +00:00
if dataset.set == "training":
2020-03-31 19:42:35 +00:00
self.N = dataset.config.epoch_steps
else:
self.N = dataset.config.validation_size
return
def __iter__(self):
"""
Yield next batch indices here. In this dataset, this is a dummy sampler that yield the index of batch element
(input sphere) in epoch instead of the list of point indices
"""
2020-04-03 15:22:57 +00:00
if not self.dataset.use_potentials:
# Initiate current epoch ind
self.dataset.epoch_i *= 0
self.dataset.epoch_inds *= 0
# Initiate container for indices
all_epoch_inds = np.zeros((2, 0), dtype=np.int64)
2020-04-03 15:22:57 +00:00
# Number of sphere centers taken per class in each cloud
num_centers = self.N * self.dataset.config.batch_num
random_pick_n = int(np.ceil(num_centers / self.dataset.config.num_classes))
2020-04-03 15:22:57 +00:00
# Choose random points of each class for each cloud
2023-05-15 15:18:10 +00:00
np.zeros((2, 0), dtype=np.int64)
for label_ind, label in enumerate(self.dataset.label_values):
if label not in self.dataset.ignored_labels:
2023-05-15 15:18:10 +00:00
# Gather indices of the points with this label in all the input clouds
all_label_indices = []
for cloud_ind, cloud_labels in enumerate(self.dataset.input_labels):
label_indices = np.where(np.equal(cloud_labels, label))[0]
2023-05-15 15:18:10 +00:00
all_label_indices.append(
np.vstack(
(
np.full(
label_indices.shape, cloud_ind, dtype=np.int64
),
label_indices,
)
)
)
# Stack them: [2, N1+N2+...]
all_label_indices = np.hstack(all_label_indices)
# Select a a random number amongst them
N_inds = all_label_indices.shape[1]
if N_inds < random_pick_n:
chosen_label_inds = np.zeros((2, 0), dtype=np.int64)
while chosen_label_inds.shape[1] < random_pick_n:
2023-05-15 15:18:10 +00:00
chosen_label_inds = np.hstack(
(
chosen_label_inds,
all_label_indices[:, np.random.permutation(N_inds)],
)
)
warnings.warn(
"When choosing random epoch indices (use_potentials=False), \
class {:d}: {:s} only had {:d} available points, while we \
2023-05-15 15:18:10 +00:00
needed {:d}. Repeating indices in the same epoch".format(
label,
self.dataset.label_names[label_ind],
N_inds,
random_pick_n,
)
)
elif N_inds < 50 * random_pick_n:
2023-05-15 15:18:10 +00:00
rand_inds = np.random.choice(
N_inds, size=random_pick_n, replace=False
)
chosen_label_inds = all_label_indices[:, rand_inds]
else:
chosen_label_inds = np.zeros((2, 0), dtype=np.int64)
while chosen_label_inds.shape[1] < random_pick_n:
2023-05-15 15:18:10 +00:00
rand_inds = np.unique(
np.random.choice(
N_inds, size=2 * random_pick_n, replace=True
)
)
chosen_label_inds = np.hstack(
(chosen_label_inds, all_label_indices[:, rand_inds])
)
chosen_label_inds = chosen_label_inds[:, :random_pick_n]
# Stack for each label
all_epoch_inds = np.hstack((all_epoch_inds, chosen_label_inds))
2020-04-03 15:22:57 +00:00
# Random permutation of the indices
random_order = np.random.permutation(all_epoch_inds.shape[1])[:num_centers]
2020-04-03 15:22:57 +00:00
all_epoch_inds = all_epoch_inds[:, random_order].astype(np.int64)
# Update epoch inds
self.dataset.epoch_inds += torch.from_numpy(all_epoch_inds)
2020-04-03 15:22:57 +00:00
2020-03-31 19:42:35 +00:00
# Generator loop
for i in range(self.N):
yield i
def __len__(self):
"""
The number of yielded samples is variable
"""
2020-04-03 15:22:57 +00:00
return self.N
2020-03-31 19:42:35 +00:00
def fast_calib(self):
"""
This method calibrates the batch sizes while ensuring the potentials are well initialized. Indeed on a dataset
like Semantic3D, before potential have been updated over the dataset, there are cahnces that all the dense area
are picked in the begining and in the end, we will have very large batch of small point clouds
:return:
"""
# Estimated average batch size and target value
estim_b = 0
target_b = self.dataset.config.batch_num
# Calibration parameters
low_pass_T = 10
Kp = 100.0
finer = False
breaking = False
# Convergence parameters
smooth_errors = []
converge_threshold = 0.1
t = [time.time()]
last_display = time.time()
mean_dt = np.zeros(2)
for epoch in range(10):
for i, test in enumerate(self):
# New time
t = t[-1:]
t += [time.time()]
# batch length
b = len(test)
# Update estim_b (low pass filter)
estim_b += (b - estim_b) / low_pass_T
# Estimate error (noisy)
error = target_b - b
# Save smooth errors for convergene check
smooth_errors.append(target_b - estim_b)
if len(smooth_errors) > 10:
smooth_errors = smooth_errors[1:]
# Update batch limit with P controller
self.dataset.batch_limit += Kp * error
# finer low pass filter when closing in
if not finer and np.abs(estim_b - target_b) < 1:
low_pass_T = 100
finer = True
# Convergence
if finer and np.max(np.abs(smooth_errors)) < converge_threshold:
breaking = True
break
# Average timing
t += [time.time()]
mean_dt = 0.9 * mean_dt + 0.1 * (np.array(t[1:]) - np.array(t[:-1]))
# Console display (only one per second)
if (t[-1] - last_display) > 1.0:
last_display = t[-1]
2023-05-15 15:18:10 +00:00
message = "Step {:5d} estim_b ={:5.2f} batch_limit ={:7d}, // {:.1f}ms {:.1f}ms"
print(
message.format(
i,
estim_b,
int(self.dataset.batch_limit),
1000 * mean_dt[0],
1000 * mean_dt[1],
)
)
2020-03-31 19:42:35 +00:00
if breaking:
break
2023-05-15 15:18:10 +00:00
def calibration(
self, dataloader, untouched_ratio=0.9, verbose=False, force_redo=False
):
2020-03-31 19:42:35 +00:00
"""
Method performing batch and neighbors calibration.
Batch calibration: Set "batch_limit" (the maximum number of points allowed in every batch) so that the
average batch size (number of stacked pointclouds) is the one asked.
Neighbors calibration: Set the "neighborhood_limits" (the maximum number of neighbors allowed in convolutions)
so that 90% of the neighborhoods remain untouched. There is a limit for each layer.
"""
##############################
# Previously saved calibration
##############################
2023-05-15 15:18:10 +00:00
print("\nStarting Calibration (use verbose=True for more details)")
2020-03-31 19:42:35 +00:00
t0 = time.time()
2020-04-23 13:51:16 +00:00
redo = force_redo
2020-03-31 19:42:35 +00:00
# Batch limit
# ***********
# Load batch_limit dictionary
2023-05-15 15:18:10 +00:00
batch_lim_file = join(self.dataset.path, "batch_limits.pkl")
2020-03-31 19:42:35 +00:00
if exists(batch_lim_file):
2023-05-15 15:18:10 +00:00
with open(batch_lim_file, "rb") as file:
2020-03-31 19:42:35 +00:00
batch_lim_dict = pickle.load(file)
else:
batch_lim_dict = {}
# Check if the batch limit associated with current parameters exists
2020-04-03 15:22:57 +00:00
if self.dataset.use_potentials:
2023-05-15 15:18:10 +00:00
sampler_method = "potentials"
2020-04-03 15:22:57 +00:00
else:
2023-05-15 15:18:10 +00:00
sampler_method = "random"
key = "{:s}_{:.3f}_{:.3f}_{:d}".format(
sampler_method,
self.dataset.config.in_radius,
self.dataset.config.first_subsampling_dl,
self.dataset.config.batch_num,
)
2020-04-23 13:51:16 +00:00
if not redo and key in batch_lim_dict:
2020-03-31 19:42:35 +00:00
self.dataset.batch_limit[0] = batch_lim_dict[key]
else:
redo = True
if verbose:
2023-05-15 15:18:10 +00:00
print("\nPrevious calibration found:")
print("Check batch limit dictionary")
2020-03-31 19:42:35 +00:00
if key in batch_lim_dict:
color = bcolors.OKGREEN
v = str(int(batch_lim_dict[key]))
else:
color = bcolors.FAIL
2023-05-15 15:18:10 +00:00
v = "?"
print('{:}"{:s}": {:s}{:}'.format(color, key, v, bcolors.ENDC))
2020-03-31 19:42:35 +00:00
# Neighbors limit
# ***************
# Load neighb_limits dictionary
2023-05-15 15:18:10 +00:00
neighb_lim_file = join(self.dataset.path, "neighbors_limits.pkl")
2020-03-31 19:42:35 +00:00
if exists(neighb_lim_file):
2023-05-15 15:18:10 +00:00
with open(neighb_lim_file, "rb") as file:
2020-03-31 19:42:35 +00:00
neighb_lim_dict = pickle.load(file)
else:
neighb_lim_dict = {}
# Check if the limit associated with current parameters exists (for each layer)
neighb_limits = []
for layer_ind in range(self.dataset.config.num_layers):
dl = self.dataset.config.first_subsampling_dl * (2**layer_ind)
if self.dataset.config.deform_layers[layer_ind]:
r = dl * self.dataset.config.deform_radius
else:
r = dl * self.dataset.config.conv_radius
2023-05-15 15:18:10 +00:00
key = "{:.3f}_{:.3f}".format(dl, r)
2020-03-31 19:42:35 +00:00
if key in neighb_lim_dict:
neighb_limits += [neighb_lim_dict[key]]
2020-04-23 13:51:16 +00:00
if not redo and len(neighb_limits) == self.dataset.config.num_layers:
2020-03-31 19:42:35 +00:00
self.dataset.neighborhood_limits = neighb_limits
else:
redo = True
if verbose:
2023-05-15 15:18:10 +00:00
print("Check neighbors limit dictionary")
2020-03-31 19:42:35 +00:00
for layer_ind in range(self.dataset.config.num_layers):
dl = self.dataset.config.first_subsampling_dl * (2**layer_ind)
if self.dataset.config.deform_layers[layer_ind]:
r = dl * self.dataset.config.deform_radius
else:
r = dl * self.dataset.config.conv_radius
2023-05-15 15:18:10 +00:00
key = "{:.3f}_{:.3f}".format(dl, r)
2020-03-31 19:42:35 +00:00
if key in neighb_lim_dict:
color = bcolors.OKGREEN
v = str(neighb_lim_dict[key])
else:
color = bcolors.FAIL
2023-05-15 15:18:10 +00:00
v = "?"
print('{:}"{:s}": {:s}{:}'.format(color, key, v, bcolors.ENDC))
2020-03-31 19:42:35 +00:00
if redo:
############################
# Neighbors calib parameters
############################
# From config parameter, compute higher bound of neighbors number in a neighborhood
2023-05-15 15:18:10 +00:00
hist_n = int(
np.ceil(4 / 3 * np.pi * (self.dataset.config.deform_radius + 1) ** 3)
)
2020-03-31 19:42:35 +00:00
# Histogram of neighborhood sizes
2023-05-15 15:18:10 +00:00
neighb_hists = np.zeros(
(self.dataset.config.num_layers, hist_n), dtype=np.int32
)
2020-03-31 19:42:35 +00:00
########################
# Batch calib parameters
########################
# Estimated average batch size and target value
estim_b = 0
target_b = self.dataset.config.batch_num
2023-05-15 15:18:10 +00:00
# Expected batch size order of magnitude
expected_N = 100000
# Calibration parameters. Higher means faster but can also become unstable
2023-05-15 15:18:10 +00:00
# Reduce Kp and Kd if your GP Uis small as the total number of points per batch will be smaller
low_pass_T = 100
Kp = expected_N / 200
Ki = 0.001 * Kp
Kd = 5 * Kp
2020-03-31 19:42:35 +00:00
finer = False
stabilized = False
2020-03-31 19:42:35 +00:00
# Convergence parameters
smooth_errors = []
converge_threshold = 0.1
# Loop parameters
last_display = time.time()
i = 0
breaking = False
error_I = 0
error_D = 0
last_error = 0
debug_in = []
debug_out = []
debug_b = []
debug_estim_b = []
2020-03-31 19:42:35 +00:00
#####################
# Perform calibration
#####################
2023-05-15 15:18:10 +00:00
# number of batch per epoch
sample_batches = 999
for epoch in range((sample_batches // self.N) + 1):
2020-03-31 19:42:35 +00:00
for batch_i, batch in enumerate(dataloader):
# Update neighborhood histogram
2023-05-15 15:18:10 +00:00
counts = [
np.sum(neighb_mat.numpy() < neighb_mat.shape[0], axis=1)
for neighb_mat in batch.neighbors
]
2020-03-31 19:42:35 +00:00
hists = [np.bincount(c, minlength=hist_n)[:hist_n] for c in counts]
neighb_hists += np.vstack(hists)
# batch length
b = len(batch.cloud_inds)
# Update estim_b (low pass filter)
estim_b += (b - estim_b) / low_pass_T
# Estimate error (noisy)
error = target_b - b
error_I += error
error_D = error - last_error
last_error = error
2020-03-31 19:42:35 +00:00
# Save smooth errors for convergene check
smooth_errors.append(target_b - estim_b)
if len(smooth_errors) > 30:
2020-03-31 19:42:35 +00:00
smooth_errors = smooth_errors[1:]
# Update batch limit with P controller
self.dataset.batch_limit += Kp * error + Ki * error_I + Kd * error_D
# Unstability detection
if not stabilized and self.dataset.batch_limit < 0:
Kp *= 0.1
Ki *= 0.1
Kd *= 0.1
stabilized = True
2020-03-31 19:42:35 +00:00
# finer low pass filter when closing in
if not finer and np.abs(estim_b - target_b) < 1:
low_pass_T = 100
finer = True
# Convergence
if finer and np.max(np.abs(smooth_errors)) < converge_threshold:
breaking = True
break
i += 1
t = time.time()
# Console display (only one per second)
if verbose and (t - last_display) > 1.0:
last_display = t
2023-05-15 15:18:10 +00:00
message = "Step {:5d} estim_b ={:5.2f} batch_limit ={:7d}"
print(message.format(i, estim_b, int(self.dataset.batch_limit)))
2020-03-31 19:42:35 +00:00
# Debug plots
debug_in.append(int(batch.points[0].shape[0]))
debug_out.append(int(self.dataset.batch_limit))
debug_b.append(b)
debug_estim_b.append(estim_b)
2020-03-31 19:42:35 +00:00
if breaking:
break
# Plot in case we did not reach convergence
if not breaking:
import matplotlib.pyplot as plt
2023-05-15 15:18:10 +00:00
print(
"ERROR: It seems that the calibration have not reached convergence. Here are some plot to understand why:"
)
print("If you notice unstability, reduce the expected_N value")
print("If convergece is too slow, increase the expected_N value")
plt.figure()
plt.plot(debug_in)
plt.plot(debug_out)
plt.figure()
plt.plot(debug_b)
plt.plot(debug_estim_b)
plt.show()
2020-03-31 19:42:35 +00:00
# Use collected neighbor histogram to get neighbors limit
cumsum = np.cumsum(neighb_hists.T, axis=0)
2023-05-15 15:18:10 +00:00
percentiles = np.sum(
cumsum < (untouched_ratio * cumsum[hist_n - 1, :]), axis=0
)
2020-03-31 19:42:35 +00:00
self.dataset.neighborhood_limits = percentiles
if verbose:
# Crop histogram
while np.sum(neighb_hists[:, -1]) == 0:
neighb_hists = neighb_hists[:, :-1]
hist_n = neighb_hists.shape[1]
2023-05-15 15:18:10 +00:00
print("\n**************************************************\n")
line0 = "neighbors_num "
2020-03-31 19:42:35 +00:00
for layer in range(neighb_hists.shape[0]):
2023-05-15 15:18:10 +00:00
line0 += "| layer {:2d} ".format(layer)
2020-03-31 19:42:35 +00:00
print(line0)
for neighb_size in range(hist_n):
2023-05-15 15:18:10 +00:00
line0 = " {:4d} ".format(neighb_size)
2020-03-31 19:42:35 +00:00
for layer in range(neighb_hists.shape[0]):
if neighb_size > percentiles[layer]:
color = bcolors.FAIL
else:
color = bcolors.OKGREEN
2023-05-15 15:18:10 +00:00
line0 += "|{:}{:10d}{:} ".format(
color, neighb_hists[layer, neighb_size], bcolors.ENDC
)
2020-03-31 19:42:35 +00:00
print(line0)
2023-05-15 15:18:10 +00:00
print("\n**************************************************\n")
print("\nchosen neighbors limits: ", percentiles)
2020-03-31 19:42:35 +00:00
print()
# Save batch_limit dictionary
2020-04-03 15:22:57 +00:00
if self.dataset.use_potentials:
2023-05-15 15:18:10 +00:00
sampler_method = "potentials"
2020-04-03 15:22:57 +00:00
else:
2023-05-15 15:18:10 +00:00
sampler_method = "random"
key = "{:s}_{:.3f}_{:.3f}_{:d}".format(
sampler_method,
self.dataset.config.in_radius,
self.dataset.config.first_subsampling_dl,
self.dataset.config.batch_num,
)
2020-03-31 19:42:35 +00:00
batch_lim_dict[key] = float(self.dataset.batch_limit)
2023-05-15 15:18:10 +00:00
with open(batch_lim_file, "wb") as file:
2020-03-31 19:42:35 +00:00
pickle.dump(batch_lim_dict, file)
# Save neighb_limit dictionary
for layer_ind in range(self.dataset.config.num_layers):
2023-05-15 15:18:10 +00:00
dl = self.dataset.config.first_subsampling_dl * (2**layer_ind)
2020-03-31 19:42:35 +00:00
if self.dataset.config.deform_layers[layer_ind]:
r = dl * self.dataset.config.deform_radius
else:
r = dl * self.dataset.config.conv_radius
2023-05-15 15:18:10 +00:00
key = "{:.3f}_{:.3f}".format(dl, r)
2020-03-31 19:42:35 +00:00
neighb_lim_dict[key] = self.dataset.neighborhood_limits[layer_ind]
2023-05-15 15:18:10 +00:00
with open(neighb_lim_file, "wb") as file:
2020-03-31 19:42:35 +00:00
pickle.dump(neighb_lim_dict, file)
2023-05-15 15:18:10 +00:00
print("Calibration done in {:.1f}s\n".format(time.time() - t0))
2020-03-31 19:42:35 +00:00
return
class S3DISCustomBatch:
"""Custom batch definition with memory pinning for S3DIS"""
def __init__(self, input_list):
# Get rid of batch dimension
input_list = input_list[0]
# Number of layers
L = (len(input_list) - 7) // 5
# Extract input tensors from the list of numpy array
ind = 0
2023-05-15 15:18:10 +00:00
self.points = [
torch.from_numpy(nparray) for nparray in input_list[ind : ind + L]
]
2020-03-31 19:42:35 +00:00
ind += L
2023-05-15 15:18:10 +00:00
self.neighbors = [
torch.from_numpy(nparray) for nparray in input_list[ind : ind + L]
]
2020-03-31 19:42:35 +00:00
ind += L
2023-05-15 15:18:10 +00:00
self.pools = [
torch.from_numpy(nparray) for nparray in input_list[ind : ind + L]
]
2020-03-31 19:42:35 +00:00
ind += L
2023-05-15 15:18:10 +00:00
self.upsamples = [
torch.from_numpy(nparray) for nparray in input_list[ind : ind + L]
]
2020-03-31 19:42:35 +00:00
ind += L
2023-05-15 15:18:10 +00:00
self.lengths = [
torch.from_numpy(nparray) for nparray in input_list[ind : ind + L]
]
2020-03-31 19:42:35 +00:00
ind += L
self.features = torch.from_numpy(input_list[ind])
ind += 1
self.labels = torch.from_numpy(input_list[ind])
ind += 1
self.scales = torch.from_numpy(input_list[ind])
ind += 1
self.rots = torch.from_numpy(input_list[ind])
ind += 1
self.cloud_inds = torch.from_numpy(input_list[ind])
ind += 1
self.center_inds = torch.from_numpy(input_list[ind])
ind += 1
self.input_inds = torch.from_numpy(input_list[ind])
return
def pin_memory(self):
"""
Manual pinning of the memory
"""
self.points = [in_tensor.pin_memory() for in_tensor in self.points]
self.neighbors = [in_tensor.pin_memory() for in_tensor in self.neighbors]
self.pools = [in_tensor.pin_memory() for in_tensor in self.pools]
self.upsamples = [in_tensor.pin_memory() for in_tensor in self.upsamples]
self.lengths = [in_tensor.pin_memory() for in_tensor in self.lengths]
self.features = self.features.pin_memory()
self.labels = self.labels.pin_memory()
self.scales = self.scales.pin_memory()
self.rots = self.rots.pin_memory()
self.cloud_inds = self.cloud_inds.pin_memory()
self.center_inds = self.center_inds.pin_memory()
self.input_inds = self.input_inds.pin_memory()
return self
def to(self, device):
self.points = [in_tensor.to(device) for in_tensor in self.points]
self.neighbors = [in_tensor.to(device) for in_tensor in self.neighbors]
self.pools = [in_tensor.to(device) for in_tensor in self.pools]
self.upsamples = [in_tensor.to(device) for in_tensor in self.upsamples]
self.lengths = [in_tensor.to(device) for in_tensor in self.lengths]
self.features = self.features.to(device)
self.labels = self.labels.to(device)
self.scales = self.scales.to(device)
self.rots = self.rots.to(device)
self.cloud_inds = self.cloud_inds.to(device)
self.center_inds = self.center_inds.to(device)
self.input_inds = self.input_inds.to(device)
return self
def unstack_points(self, layer=None):
"""Unstack the points"""
2023-05-15 15:18:10 +00:00
return self.unstack_elements("points", layer)
2020-03-31 19:42:35 +00:00
def unstack_neighbors(self, layer=None):
"""Unstack the neighbors indices"""
2023-05-15 15:18:10 +00:00
return self.unstack_elements("neighbors", layer)
2020-03-31 19:42:35 +00:00
def unstack_pools(self, layer=None):
"""Unstack the pooling indices"""
2023-05-15 15:18:10 +00:00
return self.unstack_elements("pools", layer)
2020-03-31 19:42:35 +00:00
def unstack_elements(self, element_name, layer=None, to_numpy=True):
"""
Return a list of the stacked elements in the batch at a certain layer. If no layer is given, then return all
layers
"""
2023-05-15 15:18:10 +00:00
if element_name == "points":
2020-03-31 19:42:35 +00:00
elements = self.points
2023-05-15 15:18:10 +00:00
elif element_name == "neighbors":
2020-03-31 19:42:35 +00:00
elements = self.neighbors
2023-05-15 15:18:10 +00:00
elif element_name == "pools":
2020-03-31 19:42:35 +00:00
elements = self.pools[:-1]
else:
2023-05-15 15:18:10 +00:00
raise ValueError("Unknown element name: {:s}".format(element_name))
2020-03-31 19:42:35 +00:00
all_p_list = []
for layer_i, layer_elems in enumerate(elements):
if layer is None or layer == layer_i:
i0 = 0
p_list = []
2023-05-15 15:18:10 +00:00
if element_name == "pools":
lengths = self.lengths[layer_i + 1]
2020-03-31 19:42:35 +00:00
else:
lengths = self.lengths[layer_i]
for b_i, length in enumerate(lengths):
2023-05-15 15:18:10 +00:00
elem = layer_elems[i0 : i0 + length]
if element_name == "neighbors":
2020-03-31 19:42:35 +00:00
elem[elem >= self.points[layer_i].shape[0]] = -1
elem[elem >= 0] -= i0
2023-05-15 15:18:10 +00:00
elif element_name == "pools":
2020-03-31 19:42:35 +00:00
elem[elem >= self.points[layer_i].shape[0]] = -1
elem[elem >= 0] -= torch.sum(self.lengths[layer_i][:b_i])
i0 += length
if to_numpy:
p_list.append(elem.numpy())
else:
p_list.append(elem)
if layer == layer_i:
return p_list
all_p_list.append(p_list)
return all_p_list
def S3DISCollate(batch_data):
return S3DISCustomBatch(batch_data)
# ----------------------------------------------------------------------------------------------------------------------
#
# Debug functions
# \*********************/
2020-04-02 21:31:35 +00:00
def debug_upsampling(dataset, loader):
2020-03-31 19:42:35 +00:00
"""Shows which labels are sampled according to strategy chosen"""
2020-04-02 21:31:35 +00:00
2020-03-31 19:42:35 +00:00
for epoch in range(10):
2020-04-02 21:31:35 +00:00
for batch_i, batch in enumerate(loader):
pc1 = batch.points[1].numpy()
pc2 = batch.points[2].numpy()
up1 = batch.upsamples[1].numpy()
2023-05-15 15:18:10 +00:00
print(pc1.shape, "=>", pc2.shape)
2020-04-02 21:31:35 +00:00
print(up1.shape, np.max(up1))
pc2 = np.vstack((pc2, np.zeros_like(pc2[:1, :])))
# Get neighbors distance
p0 = pc1[10, :]
neighbs0 = up1[10, :]
neighbs0 = pc2[neighbs0, :] - p0
2023-05-15 15:18:10 +00:00
d2 = np.sum(neighbs0**2, axis=1)
2020-03-31 19:42:35 +00:00
2020-04-02 21:31:35 +00:00
print(neighbs0.shape)
print(neighbs0[:5])
print(d2[:5])
2020-03-31 19:42:35 +00:00
2023-05-15 15:18:10 +00:00
print("******************")
print("*******************************************")
2020-03-31 19:42:35 +00:00
_, counts = np.unique(dataset.input_labels, return_counts=True)
print(counts)
2020-04-02 21:31:35 +00:00
def debug_timing(dataset, loader):
2020-03-31 19:42:35 +00:00
"""Timing of generator function"""
t = [time.time()]
last_display = time.time()
mean_dt = np.zeros(2)
estim_b = dataset.config.batch_num
2020-04-03 15:22:57 +00:00
estim_N = 0
2020-03-31 19:42:35 +00:00
for epoch in range(10):
for batch_i, batch in enumerate(loader):
# print(batch_i, tuple(points.shape), tuple(normals.shape), labels, indices, in_sizes)
# New time
t = t[-1:]
t += [time.time()]
# Update estim_b (low pass filter)
estim_b += (len(batch.cloud_inds) - estim_b) / 100
2020-04-03 15:22:57 +00:00
estim_N += (batch.features.shape[0] - estim_N) / 10
2020-03-31 19:42:35 +00:00
# Pause simulating computations
time.sleep(0.05)
t += [time.time()]
# Average timing
mean_dt = 0.9 * mean_dt + 0.1 * (np.array(t[1:]) - np.array(t[:-1]))
# Console display (only one per second)
if (t[-1] - last_display) > -1.0:
last_display = t[-1]
2023-05-15 15:18:10 +00:00
message = "Step {:08d} -> (ms/batch) {:8.2f} {:8.2f} / batch = {:.2f} - {:.0f}"
print(
message.format(
batch_i, 1000 * mean_dt[0], 1000 * mean_dt[1], estim_b, estim_N
)
)
2020-03-31 19:42:35 +00:00
2023-05-15 15:18:10 +00:00
print("************* Epoch ended *************")
2020-03-31 19:42:35 +00:00
_, counts = np.unique(dataset.input_labels, return_counts=True)
print(counts)
2020-04-02 21:31:35 +00:00
def debug_show_clouds(dataset, loader):
2020-03-31 19:42:35 +00:00
for epoch in range(10):
L = dataset.config.num_layers
for batch_i, batch in enumerate(loader):
# Print characteristics of input tensors
2023-05-15 15:18:10 +00:00
print("\nPoints tensors")
2020-03-31 19:42:35 +00:00
for i in range(L):
print(batch.points[i].dtype, batch.points[i].shape)
2023-05-15 15:18:10 +00:00
print("\nNeigbors tensors")
2020-03-31 19:42:35 +00:00
for i in range(L):
print(batch.neighbors[i].dtype, batch.neighbors[i].shape)
2023-05-15 15:18:10 +00:00
print("\nPools tensors")
2020-03-31 19:42:35 +00:00
for i in range(L):
print(batch.pools[i].dtype, batch.pools[i].shape)
2023-05-15 15:18:10 +00:00
print("\nStack lengths")
2020-03-31 19:42:35 +00:00
for i in range(L):
print(batch.lengths[i].dtype, batch.lengths[i].shape)
2023-05-15 15:18:10 +00:00
print("\nFeatures")
2020-03-31 19:42:35 +00:00
print(batch.features.dtype, batch.features.shape)
2023-05-15 15:18:10 +00:00
print("\nLabels")
2020-03-31 19:42:35 +00:00
print(batch.labels.dtype, batch.labels.shape)
2023-05-15 15:18:10 +00:00
print("\nAugment Scales")
2020-03-31 19:42:35 +00:00
print(batch.scales.dtype, batch.scales.shape)
2023-05-15 15:18:10 +00:00
print("\nAugment Rotations")
2020-03-31 19:42:35 +00:00
print(batch.rots.dtype, batch.rots.shape)
2023-05-15 15:18:10 +00:00
print("\nModel indices")
2020-03-31 19:42:35 +00:00
print(batch.model_inds.dtype, batch.model_inds.shape)
2023-05-15 15:18:10 +00:00
print("\nAre input tensors pinned")
2020-03-31 19:42:35 +00:00
print(batch.neighbors[0].is_pinned())
print(batch.neighbors[-1].is_pinned())
print(batch.points[0].is_pinned())
print(batch.points[-1].is_pinned())
print(batch.labels.is_pinned())
print(batch.scales.is_pinned())
print(batch.rots.is_pinned())
print(batch.model_inds.is_pinned())
show_input_batch(batch)
2023-05-15 15:18:10 +00:00
print("*******************************************")
2020-03-31 19:42:35 +00:00
_, counts = np.unique(dataset.input_labels, return_counts=True)
print(counts)
2020-04-02 21:31:35 +00:00
def debug_batch_and_neighbors_calib(dataset, loader):
2020-03-31 19:42:35 +00:00
"""Timing of generator function"""
t = [time.time()]
last_display = time.time()
mean_dt = np.zeros(2)
for epoch in range(10):
for batch_i, input_list in enumerate(loader):
# print(batch_i, tuple(points.shape), tuple(normals.shape), labels, indices, in_sizes)
# New time
t = t[-1:]
t += [time.time()]
# Pause simulating computations
time.sleep(0.01)
t += [time.time()]
# Average timing
mean_dt = 0.9 * mean_dt + 0.1 * (np.array(t[1:]) - np.array(t[:-1]))
# Console display (only one per second)
if (t[-1] - last_display) > 1.0:
last_display = t[-1]
2023-05-15 15:18:10 +00:00
message = "Step {:08d} -> Average timings (ms/batch) {:8.2f} {:8.2f} "
print(message.format(batch_i, 1000 * mean_dt[0], 1000 * mean_dt[1]))
2020-03-31 19:42:35 +00:00
2023-05-15 15:18:10 +00:00
print("************* Epoch ended *************")
2020-03-31 19:42:35 +00:00
_, counts = np.unique(dataset.input_labels, return_counts=True)
print(counts)