KPConv-PyTorch/train_S3DIS.py

#
#
#      0=================================0
#      |    Kernel Point Convolutions    |
#      0=================================0
#
#
# ----------------------------------------------------------------------------------------------------------------------
#
#      Callable script to start a training on S3DIS dataset
#
# ----------------------------------------------------------------------------------------------------------------------
#
#      Hugues THOMAS - 06/03/2020
#


# ----------------------------------------------------------------------------------------------------------------------
#
#           Imports and global variables
#       \**********************************/
#

# Common libs
import signal
import os

# Dataset
from datasetss.S3DIS import *
from torch.utils.data import DataLoader

from utils.config import Config
from utils.trainer import ModelTrainer
from models.architectures import KPFCNN


# ----------------------------------------------------------------------------------------------------------------------
#
#           Config Class
#       \******************/
#


class S3DISConfig(Config):
    """
    Override the parameters you want to modify for this dataset
    """

    ####################
    # Dataset parameters
    ####################

    # Dataset name
    dataset = "S3DIS"

    # Number of classes in the dataset (This value is overwritten by dataset class when Initializating dataset).
    num_classes = None

    # Type of task performed on this dataset (also overwritten)
    dataset_task = ""

    # Number of CPU threads for the input pipeline
    input_threads = 10

    #########################
    # Architecture definition
    #########################

    # # Define layers
    architecture = [
        "simple",
        "resnetb",
        "resnetb_strided",
        "resnetb",
        "resnetb",
        "resnetb_strided",
        "resnetb",
        "resnetb",
        "resnetb_strided",
        "resnetb_deformable",
        "resnetb_deformable",
        "resnetb_deformable_strided",
        "resnetb_deformable",
        "resnetb_deformable",
        "nearest_upsample",
        "unary",
        "nearest_upsample",
        "unary",
        "nearest_upsample",
        "unary",
        "nearest_upsample",
        "unary",
    ]

    # Define layers
    # architecture = ['simple',
    #                 'resnetb',
    #                 'resnetb_strided',
    #                 'resnetb',
    #                 'resnetb',
    #                 'resnetb_strided',
    #                 'resnetb',
    #                 'resnetb',
    #                 'resnetb_strided',
    #                 'resnetb',
    #                 'resnetb',
    #                 'resnetb_strided',
    #                 'resnetb',
    #                 'resnetb',
    #                 'nearest_upsample',
    #                 'unary',
    #                 'nearest_upsample',
    #                 'unary',
    #                 'nearest_upsample',
    #                 'unary',
    #                 'nearest_upsample',
    #                 'unary']

    ###################
    # KPConv parameters
    ###################

    # Number of kernel points
    num_kernel_points = 15

    # Radius of the input sphere (decrease value to reduce memory cost)
    in_radius = 1.2

    # Size of the first subsampling grid in meter (increase value to reduce memory cost)
    first_subsampling_dl = 0.03

    # Radius of convolution in "number grid cell". (2.5 is the standard value)
    conv_radius = 2.5

    # Radius of deformable convolution in "number grid cell". Larger so that deformed kernel can spread out
    deform_radius = 5.0

    # Radius of the area of influence of each kernel point in "number grid cell". (1.0 is the standard value)
    KP_extent = 1.2

    # Behavior of convolutions in ('constant', 'linear', 'gaussian')
    KP_influence = "linear"

    # Aggregation function of KPConv in ('closest', 'sum')
    aggregation_mode = "sum"

    # Choice of input features
    first_features_dim = 128
    in_features_dim = 5

    # Can the network learn modulations
    modulated = False

    # Batch normalization parameters
    use_batch_norm = True
    batch_norm_momentum = 0.02

    # Deformable offset loss
    # 'point2point' fitting geometry by penalizing distance from deform point to input points
    # 'point2plane' fitting geometry by penalizing distance from deform point to input point triplet (not implemented)
    deform_fitting_mode = "point2point"
    deform_fitting_power = 1.0  # Multiplier for the fitting/repulsive loss
    deform_lr_factor = 0.1  # Multiplier for learning rate applied to the deformations
    repulse_extent = 1.2  # Distance of repulsion for deformed kernel points

    #####################
    # Training parameters
    #####################

    # Maximal number of epochs
    max_epoch = 500

    # Learning rate management
    learning_rate = 1e-2
    momentum = 0.98
    lr_decays = {i: 0.1 ** (1 / 150) for i in range(1, max_epoch)}
    grad_clip_norm = 100.0

    # Number of batch (decrease to reduce memory cost, but it should remain > 3 for stability)
    batch_num = 6

    # Number of steps per epochs
    epoch_steps = 500

    # Number of validation examples per epoch
    validation_size = 50

    # Number of epoch between each checkpoint
    checkpoint_gap = 50

    # Augmentations
    augment_scale_anisotropic = True
    augment_symmetries = [True, False, False]
    augment_rotation = "vertical"
    augment_scale_min = 0.9
    augment_scale_max = 1.1
    augment_noise = 0.001
    augment_color = 0.8

    # The way we balance segmentation loss
    #   > 'none': Each point in the whole batch has the same contribution.
    #   > 'class': Each class has the same contribution (points are weighted according to class balance)
    #   > 'batch': Each cloud in the batch has the same contribution (points are weighted according cloud sizes)
    segloss_balance = "none"

    # Do we nee to save convergence
    saving = True
    saving_path = None


# ----------------------------------------------------------------------------------------------------------------------
#
#           Main Call
#       \***************/
#

if __name__ == "__main__":
    ############################
    # Initialize the environment
    ############################

    # Set which gpu is going to be used
    GPU_ID = "0"

    # Set GPU visible device
    os.environ["CUDA_VISIBLE_DEVICES"] = GPU_ID

    ###############
    # Previous chkp
    ###############

    # Choose here if you want to start training from a previous snapshot (None for new training)
    # previous_training_path = 'Log_2020-03-19_19-53-27'
    previous_training_path = ""

    # Choose index of checkpoint to start from. If None, uses the latest chkp
    chkp_idx = None
    if previous_training_path:
        # Find all snapshot in the chosen training folder
        chkp_path = os.path.join("results", previous_training_path, "checkpoints")
        chkps = [f for f in os.listdir(chkp_path) if f[:4] == "chkp"]

        # Find which snapshot to restore
        if chkp_idx is None:
            chosen_chkp = "current_chkp.tar"
        else:
            chosen_chkp = np.sort(chkps)[chkp_idx]
        chosen_chkp = os.path.join(
            "results", previous_training_path, "checkpoints", chosen_chkp
        )

    else:
        chosen_chkp = None

    ##############
    # Prepare Data
    ##############

    print()
    print("Data Preparation")
    print("****************")

    # Initialize configuration class
    config = S3DISConfig()
    if previous_training_path:
        config.load(os.path.join("results", previous_training_path))
        config.saving_path = None

    # Get path from argument if given
    if len(sys.argv) > 1:
        config.saving_path = sys.argv[1]

    # Initialize datasets
    training_dataset = S3DISDataset(config, set="training", use_potentials=True)
    test_dataset = S3DISDataset(config, set="validation", use_potentials=True)

    # Initialize samplers
    training_sampler = S3DISSampler(training_dataset)
    test_sampler = S3DISSampler(test_dataset)

    # Initialize the dataloader
    training_loader = DataLoader(
        training_dataset,
        batch_size=1,
        sampler=training_sampler,
        collate_fn=S3DISCollate,
        num_workers=config.input_threads,
        pin_memory=True,
    )
    test_loader = DataLoader(
        test_dataset,
        batch_size=1,
        sampler=test_sampler,
        collate_fn=S3DISCollate,
        num_workers=config.input_threads,
        pin_memory=True,
    )

    # Calibrate samplers
    training_sampler.calibration(training_loader, verbose=True)
    test_sampler.calibration(test_loader, verbose=True)

    # Optional debug functions
    # debug_timing(training_dataset, training_loader)
    # debug_timing(test_dataset, test_loader)
    # debug_upsampling(training_dataset, training_loader)

    print("\nModel Preparation")
    print("*****************")

    # Define network model
    t1 = time.time()
    net = KPFCNN(config, training_dataset.label_values, training_dataset.ignored_labels)

    debug = False
    if debug:
        print("\n*************************************\n")
        print(net)
        print("\n*************************************\n")
        for param in net.parameters():
            if param.requires_grad:
                print(param.shape)
        print("\n*************************************\n")
        print(
            "Model size %i"
            % sum(param.numel() for param in net.parameters() if param.requires_grad)
        )
        print("\n*************************************\n")

    # Define a trainer class
    trainer = ModelTrainer(net, config, chkp_path=chosen_chkp)
    print("Done in {:.1f}s\n".format(time.time() - t1))

    print("\nStart training")
    print("**************")

    # Training
    trainer.train(net, training_loader, test_loader, config)

    print("Forcing exit now")
    os.kill(os.getpid(), signal.SIGINT)
Initial commit 2020-03-31 19:42:35 +00:00			`#`
			`#`
			`# 0=================================0`
			`# \| Kernel Point Convolutions \|`
			`# 0=================================0`
			`#`
			`#`
			`# ----------------------------------------------------------------------------------------------------------------------`
			`#`
			`# Callable script to start a training on S3DIS dataset`
			`#`
			`# ----------------------------------------------------------------------------------------------------------------------`
			`#`
			`# Hugues THOMAS - 06/03/2020`
			`#`


			`# ----------------------------------------------------------------------------------------------------------------------`
			`#`
			`# Imports and global variables`
			`# \**********************************/`
			`#`

			`# Common libs`
			`import signal`
			`import os`

			`# Dataset`
♻️ rename datasets to datasetss since it interferes with hugginface's module + ../../Data -> ./Data 2023-05-15 14:22:48 +00:00			`from datasetss.S3DIS import *`
Initial commit 2020-03-31 19:42:35 +00:00			`from torch.utils.data import DataLoader`

			`from utils.config import Config`
			`from utils.trainer import ModelTrainer`
			`from models.architectures import KPFCNN`


			`# ----------------------------------------------------------------------------------------------------------------------`
			`#`
			`# Config Class`
			`# \******************/`
			`#`

🎨 black + ruff 2023-05-15 15:18:10 +00:00
Initial commit 2020-03-31 19:42:35 +00:00			`class S3DISConfig(Config):`
			`"""`
			`Override the parameters you want to modify for this dataset`
			`"""`

			`####################`
			`# Dataset parameters`
			`####################`

			`# Dataset name`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`dataset = "S3DIS"`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Number of classes in the dataset (This value is overwritten by dataset class when Initializating dataset).`
			`num_classes = None`

			`# Type of task performed on this dataset (also overwritten)`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`dataset_task = ""`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Number of CPU threads for the input pipeline`
Corrections 2020-04-27 22:01:40 +00:00			`input_threads = 10`
Initial commit 2020-03-31 19:42:35 +00:00
			`#########################`
			`# Architecture definition`
			`#########################`

correction of use_potentials=False on validation 2021-07-29 15:49:30 +00:00			`# # Define layers`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`architecture = [`
			`"simple",`
			`"resnetb",`
			`"resnetb_strided",`
			`"resnetb",`
			`"resnetb",`
			`"resnetb_strided",`
			`"resnetb",`
			`"resnetb",`
			`"resnetb_strided",`
			`"resnetb_deformable",`
			`"resnetb_deformable",`
			`"resnetb_deformable_strided",`
			`"resnetb_deformable",`
			`"resnetb_deformable",`
			`"nearest_upsample",`
			`"unary",`
			`"nearest_upsample",`
			`"unary",`
			`"nearest_upsample",`
			`"unary",`
			`"nearest_upsample",`
			`"unary",`
			`]`
Initial commit 2020-03-31 19:42:35 +00:00
. 2021-08-04 15:01:56 +00:00			`# Define layers`
			`# architecture = ['simple',`
			`# 'resnetb',`
			`# 'resnetb_strided',`
			`# 'resnetb',`
			`# 'resnetb',`
			`# 'resnetb_strided',`
			`# 'resnetb',`
			`# 'resnetb',`
			`# 'resnetb_strided',`
			`# 'resnetb',`
			`# 'resnetb',`
			`# 'resnetb_strided',`
			`# 'resnetb',`
			`# 'resnetb',`
			`# 'nearest_upsample',`
			`# 'unary',`
			`# 'nearest_upsample',`
			`# 'unary',`
			`# 'nearest_upsample',`
			`# 'unary',`
			`# 'nearest_upsample',`
			`# 'unary']`

Initial commit 2020-03-31 19:42:35 +00:00			`###################`
			`# KPConv parameters`
			`###################`

			`# Number of kernel points`
			`num_kernel_points = 15`

. 2021-07-29 13:00:25 +00:00			`# Radius of the input sphere (decrease value to reduce memory cost)`
. 2021-08-04 15:01:56 +00:00			`in_radius = 1.2`
. 2021-07-29 13:00:25 +00:00
			`# Size of the first subsampling grid in meter (increase value to reduce memory cost)`
. 2021-07-29 16:49:08 +00:00			`first_subsampling_dl = 0.03`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Radius of convolution in "number grid cell". (2.5 is the standard value)`
			`conv_radius = 2.5`

			`# Radius of deformable convolution in "number grid cell". Larger so that deformed kernel can spread out`
. 2021-08-04 15:01:56 +00:00			`deform_radius = 5.0`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Radius of the area of influence of each kernel point in "number grid cell". (1.0 is the standard value)`
Corrections 2020-04-27 22:01:40 +00:00			`KP_extent = 1.2`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Behavior of convolutions in ('constant', 'linear', 'gaussian')`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`KP_influence = "linear"`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Aggregation function of KPConv in ('closest', 'sum')`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`aggregation_mode = "sum"`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Choice of input features`
Initial commit 2020-04-09 21:13:27 +00:00			`first_features_dim = 128`
Initial commit 2020-03-31 19:42:35 +00:00			`in_features_dim = 5`

			`# Can the network learn modulations`
Initial commit 2020-04-02 21:31:35 +00:00			`modulated = False`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Batch normalization parameters`
			`use_batch_norm = True`
Initial commit 2020-04-02 21:31:35 +00:00			`batch_norm_momentum = 0.02`
Initial commit 2020-03-31 19:42:35 +00:00
Corrections 2020-04-27 22:01:40 +00:00			`# Deformable offset loss`
Corrections 2020-04-24 16:00:11 +00:00			`# 'point2point' fitting geometry by penalizing distance from deform point to input points`
Corrections 2020-04-27 22:01:40 +00:00			`# 'point2plane' fitting geometry by penalizing distance from deform point to input point triplet (not implemented)`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`deform_fitting_mode = "point2point"`
			`deform_fitting_power = 1.0 # Multiplier for the fitting/repulsive loss`
			`deform_lr_factor = 0.1 # Multiplier for learning rate applied to the deformations`
			`repulse_extent = 1.2 # Distance of repulsion for deformed kernel points`
Initial commit 2020-03-31 19:42:35 +00:00
			`#####################`
			`# Training parameters`
			`#####################`

			`# Maximal number of epochs`
			`max_epoch = 500`

			`# Learning rate management`
			`learning_rate = 1e-2`
			`momentum = 0.98`
Initial commit 2020-04-09 21:13:27 +00:00			`lr_decays = {i: 0.1 ** (1 / 150) for i in range(1, max_epoch)}`
Initial commit 2020-03-31 19:42:35 +00:00			`grad_clip_norm = 100.0`

. 2021-07-29 13:01:48 +00:00			`# Number of batch (decrease to reduce memory cost, but it should remain > 3 for stability)`
Corrections 2020-04-23 13:51:16 +00:00			`batch_num = 6`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Number of steps per epochs`
			`epoch_steps = 500`

			`# Number of validation examples per epoch`
Initial commit 2020-04-09 21:13:27 +00:00			`validation_size = 50`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Number of epoch between each checkpoint`
			`checkpoint_gap = 50`

			`# Augmentations`
			`augment_scale_anisotropic = True`
			`augment_symmetries = [True, False, False]`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`augment_rotation = "vertical"`
. 2021-07-29 16:49:08 +00:00			`augment_scale_min = 0.9`
			`augment_scale_max = 1.1`
Initial commit 2020-03-31 19:42:35 +00:00			`augment_noise = 0.001`
Initial commit 2020-04-02 21:31:35 +00:00			`augment_color = 0.8`
Initial commit 2020-03-31 19:42:35 +00:00
Initial commit 2020-04-10 19:38:24 +00:00			`# The way we balance segmentation loss`
Initial commit 2020-03-31 19:42:35 +00:00			`# > 'none': Each point in the whole batch has the same contribution.`
			`# > 'class': Each class has the same contribution (points are weighted according to class balance)`
			`# > 'batch': Each cloud in the batch has the same contribution (points are weighted according cloud sizes)`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`segloss_balance = "none"`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Do we nee to save convergence`
			`saving = True`
			`saving_path = None`


			`# ----------------------------------------------------------------------------------------------------------------------`
			`#`
			`# Main Call`
			`# \***************/`
			`#`

🎨 black + ruff 2023-05-15 15:18:10 +00:00			`if __name__ == "__main__":`
Initial commit 2020-03-31 19:42:35 +00:00			`############################`
			`# Initialize the environment`
			`############################`

			`# Set which gpu is going to be used`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`GPU_ID = "0"`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Set GPU visible device`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`os.environ["CUDA_VISIBLE_DEVICES"] = GPU_ID`
Initial commit 2020-03-31 19:42:35 +00:00
			`###############`
			`# Previous chkp`
			`###############`

			`# Choose here if you want to start training from a previous snapshot (None for new training)`
Initial commit 2020-04-09 21:13:27 +00:00			`# previous_training_path = 'Log_2020-03-19_19-53-27'`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`previous_training_path = ""`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Choose index of checkpoint to start from. If None, uses the latest chkp`
			`chkp_idx = None`
			`if previous_training_path:`
			`# Find all snapshot in the chosen training folder`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`chkp_path = os.path.join("results", previous_training_path, "checkpoints")`
			`chkps = [f for f in os.listdir(chkp_path) if f[:4] == "chkp"]`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Find which snapshot to restore`
			`if chkp_idx is None:`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`chosen_chkp = "current_chkp.tar"`
Initial commit 2020-03-31 19:42:35 +00:00			`else:`
			`chosen_chkp = np.sort(chkps)[chkp_idx]`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`chosen_chkp = os.path.join(`
			`"results", previous_training_path, "checkpoints", chosen_chkp`
			`)`
Initial commit 2020-03-31 19:42:35 +00:00
			`else:`
			`chosen_chkp = None`

			`##############`
			`# Prepare Data`
			`##############`

			`print()`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`print("Data Preparation")`
			`print("****************")`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Initialize configuration class`
			`config = S3DISConfig()`
			`if previous_training_path:`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`config.load(os.path.join("results", previous_training_path))`
Initial commit 2020-03-31 19:42:35 +00:00			`config.saving_path = None`

			`# Get path from argument if given`
			`if len(sys.argv) > 1:`
			`config.saving_path = sys.argv[1]`

			`# Initialize datasets`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`training_dataset = S3DISDataset(config, set="training", use_potentials=True)`
			`test_dataset = S3DISDataset(config, set="validation", use_potentials=True)`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Initialize samplers`
			`training_sampler = S3DISSampler(training_dataset)`
			`test_sampler = S3DISSampler(test_dataset)`

			`# Initialize the dataloader`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`training_loader = DataLoader(`
			`training_dataset,`
			`batch_size=1,`
			`sampler=training_sampler,`
			`collate_fn=S3DISCollate,`
			`num_workers=config.input_threads,`
			`pin_memory=True,`
			`)`
			`test_loader = DataLoader(`
			`test_dataset,`
			`batch_size=1,`
			`sampler=test_sampler,`
			`collate_fn=S3DISCollate,`
			`num_workers=config.input_threads,`
			`pin_memory=True,`
			`)`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Calibrate samplers`
			`training_sampler.calibration(training_loader, verbose=True)`
			`test_sampler.calibration(test_loader, verbose=True)`

Corrections 2020-04-24 16:19:12 +00:00			`# Optional debug functions`
Initial commit 2020-04-09 21:13:27 +00:00			`# debug_timing(training_dataset, training_loader)`
			`# debug_timing(test_dataset, test_loader)`
			`# debug_upsampling(training_dataset, training_loader)`
Initial commit 2020-03-31 19:42:35 +00:00
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`print("\nModel Preparation")`
			`print("*****************")`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Define network model`
			`t1 = time.time()`
Initial commit 2020-04-09 21:13:27 +00:00			`net = KPFCNN(config, training_dataset.label_values, training_dataset.ignored_labels)`
Initial commit 2020-04-03 15:22:57 +00:00
			`debug = False`
			`if debug:`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`print("\n*************************************\n")`
Initial commit 2020-04-03 15:22:57 +00:00			`print(net)`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`print("\n*************************************\n")`
Initial commit 2020-04-03 15:22:57 +00:00			`for param in net.parameters():`
			`if param.requires_grad:`
			`print(param.shape)`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`print("\n*************************************\n")`
			`print(`
			`"Model size %i"`
			`% sum(param.numel() for param in net.parameters() if param.requires_grad)`
			`)`
			`print("\n*************************************\n")`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Define a trainer class`
			`trainer = ModelTrainer(net, config, chkp_path=chosen_chkp)`
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`print("Done in {:.1f}s\n".format(time.time() - t1))`
Initial commit 2020-03-31 19:42:35 +00:00
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`print("\nStart training")`
			`print("**************")`
Initial commit 2020-03-31 19:42:35 +00:00
			`# Training`
Initial commit 2020-04-09 21:13:27 +00:00			`trainer.train(net, training_loader, test_loader, config)`
Initial commit 2020-03-31 19:42:35 +00:00
🎨 black + ruff 2023-05-15 15:18:10 +00:00			`print("Forcing exit now")`
Initial commit 2020-03-31 19:42:35 +00:00			`os.kill(os.getpid(), signal.SIGINT)`