REVA-QCAV/src/train.py

import argparse
import logging
from pathlib import Path

import albumentations as A
import torch
import torch.nn as nn
import torch.nn.functional as F
from albumentations.pytorch import ToTensorV2
from torch import optim
from torch.utils.data import DataLoader
from tqdm import tqdm

import wandb
from evaluate import evaluate
from src.utils.dataset import SphereDataset
from src.utils.dice import dice_loss
from unet import UNet
from utils.paste import RandomPaste

CHECKPOINT_DIR = Path("./checkpoints/")
DIR_TRAIN_IMG = Path("/home/lilian/data_disk/lfainsin/train2017")
DIR_VALID_IMG = Path("/home/lilian/data_disk/lfainsin/val2017/")
# DIR_VALID_MASK = Path("/home/lilian/data_disk/lfainsin/val2017mask/")
DIR_SPHERE_IMG = Path("/home/lilian/data_disk/lfainsin/spheres/Images/")
DIR_SPHERE_MASK = Path("/home/lilian/data_disk/lfainsin/spheres/Masks/")


def train_net(
    net,
    device,
    epochs: int = 5,
    batch_size: int = 1,
    learning_rate: float = 1e-5,
    save_checkpoint: bool = True,
    amp: bool = False,
):
    # 1. Create transforms
    tf_train = A.Compose(
        [
            A.Flip(),
            A.ColorJitter(),
            RandomPaste(5, 0.2, DIR_SPHERE_IMG, DIR_SPHERE_MASK),
            A.ISONoise(),
            A.ToFloat(max_value=255),
            A.pytorch.ToTensorV2(),
        ],
    )

    tf_valid = A.Compose(
        [
            RandomPaste(5, 0.2, DIR_SPHERE_IMG, DIR_SPHERE_MASK),
            A.ToFloat(max_value=255),
            ToTensorV2(),
        ],
    )

    # 2. Create datasets
    ds_train = SphereDataset(images_dir=DIR_TRAIN_IMG, transform=tf_train)
    # ds_valid = SphereDataset(images_dir=DIR_VALID_IMG, masks_dir=DIR_VALID_MASK, transform=tf_valid)
    ds_valid = SphereDataset(images_dir=DIR_VALID_IMG, transform=tf_valid)

    # 3. Create data loaders
    loader_args = dict(batch_size=batch_size, num_workers=4, pin_memory=True)
    train_loader = DataLoader(ds_train, shuffle=True, **loader_args)
    val_loader = DataLoader(ds_valid, shuffle=False, drop_last=True, **loader_args)

    # (Initialize logging)
    experiment = wandb.init(
        project="U-Net",
        config=dict(
            epochs=epochs,
            batch_size=batch_size,
            learning_rate=learning_rate,
            save_checkpoint=save_checkpoint,
            amp=amp,
        ),
    )

    logging.info(
        f"""Starting training:
        Epochs:          {epochs}
        Batch size:      {batch_size}
        Learning rate:   {learning_rate}
        Training size:   {len(ds_train)}
        Validation size: {len(ds_valid)}
        Checkpoints:     {save_checkpoint}
        Device:          {device.type}
        Mixed Precision: {amp}
        """
    )

    # 4. Set up the optimizer, the loss, the learning rate scheduler and the loss scaling for AMP
    optimizer = optim.RMSprop(net.parameters(), lr=learning_rate, weight_decay=1e-8, momentum=0.9)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "max", patience=2)  # goal: maximize Dice score
    grad_scaler = torch.cuda.amp.GradScaler(enabled=amp)
    criterion = nn.CrossEntropyLoss()
    global_step = 0

    # 5. Begin training
    for epoch in range(1, epochs + 1):
        net.train()
        epoch_loss = 0

        with tqdm(total=len(ds_train), desc=f"Epoch {epoch}/{epochs}", unit="img") as pbar:
            for batch in train_loader:
                images = batch["image"]
                true_masks = batch["mask"]

                assert images.shape[1] == net.n_channels, (
                    f"Network has been defined with {net.n_channels} input channels, "
                    f"but loaded images have {images.shape[1]} channels. Please check that "
                    "the images are loaded correctly."
                )

                images = images.to(device=device, dtype=torch.float32)
                true_masks = true_masks.to(device=device, dtype=torch.long)

                with torch.cuda.amp.autocast(enabled=amp):
                    masks_pred = net(images)
                    loss = criterion(masks_pred, true_masks) + dice_loss(
                        F.softmax(masks_pred, dim=1).float(),
                        F.one_hot(true_masks, net.n_classes).permute(0, 3, 1, 2).float(),
                        multiclass=True,
                    )

                optimizer.zero_grad(set_to_none=True)
                grad_scaler.scale(loss).backward()
                grad_scaler.step(optimizer)
                grad_scaler.update()

                pbar.update(images.shape[0])
                global_step += 1
                epoch_loss += loss.item()
                experiment.log({"train loss": loss.item(), "step": global_step, "epoch": epoch})
                pbar.set_postfix(**{"loss (batch)": loss.item()})

                # Evaluation round
                division_step = len(ds_train) // (10 * batch_size)
                if division_step > 0:
                    if global_step % division_step == 0:
                        histograms = {}
                        for tag, value in net.named_parameters():
                            tag = tag.replace("/", ".")
                            histograms["Weights/" + tag] = wandb.Histogram(value.data.cpu())
                            histograms["Gradients/" + tag] = wandb.Histogram(value.grad.data.cpu())

                        val_score = evaluate(net, val_loader, device)
                        scheduler.step(val_score)

                        logging.info("Validation Dice score: {}".format(val_score))
                        experiment.log(
                            {
                                "learning rate": optimizer.param_groups[0]["lr"],
                                "validation Dice": val_score,
                                "images": wandb.Image(images[0].cpu()),
                                "masks": {
                                    "true": wandb.Image(true_masks[0].float().cpu()),
                                    "pred": wandb.Image(
                                        torch.softmax(masks_pred, dim=1).argmax(dim=1)[0].float().cpu()
                                    ),
                                },
                                "step": global_step,
                                "epoch": epoch,
                                **histograms,
                            }
                        )

        if save_checkpoint:
            Path(CHECKPOINT_DIR).mkdir(parents=True, exist_ok=True)
            torch.save(net.state_dict(), str(CHECKPOINT_DIR / "checkpoint_epoch{}.pth".format(epoch)))
            logging.info(f"Checkpoint {epoch} saved!")


def get_args():
    parser = argparse.ArgumentParser(
        description="Train the UNet on images and target masks",
    )
    parser.add_argument(
        "--epochs",
        "-e",
        metavar="E",
        type=int,
        default=5,
        help="Number of epochs",
    )
    parser.add_argument(
        "--batch-size",
        "-b",
        dest="batch_size",
        metavar="B",
        type=int,
        default=32,
        help="Batch size",
    )
    parser.add_argument(
        "--learning-rate",
        "-l",
        metavar="LR",
        type=float,
        default=1e-5,
        help="Learning rate",
        dest="lr",
    )
    parser.add_argument(
        "--load",
        "-f",
        type=str,
        default=False,
        help="Load model from a .pth file",
    )
    parser.add_argument(
        "--amp",
        action="store_true",
        default=True,
        help="Use mixed precision",
    )
    parser.add_argument(
        "--classes",
        "-c",
        type=int,
        default=1,
        help="Number of classes",
    )

    return parser.parse_args()


if __name__ == "__main__":
    args = get_args()

    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logging.info(f"Using device {device}")

    net = UNet(n_channels=3, n_classes=args.classes)

    logging.info(
        f"""Network:
        \t{net.n_channels} input channels
        \t{net.n_classes} output channels (classes)
        """
    )

    if args.load:
        net.load_state_dict(torch.load(args.load, map_location=device))
        logging.info(f"Model loaded from {args.load}")

    net.to(device=device)

    try:
        train_net(
            net=net,
            epochs=args.epochs,
            batch_size=args.batch_size,
            learning_rate=args.lr,
            device=device,
            amp=args.amp,
        )
    except KeyboardInterrupt:
        torch.save(net.state_dict(), "INTERRUPTED.pth")
        logging.info("Saved interrupt")
        raise
Global cleanup, better logging and CLI Former-commit-id: ff1ac0936c118d129bc8a8014958948d3b3883be 2019-10-24 19:37:21 +00:00			`import argparse`
			`import logging`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`from pathlib import Path`
Removed unused function and general cleanup Former-commit-id: c34a455f1722e0b899e9e92c7766b83a9a641980 2018-04-09 03:15:24 +00:00
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`import albumentations as A`
Created a basic train loop + changed a bit loss and utils 2017-08-17 19:16:19 +00:00			`import torch`
Added simple eval and test CRF 2017-08-19 08:59:51 +00:00			`import torch.nn as nn`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`import torch.nn.functional as F`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`from albumentations.pytorch import ToTensorV2`
Removed unused function and general cleanup Former-commit-id: c34a455f1722e0b899e9e92c7766b83a9a641980 2018-04-09 03:15:24 +00:00			`from torch import optim`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`from torch.utils.data import DataLoader`
Global cleanup, better logging and CLI Former-commit-id: ff1ac0936c118d129bc8a8014958948d3b3883be 2019-10-24 19:37:21 +00:00			`from tqdm import tqdm`
Created a basic train loop + changed a bit loss and utils 2017-08-17 19:16:19 +00:00
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`import wandb`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`from evaluate import evaluate`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`from src.utils.dataset import SphereDataset`
refactor: renamed a file Former-commit-id: b1e3f616bbadd8087ad52b13152d4dc72d1267aa 2022-06-27 14:41:03 +00:00			`from src.utils.dice import dice_loss`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`from unet import UNet`
			`from utils.paste import RandomPaste`
Global cleanup, better logging and CLI Former-commit-id: ff1ac0936c118d129bc8a8014958948d3b3883be 2019-10-24 19:37:21 +00:00
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`CHECKPOINT_DIR = Path("./checkpoints/")`
			`DIR_TRAIN_IMG = Path("/home/lilian/data_disk/lfainsin/train2017")`
			`DIR_VALID_IMG = Path("/home/lilian/data_disk/lfainsin/val2017/")`
			`# DIR_VALID_MASK = Path("/home/lilian/data_disk/lfainsin/val2017mask/")`
			`DIR_SPHERE_IMG = Path("/home/lilian/data_disk/lfainsin/spheres/Images/")`
			`DIR_SPHERE_MASK = Path("/home/lilian/data_disk/lfainsin/spheres/Masks/")`
Global cleanup, better logging and CLI Former-commit-id: ff1ac0936c118d129bc8a8014958948d3b3883be 2019-10-24 19:37:21 +00:00
Created a basic train loop + changed a bit loss and utils 2017-08-17 19:16:19 +00:00
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`def train_net(`
			`net,`
			`device,`
			`epochs: int = 5,`
			`batch_size: int = 1,`
			`learning_rate: float = 1e-5,`
			`save_checkpoint: bool = True,`
			`amp: bool = False,`
			`):`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`# 1. Create transforms`
			`tf_train = A.Compose(`
			`[`
			`A.Flip(),`
			`A.ColorJitter(),`
			`RandomPaste(5, 0.2, DIR_SPHERE_IMG, DIR_SPHERE_MASK),`
			`A.ISONoise(),`
			`A.ToFloat(max_value=255),`
			`A.pytorch.ToTensorV2(),`
			`],`
			`)`

			`tf_valid = A.Compose(`
			`[`
			`RandomPaste(5, 0.2, DIR_SPHERE_IMG, DIR_SPHERE_MASK),`
			`A.ToFloat(max_value=255),`
			`ToTensorV2(),`
			`],`
			`)`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`# 2. Create datasets`
			`ds_train = SphereDataset(images_dir=DIR_TRAIN_IMG, transform=tf_train)`
			`# ds_valid = SphereDataset(images_dir=DIR_VALID_IMG, masks_dir=DIR_VALID_MASK, transform=tf_valid)`
			`ds_valid = SphereDataset(images_dir=DIR_VALID_IMG, transform=tf_valid)`
Cleanup + now using tensorboard Former-commit-id: 79928c84cdf990ef6fe1043a3e4f74b9cc252642 2019-11-23 16:56:14 +00:00
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`# 3. Create data loaders`
			`loader_args = dict(batch_size=batch_size, num_workers=4, pin_memory=True)`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`train_loader = DataLoader(ds_train, shuffle=True, **loader_args)`
			`val_loader = DataLoader(ds_valid, shuffle=False, drop_last=True, **loader_args)`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
Add docker instructions Former-commit-id: b44a96b989ff3000a5afa72af863f4cb0df21e92 2021-08-16 14:54:06 +00:00			`# (Initialize logging)`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`experiment = wandb.init(`
			`project="U-Net",`
			`config=dict(`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`epochs=epochs,`
			`batch_size=batch_size,`
			`learning_rate=learning_rate,`
			`save_checkpoint=save_checkpoint,`
			`amp=amp,`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`),`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`)`
Created a basic train loop + changed a bit loss and utils 2017-08-17 19:16:19 +00:00
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`logging.info(`
			`f"""Starting training:`
Global cleanup, better logging and CLI Former-commit-id: ff1ac0936c118d129bc8a8014958948d3b3883be 2019-10-24 19:37:21 +00:00			`Epochs: {epochs}`
			`Batch size: {batch_size}`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`Learning rate: {learning_rate}`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`Training size: {len(ds_train)}`
			`Validation size: {len(ds_valid)}`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`Checkpoints: {save_checkpoint}`
Global cleanup, better logging and CLI Former-commit-id: ff1ac0936c118d129bc8a8014958948d3b3883be 2019-10-24 19:37:21 +00:00			`Device: {device.type}`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`Mixed Precision: {amp}`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`"""`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`)`
Global cleanup, better logging and CLI Former-commit-id: ff1ac0936c118d129bc8a8014958948d3b3883be 2019-10-24 19:37:21 +00:00
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`# 4. Set up the optimizer, the loss, the learning rate scheduler and the loss scaling for AMP`
			`optimizer = optim.RMSprop(net.parameters(), lr=learning_rate, weight_decay=1e-8, momentum=0.9)`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "max", patience=2) # goal: maximize Dice score`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`grad_scaler = torch.cuda.amp.GradScaler(enabled=amp)`
			`criterion = nn.CrossEntropyLoss()`
			`global_step = 0`
Created a basic train loop + changed a bit loss and utils 2017-08-17 19:16:19 +00:00
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`# 5. Begin training`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`for epoch in range(1, epochs + 1):`
Switch net to train mode in train.py Former-commit-id: ab255c4b3b823dfe8681083aeac0ee78c1a54780 2018-09-26 06:58:49 +00:00			`net.train()`
Created a basic train loop + changed a bit loss and utils 2017-08-17 19:16:19 +00:00			`epoch_loss = 0`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00
			`with tqdm(total=len(ds_train), desc=f"Epoch {epoch}/{epochs}", unit="img") as pbar:`
Now using utils.data.Dataset Former-commit-id: c75d9c075e18add5cd8683faf827937393bf2c94 2019-11-23 13:22:42 +00:00			`for batch in train_loader:`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`images = batch["image"]`
			`true_masks = batch["mask"]`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`assert images.shape[1] == net.n_channels, (`
			`f"Network has been defined with {net.n_channels} input channels, "`
			`f"but loaded images have {images.shape[1]} channels. Please check that "`
			`"the images are loaded correctly."`
			`)`
Now using utils.data.Dataset Former-commit-id: c75d9c075e18add5cd8683faf827937393bf2c94 2019-11-23 13:22:42 +00:00
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`images = images.to(device=device, dtype=torch.float32)`
			`true_masks = true_masks.to(device=device, dtype=torch.long)`
Created a basic train loop + changed a bit loss and utils 2017-08-17 19:16:19 +00:00
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`with torch.cuda.amp.autocast(enabled=amp):`
			`masks_pred = net(images)`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`loss = criterion(masks_pred, true_masks) + dice_loss(`
			`F.softmax(masks_pred, dim=1).float(),`
			`F.one_hot(true_masks, net.n_classes).permute(0, 3, 1, 2).float(),`
			`multiclass=True,`
			`)`
Created a basic train loop + changed a bit loss and utils 2017-08-17 19:16:19 +00:00
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`optimizer.zero_grad(set_to_none=True)`
			`grad_scaler.scale(loss).backward()`
			`grad_scaler.step(optimizer)`
			`grad_scaler.update()`
Added simple eval and test CRF 2017-08-19 08:59:51 +00:00
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`pbar.update(images.shape[0])`
Cleanup + now using tensorboard Former-commit-id: 79928c84cdf990ef6fe1043a3e4f74b9cc252642 2019-11-23 16:56:14 +00:00			`global_step += 1`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`epoch_loss += loss.item()`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`experiment.log({"train loss": loss.item(), "step": global_step, "epoch": epoch})`
			`pbar.set_postfix(**{"loss (batch)": loss.item()})`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
			`# Evaluation round`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`division_step = len(ds_train) // (10 * batch_size)`
typo Former-commit-id: 9b99dba1e6ad5fec942f75418676330f21cef7c2 2021-10-24 21:14:18 +00:00			`if division_step > 0:`
bug fixes for i/o and low val set Former-commit-id: bdcad8300e3c930b43b976ccd2562f27c9867892 2021-10-24 21:07:54 +00:00			`if global_step % division_step == 0:`
			`histograms = {}`
			`for tag, value in net.named_parameters():`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`tag = tag.replace("/", ".")`
			`histograms["Weights/" + tag] = wandb.Histogram(value.data.cpu())`
			`histograms["Gradients/" + tag] = wandb.Histogram(value.grad.data.cpu())`
bug fixes for i/o and low val set Former-commit-id: bdcad8300e3c930b43b976ccd2562f27c9867892 2021-10-24 21:07:54 +00:00
			`val_score = evaluate(net, val_loader, device)`
			`scheduler.step(val_score)`

style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`logging.info("Validation Dice score: {}".format(val_score))`
			`experiment.log(`
			`{`
			`"learning rate": optimizer.param_groups[0]["lr"],`
			`"validation Dice": val_score,`
			`"images": wandb.Image(images[0].cpu()),`
			`"masks": {`
			`"true": wandb.Image(true_masks[0].float().cpu()),`
			`"pred": wandb.Image(`
			`torch.softmax(masks_pred, dim=1).argmax(dim=1)[0].float().cpu()`
			`),`
			`},`
			`"step": global_step,`
			`"epoch": epoch,`
			`**histograms,`
			`}`
			`)`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
			`if save_checkpoint:`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`Path(CHECKPOINT_DIR).mkdir(parents=True, exist_ok=True)`
			`torch.save(net.state_dict(), str(CHECKPOINT_DIR / "checkpoint_epoch{}.pth".format(epoch)))`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`logging.info(f"Checkpoint {epoch} saved!")`
Created a basic train loop + changed a bit loss and utils 2017-08-17 19:16:19 +00:00
Migration to PyTorch 0.4, code cleanup Former-commit-id: c981801ccc3b74047e94c76e67c4ff1f3097226c 2018-06-08 17:27:32 +00:00
			`def get_args():`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`parser = argparse.ArgumentParser(`
			`description="Train the UNet on images and target masks",`
			`)`
			`parser.add_argument(`
			`"--epochs",`
			`"-e",`
			`metavar="E",`
			`type=int,`
			`default=5,`
			`help="Number of epochs",`
			`)`
			`parser.add_argument(`
			`"--batch-size",`
			`"-b",`
			`dest="batch_size",`
			`metavar="B",`
			`type=int,`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`default=32,`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`help="Batch size",`
			`)`
			`parser.add_argument(`
			`"--learning-rate",`
			`"-l",`
			`metavar="LR",`
			`type=float,`
			`default=1e-5,`
			`help="Learning rate",`
			`dest="lr",`
			`)`
			`parser.add_argument(`
			`"--load",`
			`"-f",`
			`type=str,`
			`default=False,`
			`help="Load model from a .pth file",`
			`)`
			`parser.add_argument(`
			`"--amp",`
			`action="store_true",`
style: formatting Former-commit-id: 2ccef30ce44d33beb611b63adef635ab2c1226bb 2022-06-27 14:40:04 +00:00			`default=True,`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`help="Use mixed precision",`
			`)`
			`parser.add_argument(`
			`"--classes",`
			`"-c",`
			`type=int,`
style: formatting Former-commit-id: 2ccef30ce44d33beb611b63adef635ab2c1226bb 2022-06-27 14:40:04 +00:00			`default=1,`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`help="Number of classes",`
			`)`
Global cleanup, better logging and CLI Former-commit-id: ff1ac0936c118d129bc8a8014958948d3b3883be 2019-10-24 19:37:21 +00:00
			`return parser.parse_args()`


style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`if __name__ == "__main__":`
Migration to PyTorch 0.4, code cleanup Former-commit-id: c981801ccc3b74047e94c76e67c4ff1f3097226c 2018-06-08 17:27:32 +00:00			`args = get_args()`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`device = torch.device("cuda" if torch.cuda.is_available() else "cpu")`
			`logging.info(f"Using device {device}")`
Global cleanup, better logging and CLI Former-commit-id: ff1ac0936c118d129bc8a8014958948d3b3883be 2019-10-24 19:37:21 +00:00
refactor: removed bilinear stuff + simplified the construction of the Downs and Ups Former-commit-id: 4c1e0a5a9fc02047b788b13d9bfc3ad7313413e3 2022-06-27 14:13:38 +00:00			`net = UNet(n_channels=3, n_classes=args.classes)`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`logging.info(`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00			`f"""Network:`
			`\t{net.n_channels} input channels`
			`\t{net.n_classes} output channels (classes)`
refactor: removed bilinear stuff + simplified the construction of the Downs and Ups Former-commit-id: 4c1e0a5a9fc02047b788b13d9bfc3ad7313413e3 2022-06-27 14:13:38 +00:00			`"""`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`)`
Added simple eval and test CRF 2017-08-19 08:59:51 +00:00
Migration to PyTorch 0.4, code cleanup Former-commit-id: c981801ccc3b74047e94c76e67c4ff1f3097226c 2018-06-08 17:27:32 +00:00			`if args.load:`
Summer cleanup Former-commit-id: f6185d67a4bc50aa7ec1b8168aab3f92721c4965 2021-08-16 00:53:00 +00:00			`net.load_state_dict(torch.load(args.load, map_location=device))`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`logging.info(f"Model loaded from {args.load}")`
Global cleanup, better logging and CLI Former-commit-id: ff1ac0936c118d129bc8a8014958948d3b3883be 2019-10-24 19:37:21 +00:00
			`net.to(device=device)`
fix: broken Ups Former-commit-id: 9c33326beb0d44e8491b040e25fad57ffa820076 2022-06-28 07:36:21 +00:00
Added simple eval and test CRF 2017-08-19 08:59:51 +00:00			`try:`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`train_net(`
			`net=net,`
			`epochs=args.epochs,`
			`batch_size=args.batch_size,`
			`learning_rate=args.lr,`
			`device=device,`
			`amp=args.amp,`
			`)`
Removed dense_crf and small fixes Former-commit-id: de7507ff08510b48e6a0e11da849e0d1c94d3ac8 2019-12-21 21:04:23 +00:00			`except KeyboardInterrupt:`
style: autoformating Former-commit-id: 8c5c75469afa61e8d3728959390b1354033be462 2022-06-27 13:39:44 +00:00			`torch.save(net.state_dict(), "INTERRUPTED.pth")`
			`logging.info("Saved interrupt")`
Update train.py Former-commit-id: dee78b12ca6810f5e02febfb244dce3885ed49ac 2022-04-06 11:35:02 +00:00			`raise`