From 6438b1dcddfb1ae481909c8d5e54961b21d6239c Mon Sep 17 00:00:00 2001 From: Arka Date: Sun, 24 Oct 2021 17:07:54 -0400 Subject: [PATCH] bug fixes for i/o and low val set Former-commit-id: bdcad8300e3c930b43b976ccd2562f27c9867892 --- README.md | 2 +- evaluate.py | 4 ++++ train.py | 44 +++++++++++++++++++++++--------------------- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 3620a45..3516e10 100644 --- a/README.md +++ b/README.md @@ -154,7 +154,7 @@ You can also download it using the helper script: bash scripts/download_data.sh ``` -The input images and target masks should be in the `data/imgs` and `data/masks` folders respectively. For Carvana, images are RGB and masks are black and white. +The input images and target masks should be in the `data/imgs` and `data/masks` folders respectively (note that the `imgs` and `masks` folder should not contain any sub-folder or any other files, due to the greedy data-loader). For Carvana, images are RGB and masks are black and white. You can use your own dataset as long as you make sure it is loaded properly in `utils/data_loading.py`. diff --git a/evaluate.py b/evaluate.py index 504432f..2b4ebf9 100644 --- a/evaluate.py +++ b/evaluate.py @@ -35,4 +35,8 @@ def evaluate(net, dataloader, device): net.train() + + # Fixes a potential division by zero error + if num_val_batches == 0: + return dice_score return dice_score / num_val_batches diff --git a/train.py b/train.py index dc04e96..5cf568c 100644 --- a/train.py +++ b/train.py @@ -111,29 +111,31 @@ def train_net(net, pbar.set_postfix(**{'loss (batch)': loss.item()}) # Evaluation round - if global_step % (n_train // (10 * batch_size)) == 0: - histograms = {} - for tag, value in net.named_parameters(): - tag = tag.replace('/', '.') - histograms['Weights/' + tag] = wandb.Histogram(value.data.cpu()) - histograms['Gradients/' + tag] = wandb.Histogram(value.grad.data.cpu()) + division_step = (n_train // (10 * batch_size)) + if division_step > 0: + if global_step % division_step == 0: + histograms = {} + for tag, value in net.named_parameters(): + tag = tag.replace('/', '.') + histograms['Weights/' + tag] = wandb.Histogram(value.data.cpu()) + histograms['Gradients/' + tag] = wandb.Histogram(value.grad.data.cpu()) - val_score = evaluate(net, val_loader, device) - scheduler.step(val_score) + val_score = evaluate(net, val_loader, device) + scheduler.step(val_score) - logging.info('Validation Dice score: {}'.format(val_score)) - experiment.log({ - 'learning rate': optimizer.param_groups[0]['lr'], - 'validation Dice': val_score, - 'images': wandb.Image(images[0].cpu()), - 'masks': { - 'true': wandb.Image(true_masks[0].float().cpu()), - 'pred': wandb.Image(torch.softmax(masks_pred, dim=1)[0].float().cpu()), - }, - 'step': global_step, - 'epoch': epoch, - **histograms - }) + logging.info('Validation Dice score: {}'.format(val_score)) + experiment.log({ + 'learning rate': optimizer.param_groups[0]['lr'], + 'validation Dice': val_score, + 'images': wandb.Image(images[0].cpu()), + 'masks': { + 'true': wandb.Image(true_masks[0].float().cpu()), + 'pred': wandb.Image(torch.softmax(masks_pred, dim=1)[0].float().cpu()), + }, + 'step': global_step, + 'epoch': epoch, + **histograms + }) if save_checkpoint: Path(dir_checkpoint).mkdir(parents=True, exist_ok=True)