diff --git a/comp.ipynb.REMOVED.git-id b/comp.ipynb.REMOVED.git-id
index b1ef6f5..b439b71 100644
--- a/comp.ipynb.REMOVED.git-id
+++ b/comp.ipynb.REMOVED.git-id
@@ -1 +1 @@
-fb39f9a23b728fadb88ce579f78bb419ff0eaab6
\ No newline at end of file
+9cbd3cff7e664a80a5a1fa1404898b7bba3cae0d
\ No newline at end of file
diff --git a/src/evaluate.py b/src/evaluate.py
deleted file mode 100644
index cf39e98..0000000
--- a/src/evaluate.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import numpy as np
-import torch
-from tqdm import tqdm
-
-import wandb
-from src.utils.dice import dice_coeff
-
-class_labels = {
-    1: "sphere",
-}
-
-
-def evaluate(net, dataloader, device):
-    net.eval()
-    num_val_batches = len(dataloader)
-    dice_score = 0
-
-    # iterate over the validation set
-    with tqdm(dataloader, total=len(dataloader.dataset), desc="val", unit="img", leave=False) as pbar:
-        for images, masks_true in dataloader:
-            # move images and labels to correct device
-            images = images.to(device=device)
-            masks_true = masks_true.unsqueeze(1).float().to(device=device)
-
-            # forward, predict the mask
-            with torch.inference_mode():
-                masks_pred = net(images)
-                masks_pred_bin = (torch.sigmoid(masks_pred) > 0.5).float()
-
-                # compute the Dice score
-                dice_score += dice_coeff(masks_pred_bin, masks_true, reduce_batch_first=False)
-
-            # update progress bar
-            pbar.update(images.shape[0])
-
-    # save some images to wandb
-    table = wandb.Table(columns=["ID", "image", "ground truth", "prediction"])
-    for i, (img, mask, pred) in enumerate(zip(images.to("cpu"), masks_true.to("cpu"), masks_pred.to("cpu"))):
-        table.add_data(i, wandb.Image(img), wandb.Image(mask), wandb.Image(pred))
-    wandb.log({"predictions_table": table})
-
-    net.train()
-
-    # Fixes a potential division by zero error
-    return dice_score / num_val_batches if num_val_batches else dice_score
diff --git a/src/train.py b/src/train.py
index 5feb01c..8d4dab1 100644
--- a/src/train.py
+++ b/src/train.py
@@ -8,9 +8,9 @@ from torch.utils.data import DataLoader
 from tqdm import tqdm
 
 import wandb
-from evaluate import evaluate
 from src.utils.dataset import SphereDataset
 from unet import UNet
+from utils.dice import dice_coeff
 from utils.paste import RandomPaste
 
 
@@ -22,7 +22,7 @@ def main():
     wandb.init(
         project="U-Net",
         config=dict(
-            DIR_TRAIN_IMG="/home/lilian/data_disk/lfainsin/val2017",
+            DIR_TRAIN_IMG="/home/lilian/data_disk/lfainsin/smolval2017",
             DIR_VALID_IMG="/home/lilian/data_disk/lfainsin/smoltrain2017/",
             DIR_SPHERE_IMG="/home/lilian/data_disk/lfainsin/spheres/Images/",
             DIR_SPHERE_MASK="/home/lilian/data_disk/lfainsin/spheres/Masks/",
@@ -51,7 +51,7 @@ def main():
     # 0. Create network
     net = UNet(n_channels=wandb.config.N_CHANNELS, n_classes=wandb.config.N_CLASSES, features=wandb.config.FEATURES)
     wandb.config.PARAMETERS = sum(p.numel() for p in net.parameters() if p.requires_grad)
-    wandb.watch(net, log_freq=100)
+    wandb.watch(net, log_freq=100)  # TODO: 1/4 epochs
 
     # transfer network to device
     net.to(device=device)
@@ -110,10 +110,6 @@ def main():
     grad_scaler = torch.cuda.amp.GradScaler(enabled=wandb.config.AMP)
     criterion = torch.nn.BCEWithLogitsLoss()
 
-    # accuracy stuff
-    mse = torch.nn.MSELoss()
-    mae = torch.nn.L1Loss()
-
     # save model.pth
     torch.save(net.state_dict(), "checkpoints/model-0.pth")
     artifact = wandb.Artifact("pth", type="model")
@@ -136,6 +132,9 @@ def main():
         """
     )
 
+    # setup wandb table for saving images
+    table = wandb.Table(columns=["ID", "image", "ground truth", "prediction"])
+
     try:
         for epoch in range(1, wandb.config.EPOCHS + 1):
             with tqdm(total=len(ds_train), desc=f"{epoch}/{wandb.config.EPOCHS}", unit="img") as pbar:
@@ -164,9 +163,9 @@ def main():
                     grad_scaler.update()
 
                     # compute metrics
-                    accuracy = (true_masks == pred_masks).float().mean()
-                    mse = torch.nn.functional.mse_loss(pred_masks, true_masks)
-                    mae = torch.nn.functional.l1_loss(pred_masks, true_masks)
+                    pred_masks_bin = (torch.sigmoid(pred_masks) > 0.5).float()
+                    accuracy = (true_masks == pred_masks_bin).float().mean()
+                    mae = torch.nn.functional.l1_loss(pred_masks_bin, true_masks)
 
                     # update tqdm progress bar
                     pbar.update(images.shape[0])
@@ -177,23 +176,64 @@ def main():
                         {
                             "train/epoch": epoch - 1 + step / len(train_loader),
                             "train/accuracy": accuracy,
-                            "train/loss": train_loss,
-                            "train/mse": mse,
+                            "train/bce": train_loss,
                             "train/mae": mae,
                         }
                     )
 
                 # Evaluation round
-                val_score = evaluate(net, val_loader, device)
-                scheduler.step(val_score)
+                net.eval()
+                accuracy = 0
+                dice = 0
+                mae = 0
+                with tqdm(val_loader, total=len(ds_valid), desc="val", unit="img", leave=False) as pbar:
+                    for images, masks_true in val_loader:
+
+                        # transfer images to device
+                        images = images.to(device=device)
+                        masks_true = masks_true.unsqueeze(1).to(device=device)
+
+                        # forward
+                        with torch.inference_mode():
+                            masks_pred = net(images)
+
+                        # compute metrics
+                        masks_pred_bin = (torch.sigmoid(masks_pred) > 0.5).float()
+                        accuracy += (true_masks == pred_masks_bin).float().sum()
+                        dice += dice_coeff(masks_pred_bin, masks_true, reduce_batch_first=False)
+                        mae += torch.nn.functional.l1_loss(pred_masks_bin, true_masks, reduction="sum")
+
+                        # update progress bar
+                        pbar.update(images.shape[0])
+
+                accuracy /= len(ds_valid)
+                dice /= len(val_loader)  # TODO: fix dice_coeff to not average
+                mae /= len(ds_valid)
+
+                # save the last validation batch to table
+                for i, (img, mask, pred) in enumerate(
+                    zip(
+                        images.to("cpu"),
+                        masks_true.to("cpu"),
+                        masks_pred.to("cpu"),
+                    )
+                ):
+                    table.add_data(i, wandb.Image(img), wandb.Image(mask), wandb.Image(pred))
 
                 # log validation metrics
                 wandb.log(
                     {
-                        "val/val_score": val_score,
+                        "val/predictions": table,
+                        "val/accuracy": accuracy,
+                        "val/dice": dice,
+                        "val/mae": mae,
                     }
                 )
 
+                # update hyperparameters
+                net.train()
+                scheduler.step(dice)
+
             # save weights when epoch end
             torch.save(net.state_dict(), f"checkpoints/model-{epoch}.pth")
             artifact = wandb.Artifact("pth", type="model")
diff --git a/src/utils/dataset.py b/src/utils/dataset.py
index f1cb2be..a798731 100644
--- a/src/utils/dataset.py
+++ b/src/utils/dataset.py
@@ -1,4 +1,3 @@
-import logging
 import os
 
 import numpy as np
@@ -24,6 +23,10 @@ class SphereDataset(Dataset):
         if self.transform is not None:
             augmentations = self.transform(image=image, mask=mask)
             image = augmentations["image"]
-            mask = augmentations["mask"].float()
+            mask = augmentations["mask"]
+
+        # make sure image and mask are floats
+        image = image.float()
+        mask = mask.float()
 
         return image, mask
diff --git a/src/utils/paste.py b/src/utils/paste.py
index 8d5f904..486a8ec 100644
--- a/src/utils/paste.py
+++ b/src/utils/paste.py
@@ -42,6 +42,7 @@ class RandomPaste(A.DualTransform):
         # convert img to Image, needed for `paste` function
         img = Image.fromarray(img)
 
+        # paste spheres
         for pos in positions:
             img.paste(paste_img, pos, paste_mask)
 
@@ -51,8 +52,12 @@ class RandomPaste(A.DualTransform):
         # convert mask to Image, needed for `paste` function
         mask = Image.fromarray(mask)
 
+        # binarize the mask -> {0, 1}
+        paste_mask_bin = paste_mask.point(lambda p: 1 if p > 10 else 0)
+
+        # paste spheres
         for pos in positions:
-            mask.paste(paste_mask, pos, paste_mask)
+            mask.paste(paste_mask, pos, paste_mask_bin)
 
         return np.asarray(mask.convert("L"))