Added CLI for predict, cleaned up code, updated README

Former-commit-id: 77555ccc0925a8fba796ce7e42843d95b6e9dce0
2024-09-19 19:45:28 +00:00 · 2017-11-30 06:45:19 +01:00 · 2017-11-30 06:45:19 +01:00 · 7ea54febec
parent e1bf150da3
commit 7ea54febec
10 changed files with 122 additions and 11 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,4 +3,5 @@ data/
 __pycache__/
 checkpoints/
 *.pth
-
+*.jpg
 SUBMISSION*
--- a/README.md
+++ b/README.md
@ -5,7 +5,21 @@ This model scored a [dice coefficient](https://en.wikipedia.org/wiki/S%C3%B8rens
 The model used for the last submission is stored in the `MODEL.pth` file, if you wish to play with it. The data is available on the [Kaggle website](https://www.kaggle.com/c/carvana-image-masking-challenge/data).
 ## Usage
 ### Prediction
 You can easily test the output masks on your images via the CLI.
 To see all options:
 `python predict.py -h`
 To predict a single image and save it:
 `python predict.py -i image.jpg -o ouput.jpg
 To predict a multiple images and show them without saving them:
 `python predict.py -i image1.jpg image2.jpg --viz --no-save`
 You can use the cpu-only version with `--cpu`.
 You can specify which model file to use with `--model MODEL.pth`.
 ## Note
 The code and the overall project architecture is a big mess for now, as I left it abandoned when the challenge finished. I will clean it Soon<sup>TM</sup>.
--- a/main.py
+++ b/main.py
@ -1,5 +1,5 @@
 #models
-from unet_model import UNet
+from unet import UNet
 from myloss import *
 import torch
 from torch.autograd import Variable
--- a/predict.py
+++ b/predict.py
@ -1,12 +1,16 @@
 import torch
 from utils import *
 import torch.nn.functional as F
 from PIL import Image
 from unet_model import UNet
 from torch.autograd import Variable
 import matplotlib.pyplot as plt
 import numpy
 from PIL import Image
 import argparse
 import os
 from utils import *
 from crf import dense_crf
 from unet import UNet
 def predict_img(net, full_img, gpu=False):
    img = resize_and_crop(full_img)
@ -39,3 +43,76 @@ def predict_img(net, full_img, gpu=False):
    yy = dense_crf(np.array(full_img).astype(np.uint8), y)
    return yy > 0.5
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', '-m', default='MODEL.pth',
                        metavar='FILE',
                        help="Specify the file in which is stored the model"
                        " (default : 'MODEL.pth')")
    parser.add_argument('--input', '-i', metavar='INPUT', nargs='+',
                        help='filenames of input images', required=True)
    parser.add_argument('--output', '-o', metavar='INPUT', nargs='+',
                        help='filenames of ouput images')
    parser.add_argument('--cpu', '-c', action='store_true',
                        help="Do not use the cuda version of the net",
                        default=False)
    parser.add_argument('--viz', '-v', action='store_true',
                        help="Visualize the images as they are processed",
                        default=False)
    parser.add_argument('--no-save', '-n', action='store_false',
                        help="Do not save the output masks",
                        default=False)
    args = parser.parse_args()
    print("Using model file : {}".format(args.model))
    net = UNet(3, 1)
    if not args.cpu:
        print("Using CUDA version of the net, prepare your GPU !")
        net.cuda()
    else:
        net.cpu()
        print("Using CPU version of the net, this may be very slow")
    in_files = args.input
    out_files = []
    if not args.output:
        for f in in_files:
            pathsplit = os.path.splitext(f)
            out_files.append("{}_OUT{}".format(pathsplit[0], pathsplit[1]))
    elif len(in_files) != len(args.output):
        print("Error : Input files and output files are not of the same length")
        raise SystemExit()
    else:
        out_files = args.output
    print("Loading model ...")
    net.load_state_dict(torch.load(args.model))
    print("Model loaded !")
    for i, fn in enumerate(in_files):
        print("\nPredicting image {} ...".format(fn))
        img = Image.open(fn)
        out = predict_img(net, img, not args.cpu)
        if args.viz:
            print("Vizualising results for image {}, close to continue ..."
                  .format(fn))
            fig = plt.figure()
            a = fig.add_subplot(1, 2, 1)
            a.set_title('Input image')
            plt.imshow(img)
            b = fig.add_subplot(1, 2, 2)
            b.set_title('Output mask')
            plt.imshow(out)
            plt.show()
        if not args.no_save:
            out_fn = out_files[i]
            result = Image.fromarray((out * 255).astype(numpy.uint8))
            result.save(out_files[i])
            print("Mask saved to {}".format(out_files[i]))
--- a/submit.py
+++ b/submit.py
@ -3,7 +3,7 @@ import os
 from PIL import Image
 from predict import *
 from utils import encode
-from unet_model import UNet
+from unet import UNet
 def submit(net, gpu=False):
    dir = 'data/test/'
--- a/train.py
+++ b/train.py
@ -8,12 +8,13 @@ from data_vis import *
 from utils import split_train_val, batch
 from myloss import DiceLoss
 from eval import eval_net
-from unet_model import UNet
+from unet import UNet
 from torch.autograd import Variable
 from torch import optim
 from optparse import OptionParser
 import sys
 import os
 import argparse
 def train_net(net, epochs=5, batch_size=2, lr=0.1, val_percent=0.05,
--- a/unet/init.py
+++ b/unet/init.py
@ -0,0 +1 @@
 from .unet_model import UNet
--- a/unet/unet_model.py
+++ b/unet/unet_model.py
@ -1,8 +1,12 @@
 #!/usr/bin/python
 # full assembly of the sub-parts to form the complete net
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from unet_parts import *
+# python 3 confusing imports :(
 from .unet_parts import *
 class UNet(nn.Module):
--- a/unet/unet_parts.py
+++ b/unet/unet_parts.py
@ -1,3 +1,5 @@
 #!/usr/bin/python
 # sub-parts of the U-Net model
 import torch
@ -6,6 +8,7 @@ import torch.nn.functional as F
 class double_conv(nn.Module):
    '''(conv => BN => ReLU) * 2'''
    def __init__(self, in_ch, out_ch):
        super(double_conv, self).__init__()
        self.conv = nn.Sequential(
@ -46,10 +49,16 @@ class down(nn.Module):
 class up(nn.Module):
-    def __init__(self, in_ch, out_ch):
+    def __init__(self, in_ch, out_ch, bilinear=True):
        super(up, self).__init__()
-        self.up = nn.UpsamplingBilinear2d(scale_factor=2)
+
-        # self.up = nn.ConvTranspose2d(in_ch, out_ch, 2, stride=2)
+        #  would be a nice idea if the upsampling could be learned too,
        #  but my machine do not have enough memory to handle all those weights
        if bilinear:
            self.up = nn.UpsamplingBilinear2d(scale_factor=2)
        else:
            self.up = nn.ConvTranspose2d(in_ch, out_ch, 2, stride=2)
        self.conv = double_conv(in_ch, out_ch)
    def forward(self, x1, x2):
--- a/utils.py
+++ b/utils.py
@ -119,3 +119,7 @@ def rle_encode(mask_image):
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] = runs[1::2] - runs[:-1:2]
    return runs
 def full_process(filename):
    im = PIL.Image.open(filename)
    im = resize_and_crop(im)