diff --git a/.gitignore b/.gitignore index 025cf91..eeeacd2 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ data/ __pycache__/ checkpoints/ *.pth - +*.jpg +SUBMISSION* diff --git a/README.md b/README.md index f70eb88..2fa6e10 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,21 @@ This model scored a [dice coefficient](https://en.wikipedia.org/wiki/S%C3%B8rens The model used for the last submission is stored in the `MODEL.pth` file, if you wish to play with it. The data is available on the [Kaggle website](https://www.kaggle.com/c/carvana-image-masking-challenge/data). +## Usage +### Prediction +You can easily test the output masks on your images via the CLI. +To see all options: +`python predict.py -h` + +To predict a single image and save it: +`python predict.py -i image.jpg -o ouput.jpg + +To predict a multiple images and show them without saving them: +`python predict.py -i image1.jpg image2.jpg --viz --no-save` + +You can use the cpu-only version with `--cpu`. +You can specify which model file to use with `--model MODEL.pth`. ## Note The code and the overall project architecture is a big mess for now, as I left it abandoned when the challenge finished. I will clean it SoonTM. diff --git a/main.py b/main.py index 6796781..3e27096 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,5 @@ #models -from unet_model import UNet +from unet import UNet from myloss import * import torch from torch.autograd import Variable diff --git a/predict.py b/predict.py index 26fdeff..7a4c792 100644 --- a/predict.py +++ b/predict.py @@ -1,12 +1,16 @@ import torch -from utils import * import torch.nn.functional as F -from PIL import Image -from unet_model import UNet from torch.autograd import Variable import matplotlib.pyplot as plt +import numpy +from PIL import Image +import argparse +import os + +from utils import * from crf import dense_crf +from unet import UNet def predict_img(net, full_img, gpu=False): img = resize_and_crop(full_img) @@ -39,3 +43,76 @@ def predict_img(net, full_img, gpu=False): yy = dense_crf(np.array(full_img).astype(np.uint8), y) return yy > 0.5 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--model', '-m', default='MODEL.pth', + metavar='FILE', + help="Specify the file in which is stored the model" + " (default : 'MODEL.pth')") + parser.add_argument('--input', '-i', metavar='INPUT', nargs='+', + help='filenames of input images', required=True) + parser.add_argument('--output', '-o', metavar='INPUT', nargs='+', + help='filenames of ouput images') + parser.add_argument('--cpu', '-c', action='store_true', + help="Do not use the cuda version of the net", + default=False) + parser.add_argument('--viz', '-v', action='store_true', + help="Visualize the images as they are processed", + default=False) + parser.add_argument('--no-save', '-n', action='store_false', + help="Do not save the output masks", + default=False) + + args = parser.parse_args() + print("Using model file : {}".format(args.model)) + net = UNet(3, 1) + if not args.cpu: + print("Using CUDA version of the net, prepare your GPU !") + net.cuda() + else: + net.cpu() + print("Using CPU version of the net, this may be very slow") + + in_files = args.input + out_files = [] + if not args.output: + for f in in_files: + pathsplit = os.path.splitext(f) + out_files.append("{}_OUT{}".format(pathsplit[0], pathsplit[1])) + elif len(in_files) != len(args.output): + print("Error : Input files and output files are not of the same length") + raise SystemExit() + else: + out_files = args.output + + print("Loading model ...") + net.load_state_dict(torch.load(args.model)) + print("Model loaded !") + + for i, fn in enumerate(in_files): + print("\nPredicting image {} ...".format(fn)) + img = Image.open(fn) + out = predict_img(net, img, not args.cpu) + + if args.viz: + print("Vizualising results for image {}, close to continue ..." + .format(fn)) + + fig = plt.figure() + a = fig.add_subplot(1, 2, 1) + a.set_title('Input image') + plt.imshow(img) + + b = fig.add_subplot(1, 2, 2) + b.set_title('Output mask') + plt.imshow(out) + + plt.show() + + if not args.no_save: + out_fn = out_files[i] + result = Image.fromarray((out * 255).astype(numpy.uint8)) + result.save(out_files[i]) + print("Mask saved to {}".format(out_files[i])) diff --git a/submit.py b/submit.py index 91aeefe..c82e55d 100644 --- a/submit.py +++ b/submit.py @@ -3,7 +3,7 @@ import os from PIL import Image from predict import * from utils import encode -from unet_model import UNet +from unet import UNet def submit(net, gpu=False): dir = 'data/test/' diff --git a/train.py b/train.py index 59dd2c1..5b3f325 100644 --- a/train.py +++ b/train.py @@ -8,12 +8,13 @@ from data_vis import * from utils import split_train_val, batch from myloss import DiceLoss from eval import eval_net -from unet_model import UNet +from unet import UNet from torch.autograd import Variable from torch import optim from optparse import OptionParser import sys import os +import argparse def train_net(net, epochs=5, batch_size=2, lr=0.1, val_percent=0.05, diff --git a/unet/__init__.py b/unet/__init__.py new file mode 100644 index 0000000..2e9b63b --- /dev/null +++ b/unet/__init__.py @@ -0,0 +1 @@ +from .unet_model import UNet diff --git a/unet_model.py b/unet/unet_model.py similarity index 86% rename from unet_model.py rename to unet/unet_model.py index 5129e64..4afb8dd 100644 --- a/unet_model.py +++ b/unet/unet_model.py @@ -1,8 +1,12 @@ +#!/usr/bin/python +# full assembly of the sub-parts to form the complete net + import torch import torch.nn as nn import torch.nn.functional as F -from unet_parts import * +# python 3 confusing imports :( +from .unet_parts import * class UNet(nn.Module): diff --git a/unet_parts.py b/unet/unet_parts.py similarity index 80% rename from unet_parts.py rename to unet/unet_parts.py index 7efb750..66149d2 100644 --- a/unet_parts.py +++ b/unet/unet_parts.py @@ -1,3 +1,5 @@ +#!/usr/bin/python + # sub-parts of the U-Net model import torch @@ -6,6 +8,7 @@ import torch.nn.functional as F class double_conv(nn.Module): + '''(conv => BN => ReLU) * 2''' def __init__(self, in_ch, out_ch): super(double_conv, self).__init__() self.conv = nn.Sequential( @@ -46,10 +49,16 @@ class down(nn.Module): class up(nn.Module): - def __init__(self, in_ch, out_ch): + def __init__(self, in_ch, out_ch, bilinear=True): super(up, self).__init__() - self.up = nn.UpsamplingBilinear2d(scale_factor=2) - # self.up = nn.ConvTranspose2d(in_ch, out_ch, 2, stride=2) + + # would be a nice idea if the upsampling could be learned too, + #  but my machine do not have enough memory to handle all those weights + if bilinear: + self.up = nn.UpsamplingBilinear2d(scale_factor=2) + else: + self.up = nn.ConvTranspose2d(in_ch, out_ch, 2, stride=2) + self.conv = double_conv(in_ch, out_ch) def forward(self, x1, x2): diff --git a/utils.py b/utils.py index a3b2461..6f4f07b 100644 --- a/utils.py +++ b/utils.py @@ -119,3 +119,7 @@ def rle_encode(mask_image): runs = np.where(pixels[1:] != pixels[:-1])[0] + 2 runs[1::2] = runs[1::2] - runs[:-1:2] return runs + +def full_process(filename): + im = PIL.Image.open(filename) + im = resize_and_crop(im)