Added CLI for predict, cleaned up code, updated README

Former-commit-id: 77555ccc0925a8fba796ce7e42843d95b6e9dce0
This commit is contained in:
milesial 2017-11-30 06:45:19 +01:00
parent e1bf150da3
commit 7ea54febec
10 changed files with 122 additions and 11 deletions

3
.gitignore vendored
View file

@ -3,4 +3,5 @@ data/
__pycache__/
checkpoints/
*.pth
*.jpg
SUBMISSION*

View file

@ -5,7 +5,21 @@ This model scored a [dice coefficient](https://en.wikipedia.org/wiki/S%C3%B8rens
The model used for the last submission is stored in the `MODEL.pth` file, if you wish to play with it. The data is available on the [Kaggle website](https://www.kaggle.com/c/carvana-image-masking-challenge/data).
## Usage
### Prediction
You can easily test the output masks on your images via the CLI.
To see all options:
`python predict.py -h`
To predict a single image and save it:
`python predict.py -i image.jpg -o ouput.jpg
To predict a multiple images and show them without saving them:
`python predict.py -i image1.jpg image2.jpg --viz --no-save`
You can use the cpu-only version with `--cpu`.
You can specify which model file to use with `--model MODEL.pth`.
## Note
The code and the overall project architecture is a big mess for now, as I left it abandoned when the challenge finished. I will clean it Soon<sup>TM</sup>.

View file

@ -1,5 +1,5 @@
#models
from unet_model import UNet
from unet import UNet
from myloss import *
import torch
from torch.autograd import Variable

View file

@ -1,12 +1,16 @@
import torch
from utils import *
import torch.nn.functional as F
from PIL import Image
from unet_model import UNet
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy
from PIL import Image
import argparse
import os
from utils import *
from crf import dense_crf
from unet import UNet
def predict_img(net, full_img, gpu=False):
img = resize_and_crop(full_img)
@ -39,3 +43,76 @@ def predict_img(net, full_img, gpu=False):
yy = dense_crf(np.array(full_img).astype(np.uint8), y)
return yy > 0.5
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--model', '-m', default='MODEL.pth',
metavar='FILE',
help="Specify the file in which is stored the model"
" (default : 'MODEL.pth')")
parser.add_argument('--input', '-i', metavar='INPUT', nargs='+',
help='filenames of input images', required=True)
parser.add_argument('--output', '-o', metavar='INPUT', nargs='+',
help='filenames of ouput images')
parser.add_argument('--cpu', '-c', action='store_true',
help="Do not use the cuda version of the net",
default=False)
parser.add_argument('--viz', '-v', action='store_true',
help="Visualize the images as they are processed",
default=False)
parser.add_argument('--no-save', '-n', action='store_false',
help="Do not save the output masks",
default=False)
args = parser.parse_args()
print("Using model file : {}".format(args.model))
net = UNet(3, 1)
if not args.cpu:
print("Using CUDA version of the net, prepare your GPU !")
net.cuda()
else:
net.cpu()
print("Using CPU version of the net, this may be very slow")
in_files = args.input
out_files = []
if not args.output:
for f in in_files:
pathsplit = os.path.splitext(f)
out_files.append("{}_OUT{}".format(pathsplit[0], pathsplit[1]))
elif len(in_files) != len(args.output):
print("Error : Input files and output files are not of the same length")
raise SystemExit()
else:
out_files = args.output
print("Loading model ...")
net.load_state_dict(torch.load(args.model))
print("Model loaded !")
for i, fn in enumerate(in_files):
print("\nPredicting image {} ...".format(fn))
img = Image.open(fn)
out = predict_img(net, img, not args.cpu)
if args.viz:
print("Vizualising results for image {}, close to continue ..."
.format(fn))
fig = plt.figure()
a = fig.add_subplot(1, 2, 1)
a.set_title('Input image')
plt.imshow(img)
b = fig.add_subplot(1, 2, 2)
b.set_title('Output mask')
plt.imshow(out)
plt.show()
if not args.no_save:
out_fn = out_files[i]
result = Image.fromarray((out * 255).astype(numpy.uint8))
result.save(out_files[i])
print("Mask saved to {}".format(out_files[i]))

View file

@ -3,7 +3,7 @@ import os
from PIL import Image
from predict import *
from utils import encode
from unet_model import UNet
from unet import UNet
def submit(net, gpu=False):
dir = 'data/test/'

View file

@ -8,12 +8,13 @@ from data_vis import *
from utils import split_train_val, batch
from myloss import DiceLoss
from eval import eval_net
from unet_model import UNet
from unet import UNet
from torch.autograd import Variable
from torch import optim
from optparse import OptionParser
import sys
import os
import argparse
def train_net(net, epochs=5, batch_size=2, lr=0.1, val_percent=0.05,

1
unet/__init__.py Normal file
View file

@ -0,0 +1 @@
from .unet_model import UNet

View file

@ -1,8 +1,12 @@
#!/usr/bin/python
# full assembly of the sub-parts to form the complete net
import torch
import torch.nn as nn
import torch.nn.functional as F
from unet_parts import *
# python 3 confusing imports :(
from .unet_parts import *
class UNet(nn.Module):

View file

@ -1,3 +1,5 @@
#!/usr/bin/python
# sub-parts of the U-Net model
import torch
@ -6,6 +8,7 @@ import torch.nn.functional as F
class double_conv(nn.Module):
'''(conv => BN => ReLU) * 2'''
def __init__(self, in_ch, out_ch):
super(double_conv, self).__init__()
self.conv = nn.Sequential(
@ -46,10 +49,16 @@ class down(nn.Module):
class up(nn.Module):
def __init__(self, in_ch, out_ch):
def __init__(self, in_ch, out_ch, bilinear=True):
super(up, self).__init__()
# would be a nice idea if the upsampling could be learned too,
#  but my machine do not have enough memory to handle all those weights
if bilinear:
self.up = nn.UpsamplingBilinear2d(scale_factor=2)
# self.up = nn.ConvTranspose2d(in_ch, out_ch, 2, stride=2)
else:
self.up = nn.ConvTranspose2d(in_ch, out_ch, 2, stride=2)
self.conv = double_conv(in_ch, out_ch)
def forward(self, x1, x2):

View file

@ -119,3 +119,7 @@ def rle_encode(mask_image):
runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
runs[1::2] = runs[1::2] - runs[:-1:2]
return runs
def full_process(filename):
im = PIL.Image.open(filename)
im = resize_and_crop(im)