diff --git a/part_segmentation/data.py b/part_segmentation/data.py
index 3e4af26..a8fb553 100644
--- a/part_segmentation/data.py
+++ b/part_segmentation/data.py
@@ -1,13 +1,3 @@
-"""
-@Author: Yue Wang
-@Contact: yuewangx@mit.edu
-@File: data.py
-@Time: 2018/10/13 6:21 PM
-Modified by
-@Author: Tiange Xiang
-@Contact: txia7609@uni.sydney.edu.au
-@Time: 2021/1/21 3:10 PM
-"""
 
 
 import os
diff --git a/part_segmentation/main.py b/part_segmentation/main.py
index d43f9d9..d504a45 100644
--- a/part_segmentation/main.py
+++ b/part_segmentation/main.py
@@ -1,367 +1,440 @@
-"""
-Usage:
-python main.py --model CurveNet --exp_name=demo1
-
-@Author: An Tao
-@Contact: ta19@mails.tsinghua.edu.cn
-@File: main_partseg.py
-@Time: 2019/12/31 11:17 AM
-Modified by
-@Author: Tiange Xiang
-@Contact: txia7609@uni.sydney.edu.au
-@Time: 2021/01/21 3:10 PM
-"""
-
-
 from __future__ import print_function
 import os
-import datetime
 import argparse
 import torch
-import torch.nn as nn
-import torch.nn.functional as F
 import torch.optim as optim
-from torch.optim.lr_scheduler import CosineAnnealingLR, StepLR, MultiStepLR
-from data import ShapeNetPart
-import models as models
+from torch.optim.lr_scheduler import CosineAnnealingLR, StepLR
+from util.data_util import PartNormalDataset
+import torch.nn.functional as F
+import torch.nn as nn
+import model as models
 import numpy as np
 from torch.utils.data import DataLoader
-from util import cal_loss, IOStream
-import sklearn.metrics as metrics
+from util.util import to_categorical, compute_overall_iou, IOStream
+from tqdm import tqdm
+from collections import defaultdict
+from torch.autograd import Variable
+import random
+
+
+classes_str = ['aero','bag','cap','car','chair','ear','guitar','knife','lamp','lapt','moto','mug','Pistol','rock','stake','table']
 
-seg_num = [4, 2, 2, 4, 4, 3, 3, 2, 4, 2, 6, 2, 3, 3, 3, 3]
-index_start = [0, 4, 6, 8, 12, 16, 19, 22, 24, 28, 30, 36, 38, 41, 44, 47]
 
 def _init_():
-    # fix random seed
-    if args.seed is not None:
-        torch.manual_seed(args.seed)
-        np.random.seed(args.seed)
-        torch.cuda.manual_seed_all(args.seed)
-        torch.cuda.manual_seed(args.seed)
-        torch.set_printoptions(10)
-        torch.backends.cudnn.benchmark = False
-        torch.backends.cudnn.deterministic = True
-        os.environ['PYTHONHASHSEED'] = str(args.seed)
-
-    # prepare file structures
     if not os.path.exists('checkpoints'):
         os.makedirs('checkpoints')
-    if not os.path.exists('checkpoints/'+args.exp_name):
-        os.makedirs('checkpoints/'+args.exp_name)
-    if not os.path.exists('checkpoints/'+args.exp_name+'/'+'models'):
-        os.makedirs('checkpoints/'+args.exp_name+'/'+'models')
+    if not os.path.exists('checkpoints/' + args.exp_name):
+        os.makedirs('checkpoints/' + args.exp_name)
 
 
-def calculate_shape_IoU(pred_np, seg_np, label, class_choice, eva=False):
-    label = label.squeeze()
-    shape_ious = []
-    category = {}
-    for shape_idx in range(seg_np.shape[0]):
-        if not class_choice:
-            start_index = index_start[label[shape_idx]]
-            num = seg_num[label[shape_idx]]
-            parts = range(start_index, start_index + num)
-        else:
-            parts = range(seg_num[label[0]])
-        part_ious = []
-        for part in parts:
-            I = np.sum(np.logical_and(pred_np[shape_idx] == part, seg_np[shape_idx] == part))
-            U = np.sum(np.logical_or(pred_np[shape_idx] == part, seg_np[shape_idx] == part))
-            if U == 0:
-                iou = 1  # If the union of groundtruth and prediction points is empty, then count part IoU as 1
-            else:
-                iou = I / float(U)
-            part_ious.append(iou)
-        shape_ious.append(np.mean(part_ious))
-        if label[shape_idx] not in category:
-            category[label[shape_idx]] = [shape_ious[-1]]
-        else:
-            category[label[shape_idx]].append(shape_ious[-1])
+def weight_init(m):
+    if isinstance(m, torch.nn.Linear):
+        torch.nn.init.xavier_normal_(m.weight)
+        if m.bias is not None:
+            torch.nn.init.constant_(m.bias, 0)
+    elif isinstance(m, torch.nn.Conv2d):
+        torch.nn.init.xavier_normal_(m.weight)
+        if m.bias is not None:
+            torch.nn.init.constant_(m.bias, 0)
+    elif isinstance(m, torch.nn.Conv1d):
+        torch.nn.init.xavier_normal_(m.weight)
+        if m.bias is not None:
+            torch.nn.init.constant_(m.bias, 0)
+    elif isinstance(m, torch.nn.BatchNorm2d):
+        torch.nn.init.constant_(m.weight, 1)
+        torch.nn.init.constant_(m.bias, 0)
+    elif isinstance(m, torch.nn.BatchNorm1d):
+        torch.nn.init.constant_(m.weight, 1)
+        torch.nn.init.constant_(m.bias, 0)
 
-    if eva:
-        return shape_ious, category
-    else:
-        return shape_ious
 
 def train(args, io):
-    train_dataset = ShapeNetPart(partition='trainval', num_points=args.num_points, class_choice=args.class_choice)
-    if (len(train_dataset) < 100):
-        drop_last = False
-    else:
-        drop_last = True
-    train_loader = DataLoader(train_dataset, num_workers=8, batch_size=args.batch_size, shuffle=True, drop_last=drop_last, pin_memory=True)
-    test_loader = DataLoader(ShapeNetPart(partition='test', num_points=args.num_points, class_choice=args.class_choice),
-                            num_workers=8, batch_size=args.test_batch_size, shuffle=False, drop_last=False, pin_memory=True)
 
+    # ============= Model ===================
+    num_part = 50
     device = torch.device("cuda" if args.cuda else "cpu")
-    io.cprint("Let's use " + str(torch.cuda.device_count()) + " GPUs!")
 
-    seg_num_all = train_loader.dataset.seg_num_all
-    seg_start_index = train_loader.dataset.seg_start_index
-
-    # create model
-    model = models.__dict__[args.model]().to(device)
+    model = models.__dict__[args.model](num_part).to(device)
     io.cprint(str(model))
-    model = nn.DataParallel(model)
 
+    model.apply(weight_init)
+    model = nn.DataParallel(model)
+    print("Let's use", torch.cuda.device_count(), "GPUs!")
+
+    '''Resume or not'''
+    if args.resume:
+        state_dict = torch.load("checkpoints/%s/best_insiou_model.pth" % args.exp_name,
+                                map_location=torch.device('cpu'))['model']
+        for k in state_dict.keys():
+            if 'module' not in k:
+                from collections import OrderedDict
+                new_state_dict = OrderedDict()
+                for k in state_dict:
+                    new_state_dict['module.' + k] = state_dict[k]
+                state_dict = new_state_dict
+            break
+        model.load_state_dict(state_dict)
+
+        print("Resume training model...")
+        print(torch.load("checkpoints/%s/best_insiou_model.pth" % args.exp_name).keys())
+    else:
+        print("Training from scratch...")
+
+    # =========== Dataloader =================
+    train_data = PartNormalDataset(npoints=2048, split='trainval', normalize=False)
+    print("The number of training data is:%d", len(train_data))
+
+    test_data = PartNormalDataset(npoints=2048, split='test', normalize=False)
+    print("The number of test data is:%d", len(test_data))
+
+    train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers,
+                              drop_last=True)
+
+    test_loader = DataLoader(test_data, batch_size=args.test_batch_size, shuffle=False, num_workers=args.workers,
+                             drop_last=False)
+
+    # ============= Optimizer ================
     if args.use_sgd:
         print("Use SGD")
-        opt = optim.SGD(model.parameters(), lr=args.lr*100, momentum=args.momentum, weight_decay=1e-4)
+        opt = optim.SGD(model.parameters(), lr=args.lr*100, momentum=args.momentum, weight_decay=0)
     else:
         print("Use Adam")
-        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)
+        opt = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
 
     if args.scheduler == 'cos':
-        if args.use_sgd:
-            eta_min = args.lr/5.0
-        else:
-            eta_min = args.lr/100.0
-        scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=eta_min)
-    elif args.scheduler == 'step':
-        scheduler = MultiStepLR(opt, [140, 180], gamma=0.1)
-    criterion = cal_loss
+        print("Use CosLR")
+        scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr if args.use_sgd else args.lr / 100)
+    else:
+        print("Use StepLR")
+        scheduler = StepLR(opt, step_size=args.step, gamma=0.5)
+
+    # ============= Training =================
+    best_acc = 0
+    best_class_iou = 0
+    best_instance_iou = 0
+    num_part = 50
+    num_classes = 16
 
-    best_test_iou = 0
     for epoch in range(args.epochs):
-        ####################
-        # Train
-        ####################
-        train_time_cost = datetime.datetime.now()
-        train_loss = 0.0
-        count = 0.0
-        model.train()
-        train_true_cls = []
-        train_pred_cls = []
-        train_true_seg = []
-        train_pred_seg = []
-        train_label_seg = []
-        for data, label, seg in train_loader:
-            seg = seg - seg_start_index
-            label_one_hot = np.zeros((label.shape[0], 16))
-            for idx in range(label.shape[0]):
-                label_one_hot[idx, label[idx]] = 1
-            label_one_hot = torch.from_numpy(label_one_hot.astype(np.float32))
-            data, label_one_hot, seg = data.to(device), label_one_hot.to(device), seg.to(device)
-            data = data.permute(0, 2, 1)
-            batch_size = data.size()[0]
-            opt.zero_grad()
-            seg_pred = model(data, label_one_hot)
-            seg_pred = seg_pred.permute(0, 2, 1).contiguous()
-            loss = criterion(seg_pred.view(-1, seg_num_all), seg.view(-1,1).squeeze())
-            loss.backward()
-            torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
-            opt.step()
-            pred = seg_pred.max(dim=2)[1]               # (batch_size, num_points)
-            count += batch_size
-            train_loss += loss.item() * batch_size
-            seg_np = seg.cpu().numpy()                  # (batch_size, num_points)
-            pred_np = pred.detach().cpu().numpy()       # (batch_size, num_points)
-            train_true_cls.append(seg_np.reshape(-1))       # (batch_size * num_points)
-            train_pred_cls.append(pred_np.reshape(-1))      # (batch_size * num_points)
-            train_true_seg.append(seg_np)
-            train_pred_seg.append(pred_np)
-            train_label_seg.append(label.reshape(-1))
-        if args.scheduler == 'cos':
-            scheduler.step()
-        elif args.scheduler == 'step':
-            if opt.param_groups[0]['lr'] > 1e-5:
-                scheduler.step()
-            if opt.param_groups[0]['lr'] < 1e-5:
-                for param_group in opt.param_groups:
-                    param_group['lr'] = 1e-5
-        train_true_cls = np.concatenate(train_true_cls)
-        train_pred_cls = np.concatenate(train_pred_cls)
-        train_acc = metrics.accuracy_score(train_true_cls, train_pred_cls)
-        avg_per_class_acc = metrics.balanced_accuracy_score(train_true_cls, train_pred_cls)
-        train_true_seg = np.concatenate(train_true_seg, axis=0)
-        train_pred_seg = np.concatenate(train_pred_seg, axis=0)
-        train_label_seg = np.concatenate(train_label_seg)
-        train_ious = calculate_shape_IoU(train_pred_seg, train_true_seg, train_label_seg, args.class_choice)
-        train_time_cost = int((datetime.datetime.now() - train_time_cost).total_seconds())
-        outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f, train iou: %.6f' % (epoch,
-                                                                                                  train_loss*1.0/count,
-                                                                                                  train_acc,
-                                                                                                  avg_per_class_acc,
-                                                                                                  np.mean(train_ious))
-        io.cprint(outstr)
-        io.cprint(f"Training time: {train_time_cost} seconds.")
 
-        ####################
-        # Test
-        ####################
-        test_time_cost = datetime.datetime.now()
-        test_loss = 0.0
-        count = 0.0
-        model.eval()
-        test_true_cls = []
-        test_pred_cls = []
-        test_true_seg = []
-        test_pred_seg = []
-        test_label_seg = []
-        for data, label, seg in test_loader:
-            seg = seg - seg_start_index
-            label_one_hot = np.zeros((label.shape[0], 16))
-            for idx in range(label.shape[0]):
-                label_one_hot[idx, label[idx]] = 1
-            label_one_hot = torch.from_numpy(label_one_hot.astype(np.float32))
-            data, label_one_hot, seg = data.to(device), label_one_hot.to(device), seg.to(device)
-            data = data.permute(0, 2, 1)
-            batch_size = data.size()[0]
-            seg_pred = model(data, label_one_hot)
-            seg_pred = seg_pred.permute(0, 2, 1).contiguous()
-            loss = criterion(seg_pred.view(-1, seg_num_all), seg.view(-1,1).squeeze())
-            pred = seg_pred.max(dim=2)[1]
-            count += batch_size
-            test_loss += loss.item() * batch_size
-            seg_np = seg.cpu().numpy()
-            pred_np = pred.detach().cpu().numpy()
-            test_true_cls.append(seg_np.reshape(-1))
-            test_pred_cls.append(pred_np.reshape(-1))
-            test_true_seg.append(seg_np)
-            test_pred_seg.append(pred_np)
-            test_label_seg.append(label.reshape(-1))
-        test_true_cls = np.concatenate(test_true_cls)
-        test_pred_cls = np.concatenate(test_pred_cls)
-        test_acc = metrics.accuracy_score(test_true_cls, test_pred_cls)
-        avg_per_class_acc = metrics.balanced_accuracy_score(test_true_cls, test_pred_cls)
-        test_true_seg = np.concatenate(test_true_seg, axis=0)
-        test_pred_seg = np.concatenate(test_pred_seg, axis=0)
-        test_label_seg = np.concatenate(test_label_seg)
-        test_ious = calculate_shape_IoU(test_pred_seg, test_true_seg, test_label_seg, args.class_choice)
-        test_time_cost = int((datetime.datetime.now() - test_time_cost).total_seconds())
-        outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f, test iou: %.6f, best iou %.6f' % (epoch,
-                                                                                              test_loss*1.0/count,
-                                                                                              test_acc,
-                                                                                              avg_per_class_acc,
-                                                                                              np.mean(test_ious), best_test_iou)
-        io.cprint(outstr)
-        io.cprint(f"Testing time: {test_time_cost} seconds.")
-        if np.mean(test_ious) >= best_test_iou:
-            best_test_iou = np.mean(test_ious)
-            torch.save(model.state_dict(), 'checkpoints/%s/models/model.t7' % args.exp_name)
+        train_epoch(train_loader, model, opt, scheduler, epoch, num_part, num_classes, io)
+
+        test_metrics, total_per_cat_iou = test_epoch(test_loader, model, epoch, num_part, num_classes, io)
+
+        # 1. when get the best accuracy, save the model:
+        if test_metrics['accuracy'] > best_acc:
+            best_acc = test_metrics['accuracy']
+            io.cprint('Max Acc:%.5f' % best_acc)
+            state = {
+                'model': model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict(),
+                'optimizer': opt.state_dict(), 'epoch': epoch, 'test_acc': best_acc}
+            torch.save(state, 'checkpoints/%s/best_acc_model.pth' % args.exp_name)
+
+        # 2. when get the best instance_iou, save the model:
+        if test_metrics['shape_avg_iou'] > best_instance_iou:
+            best_instance_iou = test_metrics['shape_avg_iou']
+            io.cprint('Max instance iou:%.5f' % best_instance_iou)
+            state = {
+                'model': model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict(),
+                'optimizer': opt.state_dict(), 'epoch': epoch, 'test_instance_iou': best_instance_iou}
+            torch.save(state, 'checkpoints/%s/best_insiou_model.pth' % args.exp_name)
+
+        # 3. when get the best class_iou, save the model:
+        # first we need to calculate the average per-class iou
+        class_iou = 0
+        for cat_idx in range(16):
+            class_iou += total_per_cat_iou[cat_idx]
+        avg_class_iou = class_iou / 16
+        if avg_class_iou > best_class_iou:
+            best_class_iou = avg_class_iou
+            # print the iou of each class:
+            for cat_idx in range(16):
+                io.cprint(classes_str[cat_idx] + ' iou: ' + str(total_per_cat_iou[cat_idx]))
+            io.cprint('Max class iou:%.5f' % best_class_iou)
+            state = {
+                'model': model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict(),
+                'optimizer': opt.state_dict(), 'epoch': epoch, 'test_class_iou': best_class_iou}
+            torch.save(state, 'checkpoints/%s/best_clsiou_model.pth' % args.exp_name)
+
+    # report best acc, ins_iou, cls_iou
+    io.cprint('Final Max Acc:%.5f' % best_acc)
+    io.cprint('Final Max instance iou:%.5f' % best_instance_iou)
+    io.cprint('Final Max class iou:%.5f' % best_class_iou)
+    # save last model
+    state = {
+        'model': model.module.state_dict() if torch.cuda.device_count() > 1 else model.state_dict(),
+        'optimizer': opt.state_dict(), 'epoch': args.epochs - 1, 'test_iou': best_instance_iou}
+    torch.save(state, 'checkpoints/%s/model_ep%d.pth' % (args.exp_name, args.epochs))
+
+
+def train_epoch(train_loader, model, opt, scheduler, epoch, num_part, num_classes, io):
+    train_loss = 0.0
+    count = 0.0
+    accuracy = []
+    shape_ious = 0.0
+    metrics = defaultdict(lambda: list())
+    model.train()
+
+    for batch_id, (points, label, target, norm_plt) in tqdm(enumerate(train_loader), total=len(train_loader), smoothing=0.9):
+        batch_size, num_point, _ = points.size()
+        points, label, target, norm_plt = Variable(points.float()), Variable(label.long()), Variable(target.long()), \
+                                          Variable(norm_plt.float())
+        points = points.transpose(2, 1)
+        norm_plt = norm_plt.transpose(2, 1)
+        points, label, target, norm_plt = points.cuda(non_blocking=True), label.squeeze(1).cuda(non_blocking=True), \
+                                          target.cuda(non_blocking=True), norm_plt.cuda(non_blocking=True)
+        # target: b,n
+        seg_pred = model(points, norm_plt, to_categorical(label, num_classes))  # seg_pred: b,n,50
+        loss = F.nll_loss(seg_pred.contiguous().view(-1, num_part), target.view(-1, 1)[:, 0])
+
+        # instance iou without considering the class average at each batch_size:
+        batch_shapeious = compute_overall_iou(seg_pred, target, num_part)  # list of of current batch_iou:[iou1,iou2,...,iou#b_size]
+        # total iou of current batch in each process:
+        batch_shapeious = seg_pred.new_tensor([np.sum(batch_shapeious)], dtype=torch.float64)  # same device with seg_pred!!!
+
+        # Loss backward
+        loss = torch.mean(loss)
+        opt.zero_grad()
+        loss.backward()
+        opt.step()
+
+        # accuracy
+        seg_pred = seg_pred.contiguous().view(-1, num_part)  # b*n,50
+        target = target.view(-1, 1)[:, 0]   # b*n
+        pred_choice = seg_pred.contiguous().data.max(1)[1]  # b*n
+        correct = pred_choice.eq(target.contiguous().data).sum()  # torch.int64: total number of correct-predict pts
+
+        # sum
+        shape_ious += batch_shapeious.item()  # count the sum of ious in each iteration
+        count += batch_size   # count the total number of samples in each iteration
+        train_loss += loss.item() * batch_size
+        accuracy.append(correct.item()/(batch_size * num_point))   # append the accuracy of each iteration
+
+        # Note: We do not need to calculate per_class iou during training
+
+    if args.scheduler == 'cos':
+        scheduler.step()
+    elif args.scheduler == 'step':
+        if opt.param_groups[0]['lr'] > 0.9e-5:
+            scheduler.step()
+        if opt.param_groups[0]['lr'] < 0.9e-5:
+            for param_group in opt.param_groups:
+                param_group['lr'] = 0.9e-5
+    io.cprint('Learning rate: %f' % opt.param_groups[0]['lr'])
+
+    metrics['accuracy'] = np.mean(accuracy)
+    metrics['shape_avg_iou'] = shape_ious * 1.0 / count
+
+    outstr = 'Train %d, loss: %f, train acc: %f, train ins_iou: %f' % (epoch+1, train_loss * 1.0 / count,
+                                                                       metrics['accuracy'], metrics['shape_avg_iou'])
+    io.cprint(outstr)
+
+
+def test_epoch(test_loader, model, epoch, num_part, num_classes, io):
+    test_loss = 0.0
+    count = 0.0
+    accuracy = []
+    shape_ious = 0.0
+    final_total_per_cat_iou = np.zeros(16).astype(np.float32)
+    final_total_per_cat_seen = np.zeros(16).astype(np.int32)
+    metrics = defaultdict(lambda: list())
+    model.eval()
+
+    # label_size: b, means each sample has one corresponding class
+    for batch_id, (points, label, target, norm_plt) in tqdm(enumerate(test_loader), total=len(test_loader), smoothing=0.9):
+        batch_size, num_point, _ = points.size()
+        points, label, target, norm_plt = Variable(points.float()), Variable(label.long()), Variable(target.long()), \
+                                          Variable(norm_plt.float())
+        points = points.transpose(2, 1)
+        norm_plt = norm_plt.transpose(2, 1)
+        points, label, target, norm_plt = points.cuda(non_blocking=True), label.squeeze(1).cuda(non_blocking=True), \
+                                          target.cuda(non_blocking=True), norm_plt.cuda(non_blocking=True)
+        seg_pred = model(points, norm_plt, to_categorical(label, num_classes))  # b,n,50
+
+        # instance iou without considering the class average at each batch_size:
+        batch_shapeious = compute_overall_iou(seg_pred, target, num_part)  # [b]
+        # per category iou at each batch_size:
+
+        for shape_idx in range(seg_pred.size(0)):  # sample_idx
+            cur_gt_label = label[shape_idx]  # label[sample_idx], denotes current sample belongs to which cat
+            final_total_per_cat_iou[cur_gt_label] += batch_shapeious[shape_idx]  # add the iou belongs to this cat
+            final_total_per_cat_seen[cur_gt_label] += 1  # count the number of this cat is chosen
+
+        # total iou of current batch in each process:
+        batch_ious = seg_pred.new_tensor([np.sum(batch_shapeious)], dtype=torch.float64)  # same device with seg_pred!!!
+
+        # prepare seg_pred and target for later calculating loss and acc:
+        seg_pred = seg_pred.contiguous().view(-1, num_part)
+        target = target.view(-1, 1)[:, 0]
+        # Loss
+        loss = F.nll_loss(seg_pred.contiguous(), target.contiguous())
+
+        # accuracy:
+        pred_choice = seg_pred.data.max(1)[1]  # b*n
+        correct = pred_choice.eq(target.data).sum()  # torch.int64: total number of correct-predict pts
+
+        loss = torch.mean(loss)
+        shape_ious += batch_ious.item()  # count the sum of ious in each iteration
+        count += batch_size  # count the total number of samples in each iteration
+        test_loss += loss.item() * batch_size
+        accuracy.append(correct.item() / (batch_size * num_point))  # append the accuracy of each iteration
+
+    for cat_idx in range(16):
+        if final_total_per_cat_seen[cat_idx] > 0:  # indicating this cat is included during previous iou appending
+            final_total_per_cat_iou[cat_idx] = final_total_per_cat_iou[cat_idx] / final_total_per_cat_seen[cat_idx]  # avg class iou across all samples
+
+    metrics['accuracy'] = np.mean(accuracy)
+    metrics['shape_avg_iou'] = shape_ious * 1.0 / count
+
+    outstr = 'Test %d, loss: %f, test acc: %f  test ins_iou: %f' % (epoch + 1, test_loss * 1.0 / count,
+                                                                    metrics['accuracy'], metrics['shape_avg_iou'])
+
+    io.cprint(outstr)
+
+    return metrics, final_total_per_cat_iou
 
 
 def test(args, io):
-    test_loader = DataLoader(ShapeNetPart(partition='test', num_points=args.num_points, class_choice=args.class_choice),
-                             batch_size=args.test_batch_size, shuffle=True, drop_last=False)
+    # Dataloader
+    test_data = PartNormalDataset(npoints=2048, split='test', normalize=False)
+    print("The number of test data is:%d", len(test_data))
 
+    test_loader = DataLoader(test_data, batch_size=args.test_batch_size, shuffle=False, num_workers=args.workers,
+                             drop_last=False)
+
+    # Try to load models
+    num_part = 50
     device = torch.device("cuda" if args.cuda else "cpu")
 
-    #Try to load models
-    seg_start_index = test_loader.dataset.seg_start_index
-    model = models.__dict__[args.model]().to(device)
-    model = nn.DataParallel(model)
-    model.load_state_dict(torch.load(args.model_path))
+    model = models.__dict__[args.model](num_part).to(device)
+    io.cprint(str(model))
 
-    model = model.eval()
-    test_acc = 0.0
-    test_true_cls = []
-    test_pred_cls = []
-    test_true_seg = []
-    test_pred_seg = []
-    test_label_seg = []
-    category = {}
-    for data, label, seg in test_loader:
-        seg = seg - seg_start_index
-        label_one_hot = np.zeros((label.shape[0], 16))
-        for idx in range(label.shape[0]):
-            label_one_hot[idx, label[idx]] = 1
-        label_one_hot = torch.from_numpy(label_one_hot.astype(np.float32))
-        data, label_one_hot, seg = data.to(device), label_one_hot.to(device), seg.to(device)
-        data = data.permute(0, 2, 1)
-        seg_pred = model(data, label_one_hot)
-        seg_pred = seg_pred.permute(0, 2, 1).contiguous()
-        pred = seg_pred.max(dim=2)[1]
-        seg_np = seg.cpu().numpy()
-        pred_np = pred.detach().cpu().numpy()
-        test_true_cls.append(seg_np.reshape(-1))
-        test_pred_cls.append(pred_np.reshape(-1))
-        test_true_seg.append(seg_np)
-        test_pred_seg.append(pred_np)
-        test_label_seg.append(label.reshape(-1))
+    from collections import OrderedDict
+    state_dict = torch.load("checkpoints/%s/best_%s_model.pth" % (args.exp_name, args.model_type),
+                            map_location=torch.device('cpu'))['model']
 
-    test_true_cls = np.concatenate(test_true_cls)
-    test_pred_cls = np.concatenate(test_pred_cls)
-    test_acc = metrics.accuracy_score(test_true_cls, test_pred_cls)
-    avg_per_class_acc = metrics.balanced_accuracy_score(test_true_cls, test_pred_cls)
-    test_true_seg = np.concatenate(test_true_seg, axis=0)
-    test_pred_seg = np.concatenate(test_pred_seg, axis=0)
-    test_label_seg = np.concatenate(test_label_seg)
-    test_ious,category = calculate_shape_IoU(test_pred_seg, test_true_seg, test_label_seg, args.class_choice, eva=True)
-    outstr = 'Test :: test acc: %.6f, test avg acc: %.6f, test iou: %.6f' % (test_acc,
-                                                                             avg_per_class_acc,
-                                                                             np.mean(test_ious))
+    new_state_dict = OrderedDict()
+    for layer in state_dict:
+        new_state_dict[layer.replace('module.', '')] = state_dict[layer]
+    model.load_state_dict(new_state_dict)
+
+    model.eval()
+    num_part = 50
+    num_classes = 16
+    metrics = defaultdict(lambda: list())
+    hist_acc = []
+    shape_ious = []
+    total_per_cat_iou = np.zeros((16)).astype(np.float32)
+    total_per_cat_seen = np.zeros((16)).astype(np.int32)
+
+    for batch_id, (points, label, target, norm_plt) in tqdm(enumerate(test_loader), total=len(test_loader), smoothing=0.9):
+        batch_size, num_point, _ = points.size()
+        points, label, target, norm_plt = Variable(points.float()), Variable(label.long()), Variable(target.long()), Variable(norm_plt.float())
+        points = points.transpose(2, 1)
+        norm_plt = norm_plt.transpose(2, 1)
+        points, label, target, norm_plt = points.cuda(non_blocking=True), label.squeeze().cuda(
+            non_blocking=True), target.cuda(non_blocking=True), norm_plt.cuda(non_blocking=True)
+
+        with torch.no_grad():
+            seg_pred = model(points, norm_plt, to_categorical(label, num_classes))  # b,n,50
+
+        # instance iou without considering the class average at each batch_size:
+        batch_shapeious = compute_overall_iou(seg_pred, target, num_part)  # [b]
+        shape_ious += batch_shapeious  # iou +=, equals to .append
+
+        # per category iou at each batch_size:
+        for shape_idx in range(seg_pred.size(0)):  # sample_idx
+            cur_gt_label = label[shape_idx]  # label[sample_idx]
+            total_per_cat_iou[cur_gt_label] += batch_shapeious[shape_idx]
+            total_per_cat_seen[cur_gt_label] += 1
+
+        # accuracy:
+        seg_pred = seg_pred.contiguous().view(-1, num_part)
+        target = target.view(-1, 1)[:, 0]
+        pred_choice = seg_pred.data.max(1)[1]
+        correct = pred_choice.eq(target.data).cpu().sum()
+        metrics['accuracy'].append(correct.item() / (batch_size * num_point))
+
+    hist_acc += metrics['accuracy']
+    metrics['accuracy'] = np.mean(hist_acc)
+    metrics['shape_avg_iou'] = np.mean(shape_ious)
+    for cat_idx in range(16):
+        if total_per_cat_seen[cat_idx] > 0:
+            total_per_cat_iou[cat_idx] = total_per_cat_iou[cat_idx] / total_per_cat_seen[cat_idx]
+
+    # First we need to calculate the iou of each class and the avg class iou:
+    class_iou = 0
+    for cat_idx in range(16):
+        class_iou += total_per_cat_iou[cat_idx]
+        io.cprint(classes_str[cat_idx] + ' iou: ' + str(total_per_cat_iou[cat_idx]))  # print the iou of each class
+    avg_class_iou = class_iou / 16
+    outstr = 'Test :: test acc: %f  test class mIOU: %f, test instance mIOU: %f' % (metrics['accuracy'], avg_class_iou, metrics['shape_avg_iou'])
     io.cprint(outstr)
-    results = []
-    for key in category.keys():
-        results.append((int(key), np.mean(category[key]), len(category[key])))
-    results.sort(key=lambda x:x[0])
-    for re in results:
-        io.cprint('idx: %d mIoU: %.3f num: %d' % (re[0], re[1], re[2]))
 
 
 if __name__ == "__main__":
     # Training settings
-    parser = argparse.ArgumentParser(description='Point Cloud Part Segmentation')
-    parser.add_argument('--model', type=str, default='CurveNet')
-    parser.add_argument('--exp_name', type=str, default='exp', metavar='N',
+    parser = argparse.ArgumentParser(description='3D Shape Part Segmentation')
+    parser.add_argument('--model', type=str, default='PointMLP1')
+    parser.add_argument('--exp_name', type=str, default='demo1', metavar='N',
                         help='Name of the experiment')
-    parser.add_argument('--dataset', type=str, default='shapenetpart', metavar='N',
-                        choices=['shapenetpart'])
-    parser.add_argument('--class_choice', type=str, default=None, metavar='N',
-                        choices=['airplane', 'bag', 'cap', 'car', 'chair',
-                                 'earphone', 'guitar', 'knife', 'lamp', 'laptop',
-                                 'motor', 'mug', 'pistol', 'rocket', 'skateboard', 'table'])
     parser.add_argument('--batch_size', type=int, default=32, metavar='batch_size',
                         help='Size of batch)')
-    parser.add_argument('--test_batch_size', type=int, default=16, metavar='batch_size',
+    parser.add_argument('--test_batch_size', type=int, default=32, metavar='batch_size',
                         help='Size of batch)')
-    parser.add_argument('--epochs', type=int, default=200, metavar='N',
-                        help='number of episode to train ')
-    parser.add_argument('--seed', type=int)
-    parser.add_argument('--use_sgd', type=bool, default=True,
+    parser.add_argument('--epochs', type=int, default=350, metavar='N',
+                        help='number of episode to train')
+    parser.add_argument('--use_sgd', type=bool, default=False,
                         help='Use SGD')
-    parser.add_argument('--lr', type=float, default=0.0005, metavar='LR',
-                        help='learning rate (default: 0.001, 0.1 if using sgd)')
+    parser.add_argument('--scheduler', type=str, default='step',
+                        help='lr scheduler')
+    parser.add_argument('--step', type=int, default=40,
+                        help='lr decay step')
+    parser.add_argument('--lr', type=float, default=0.003, metavar='LR',
+                        help='learning rate')
     parser.add_argument('--momentum', type=float, default=0.9, metavar='M',
                         help='SGD momentum (default: 0.9)')
-    parser.add_argument('--scheduler', type=str, default='step', metavar='N',
-                        choices=['cos', 'step'],
-                        help='Scheduler to use, [cos, step]')
     parser.add_argument('--no_cuda', type=bool, default=False,
                         help='enables CUDA training')
+    parser.add_argument('--manual_seed', type=int, metavar='S',
+                        help='random seed (default: 1)')
     parser.add_argument('--eval', type=bool,  default=False,
                         help='evaluate the model')
     parser.add_argument('--num_points', type=int, default=2048,
                         help='num of points to use')
-    parser.add_argument('--model_path', type=str, default='', metavar='N',
-                        help='Pretrained model path')
+    parser.add_argument('--workers', type=int, default=12)
+    parser.add_argument('--resume', type=bool, default=False,
+                        help='Resume training or not')
+    parser.add_argument('--model_type', type=str, default='insiou',
+                        help='choose to test the best insiou/clsiou/acc model (options: insiou, clsiou, acc)')
+
     args = parser.parse_args()
-    time_str = str(datetime.datetime.now().strftime('-%Y%m%d%H%M%S'))
-    if args.exp_name is None:
-        args.exp_name = time_str
     args.exp_name = args.model+"_"+args.exp_name
 
     _init_()
 
-    if args.eval:
-        io = IOStream('checkpoints/' + args.exp_name + '/eval.log')
+    if not args.eval:
+        io = IOStream('checkpoints/' + args.exp_name + '/%s_train.log' % (args.exp_name))
     else:
-        io = IOStream('checkpoints/' + args.exp_name + '/run.log')
+        io = IOStream('checkpoints/' + args.exp_name + '/%s_test.log' % (args.exp_name))
     io.cprint(str(args))
-    io.cprint('random seed is: ' + str(args.seed))
+
+    if args.manual_seed is not None:
+        random.seed(args.manual_seed)
+        np.random.seed(args.manual_seed)
+        torch.manual_seed(args.manual_seed)
 
     args.cuda = not args.no_cuda and torch.cuda.is_available()
 
     if args.cuda:
-        io.cprint(
-            'Using GPU : ' + str(torch.cuda.current_device()) + ' from ' + str(torch.cuda.device_count()) + ' devices')
+        io.cprint('Using GPU')
+        if args.manual_seed is not None:
+            torch.cuda.manual_seed(args.manual_seed)
+            torch.cuda.manual_seed_all(args.manual_seed)
     else:
         io.cprint('Using CPU')
 
     if not args.eval:
         train(args, io)
     else:
-        with torch.no_grad():
-            test(args, io)
+        test(args, io)
diff --git a/part_segmentation/util.py b/part_segmentation/util.py
index f31efb9..e648bb2 100644
--- a/part_segmentation/util.py
+++ b/part_segmentation/util.py
@@ -1,9 +1,3 @@
-"""
-@Author: Yue Wang
-@Contact: yuewangx@mit.edu
-@File: util
-@Time: 4/5/19 3:47 PM
-"""
 
 
 import numpy as np