diff --git a/examples/trials/kaggle-tgs-salt/README.md b/examples/trials/kaggle-tgs-salt/README.md new file mode 100644 index 0000000000..f0ed660d39 --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/README.md @@ -0,0 +1,56 @@ +## 33rd place solution code for Kaggle [TGS Salt Identification Chanllenge](https://www.kaggle.com/c/tgs-salt-identification-challenge) + +This example shows how to enable AutoML for competition code by running it on NNI without any code change. +To run this code on NNI, firstly you need to run it standalone, then configure the config.yml and: +``` +nnictl create --config config.yml +``` + +This code can still run standalone, the code is for reference, it requires at least one week effort to reproduce the competition result. + +[Solution summary](https://www.kaggle.com/c/tgs-salt-identification-challenge/discussion/69593) + +Preparation: + +Download competition data, run preprocess.py to prepare training data. + +Stage 1: + +Train fold 0-3 for 100 epochs, for each fold, train 3 models: +``` +python3 train.py --ifolds 0 --epochs 100 --model_name UNetResNetV4 +python3 train.py --ifolds 0 --epochs 100 --model_name UNetResNetV5 --layers 50 +python3 train.py --ifolds 0 --epochs 100 --model_name UNetResNetV6 +``` + +Stage 2: + +Fine tune stage 1 models for 300 epochs with cosine annealing lr scheduler: + +``` +python3 train.py --ifolds 0 --epochs 300 --lrs cosine --lr 0.001 --min_lr 0.0001 --model_name UNetResNetV4 +``` + +Stage 3: + +Fine tune Stage 2 models with depths channel: + +``` +python3 train.py --ifolds 0 --epochs 300 --lrs cosine --lr 0.001 --min_lr 0.0001 --model_name UNetResNetV4 --depths +``` + +Stage 4: + +Make prediction for each model, then ensemble the result to generate peasdo labels. + +Stage 5: + +Fine tune stage 3 models with pseudo labels + +``` +python3 train.py --ifolds 0 --epochs 300 --lrs cosine --lr 0.001 --min_lr 0.0001 --model_name UNetResNetV4 --depths --pseudo +``` + +Stage 6: +Ensemble all stage 3 and stage 5 models. + diff --git a/examples/trials/kaggle-tgs-salt/augmentation.py b/examples/trials/kaggle-tgs-salt/augmentation.py new file mode 100644 index 0000000000..7e558ef130 --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/augmentation.py @@ -0,0 +1,241 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os +import cv2 +import numpy as np +import random +import torchvision.transforms.functional as F +from torchvision.transforms import RandomResizedCrop, ColorJitter, RandomAffine +import PIL +from PIL import Image +import collections + +import settings + + +class RandomHFlipWithMask(object): + def __init__(self, p=0.5): + self.p = p + def __call__(self, *imgs): + if random.random() < self.p: + return map(F.hflip, imgs) + else: + return imgs + +class RandomVFlipWithMask(object): + def __init__(self, p=0.5): + self.p = p + def __call__(self, *imgs): + if random.random() < self.p: + return map(F.vflip, imgs) + else: + return imgs + +class RandomResizedCropWithMask(RandomResizedCrop): + def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR): + super(RandomResizedCropWithMask, self).__init__(size, scale, ratio, interpolation) + def __call__(self, *imgs): + i, j, h, w = self.get_params(imgs[0], self.scale, self.ratio) + #print(i,j,h,w) + return map(lambda x: F.resized_crop(x, i, j, h, w, self.size, self.interpolation), imgs) + +class RandomAffineWithMask(RandomAffine): + def __init__(self, degrees, translate=None, scale=None, shear=None, resample='edge'): + super(RandomAffineWithMask, self).__init__(degrees, translate, scale, shear, resample) + def __call__(self, *imgs): + ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, imgs[0].size) + w, h = imgs[0].size + imgs = map(lambda x: F.pad(x, w//2, 0, self.resample), imgs) + imgs = map(lambda x: F.affine(x, *ret, resample=0), imgs) + imgs = map(lambda x: F.center_crop(x, (w, h)), imgs) + return imgs + +class RandomRotateWithMask(object): + def __init__(self, degrees, pad_mode='reflect', expand=False, center=None): + self.pad_mode = pad_mode + self.expand = expand + self.center = center + self.degrees = degrees + + def __call__(self, *imgs): + angle = self.get_angle() + if angle == int(angle) and angle % 90 == 0: + if angle == 0: + return imgs + else: + #print(imgs) + return map(lambda x: F.rotate(x, angle, False, False, None), imgs) + else: + return map(lambda x: self._pad_rotate(x, angle), imgs) + + def get_angle(self): + if isinstance(self.degrees, collections.Sequence): + index = int(random.random() * len(self.degrees)) + return self.degrees[index] + else: + return random.uniform(-self.degrees, self.degrees) + + def _pad_rotate(self, img, angle): + w, h = img.size + img = F.pad(img, w//2, 0, self.pad_mode) + img = F.rotate(img, angle, False, self.expand, self.center) + img = F.center_crop(img, (w, h)) + return img + +class CropWithMask(object): + def __init__(self, i, j, h, w): + self.i = i + self.j = j + self.h = h + self.w = w + def __call__(self, *imgs): + return map(lambda x: F.crop(x, self.i, self.j, self.h, self.w), imgs) + +class PadWithMask(object): + def __init__(self, padding, padding_mode): + self.padding = padding + self.padding_mode = padding_mode + def __call__(self, *imgs): + return map(lambda x: F.pad(x, self.padding, padding_mode=self.padding_mode), imgs) + +class Compose(object): + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, *imgs): + for t in self.transforms: + imgs = t(*imgs) + return imgs + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.transforms: + format_string += '\n' + format_string += ' {0}'.format(t) + format_string += '\n)' + return format_string + +def get_img_mask_augments(train_mode, pad_mode): + if pad_mode == 'resize': + img_mask_aug_train = Compose([ + RandomHFlipWithMask(), + RandomAffineWithMask(10, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=None) + ]) + img_mask_aug_val = None + else: + img_mask_aug_train = Compose([ + PadWithMask((28, 28), padding_mode=pad_mode), + RandomHFlipWithMask(), + RandomAffineWithMask(10, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=None), + RandomResizedCropWithMask(128, scale=(1., 1.), ratio=(1., 1.)) + ]) + img_mask_aug_val = PadWithMask((13, 14), padding_mode=pad_mode) + + return img_mask_aug_train, img_mask_aug_val + + +def test_transform(): + img_id = '0b73b427d1.png' + img = Image.open(os.path.join(settings.TRAIN_IMG_DIR, img_id)).convert('RGB') + mask = Image.open(os.path.join(settings.TRAIN_MASK_DIR, img_id)).convert('L').point(lambda x: 0 if x < 128 else 1, 'L') + + img_id = '0a1ea1af4.jpg' + img = Image.open(os.path.join(r'D:\data\ship\train_v2', img_id)).convert('RGB') + mask = Image.open(os.path.join(r'D:\data\ship\train_masks', img_id)).convert('L').point(lambda x: 0 if x < 128 else 1, 'L') + + trans = Compose([ + RandomHFlipWithMask(), + RandomVFlipWithMask(), + RandomRotateWithMask([0, 90, 180, 270]), + #RandomRotateWithMask(15), + RandomResizedCropWithMask(768, scale=(0.81, 1)) + ]) + + trans2 = RandomAffineWithMask(45, (0.2,0.2), (0.9, 1.1)) + trans3, trans4 = get_img_mask_augments(True, 'edge') + + img, mask = trans4(img, mask) + + img.show() + mask.point(lambda x: x*255).show() + +def test_color_trans(): + img_id = '00abc623a.jpg' + img = Image.open(os.path.join(settings.TRAIN_IMG_DIR, img_id)).convert('RGB') + trans = ColorJitter(0.1, 0.1, 0.1, 0.1) + + img2 = trans(img) + img.show() + img2.show() + + +class TTATransform(object): + def __init__(self, index): + self.index = index + def __call__(self, img): + trans = { + 0: lambda x: x, + 1: lambda x: F.hflip(x), + 2: lambda x: F.vflip(x), + 3: lambda x: F.vflip(F.hflip(x)), + 4: lambda x: F.rotate(x, 90, False, False), + 5: lambda x: F.hflip(F.rotate(x, 90, False, False)), + 6: lambda x: F.vflip(F.rotate(x, 90, False, False)), + 7: lambda x: F.vflip(F.hflip(F.rotate(x, 90, False, False))) + } + return trans[self.index](img) + +# i is tta index, 0: no change, 1: horizon flip, 2: vertical flip, 3: do both +def tta_back_mask_np(img, index): + print(img.shape) + trans = { + 0: lambda x: x, + 1: lambda x: np.flip(x, 2), + 2: lambda x: np.flip(x, 1), + 3: lambda x: np.flip(np.flip(x, 2), 1), + 4: lambda x: np.rot90(x, 3, axes=(1,2)), + 5: lambda x: np.rot90(np.flip(x, 2), 3, axes=(1,2)), + 6: lambda x: np.rot90(np.flip(x, 1), 3, axes=(1,2)), + 7: lambda x: np.rot90(np.flip(np.flip(x,2), 1), 3, axes=(1,2)) + } + + return trans[index](img) + +def test_tta(): + img_f = os.path.join(settings.TEST_IMG_DIR, '0c2637aa9.jpg') + img = Image.open(img_f) + img = img.convert('RGB') + + tta_index = 7 + trans1 = TTATransform(tta_index) + img = trans1(img) + #img.show() + + img_np = np.array(img) + img_np = np.expand_dims(img_np, 0) + print(img_np.shape) + img_np = tta_back_mask_np(img_np, tta_index) + img_np = np.reshape(img_np, (768, 768, 3)) + img_back = F.to_pil_image(img_np) + img_back.show() + +if __name__ == '__main__': + test_transform() diff --git a/examples/trials/kaggle-tgs-salt/config.yml b/examples/trials/kaggle-tgs-salt/config.yml new file mode 100644 index 0000000000..1a0db8a51f --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/config.yml @@ -0,0 +1,20 @@ +authorName: default +experimentName: example_tgs +trialConcurrency: 2 +maxExecDuration: 10h +maxTrialNum: 10 +#choice: local, remote, pai +trainingServicePlatform: local +#choice: true, false +useAnnotation: true +tuner: + #choice: TPE, Random, Anneal, Evolution, BatchTuner + #SMAC (SMAC should be installed through nnictl) + builtinTunerName: TPE + classArgs: + #choice: maximize, minimize + optimize_mode: maximize +trial: + command: python3 train.py + codeDir: . + gpuNum: 1 diff --git a/examples/trials/kaggle-tgs-salt/focal_loss.py b/examples/trials/kaggle-tgs-salt/focal_loss.py new file mode 100644 index 0000000000..1ed8887a31 --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/focal_loss.py @@ -0,0 +1,77 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +class FocalLoss2d(nn.Module): + + def __init__(self, gamma=2, size_average=True): + super(FocalLoss2d, self).__init__() + self.gamma = gamma + self.size_average = size_average + + + def forward(self, logit, target, class_weight=None, type='sigmoid'): + target = target.view(-1, 1).long() + + if type=='sigmoid': + if class_weight is None: + class_weight = [1]*2 #[0.5, 0.5] + + prob = torch.sigmoid(logit) + prob = prob.view(-1, 1) + prob = torch.cat((1-prob, prob), 1) + select = torch.FloatTensor(len(prob), 2).zero_().cuda() + select.scatter_(1, target, 1.) + + elif type=='softmax': + B,C,H,W = logit.size() + if class_weight is None: + class_weight =[1]*C #[1/C]*C + + logit = logit.permute(0, 2, 3, 1).contiguous().view(-1, C) + prob = F.softmax(logit,1) + select = torch.FloatTensor(len(prob), C).zero_().cuda() + select.scatter_(1, target, 1.) + + class_weight = torch.FloatTensor(class_weight).cuda().view(-1,1) + class_weight = torch.gather(class_weight, 0, target) + + prob = (prob*select).sum(1).view(-1,1) + prob = torch.clamp(prob,1e-8,1-1e-8) + batch_loss = - class_weight *(torch.pow((1-prob), self.gamma))*prob.log() + + if self.size_average: + loss = batch_loss.mean() + else: + loss = batch_loss + + return loss + + +if __name__ == '__main__': + L = FocalLoss2d() + out = torch.randn(2, 3, 3).cuda() + target = (torch.sigmoid(out) > 0.5).float() + loss = L(out, target) + print(loss) diff --git a/examples/trials/kaggle-tgs-salt/loader.py b/examples/trials/kaggle-tgs-salt/loader.py new file mode 100644 index 0000000000..089e48b903 --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/loader.py @@ -0,0 +1,291 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os, cv2, glob +import numpy as np +from PIL import Image + +import torch +import torch.utils.data as data +from torchvision import datasets, models, transforms +from utils import read_masks, get_test_meta, get_nfold_split +import augmentation as aug +from settings import * + +class ImageDataset(data.Dataset): + def __init__(self, train_mode, meta, augment_with_target=None, + image_augment=None, image_transform=None, mask_transform=None): + self.augment_with_target = augment_with_target + self.image_augment = image_augment + self.image_transform = image_transform + self.mask_transform = mask_transform + + self.train_mode = train_mode + self.meta = meta + + self.img_ids = meta[ID_COLUMN].values + self.salt_exists = meta['salt_exists'].values + self.is_train = meta['is_train'].values + + if self.train_mode: + self.mask_filenames = meta[Y_COLUMN].values + + def __getitem__(self, index): + base_img_fn = '{}.png'.format(self.img_ids[index]) + if self.is_train[index]: #self.train_mode: + img_fn = os.path.join(TRAIN_IMG_DIR, base_img_fn) + else: + img_fn = os.path.join(TEST_IMG_DIR, base_img_fn) + img = self.load_image(img_fn) + + if self.train_mode: + base_mask_fn = '{}.png'.format(self.img_ids[index]) + if self.is_train[index]: + mask_fn = os.path.join(TRAIN_MASK_DIR, base_mask_fn) + else: + mask_fn = os.path.join(TEST_DIR, 'masks', base_mask_fn) + mask = self.load_image(mask_fn, True) + img, mask = self.aug_image(img, mask) + return img, mask, self.salt_exists[index] + else: + img = self.aug_image(img) + return [img] + + def aug_image(self, img, mask=None): + if mask is not None: + if self.augment_with_target is not None: + img, mask = self.augment_with_target(img, mask) + if self.image_augment is not None: + img = self.image_augment(img) + if self.mask_transform is not None: + mask = self.mask_transform(mask) + if self.image_transform is not None: + img = self.image_transform(img) + return img, mask + else: + if self.image_augment is not None: + img = self.image_augment(img) + if self.image_transform is not None: + img = self.image_transform(img) + return img + + def load_image(self, img_filepath, grayscale=False): + image = Image.open(img_filepath, 'r') + if not grayscale: + image = image.convert('RGB') + else: + image = image.convert('L').point(lambda x: 0 if x < 128 else 1, 'L') + return image + + def __len__(self): + return len(self.img_ids) + + def collate_fn(self, batch): + imgs = [x[0] for x in batch] + inputs = torch.stack(imgs) + + if self.train_mode: + masks = [x[1] for x in batch] + labels = torch.stack(masks) + + salt_target = [x[2] for x in batch] + return inputs, labels, torch.FloatTensor(salt_target) + else: + return inputs + +def mask_to_tensor(x): + x = np.array(x).astype(np.float32) + x = np.expand_dims(x, axis=0) + x = torch.from_numpy(x) + return x + +img_transforms = [ + transforms.Grayscale(num_output_channels=3), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ] + +def get_tta_transforms(index, pad_mode): + tta_transforms = { + 0: [], + 1: [transforms.RandomHorizontalFlip(p=2.)], + 2: [transforms.RandomVerticalFlip(p=2.)], + 3: [transforms.RandomHorizontalFlip(p=2.), transforms.RandomVerticalFlip(p=2.)] + } + if pad_mode == 'resize': + return transforms.Compose([transforms.Resize((H, W)), *(tta_transforms[index]), *img_transforms]) + else: + return transforms.Compose([*(tta_transforms[index]), *img_transforms]) + +def get_image_transform(pad_mode): + if pad_mode == 'resize': + return transforms.Compose([transforms.Resize((H, W)), *img_transforms]) + else: + return transforms.Compose(img_transforms) + +def get_mask_transform(pad_mode): + if pad_mode == 'resize': + return transforms.Compose( + [ + transforms.Resize((H, W)), + transforms.Lambda(mask_to_tensor), + ] + ) + else: + return transforms.Compose( + [ + transforms.Lambda(mask_to_tensor), + ] + ) + +def get_img_mask_augments(pad_mode, depths_channel=False): + if depths_channel: + affine_aug = aug.RandomAffineWithMask(5, translate=(0.1, 0.), scale=(0.9, 1.1), shear=None) + else: + affine_aug = aug.RandomAffineWithMask(15, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=None) + + if pad_mode == 'resize': + img_mask_aug_train = aug.Compose([ + aug.RandomHFlipWithMask(), + affine_aug + ]) + img_mask_aug_val = None + else: + img_mask_aug_train = aug.Compose([ + aug.PadWithMask((28, 28), padding_mode=pad_mode), + aug.RandomHFlipWithMask(), + affine_aug, + aug.RandomResizedCropWithMask(H, scale=(1., 1.), ratio=(1., 1.)) + ]) + img_mask_aug_val = aug.PadWithMask((13, 13, 14, 14), padding_mode=pad_mode) + + return img_mask_aug_train, img_mask_aug_val + +def get_train_loaders(ifold, batch_size=8, dev_mode=False, pad_mode='edge', meta_version=1, pseudo_label=False, depths=False): + train_shuffle = True + train_meta, val_meta = get_nfold_split(ifold, nfold=10, meta_version=meta_version) + + if pseudo_label: + test_meta = get_test_meta() + train_meta = train_meta.append(test_meta, sort=True) + + if dev_mode: + train_shuffle = False + train_meta = train_meta.iloc[:10] + val_meta = val_meta.iloc[:10] + #print(val_meta[X_COLUMN].values[:5]) + #print(val_meta[Y_COLUMN].values[:5]) + print(train_meta.shape, val_meta.shape) + img_mask_aug_train, img_mask_aug_val = get_img_mask_augments(pad_mode, depths) + + train_set = ImageDataset(True, train_meta, + augment_with_target=img_mask_aug_train, + image_augment=transforms.ColorJitter(0.2, 0.2, 0.2, 0.2), + image_transform=get_image_transform(pad_mode), + mask_transform=get_mask_transform(pad_mode)) + + train_loader = data.DataLoader(train_set, batch_size=batch_size, shuffle=train_shuffle, num_workers=4, collate_fn=train_set.collate_fn, drop_last=True) + train_loader.num = len(train_set) + + val_set = ImageDataset(True, val_meta, + augment_with_target=img_mask_aug_val, + image_augment=None, + image_transform=get_image_transform(pad_mode), + mask_transform=get_mask_transform(pad_mode)) + val_loader = data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=4, collate_fn=val_set.collate_fn) + val_loader.num = len(val_set) + val_loader.y_true = read_masks(val_meta[ID_COLUMN].values) + + return train_loader, val_loader + +def get_test_loader(batch_size=16, index=0, dev_mode=False, pad_mode='edge'): + test_meta = get_test_meta() + if dev_mode: + test_meta = test_meta.iloc[:10] + test_set = ImageDataset(False, test_meta, + image_augment=None if pad_mode == 'resize' else transforms.Pad((13,13,14,14), padding_mode=pad_mode), + image_transform=get_tta_transforms(index, pad_mode)) + test_loader = data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4, collate_fn=test_set.collate_fn, drop_last=False) + test_loader.num = len(test_set) + test_loader.meta = test_set.meta + + return test_loader + +depth_channel_tensor = None + +def get_depth_tensor(pad_mode): + global depth_channel_tensor + + if depth_channel_tensor is not None: + return depth_channel_tensor + + depth_tensor = None + + if pad_mode == 'resize': + depth_tensor = np.zeros((H, W)) + for row, const in enumerate(np.linspace(0, 1, H)): + depth_tensor[row, :] = const + else: + depth_tensor = np.zeros((ORIG_H, ORIG_W)) + for row, const in enumerate(np.linspace(0, 1, ORIG_H)): + depth_tensor[row, :] = const + depth_tensor = np.pad(depth_tensor, (14,14), mode=pad_mode) # edge or reflect + depth_tensor = depth_tensor[:H, :W] + + depth_channel_tensor = torch.Tensor(depth_tensor) + return depth_channel_tensor + +def add_depth_channel(img_tensor, pad_mode): + ''' + img_tensor: N, C, H, W + ''' + img_tensor[:, 1] = get_depth_tensor(pad_mode) + img_tensor[:, 2] = img_tensor[:, 0] * get_depth_tensor(pad_mode) + + +def test_train_loader(): + train_loader, val_loader = get_train_loaders(1, batch_size=4, dev_mode=False, pad_mode='edge', meta_version=2, pseudo_label=True) + print(train_loader.num, val_loader.num) + for i, data in enumerate(train_loader): + imgs, masks, salt_exists = data + #pdb.set_trace() + print(imgs.size(), masks.size(), salt_exists.size()) + print(salt_exists) + add_depth_channel(imgs, 'resize') + print(masks) + break + #print(imgs) + #print(masks) + +def test_test_loader(): + test_loader = get_test_loader(4, pad_mode='resize') + print(test_loader.num) + for i, data in enumerate(test_loader): + print(data.size()) + if i > 5: + break + +if __name__ == '__main__': + test_test_loader() + #test_train_loader() + #small_dict, img_ids = load_small_train_ids() + #print(img_ids[:10]) + #print(get_tta_transforms(3, 'edge')) diff --git a/examples/trials/kaggle-tgs-salt/lovasz_losses.py b/examples/trials/kaggle-tgs-salt/lovasz_losses.py new file mode 100644 index 0000000000..7d86a19af9 --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/lovasz_losses.py @@ -0,0 +1,252 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +from __future__ import print_function, division + +import torch +from torch.autograd import Variable +import torch.nn.functional as F +import numpy as np + +try: + from itertools import ifilterfalse +except ImportError: # py3k + from itertools import filterfalse + + +def lovasz_grad(gt_sorted): + """ + Computes gradient of the Lovasz extension w.r.t sorted errors + See Alg. 1 in paper + """ + p = len(gt_sorted) + gts = gt_sorted.sum() + intersection = gts - gt_sorted.float().cumsum(0) + union = gts + (1 - gt_sorted).float().cumsum(0) + jaccard = 1. - intersection / union + if p > 1: # cover 1-pixel case + jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] + return jaccard + + +def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True): + """ + IoU for foreground class + binary: 1 foreground, 0 background + """ + if not per_image: + preds, labels = (preds,), (labels,) + ious = [] + for pred, label in zip(preds, labels): + intersection = ((label == 1) & (pred == 1)).sum() + union = ((label == 1) | ((pred == 1) & (label != ignore))).sum() + if not union: + iou = EMPTY + else: + iou = float(intersection) / union + ious.append(iou) + iou = mean(ious) # mean accross images if per_image + return 100 * iou + + +def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False): + """ + Array of IoU for each (non ignored) class + """ + if not per_image: + preds, labels = (preds,), (labels,) + ious = [] + for pred, label in zip(preds, labels): + iou = [] + for i in range(C): + if i != ignore: # The ignored label is sometimes among predicted classes (ENet - CityScapes) + intersection = ((label == i) & (pred == i)).sum() + union = ((label == i) | ((pred == i) & (label != ignore))).sum() + if not union: + iou.append(EMPTY) + else: + iou.append(float(intersection) / union) + ious.append(iou) + ious = map(mean, zip(*ious)) # mean accross images if per_image + return 100 * np.array(ious) + + +# --------------------------- BINARY LOSSES --------------------------- + + +def lovasz_hinge(logits, labels, per_image=True, ignore=None): + """ + Binary Lovasz hinge loss + logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty) + labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) + per_image: compute the loss per image instead of per batch + ignore: void class id + """ + if per_image: + loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore)) + for log, lab in zip(logits, labels)) + else: + loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore)) + return loss + + +def lovasz_hinge_flat(logits, labels): + """ + Binary Lovasz hinge loss + logits: [P] Variable, logits at each prediction (between -\infty and +\infty) + labels: [P] Tensor, binary ground truth labels (0 or 1) + ignore: label to ignore + """ + if len(labels) == 0: + # only void pixels, the gradients should be 0 + return logits.sum() * 0. + signs = 2. * labels.float() - 1. + errors = (1. - logits * Variable(signs)) + errors_sorted, perm = torch.sort(errors, dim=0, descending=True) + perm = perm.data + gt_sorted = labels[perm] + grad = lovasz_grad(gt_sorted) + loss = torch.dot(F.elu(errors_sorted)+1, Variable(grad)) + #loss = torch.dot(F.relu(errors_sorted), Variable(grad)) + + return loss + + +def flatten_binary_scores(scores, labels, ignore=None): + """ + Flattens predictions in the batch (binary case) + Remove labels equal to 'ignore' + """ + scores = scores.view(-1) + labels = labels.view(-1) + if ignore is None: + return scores, labels + valid = (labels != ignore) + vscores = scores[valid] + vlabels = labels[valid] + return vscores, vlabels + + +class StableBCELoss(torch.nn.modules.Module): + def __init__(self): + super(StableBCELoss, self).__init__() + def forward(self, input, target): + neg_abs = - input.abs() + loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log() + return loss.mean() + + +def binary_xloss(logits, labels, ignore=None): + """ + Binary Cross entropy loss + logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty) + labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) + ignore: void class id + """ + logits, labels = flatten_binary_scores(logits, labels, ignore) + loss = StableBCELoss()(logits, Variable(labels.float())) + return loss + + +# --------------------------- MULTICLASS LOSSES --------------------------- + + +def lovasz_softmax(probas, labels, only_present=False, per_image=False, ignore=None): + """ + Multi-class Lovasz-Softmax loss + probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1) + labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) + only_present: average only on classes present in ground truth + per_image: compute the loss per image instead of per batch + ignore: void class labels + """ + if per_image: + loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), only_present=only_present) + for prob, lab in zip(probas, labels)) + else: + loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), only_present=only_present) + return loss + + +def lovasz_softmax_flat(probas, labels, only_present=False): + """ + Multi-class Lovasz-Softmax loss + probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1) + labels: [P] Tensor, ground truth labels (between 0 and C - 1) + only_present: average only on classes present in ground truth + """ + C = probas.size(1) + losses = [] + for c in range(C): + fg = (labels == c).float() # foreground for class c + if only_present and fg.sum() == 0: + continue + errors = (Variable(fg) - probas[:, c]).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted)))) + return mean(losses) + + +def flatten_probas(probas, labels, ignore=None): + """ + Flattens predictions in the batch + """ + B, C, H, W = probas.size() + probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C + labels = labels.view(-1) + if ignore is None: + return probas, labels + valid = (labels != ignore) + vprobas = probas[valid.nonzero().squeeze()] + vlabels = labels[valid] + return vprobas, vlabels + +def xloss(logits, labels, ignore=None): + """ + Cross entropy loss + """ + return F.cross_entropy(logits, Variable(labels), ignore_index=255) + + +# --------------------------- HELPER FUNCTIONS --------------------------- + +def mean(l, ignore_nan=False, empty=0): + """ + nanmean compatible with generators. + """ + l = iter(l) + if ignore_nan: + l = ifilterfalse(np.isnan, l) + try: + n = 1 + acc = next(l) + except StopIteration: + if empty == 'raise': + raise ValueError('Empty mean') + return empty + for n, v in enumerate(l, 2): + acc += v + if n == 1: + return acc + return acc / n diff --git a/examples/trials/kaggle-tgs-salt/metrics.py b/examples/trials/kaggle-tgs-salt/metrics.py new file mode 100644 index 0000000000..e253fec5cd --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/metrics.py @@ -0,0 +1,85 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import numpy as np +from pycocotools import mask as cocomask +from utils import get_segmentations + + +def iou(gt, pred): + gt[gt > 0] = 1. + pred[pred > 0] = 1. + intersection = gt * pred + union = gt + pred + union[union > 0] = 1. + intersection = np.sum(intersection) + union = np.sum(union) + if union == 0: + union = 1e-09 + return intersection / union + + +def compute_ious(gt, predictions): + gt_ = get_segmentations(gt) + predictions_ = get_segmentations(predictions) + + if len(gt_) == 0 and len(predictions_) == 0: + return np.ones((1, 1)) + elif len(gt_) != 0 and len(predictions_) == 0: + return np.zeros((1, 1)) + else: + iscrowd = [0 for _ in predictions_] + ious = cocomask.iou(gt_, predictions_, iscrowd) + if not np.array(ious).size: + ious = np.zeros((1, 1)) + return ious + + +def compute_precision_at(ious, threshold): + mx1 = np.max(ious, axis=0) + mx2 = np.max(ious, axis=1) + tp = np.sum(mx2 >= threshold) + fp = np.sum(mx2 < threshold) + fn = np.sum(mx1 < threshold) + return float(tp) / (tp + fp + fn) + + +def compute_eval_metric(gt, predictions): + thresholds = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95] + ious = compute_ious(gt, predictions) + precisions = [compute_precision_at(ious, th) for th in thresholds] + return sum(precisions) / len(precisions) + + +def intersection_over_union(y_true, y_pred): + ious = [] + for y_t, y_p in list(zip(y_true, y_pred)): + iou = compute_ious(y_t, y_p) + iou_mean = 1.0 * np.sum(iou) / len(iou) + ious.append(iou_mean) + return np.mean(ious) + + +def intersection_over_union_thresholds(y_true, y_pred): + iouts = [] + for y_t, y_p in list(zip(y_true, y_pred)): + iouts.append(compute_eval_metric(y_t, y_p)) + return np.mean(iouts) diff --git a/examples/trials/kaggle-tgs-salt/models.py b/examples/trials/kaggle-tgs-salt/models.py new file mode 100644 index 0000000000..ef941df886 --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/models.py @@ -0,0 +1,622 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from torch import nn +from torch.nn import functional as F +import torch +from torchvision import models +from torchvision.models import resnet34, resnet101, resnet50, resnet152 +import torchvision + + +def conv3x3(in_, out): + return nn.Conv2d(in_, out, 3, padding=1) + + +class ConvRelu(nn.Module): + def __init__(self, in_, out): + super().__init__() + self.conv = conv3x3(in_, out) + self.activation = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.activation(x) + return x + + +class ConvBn2d(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=(3,3), stride=(1,1), padding=(1,1)): + super(ConvBn2d, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) + self.bn = nn.BatchNorm2d(out_channels) + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return x + +# Concurrent Spatial and Channel Squeeze & Excitation in Fully Convolutional Networks +# https://arxiv.org/abs/1803.02579 + +class ChannelAttentionGate(nn.Module): + def __init__(self, channel, reduction=16): + super(ChannelAttentionGate, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), + nn.Sigmoid() + ) + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + return y + + +class SpatialAttentionGate(nn.Module): + def __init__(self, channel, reduction=16): + super(SpatialAttentionGate, self).__init__() + self.fc1 = nn.Conv2d(channel, reduction, kernel_size=1, padding=0) + self.fc2 = nn.Conv2d(reduction, 1, kernel_size=1, padding=0) + + def forward(self, x): + x = self.fc1(x) + x = F.relu(x, inplace=True) + x = self.fc2(x) + x = torch.sigmoid(x) + #print(x.size()) + return x + +class DecoderBlock(nn.Module): + def __init__(self, in_channels, middle_channels, out_channels): + super(DecoderBlock, self).__init__() + self.conv1 = ConvBn2d(in_channels, middle_channels) + self.conv2 = ConvBn2d(middle_channels, out_channels) + #self.deconv = nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2, padding=1) + #self.bn = nn.BatchNorm2d(out_channels) + self.spatial_gate = SpatialAttentionGate(out_channels) + self.channel_gate = ChannelAttentionGate(out_channels) + + def forward(self, x, e=None): + x = F.upsample(x, scale_factor=2, mode='bilinear', align_corners=True) + if e is not None: + x = torch.cat([x,e], 1) + + x = F.relu(self.conv1(x), inplace=True) + x = F.relu(self.conv2(x), inplace=True) + + g1 = self.spatial_gate(x) + g2 = self.channel_gate(x) + x = x*g1 + x*g2 + + return x + +class EncoderBlock(nn.Module): + def __init__(self, block, out_channels): + super(EncoderBlock, self).__init__() + self.block = block + self.out_channels = out_channels + self.spatial_gate = SpatialAttentionGate(out_channels) + self.channel_gate = ChannelAttentionGate(out_channels) + + def forward(self, x): + x = self.block(x) + g1 = self.spatial_gate(x) + g2 = self.channel_gate(x) + + return x*g1 + x*g2 + + +def create_resnet(layers): + if layers == 34: + return resnet34(pretrained=True), 512 + elif layers == 50: + return resnet50(pretrained=True), 2048 + elif layers == 101: + return resnet101(pretrained=True), 2048 + elif layers == 152: + return resnet152(pretrained=True), 2048 + else: + raise NotImplementedError('only 34, 50, 101, 152 version of Resnet are implemented') + +class UNetResNetV4(nn.Module): + def __init__(self, encoder_depth, num_classes=1, num_filters=32, dropout_2d=0.4, + pretrained=True, is_deconv=True): + super(UNetResNetV4, self).__init__() + self.name = 'UNetResNetV4_'+str(encoder_depth) + self.num_classes = num_classes + self.dropout_2d = dropout_2d + + self.resnet, bottom_channel_nr = create_resnet(encoder_depth) + + self.encoder1 = EncoderBlock( + nn.Sequential(self.resnet.conv1, self.resnet.bn1, self.resnet.relu), + num_filters*2 + ) + self.encoder2 = EncoderBlock(self.resnet.layer1, bottom_channel_nr//8) + self.encoder3 = EncoderBlock(self.resnet.layer2, bottom_channel_nr//4) + self.encoder4 = EncoderBlock(self.resnet.layer3, bottom_channel_nr//2) + self.encoder5 = EncoderBlock(self.resnet.layer4, bottom_channel_nr) + + center_block = nn.Sequential( + ConvBn2d(bottom_channel_nr, bottom_channel_nr, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + ConvBn2d(bottom_channel_nr, bottom_channel_nr//2, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2) + ) + self.center = EncoderBlock(center_block, bottom_channel_nr//2) + + self.decoder5 = DecoderBlock(bottom_channel_nr + bottom_channel_nr // 2, num_filters * 16, 64) + self.decoder4 = DecoderBlock(64 + bottom_channel_nr // 2, num_filters * 8, 64) + self.decoder3 = DecoderBlock(64 + bottom_channel_nr // 4, num_filters * 4, 64) + self.decoder2 = DecoderBlock(64 + bottom_channel_nr // 8, num_filters * 2, 64) + self.decoder1 = DecoderBlock(64, num_filters, 64) + + self.logit = nn.Sequential( + nn.Conv2d(320, 64, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(64, 1, kernel_size=1, padding=0) + ) + + def forward(self, x): + x = self.encoder1(x) #; print('x:', x.size()) + e2 = self.encoder2(x) #; print('e2:', e2.size()) + e3 = self.encoder3(e2) #; print('e3:', e3.size()) + e4 = self.encoder4(e3) #; print('e4:', e4.size()) + e5 = self.encoder5(e4) #; print('e5:', e5.size()) + + center = self.center(e5) #; print('center:', center.size()) + + d5 = self.decoder5(center, e5) #; print('d5:', d5.size()) + d4 = self.decoder4(d5, e4) #; print('d4:', d4.size()) + d3 = self.decoder3(d4, e3) #; print('d3:', d3.size()) + d2 = self.decoder2(d3, e2) #; print('d2:', d2.size()) + d1 = self.decoder1(d2) #; print('d1:', d1.size()) + + f = torch.cat([ + d1, + F.upsample(d2, scale_factor=2, mode='bilinear', align_corners=False), + F.upsample(d3, scale_factor=4, mode='bilinear', align_corners=False), + F.upsample(d4, scale_factor=8, mode='bilinear', align_corners=False), + F.upsample(d5, scale_factor=16, mode='bilinear', align_corners=False), + ], 1) + + f = F.dropout2d(f, p=self.dropout_2d) + + return self.logit(f), None + + def freeze_bn(self): + '''Freeze BatchNorm layers.''' + for layer in self.modules(): + if isinstance(layer, nn.BatchNorm2d): + layer.eval() + + def get_params(self, base_lr): + group1 = [self.encoder1, self.encoder2, self.encoder3, self.encoder4, self.encoder5] + group2 = [self.decoder1, self.decoder2, self.decoder3, self.decoder4, self.decoder5, self.center, self.logit] + + params1 = [] + for x in group1: + for p in x.parameters(): + params1.append(p) + + param_group1 = {'params': params1, 'lr': base_lr / 5} + + params2 = [] + for x in group2: + for p in x.parameters(): + params2.append(p) + param_group2 = {'params': params2, 'lr': base_lr} + + return [param_group1, param_group2] + +class DecoderBlockV5(nn.Module): + def __init__(self, in_channels_x, in_channels_e, middle_channels, out_channels): + super(DecoderBlockV5, self).__init__() + self.in_channels = in_channels_x + in_channels_e + self.conv1 = ConvBn2d(self.in_channels, middle_channels) + self.conv2 = ConvBn2d(middle_channels, out_channels) + self.deconv = nn.ConvTranspose2d(in_channels_x, in_channels_x, kernel_size=4, stride=2, padding=1) + self.bn = nn.BatchNorm2d(self.in_channels) + self.spatial_gate = SpatialAttentionGate(out_channels) + self.channel_gate = ChannelAttentionGate(out_channels) + + def forward(self, x, e=None): + #x = F.upsample(x, scale_factor=2, mode='bilinear', align_corners=True) + x = self.deconv(x) + if e is not None: + x = torch.cat([x,e], 1) + x = self.bn(x) + + x = F.relu(self.conv1(x), inplace=True) + x = F.relu(self.conv2(x), inplace=True) + + g1 = self.spatial_gate(x) + g2 = self.channel_gate(x) + x = x*g1 + x*g2 + + return x + + + +class UNetResNetV5(nn.Module): + def __init__(self, encoder_depth, num_classes=1, num_filters=32, dropout_2d=0.5): + super(UNetResNetV5, self).__init__() + self.name = 'UNetResNetV5_'+str(encoder_depth) + self.num_classes = num_classes + self.dropout_2d = dropout_2d + + self.resnet, bottom_channel_nr = create_resnet(encoder_depth) + + self.encoder1 = EncoderBlock( + nn.Sequential(self.resnet.conv1, self.resnet.bn1, self.resnet.relu), + num_filters*2 + ) + self.encoder2 = EncoderBlock(self.resnet.layer1, bottom_channel_nr//8) + self.encoder3 = EncoderBlock(self.resnet.layer2, bottom_channel_nr//4) + self.encoder4 = EncoderBlock(self.resnet.layer3, bottom_channel_nr//2) + self.encoder5 = EncoderBlock(self.resnet.layer4, bottom_channel_nr) + + center_block = nn.Sequential( + ConvBn2d(bottom_channel_nr, bottom_channel_nr, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + ConvBn2d(bottom_channel_nr, bottom_channel_nr//2, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2) + ) + self.center = EncoderBlock(center_block, bottom_channel_nr//2) + + self.decoder5 = DecoderBlockV5(bottom_channel_nr // 2, bottom_channel_nr, num_filters * 16, 64) + self.decoder4 = DecoderBlockV5(64, bottom_channel_nr // 2, num_filters * 8, 64) + self.decoder3 = DecoderBlockV5(64, bottom_channel_nr // 4, num_filters * 4, 64) + self.decoder2 = DecoderBlockV5(64, bottom_channel_nr // 8, num_filters * 2, 64) + self.decoder1 = DecoderBlockV5(64, 0, num_filters, 64) + + self.logit = nn.Sequential( + nn.Conv2d(320, 64, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(64, 1, kernel_size=1, padding=0) + ) + + def forward(self, x): + x = self.encoder1(x) #; print('x:', x.size()) + e2 = self.encoder2(x) #; print('e2:', e2.size()) + e3 = self.encoder3(e2) #; print('e3:', e3.size()) + e4 = self.encoder4(e3) #; print('e4:', e4.size()) + e5 = self.encoder5(e4) #; print('e5:', e5.size()) + + center = self.center(e5) #; print('center:', center.size()) + + d5 = self.decoder5(center, e5) #; print('d5:', d5.size()) + d4 = self.decoder4(d5, e4) #; print('d4:', d4.size()) + d3 = self.decoder3(d4, e3) #; print('d3:', d3.size()) + d2 = self.decoder2(d3, e2) #; print('d2:', d2.size()) + d1 = self.decoder1(d2) #; print('d1:', d1.size()) + + f = torch.cat([ + d1, + F.interpolate(d2, scale_factor=2, mode='bilinear', align_corners=False), + F.interpolate(d3, scale_factor=4, mode='bilinear', align_corners=False), + F.interpolate(d4, scale_factor=8, mode='bilinear', align_corners=False), + F.interpolate(d5, scale_factor=16, mode='bilinear', align_corners=False), + ], 1) + + f = F.dropout2d(f, p=self.dropout_2d) + + return self.logit(f), None + +class UNetResNetV6(nn.Module): + ''' + 1. Remove first pool from UNetResNetV5, such that resolution is doubled + 2. Remove scSE from center block + 3. Increase default dropout + ''' + def __init__(self, encoder_depth, num_filters=32, dropout_2d=0.5): + super(UNetResNetV6, self).__init__() + assert encoder_depth == 34, 'UNetResNetV6: only 34 layers is supported!' + self.name = 'UNetResNetV6_'+str(encoder_depth) + self.dropout_2d = dropout_2d + + self.resnet, bottom_channel_nr = create_resnet(encoder_depth) + + self.encoder1 = EncoderBlock( + nn.Sequential(self.resnet.conv1, self.resnet.bn1, self.resnet.relu), + num_filters*2 + ) + + self.encoder2 = EncoderBlock(self.resnet.layer1, bottom_channel_nr//8) + self.encoder3 = EncoderBlock(self.resnet.layer2, bottom_channel_nr//4) + self.encoder4 = EncoderBlock(self.resnet.layer3, bottom_channel_nr//2) + self.encoder5 = EncoderBlock(self.resnet.layer4, bottom_channel_nr) + + self.center = nn.Sequential( + ConvBn2d(bottom_channel_nr, bottom_channel_nr, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + ConvBn2d(bottom_channel_nr, bottom_channel_nr//2, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2) + ) + #self.center = EncoderBlock(center_block, bottom_channel_nr//2) + + self.decoder5 = DecoderBlockV5(bottom_channel_nr // 2, bottom_channel_nr, num_filters * 16, 64) + self.decoder4 = DecoderBlockV5(64, bottom_channel_nr // 2, num_filters * 8, 64) + self.decoder3 = DecoderBlockV5(64, bottom_channel_nr // 4, num_filters * 4, 64) + self.decoder2 = DecoderBlockV5(64, bottom_channel_nr // 8, num_filters * 2, 64) + self.decoder1 = DecoderBlockV5(64, 0, num_filters, 64) + + self.logit = nn.Sequential( + nn.Conv2d(512, 64, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(64, 1, kernel_size=1, padding=0) + ) + + self.logit_image = nn.Sequential( + nn.Linear(512, 128), + nn.ReLU(inplace=True), + nn.Linear(128, 1) + ) + + def forward(self, x): + x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) + x = self.encoder1(x) #; print('x:', x.size()) + e2 = self.encoder2(x) #; print('e2:', e2.size()) + e3 = self.encoder3(e2) #; print('e3:', e3.size()) + e4 = self.encoder4(e3) #; print('e4:', e4.size()) + e5 = self.encoder5(e4) #; print('e5:', e5.size()) + + center = self.center(e5) #; print('center:', center.size()) + + d5 = self.decoder5(center, e5) #; print('d5:', d5.size()) + d4 = self.decoder4(d5, e4) #; print('d4:', d4.size()) + d3 = self.decoder3(d4, e3) #; print('d3:', d3.size()) + d2 = self.decoder2(d3, e2) #; print('d2:', d2.size()) + #d1 = self.decoder1(d2) ; print('d1:', d1.size()) + + f = torch.cat([ + d2, + F.interpolate(d3, scale_factor=2, mode='bilinear', align_corners=False), + F.interpolate(d4, scale_factor=4, mode='bilinear', align_corners=False), + F.interpolate(d5, scale_factor=8, mode='bilinear', align_corners=False), + F.interpolate(center, scale_factor=16, mode='bilinear', align_corners=False), + ], 1) + + f = F.dropout2d(f, p=self.dropout_2d, training=self.training) + + # empty mask classifier + img_f = F.adaptive_avg_pool2d(e5, 1).view(x.size(0), -1) + img_f = F.dropout(img_f, p=0.5, training=self.training) + img_logit = self.logit_image(img_f).view(-1) + + return self.logit(f), img_logit + + +class DecoderBlockV7(nn.Module): + def __init__(self, in_channels_x, in_channels_e, middle_channels, out_channels): + super(DecoderBlockV7, self).__init__() + self.in_channels = in_channels_x + in_channels_e + self.conv1 = ConvBn2d(self.in_channels, middle_channels) + self.conv2 = ConvBn2d(middle_channels, out_channels) + self.deconv = nn.ConvTranspose2d(in_channels_x, in_channels_x, kernel_size=4, stride=2, padding=1) + self.bn = nn.BatchNorm2d(self.in_channels) + self.spatial_gate = SpatialAttentionGate(out_channels) + self.channel_gate = ChannelAttentionGate(out_channels) + + def forward(self, x, e=None, upsample=True): + #x = F.upsample(x, scale_factor=2, mode='bilinear', align_corners=True) + if upsample: + x = self.deconv(x) + if e is not None: + x = torch.cat([x,e], 1) + x = self.bn(x) + + x = F.relu(self.conv1(x), inplace=True) + x = F.relu(self.conv2(x), inplace=True) + + g1 = self.spatial_gate(x) + g2 = self.channel_gate(x) + x = x*g1 + x*g2 + + return x + +class UNet7(nn.Module): + def __init__(self, encoder_depth, num_classes=1, num_filters=32, dropout_2d=0.5): + super(UNet7, self).__init__() + nf = num_filters + self.name = 'UNet7_'+str(encoder_depth)+'_nf'+str(nf) + self.num_classes = num_classes + self.dropout_2d = dropout_2d + + self.resnet, nbtm = create_resnet(encoder_depth) + + self.encoder1 = EncoderBlock( + nn.Sequential( + nn.Conv2d(3, 64, kernel_size=7, stride=1, padding=3, bias=False), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + ), + 64 + ) + self.encoder2 = EncoderBlock( + nn.Sequential( + nn.MaxPool2d(kernel_size=2, stride=2), + self.resnet.layer1, + ), + nbtm//8 + ) + self.encoder3 = EncoderBlock(self.resnet.layer2, nbtm//4) + self.encoder4 = EncoderBlock(self.resnet.layer3, nbtm//2) + self.encoder5 = EncoderBlock(self.resnet.layer4, nbtm) + + center_block = nn.Sequential( + ConvBn2d(nbtm, nbtm, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + ConvBn2d(nbtm, nbtm//2, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + #nn.MaxPool2d(kernel_size=2, stride=2) # remove + ) + self.center = EncoderBlock(center_block, nbtm//2) + + self.decoder5 = DecoderBlockV7(nbtm // 2, nbtm, nf * 16, nf*2) + self.decoder4 = DecoderBlockV7(nf*2, nbtm // 2, nf * 8, nf*2) + self.decoder3 = DecoderBlockV7(nf*2, nbtm // 4, nf * 4, nf*2) + self.decoder2 = DecoderBlockV7(nf*2, nbtm // 8, nf * 2, nf*2) + self.decoder1 = DecoderBlockV7(nf*2, 64, nf*2, nf*2) + + self.logit = nn.Sequential( + nn.Conv2d(nf*10, 64, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(64, 1, kernel_size=1, padding=0) + ) + + self.logit_image = nn.Sequential( + nn.Linear(nbtm, 128), + nn.ReLU(inplace=True), + nn.Linear(128, 1), + ) + + def forward(self, x): + e1 = self.encoder1(x) #; print('e1:', e1.size()) + e2 = self.encoder2(e1) #; print('e2:', e2.size()) + e3 = self.encoder3(e2) #; print('e3:', e3.size()) + e4 = self.encoder4(e3) #; print('e4:', e4.size()) + e5 = self.encoder5(e4) #; print('e5:', e5.size()) + + center = self.center(e5) #; print('center:', center.size()) + + d5 = self.decoder5(center, e5, upsample=False) #; print('d5:', d5.size()) + d4 = self.decoder4(d5, e4) #; print('d4:', d4.size()) + d3 = self.decoder3(d4, e3) #; print('d3:', d3.size()) + d2 = self.decoder2(d3, e2) #; print('d2:', d2.size()) + d1 = self.decoder1(d2, e1) #; print('d1:', d1.size()) + + f = torch.cat([ + d1, + F.interpolate(d2, scale_factor=2, mode='bilinear', align_corners=False), + F.interpolate(d3, scale_factor=4, mode='bilinear', align_corners=False), + F.interpolate(d4, scale_factor=8, mode='bilinear', align_corners=False), + F.interpolate(d5, scale_factor=16, mode='bilinear', align_corners=False), + ], 1) + + f = F.dropout2d(f, p=self.dropout_2d) + + # empty mask classifier + img_f = F.adaptive_avg_pool2d(e5, 1).view(x.size(0), -1) + img_f = F.dropout(img_f, p=0.5, training=self.training) + img_logit = self.logit_image(img_f).view(-1) + + return self.logit(f), img_logit + + +class UNet8(nn.Module): + def __init__(self, encoder_depth, num_classes=1, num_filters=32, dropout_2d=0.5): + super(UNet8, self).__init__() + nf = num_filters + self.name = 'UNet8_'+str(encoder_depth)+'_nf'+str(nf) + self.num_classes = num_classes + self.dropout_2d = dropout_2d + + self.resnet, nbtm = create_resnet(encoder_depth) + + self.encoder1 = EncoderBlock( + nn.Sequential(self.resnet.conv1, self.resnet.bn1, self.resnet.relu), + 64 + ) + + self.encoder2 = EncoderBlock(self.resnet.layer1, nbtm//8) + self.encoder3 = EncoderBlock(self.resnet.layer2, nbtm//4) + self.encoder4 = EncoderBlock(self.resnet.layer3, nbtm//2) + self.encoder5 = EncoderBlock(self.resnet.layer4, nbtm) + + center_block = nn.Sequential( + ConvBn2d(nbtm, nbtm, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + ConvBn2d(nbtm, nbtm//2, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + #nn.MaxPool2d(kernel_size=2, stride=2) # remove + ) + self.center = EncoderBlock(center_block, nbtm//2) + + self.decoder5 = DecoderBlockV7(nbtm // 2, nbtm, nf * 16, nf*2) + self.decoder4 = DecoderBlockV7(nf*2, nbtm // 2, nf * 8, nf*2) + self.decoder3 = DecoderBlockV7(nf*2, nbtm // 4, nf * 4, nf*2) + self.decoder2 = DecoderBlockV7(nf*2, nbtm // 8, nf * 2, nf*2) + self.decoder1 = DecoderBlockV7(nf*2+64, 3, nf*2, nf*2) + + self.logit = nn.Sequential( + nn.Conv2d(nf*10, 64, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(64, 1, kernel_size=1, padding=0) + ) + + self.logit_image = nn.Sequential( + nn.Linear(nbtm, 128), + nn.ReLU(inplace=True), + nn.Linear(128, 1), + ) + + def forward(self, x): + e1 = self.encoder1(x) #; print('e1:', e1.size()) + e2 = self.encoder2(e1) #; print('e2:', e2.size()) + e3 = self.encoder3(e2) #; print('e3:', e3.size()) + e4 = self.encoder4(e3) #; print('e4:', e4.size()) + e5 = self.encoder5(e4) #; print('e5:', e5.size()) + + center = self.center(e5) #; print('center:', center.size()) + + d5 = self.decoder5(center, e5, upsample=False) #; print('d5:', d5.size()) + d4 = self.decoder4(d5, e4) #; print('d4:', d4.size()) + d3 = self.decoder3(d4, e3) #; print('d3:', d3.size()) + d2 = self.decoder2(d3, e2) #; print('d2:', d2.size()) + d1 = self.decoder1(torch.cat([d2, e1], 1), x) #; print('d1:', d1.size()) + + f = torch.cat([ + d1, + F.interpolate(d2, scale_factor=2, mode='bilinear', align_corners=False), + F.interpolate(d3, scale_factor=4, mode='bilinear', align_corners=False), + F.interpolate(d4, scale_factor=8, mode='bilinear', align_corners=False), + F.interpolate(d5, scale_factor=16, mode='bilinear', align_corners=False), + ], 1) + + f = F.dropout2d(f, p=self.dropout_2d) + + # empty mask classifier + img_f = F.adaptive_avg_pool2d(e5, 1).view(x.size(0), -1) + img_f = F.dropout(img_f, p=0.5, training=self.training) + img_logit = self.logit_image(img_f).view(-1) + + return self.logit(f), img_logit + + +def test(): + model = UNet8(50, num_filters=32).cuda() + inputs = torch.randn(2,3,128,128).cuda() + out, _ = model(inputs) + #print(model) + print(out.size(), _.size()) #, cls_taret.size()) + #print(out) + + +if __name__ == '__main__': + test() diff --git a/examples/trials/kaggle-tgs-salt/postprocessing.py b/examples/trials/kaggle-tgs-salt/postprocessing.py new file mode 100644 index 0000000000..9da2b8a7e7 --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/postprocessing.py @@ -0,0 +1,63 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os +import numpy as np +import pandas as pd +from scipy import ndimage as ndi +import cv2 + +from utils import get_crop_pad_sequence, run_length_decoding +import settings + +def resize_image(image, target_size): + resized_image = cv2.resize(image, target_size) + return resized_image + +def crop_image(image, target_size): + top_crop, right_crop, bottom_crop, left_crop = get_crop_pad_sequence(image.shape[0] - target_size[0], + image.shape[1] - target_size[1]) + cropped_image = image[top_crop:image.shape[0] - bottom_crop, left_crop:image.shape[1] - right_crop] + return cropped_image + +def binarize(image, threshold): + image_binarized = (image > threshold).astype(np.uint8) + return image_binarized + +def save_pseudo_label_masks(submission_file): + df = pd.read_csv(submission_file, na_filter=False) + print(df.head()) + + img_dir = os.path.join(settings.TEST_DIR, 'masks') + + for i, row in enumerate(df.values): + decoded_mask = run_length_decoding(row[1], (101,101)) + filename = os.path.join(img_dir, '{}.png'.format(row[0])) + rgb_mask = cv2.cvtColor(decoded_mask,cv2.COLOR_GRAY2RGB) + print(filename) + cv2.imwrite(filename, decoded_mask) + if i % 100 == 0: + print(i) + + + +if __name__ == '__main__': + save_pseudo_label_masks('V456_ensemble_1011.csv') \ No newline at end of file diff --git a/examples/trials/kaggle-tgs-salt/predict.py b/examples/trials/kaggle-tgs-salt/predict.py new file mode 100644 index 0000000000..28a9d1f183 --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/predict.py @@ -0,0 +1,200 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os +import glob +import argparse +import numpy as np +import torch +import torch.optim as optim +import torch.nn.functional as F + +import settings +from loader import get_test_loader, add_depth_channel +from models import UNetResNetV4, UNetResNetV5, UNetResNetV6, UNet7, UNet8 +from postprocessing import crop_image, binarize, resize_image +from metrics import intersection_over_union, intersection_over_union_thresholds +from utils import create_submission + +def do_tta_predict(args, model, ckp_path, tta_num=4): + ''' + return 18000x128x128 np array + ''' + model.eval() + preds = [] + meta = None + + # i is tta index, 0: no change, 1: horizon flip, 2: vertical flip, 3: do both + for flip_index in range(tta_num): + print('flip_index:', flip_index) + test_loader = get_test_loader(args.batch_size, index=flip_index, dev_mode=False, pad_mode=args.pad_mode) + meta = test_loader.meta + outputs = None + with torch.no_grad(): + for i, img in enumerate(test_loader): + add_depth_channel(img, args.pad_mode) + img = img.cuda() + output, _ = model(img) + output = torch.sigmoid(output) + if outputs is None: + outputs = output.squeeze() + else: + outputs = torch.cat([outputs, output.squeeze()], 0) + + print('{} / {}'.format(args.batch_size*(i+1), test_loader.num), end='\r') + outputs = outputs.cpu().numpy() + # flip back masks + if flip_index == 1: + outputs = np.flip(outputs, 2) + elif flip_index == 2: + outputs = np.flip(outputs, 1) + elif flip_index == 3: + outputs = np.flip(outputs, 2) + outputs = np.flip(outputs, 1) + #print(outputs.shape) + preds.append(outputs) + + parent_dir = ckp_path+'_out' + if not os.path.exists(parent_dir): + os.makedirs(parent_dir) + np_file = os.path.join(parent_dir, 'pred.npy') + + model_pred_result = np.mean(preds, 0) + np.save(np_file, model_pred_result) + + return model_pred_result, meta + +def predict(args, model, checkpoint, out_file): + print('predicting {}...'.format(checkpoint)) + pred, meta = do_tta_predict(args, model, checkpoint, tta_num=2) + print(pred.shape) + y_pred_test = generate_preds(pred, (settings.ORIG_H, settings.ORIG_W), pad_mode=args.pad_mode) + + submission = create_submission(meta, y_pred_test) + submission.to_csv(out_file, index=None, encoding='utf-8') + + +def ensemble(args, model, checkpoints): + preds = [] + meta = None + for checkpoint in checkpoints: + model.load_state_dict(torch.load(checkpoint)) + model = model.cuda() + print('predicting...', checkpoint) + + pred, meta = do_tta_predict(args, model, checkpoint, tta_num=2) + preds.append(pred) + + y_pred_test = generate_preds(np.mean(preds, 0), (settings.ORIG_H, settings.ORIG_W), args.pad_mode) + + submission = create_submission(meta, y_pred_test) + submission.to_csv(args.sub_file, index=None, encoding='utf-8') + +def ensemble_np(args, np_files, save_np=None): + preds = [] + for np_file in np_files: + pred = np.load(np_file) + print(np_file, pred.shape) + preds.append(pred) + + y_pred_test = generate_preds(np.mean(preds, 0), (settings.ORIG_H, settings.ORIG_W), args.pad_mode) + + if save_np is not None: + np.save(save_np, np.mean(preds, 0)) + + meta = get_test_loader(args.batch_size, index=0, dev_mode=False, pad_mode=args.pad_mode).meta + + submission = create_submission(meta, y_pred_test) + submission.to_csv(args.sub_file, index=None, encoding='utf-8') + +def generate_preds(outputs, target_size, pad_mode, threshold=0.5): + preds = [] + + for output in outputs: + #print(output.shape) + if pad_mode == 'resize': + cropped = resize_image(output, target_size=target_size) + else: + cropped = crop_image_softmax(output, target_size=target_size) + pred = binarize(cropped, threshold) + preds.append(pred) + + return preds + + +def ensemble_predict(args): + model = eval(args.model_name)(args.layers, num_filters=args.nf) + + checkpoints = [ + r'D:\data\salt\models\pseudo\UNetResNetV4_34\edge\best_5.pth', + r'D:\data\salt\models\pseudo\UNetResNetV4_34\edge\best_6.pth', + r'D:\data\salt\models\pseudo\UNetResNetV4_34\edge\best_8.pth', + r'D:\data\salt\models\pseudo\UNetResNetV4_34\edge\best_9.pth' + ] + print(checkpoints) + + ensemble(args, model, checkpoints) + +def ensemble_np_results(args): + np_files1 = glob.glob(r'D:\data\salt\models\depths\UNetResNetV5_50\edge\*pth_out\*.npy') + np_files2 = glob.glob(r'D:\data\salt\models\depths\UNetResNetV4_34\edge\*pth_out\*.npy') + np_files3 = glob.glob(r'D:\data\salt\models\depths\UNetResNetV6_34\edge\*pth_out\*.npy') + np_files6 = glob.glob(r'D:\data\salt\models\ensemble\*.npy') + np_files = np_files1 + np_files2 + np_files3 + np_files6 + print(np_files) + ensemble_np(args, np_files) + +def predict_model(args): + model = eval(args.model_name)(args.layers, num_filters=args.nf) + model_subdir = args.pad_mode + if args.meta_version == 2: + model_subdir = args.pad_mode+'_meta2' + if args.exp_name is None: + model_file = os.path.join(settings.MODEL_DIR, model.name,model_subdir, 'best_{}.pth'.format(args.ifold)) + else: + model_file = os.path.join(settings.MODEL_DIR, args.exp_name, model.name, model_subdir, 'best_{}.pth'.format(args.ifold)) + + if os.path.exists(model_file): + print('loading {}...'.format(model_file)) + model.load_state_dict(torch.load(model_file)) + else: + raise ValueError('model file not found: {}'.format(model_file)) + model = model.cuda() + predict(args, model, model_file, args.sub_file) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Salt segmentation') + parser.add_argument('--model_name', required=True, type=str, help='') + parser.add_argument('--layers', default=34, type=int, help='model layers') + parser.add_argument('--nf', default=32, type=int, help='num_filters param for model') + parser.add_argument('--ifold', required=True, type=int, help='kfold indices') + parser.add_argument('--batch_size', default=32, type=int, help='batch_size') + parser.add_argument('--pad_mode', required=True, choices=['reflect', 'edge', 'resize'], help='pad method') + parser.add_argument('--exp_name', default='depths', type=str, help='exp name') + parser.add_argument('--meta_version', default=2, type=int, help='meta version') + parser.add_argument('--sub_file', default='all_ensemble.csv', type=str, help='submission file') + + args = parser.parse_args() + + predict_model(args) + #ensemble_predict(args) + #ensemble_np_results(args) diff --git a/examples/trials/kaggle-tgs-salt/preprocess.py b/examples/trials/kaggle-tgs-salt/preprocess.py new file mode 100644 index 0000000000..f23cb419af --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/preprocess.py @@ -0,0 +1,93 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os +import pandas as pd +import numpy as np +import json +import torch +import torch.nn as nn +from keras.preprocessing.image import load_img +from sklearn.model_selection import StratifiedKFold +import settings +import utils + +DATA_DIR = settings.DATA_DIR + +def prepare_metadata(): + print('creating metadata') + meta = utils.generate_metadata(train_images_dir=settings.TRAIN_DIR, + test_images_dir=settings.TEST_DIR, + depths_filepath=settings.DEPTHS_FILE + ) + meta.to_csv(settings.META_FILE, index=None) + +def cov_to_class(val): + for i in range(0, 11): + if val * 10 <= i : + return i + +def generate_stratified_metadata(): + train_df = pd.read_csv(os.path.join(DATA_DIR, "train.csv"), index_col="id", usecols=[0]) + depths_df = pd.read_csv(os.path.join(DATA_DIR, "depths.csv"), index_col="id") + train_df = train_df.join(depths_df) + train_df["masks"] = [np.array(load_img(os.path.join(DATA_DIR, "train", "masks", "{}.png".format(idx)), grayscale=True)) / 255 for idx in train_df.index] + train_df["coverage"] = train_df.masks.map(np.sum) / pow(settings.ORIG_H, 2) + train_df["coverage_class"] = train_df.coverage.map(cov_to_class) + train_df["salt_exists"] = train_df.coverage_class.map(lambda x: 0 if x == 0 else 1) + train_df["is_train"] = 1 + train_df["file_path_image"] = train_df.index.map(lambda x: os.path.join(settings.TRAIN_IMG_DIR, '{}.png'.format(x))) + train_df["file_path_mask"] = train_df.index.map(lambda x: os.path.join(settings.TRAIN_MASK_DIR, '{}.png'.format(x))) + + train_df.to_csv(os.path.join(settings.DATA_DIR, 'train_meta2.csv'), + columns=['file_path_image','file_path_mask','is_train','z','salt_exists', 'coverage_class', 'coverage']) + train_splits = {} + + kf = StratifiedKFold(n_splits=10) + for i, (train_index, valid_index) in enumerate(kf.split(train_df.index.values.reshape(-1), train_df.coverage_class.values.reshape(-1))): + train_splits[str(i)] = { + 'train_index': train_index.tolist(), + 'val_index': valid_index.tolist() + } + with open(os.path.join(settings.DATA_DIR, 'train_split.json'), 'w') as f: + json.dump(train_splits, f, indent=4) + + print('done') + + +def test(): + meta = pd.read_csv(settings.META_FILE) + meta_train = meta[meta['is_train'] == 1] + print(type(meta_train)) + + cv = utils.KFoldBySortedValue() + for train_idx, valid_idx in cv.split(meta_train[settings.DEPTH_COLUMN].values.reshape(-1)): + print(len(train_idx), len(valid_idx)) + print(train_idx[:10]) + print(valid_idx[:10]) + #break + + meta_train_split, meta_valid_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx] + print(type(meta_train_split)) + print(meta_train_split[settings.X_COLUMN].values[:10]) + +if __name__ == '__main__': + generate_stratified_metadata() diff --git a/examples/trials/kaggle-tgs-salt/settings.py b/examples/trials/kaggle-tgs-salt/settings.py new file mode 100644 index 0000000000..a5d232bb8c --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/settings.py @@ -0,0 +1,45 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os + +DATA_DIR = r'/mnt/chicm/data/salt' + +TRAIN_DIR = os.path.join(DATA_DIR, 'train') +TEST_DIR = os.path.join(DATA_DIR, 'test') + +TRAIN_IMG_DIR = os.path.join(TRAIN_DIR, 'images') +TRAIN_MASK_DIR = os.path.join(TRAIN_DIR, 'masks') +TEST_IMG_DIR = os.path.join(TEST_DIR, 'images') + +LABEL_FILE = os.path.join(DATA_DIR, 'train.csv') +DEPTHS_FILE = os.path.join(DATA_DIR, 'depths.csv') +META_FILE = os.path.join(DATA_DIR, 'meta.csv') + +MODEL_DIR = os.path.join(DATA_DIR, 'models') + +ID_COLUMN = 'id' +DEPTH_COLUMN = 'z' +X_COLUMN = 'file_path_image' +Y_COLUMN = 'file_path_mask' + +H = W = 128 +ORIG_H = ORIG_W = 101 \ No newline at end of file diff --git a/examples/trials/kaggle-tgs-salt/train.py b/examples/trials/kaggle-tgs-salt/train.py new file mode 100644 index 0000000000..a627bef4c4 --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/train.py @@ -0,0 +1,258 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os +import argparse +import time + +import torch +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F +from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau + +from loader import get_train_loaders, add_depth_channel +from models import UNetResNetV4, UNetResNetV5, UNetResNetV6 +from lovasz_losses import lovasz_hinge +from focal_loss import FocalLoss2d +from postprocessing import binarize, crop_image, resize_image +from metrics import intersection_over_union, intersection_over_union_thresholds +import settings + +MODEL_DIR = settings.MODEL_DIR +focal_loss2d = FocalLoss2d() + +def weighted_loss(args, output, target, epoch=0): + mask_output, salt_output = output + mask_target, salt_target = target + + lovasz_loss = lovasz_hinge(mask_output, mask_target) + focal_loss = focal_loss2d(mask_output, mask_target) + + focal_weight = 0.2 + + if salt_output is not None and args.train_cls: + salt_loss = F.binary_cross_entropy_with_logits(salt_output, salt_target) + return salt_loss, focal_loss.item(), lovasz_loss.item(), salt_loss.item(), lovasz_loss.item() + focal_loss.item()*focal_weight + + return lovasz_loss+focal_loss*focal_weight, focal_loss.item(), lovasz_loss.item(), 0., lovasz_loss.item() + focal_loss.item()*focal_weight + +def train(args): + print('start training...') + + """@nni.variable(nni.choice('UNetResNetV4', 'UNetResNetV5', 'UNetResNetV6'), name=model_name)""" + model_name = args.model_name + + model = eval(model_name)(args.layers, num_filters=args.nf) + model_subdir = args.pad_mode + if args.meta_version == 2: + model_subdir = args.pad_mode+'_meta2' + if args.exp_name is None: + model_file = os.path.join(MODEL_DIR, model.name,model_subdir, 'best_{}.pth'.format(args.ifold)) + else: + model_file = os.path.join(MODEL_DIR, args.exp_name, model.name, model_subdir, 'best_{}.pth'.format(args.ifold)) + + parent_dir = os.path.dirname(model_file) + if not os.path.exists(parent_dir): + os.makedirs(parent_dir) + + if args.init_ckp is not None: + CKP = args.init_ckp + else: + CKP = model_file + if os.path.exists(CKP): + print('loading {}...'.format(CKP)) + model.load_state_dict(torch.load(CKP)) + model = model.cuda() + + if args.optim == 'Adam': + optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001) + else: + optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0001) + + train_loader, val_loader = get_train_loaders(args.ifold, batch_size=args.batch_size, dev_mode=args.dev_mode, \ + pad_mode=args.pad_mode, meta_version=args.meta_version, pseudo_label=args.pseudo, depths=args.depths) + + if args.lrs == 'plateau': + lr_scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr) + else: + lr_scheduler = CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr) + + print('epoch | lr | % | loss | avg | f loss | lovaz | iou | iout | best | time | save | salt |') + + best_iout, _iou, _f, _l, _salt, best_mix_score = validate(args, model, val_loader, args.start_epoch) + print('val | | | | | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | | | {:.4f} |'.format( + _f, _l, _iou, best_iout, best_iout, _salt)) + if args.val: + return + + model.train() + + if args.lrs == 'plateau': + lr_scheduler.step(best_iout) + else: + lr_scheduler.step() + + for epoch in range(args.start_epoch, args.epochs): + train_loss = 0 + + current_lr = get_lrs(optimizer) + bg = time.time() + for batch_idx, data in enumerate(train_loader): + img, target, salt_target = data + if args.depths: + add_depth_channel(img, args.pad_mode) + img, target, salt_target = img.cuda(), target.cuda(), salt_target.cuda() + optimizer.zero_grad() + output, salt_out = model(img) + + loss, *_ = weighted_loss(args, (output, salt_out), (target, salt_target), epoch=epoch) + loss.backward() + + if args.optim == 'Adam' and args.adamw: + wd = 0.0001 + for group in optimizer.param_groups: + for param in group['params']: + param.data = param.data.add(-wd * group['lr'], param.data) + + optimizer.step() + + train_loss += loss.item() + print('\r {:4d} | {:.5f} | {:4d}/{} | {:.4f} | {:.4f} |'.format( + epoch, float(current_lr[0]), args.batch_size*(batch_idx+1), train_loader.num, loss.item(), train_loss/(batch_idx+1)), end='') + + iout, iou, focal_loss, lovaz_loss, salt_loss, mix_score = validate(args, model, val_loader, epoch=epoch) + """@nni.report_intermediate_result(iout)""" + + _save_ckp = '' + if iout > best_iout: + best_iout = iout + torch.save(model.state_dict(), model_file) + _save_ckp = '*' + if args.store_loss_model and mix_score > best_mix_score: + best_mix_score = mix_score + torch.save(model.state_dict(), model_file+'_loss') + _save_ckp += '.' + print(' {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} | {:4s} | {:.4f} |'.format( + focal_loss, lovaz_loss, iou, iout, best_iout, (time.time() - bg) / 60, _save_ckp, salt_loss)) + + model.train() + + if args.lrs == 'plateau': + lr_scheduler.step(best_iout) + else: + lr_scheduler.step() + + del model, train_loader, val_loader, optimizer, lr_scheduler + """@nni.report_final_result(best_iout)""" + +def get_lrs(optimizer): + lrs = [] + for pgs in optimizer.state_dict()['param_groups']: + lrs.append(pgs['lr']) + lrs = ['{:.6f}'.format(x) for x in lrs] + return lrs + +def validate(args, model, val_loader, epoch=0, threshold=0.5): + model.eval() + outputs = [] + focal_loss, lovaz_loss, salt_loss, w_loss = 0, 0, 0, 0 + with torch.no_grad(): + for img, target, salt_target in val_loader: + if args.depths: + add_depth_channel(img, args.pad_mode) + img, target, salt_target = img.cuda(), target.cuda(), salt_target.cuda() + output, salt_out = model(img) + + _, floss, lovaz, _salt_loss, _w_loss = weighted_loss(args, (output, salt_out), (target, salt_target), epoch=epoch) + focal_loss += floss + lovaz_loss += lovaz + salt_loss += _salt_loss + w_loss += _w_loss + output = torch.sigmoid(output) + + for o in output.cpu(): + outputs.append(o.squeeze().numpy()) + + n_batches = val_loader.num // args.batch_size if val_loader.num % args.batch_size == 0 else val_loader.num // args.batch_size + 1 + + # y_pred, list of np array, each np array's shape is 101,101 + y_pred = generate_preds(args, outputs, (settings.ORIG_H, settings.ORIG_W), threshold) + + iou_score = intersection_over_union(val_loader.y_true, y_pred) + iout_score = intersection_over_union_thresholds(val_loader.y_true, y_pred) + + return iout_score, iou_score, focal_loss / n_batches, lovaz_loss / n_batches, salt_loss / n_batches, iout_score*4 - w_loss + + +def generate_preds(args, outputs, target_size, threshold=0.5): + preds = [] + + for output in outputs: + if args.pad_mode == 'resize': + cropped = resize_image(output, target_size=target_size) + else: + cropped = crop_image(output, target_size=target_size) + pred = binarize(cropped, threshold) + preds.append(pred) + + return preds + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='TGS Salt segmentation') + parser.add_argument('--layers', default=34, type=int, help='model layers') + parser.add_argument('--nf', default=32, type=int, help='num_filters param for model') + parser.add_argument('--lr', default=0.001, type=float, help='learning rate') + parser.add_argument('--min_lr', default=0.0001, type=float, help='min learning rate') + parser.add_argument('--ifolds', default='0', type=str, help='kfold indices') + parser.add_argument('--batch_size', default=32, type=int, help='batch_size') + parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') + parser.add_argument('--epochs', default=200, type=int, help='epoch') + parser.add_argument('--optim', default='SGD', choices=['SGD', 'Adam'], help='optimizer') + parser.add_argument('--lrs', default='cosine', choices=['cosine', 'plateau'], help='LR sceduler') + parser.add_argument('--patience', default=6, type=int, help='lr scheduler patience') + parser.add_argument('--factor', default=0.5, type=float, help='lr scheduler factor') + parser.add_argument('--t_max', default=15, type=int, help='lr scheduler patience') + parser.add_argument('--pad_mode', default='edge', choices=['reflect', 'edge', 'resize'], help='pad method') + parser.add_argument('--exp_name', default=None, type=str, help='exp name') + parser.add_argument('--model_name', default='UNetResNetV4', type=str, help='') + parser.add_argument('--init_ckp', default=None, type=str, help='resume from checkpoint path') + parser.add_argument('--val', action='store_true') + parser.add_argument('--store_loss_model', action='store_true') + parser.add_argument('--train_cls', action='store_true') + parser.add_argument('--meta_version', default=2, type=int, help='meta version') + parser.add_argument('--pseudo', action='store_true') + parser.add_argument('--depths', action='store_true') + parser.add_argument('--dev_mode', action='store_true') + parser.add_argument('--adamw', action='store_true') + + args = parser.parse_args() + + '''@nni.get_next_parameter()''' + + print(args) + ifolds = [int(x) for x in args.ifolds.split(',')] + print(ifolds) + + for i in ifolds: + args.ifold = i + train(args) diff --git a/examples/trials/kaggle-tgs-salt/utils.py b/examples/trials/kaggle-tgs-salt/utils.py new file mode 100644 index 0000000000..fa8c8bbba5 --- /dev/null +++ b/examples/trials/kaggle-tgs-salt/utils.py @@ -0,0 +1,179 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and associated +# documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os +import json +import sys +import time +import numpy as np +import pandas as pd +from PIL import Image +from tqdm import tqdm +from pycocotools import mask as cocomask +from sklearn.model_selection import KFold + +import settings + +def create_submission(meta, predictions): + output = [] + for image_id, mask in zip(meta['id'].values, predictions): + rle_encoded = ' '.join(str(rle) for rle in run_length_encoding(mask)) + output.append([image_id, rle_encoded]) + + submission = pd.DataFrame(output, columns=['id', 'rle_mask']).astype(str) + return submission + + +def encode_rle(predictions): + return [run_length_encoding(mask) for mask in predictions] + + +def read_masks(img_ids): + masks = [] + for img_id in img_ids: + base_filename = '{}.png'.format(img_id) + mask = Image.open(os.path.join(settings.TRAIN_MASK_DIR, base_filename)) + mask = np.asarray(mask.convert('L').point(lambda x: 0 if x < 128 else 1)).astype(np.uint8) + masks.append(mask) + return masks + + +def run_length_encoding(x): + bs = np.where(x.T.flatten())[0] + + rle = [] + prev = -2 + for b in bs: + if (b > prev + 1): rle.extend((b + 1, 0)) + rle[-1] += 1 + prev = b + return rle + + +def run_length_decoding(mask_rle, shape): + s = mask_rle.split() + starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])] + starts -= 1 + ends = starts + lengths + img = np.zeros(shape[1] * shape[0], dtype=np.uint8) + for lo, hi in zip(starts, ends): + img[lo:hi] = 255 + return img.reshape((shape[1], shape[0])).T + +def get_salt_existence(): + train_mask = pd.read_csv(settings.LABEL_FILE) + salt_exists_dict = {} + for row in train_mask.values: + salt_exists_dict[row[0]] = 0 if (row[1] is np.nan or len(row[1]) < 1) else 1 + return salt_exists_dict + +def generate_metadata(train_images_dir, test_images_dir, depths_filepath): + depths = pd.read_csv(depths_filepath) + salt_exists_dict = get_salt_existence() + + metadata = {} + for filename in tqdm(os.listdir(os.path.join(train_images_dir, 'images'))): + image_filepath = os.path.join(train_images_dir, 'images', filename) + mask_filepath = os.path.join(train_images_dir, 'masks', filename) + image_id = filename.split('.')[0] + depth = depths[depths['id'] == image_id]['z'].values[0] + + metadata.setdefault('file_path_image', []).append(image_filepath) + metadata.setdefault('file_path_mask', []).append(mask_filepath) + metadata.setdefault('is_train', []).append(1) + metadata.setdefault('id', []).append(image_id) + metadata.setdefault('z', []).append(depth) + metadata.setdefault('salt_exists', []).append(salt_exists_dict[image_id]) + + for filename in tqdm(os.listdir(os.path.join(test_images_dir, 'images'))): + image_filepath = os.path.join(test_images_dir, 'images', filename) + image_id = filename.split('.')[0] + depth = depths[depths['id'] == image_id]['z'].values[0] + + metadata.setdefault('file_path_image', []).append(image_filepath) + metadata.setdefault('file_path_mask', []).append(None) + metadata.setdefault('is_train', []).append(0) + metadata.setdefault('id', []).append(image_id) + metadata.setdefault('z', []).append(depth) + metadata.setdefault('salt_exists', []).append(0) + + return pd.DataFrame(metadata) + +def rle_from_binary(prediction): + prediction = np.asfortranarray(prediction) + return cocomask.encode(prediction) + + +def binary_from_rle(rle): + return cocomask.decode(rle) + + +def get_segmentations(labeled): + nr_true = labeled.max() + segmentations = [] + for i in range(1, nr_true + 1): + msk = labeled == i + segmentation = rle_from_binary(msk.astype('uint8')) + segmentation['counts'] = segmentation['counts'].decode("UTF-8") + segmentations.append(segmentation) + return segmentations + + +def get_crop_pad_sequence(vertical, horizontal): + top = int(vertical / 2) + bottom = vertical - top + right = int(horizontal / 2) + left = horizontal - right + return (top, right, bottom, left) + + +def get_nfold_split(ifold, nfold=10, meta_version=1): + if meta_version == 2: + return get_nfold_split2(ifold, nfold) + + meta = pd.read_csv(settings.META_FILE, na_filter=False) + meta_train = meta[meta['is_train'] == 1] + + kf = KFold(n_splits=nfold) + for i, (train_index, valid_index) in enumerate(kf.split(meta_train[settings.ID_COLUMN].values.reshape(-1))): + if i == ifold: + break + return meta_train.iloc[train_index], meta_train.iloc[valid_index] + +def get_nfold_split2(ifold, nfold=10): + meta_train = pd.read_csv(os.path.join(settings.DATA_DIR, 'train_meta2.csv')) + + with open(os.path.join(settings.DATA_DIR, 'train_split.json'), 'r') as f: + train_splits = json.load(f) + train_index = train_splits[str(ifold)]['train_index'] + valid_index = train_splits[str(ifold)]['val_index'] + + return meta_train.iloc[train_index], meta_train.iloc[valid_index] + + +def get_test_meta(): + meta = pd.read_csv(settings.META_FILE, na_filter=False) + test_meta = meta[meta['is_train'] == 0] + print(len(test_meta.values)) + return test_meta + +if __name__ == '__main__': + get_nfold_split(2)