-
Notifications
You must be signed in to change notification settings - Fork 24
/
dataset.py
77 lines (56 loc) · 2.53 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import cv2
import numpy as np
import pandas as pd
import albumentations
import torch
from torch.utils.data import Dataset
class LandmarkDataset(Dataset):
def __init__(self, csv, split, mode, transform=None):
self.csv = csv.reset_index()
self.split = split
self.mode = mode
self.transform = transform
def __len__(self):
return self.csv.shape[0]
def __getitem__(self, index):
row = self.csv.iloc[index]
image = cv2.imread(row.filepath)[:,:,::-1]
if self.transform is not None:
res = self.transform(image=image)
image = res['image'].astype(np.float32)
else:
image = image.astype(np.float32)
image = image.transpose(2, 0, 1)
if self.mode == 'test':
return torch.tensor(image)
else:
return torch.tensor(image), torch.tensor(row.landmark_id)
def get_transforms(image_size):
transforms_train = albumentations.Compose([
albumentations.HorizontalFlip(p=0.5),
albumentations.ImageCompression(quality_lower=99, quality_upper=100),
albumentations.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=10, border_mode=0, p=0.7),
albumentations.Resize(image_size, image_size),
albumentations.Cutout(max_h_size=int(image_size * 0.4), max_w_size=int(image_size * 0.4), num_holes=1, p=0.5),
albumentations.Normalize()
])
transforms_val = albumentations.Compose([
albumentations.Resize(image_size, image_size),
albumentations.Normalize()
])
return transforms_train, transforms_val
def get_df(kernel_type, data_dir, train_step):
df = pd.read_csv('train_0.csv')
if train_step == 0:
df_train = pd.read_csv(os.path.join(data_dir, 'train.csv')).drop(columns=['url'])
else:
cls_81313 = df.landmark_id.unique()
df_train = pd.read_csv(os.path.join(data_dir, 'train.csv')).drop(columns=['url']).set_index('landmark_id').loc[cls_81313].reset_index()
df_train['filepath'] = df_train['id'].apply(lambda x: os.path.join(data_dir, 'train', x[0], x[1], x[2], f'{x}.jpg'))
df = df_train.merge(df, on=['id','landmark_id'], how='left')
landmark_id2idx = {landmark_id: idx for idx, landmark_id in enumerate(sorted(df['landmark_id'].unique()))}
idx2landmark_id = {idx: landmark_id for idx, landmark_id in enumerate(sorted(df['landmark_id'].unique()))}
df['landmark_id'] = df['landmark_id'].map(landmark_id2idx)
out_dim = df.landmark_id.nunique()
return df, out_dim