-
Notifications
You must be signed in to change notification settings - Fork 11
/
data.py
195 lines (165 loc) · 5.61 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import torchvision
from typing import Any, Callable, Optional
from PIL import Image
from torchvision.datasets.folder import default_loader
from transforms import build_transform
from torch.utils import data
class CIFAR10(torchvision.datasets.CIFAR10):
def __init__(
self,
root: str,
train: bool = True,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
download: bool = False,
):
super(CIFAR10, self).__init__(
root, train, transform, target_transform, download
)
self.train = train
def __getitem__(self, index: int):
"""
Args:
index (int): Index
Returns:
tuple: (image, target, index) where target is index of the target class.
"""
img, target = self.data[index], self.targets[index]
# doing this so that it is consistent with all other datasets
# to return a PIL Image
img = Image.fromarray(img)
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
target = self.target_transform(target)
if self.train:
return img, target, index
else:
return img, target, index + 50000
class CIFAR100(CIFAR10):
"""`CIFAR100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
This is a subclass of the `CIFAR10` Dataset.
"""
base_folder = "cifar-100-python"
url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
filename = "cifar-100-python.tar.gz"
tgz_md5 = "eb9058c3a382ffc7106e4002c42a8d85"
train_list = [
["train", "16019d7e3df5f24257cddd939b257f8d"],
]
test_list = [
["test", "f0ef6b0ae62326f3e7ffdfab6717acfc"],
]
meta = {
"filename": "meta",
"key": "fine_label_names",
"md5": "7973b15100ade9c7d40fb424638fde48",
}
IMG_EXTENSIONS = (
".jpg",
".jpeg",
".png",
".ppm",
".bmp",
".pgm",
".tif",
".tiff",
".webp",
)
class ImageFolder(torchvision.datasets.DatasetFolder):
"""A generic data loader where the images are arranged in this way: ::
root/dog/xxx.png
root/dog/xxy.png
root/dog/xxz.png
root/cat/123.png
root/cat/nsdf3.png
root/cat/asd932_.png
Args:
root (string): Root directory path.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
loader (callable, optional): A function to load an image given its path.
is_valid_file (callable, optional): A function that takes path of an Image file
and check if the file is a valid file (used to check of corrupt files)
Attributes:
classes (list): List of the class names sorted alphabetically.
class_to_idx (dict): Dict with items (class_name, class_index).
imgs (list): List of (image path, class_index) tuples
"""
def __init__(
self,
root: str,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
loader: Callable[[str], Any] = default_loader,
is_valid_file: Optional[Callable[[str], bool]] = None,
):
super(ImageFolder, self).__init__(
root,
loader,
IMG_EXTENSIONS if is_valid_file is None else None,
transform=transform,
target_transform=target_transform,
is_valid_file=is_valid_file,
)
self.imgs = self.samples
def __getitem__(self, index: int):
"""
Args:
index (int): Index
Returns:
tuple: (sample, target, index) where target is class_index of the target class.
"""
path, target = self.samples[index]
sample = self.loader(path)
if self.transform is not None:
sample = self.transform(sample)
if self.target_transform is not None:
target = self.target_transform(target)
return sample, target, index
def build_dataset(type, args):
is_train = type == "train"
transform = build_transform(is_train, args)
root = args.data_path
if args.dataset == "CIFAR-10":
dataset = data.ConcatDataset(
[
CIFAR10(
root=root + "CIFAR-10",
train=True,
download=True,
transform=transform,
),
CIFAR10(
root=root + "CIFAR-10",
train=False,
download=True,
transform=transform,
),
]
)
elif args.dataset == "CIFAR-100":
dataset = data.ConcatDataset(
[
CIFAR100(
root=root + "CIFAR-100",
train=True,
download=True,
transform=transform,
),
CIFAR100(
root=root + "CIFAR-100",
train=False,
download=True,
transform=transform,
),
]
)
elif args.dataset == "ImageNet-10":
dataset = ImageFolder(root=root + "ImageNet-10", transform=transform)
elif args.dataset == "ImageNet":
dataset = ImageFolder(root=root + "ImageNet/train", transform=transform)
print(dataset)
return dataset