-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataset.py
61 lines (44 loc) · 1.8 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import numpy as np
import csv
_CHUNKS_OF_TRAIN = 3
def read_train() -> np.ndarray:
"""
Reads all the records of the train split.
Records are stored in several files for technical reasons. Thus, this function joins the file contents and produces
a Numpy array with all the data in the train split.
The array contains has the shape `(N, 2)`, where `N` is the number of elements in the train split. The two columns
are the unique ID of the image (string), the name of the label (string).
:return: an `ndarray` with the train data
"""
data = []
for i in range(_CHUNKS_OF_TRAIN):
file_name = f"train-annotation.chunk{i}.csv"
path = "./" + file_name
_read_from_file(path, data)
return np.asarray(data)
def read_validation() -> np.ndarray:
"""
Reads all the records of the validation split.
The array contains has the shape `(N, 2)`, where `N` is the number of elements in the train split. The two columns
are the unique ID of the image (string), the name of the label (string).
:return: an `ndarray` with the validation data
"""
data = []
_read_from_file("./validation-annotations.csv", data)
return np.asarray(data)
def read_test() -> np.ndarray:
"""
Reads all the records of the test split.
The array contains has the shape `(N, 2)`, where `N` is the number of elements in the train split. The two columns
are the unique ID of the image (string), the name of the label (string).
:return: an `ndarray` with the test data
"""
data = []
_read_from_file("./test-annotations.csv", data)
return np.asarray(data)
def _read_from_file(path, data):
with open(path) as file:
table = csv.reader(file)
next(table)
for row in table:
data.append([row[0], row[1]])