-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcommon.py
123 lines (97 loc) · 3.96 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import ast
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.applications.densenet import preprocess_input
from keras.metrics import (categorical_accuracy, top_k_categorical_accuracy)
from keras.models import Model, load_model
DP_DIR = './input/shuffle-csvs/'
INPUT_DIR = './input/quickdraw-doodle-recognition/'
BASE_SIZE = 256
NCSVS = 200
NCATS = 340
np.random.seed(seed=2018)
tf.random.set_seed(seed=2018)
def f2cat(filename: str) -> str:
return filename.split('.')[0]
def list_all_categories():
files = os.listdir(os.path.join(INPUT_DIR, 'train_simplified'))
return sorted([f2cat(f) for f in files], key=str.lower)
def apk(actual, predicted, k=3):
"""
Source: https://github.com/benhamner/Metrics/blob/master/Python/ml_metrics/average_precision.py
"""
if len(predicted) > k:
predicted = predicted[:k]
score = 0.0
num_hits = 0.0
for i, p in enumerate(predicted):
if p in actual and p not in predicted[:i]:
num_hits += 1.0
score += num_hits / (i + 1.0)
if not actual:
return 0.0
return score / min(len(actual), k)
def mapk(actual, predicted, k=3):
"""
Source: https://github.com/benhamner/Metrics/blob/master/Python/ml_metrics/average_precision.py
"""
return np.mean([apk(a, p, k) for a, p in zip(actual, predicted)])
def preds2catids(predictions):
return pd.DataFrame(np.argsort(-predictions, axis=1)[:, :3], columns=['a', 'b', 'c'])
def top_3_accuracy(y_true, y_pred):
return top_k_categorical_accuracy(y_true, y_pred, k=3)
def draw_cv2(raw_strokes, size=256, lw=6, time_color=True):
img = np.zeros((BASE_SIZE, BASE_SIZE), np.uint8)
for t, stroke in enumerate(raw_strokes):
for i in range(len(stroke[0]) - 1):
color = 255 - min(t, 10) * 13 if time_color else 255
_ = cv2.line(img, (stroke[0][i], stroke[1][i]),
(stroke[0][i + 1], stroke[1][i + 1]), color, lw)
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
if size != BASE_SIZE:
return cv2.resize(img, (size, size))
else:
return img
def image_generator_xd(size, batchsize, ks, lw=6, time_color=True):
while True:
for k in np.random.Generator.permutation(ks):
filename = os.path.join(DP_DIR, 'train_k{}.csv.gz'.format(k))
for df in pd.read_csv(filename, chunksize=batchsize):
df['drawing'] = df['drawing'].apply(ast.literal_eval)
x = np.zeros((len(df), size, size, 3))
for i, raw_strokes in enumerate(df.drawing.values):
x[i, :, :, :] = draw_cv2(raw_strokes, size=size, lw=lw,
time_color=time_color)
x = preprocess_input(x).astype(np.float32)
y = keras.utils.to_categorical(df.y, num_classes=NCATS)
yield x, y
def df_to_image_array_xd(df, size, lw=6, time_color=True):
df['drawing'] = df['drawing'].apply(ast.literal_eval)
x = np.zeros((len(df), size, size, 3))
for i, raw_strokes in enumerate(df.drawing.values):
x[i, :, :, :] = draw_cv2(
raw_strokes, size=size, lw=lw, time_color=time_color)
x = preprocess_input(x).astype(np.float32)
return x
class TTA_ModelWrapper():
"""A simple TTA wrapper for keras computer vision models.
Args:
model (keras model): A fitted keras model with a predict method.
"""
def __init__(self, model):
self.model = model
def predict(self, X):
"""Wraps the predict method of the provided model.
Augments the testdata with horizontal and vertical flips and
averages the results.
Args:
X (numpy array of dim 4): The data to get predictions for.
"""
p0 = self.model.predict(X, batch_size=128, verbose=1)
p1 = self.model.predict(np.flipud(X), batch_size=128, verbose=1)
p = (p0 + p1) / 2
return np.array(p)