-
Notifications
You must be signed in to change notification settings - Fork 226
/
preprocessing.py
277 lines (241 loc) · 9.8 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
import tensorflow as tf
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94
_RESIZE_SIDE_MIN = 256
_RESIZE_SIDE_MAX = 512
def _crop(image, offset_height, offset_width, crop_height, crop_width):
"""Crops the given image using the provided offsets and sizes.
Note that the method doesn't assume we know the input image size but it does
assume we know the input image rank.
Args:
image: an image of shape [height, width, channels].
offset_height: a scalar tensor indicating the height offset.
offset_width: a scalar tensor indicating the width offset.
crop_height: the height of the cropped image.
crop_width: the width of the cropped image.
Returns:
the cropped (and resized) image.
Raises:
InvalidArgumentError: if the rank is not 3 or if the image dimensions are
less than the crop size.
"""
original_shape = tf.shape(image)
rank_assertion = tf.Assert(
tf.equal(tf.rank(image), 3), ['Rank of image must be equal to 3.']
)
with tf.control_dependencies([rank_assertion]):
cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]])
size_assertion = tf.Assert(
tf.logical_and(
tf.greater_equal(original_shape[0], crop_height),
tf.greater_equal(original_shape[1], crop_width)
), ['Crop size greater than the image size.']
)
offsets = tf.cast(tf.stack([offset_height, offset_width, 0]), tf.int32)
# Use tf.slice instead of crop_to_bounding box as it accepts tensors to
# define the crop size.
with tf.control_dependencies([size_assertion]):
image = tf.slice(image, offsets, cropped_shape)
return tf.reshape(image, cropped_shape)
def _central_crop(image_list, crop_height, crop_width):
"""Performs central crops of the given image list.
Args:
image_list: a list of image tensors of the same dimension but possibly
varying channel.
crop_height: the height of the image following the crop.
crop_width: the width of the image following the crop.
Returns:
the list of cropped images.
"""
outputs = []
for image in image_list:
image_height = tf.shape(image)[0]
image_width = tf.shape(image)[1]
offset_height = (image_height-crop_height) / 2
offset_width = (image_width-crop_width) / 2
outputs.append(
_crop(image, offset_height, offset_width, crop_height, crop_width)
)
return outputs
def _mean_image_subtraction(image, means):
"""Subtracts the given means from each image channel.
For example:
means = [123.68, 116.779, 103.939]
image = _mean_image_subtraction(image, means)
Note that the rank of `image` must be known.
Args:
image: a tensor of size [height, width, C].
means: a C-vector of values to subtract from each channel.
Returns:
the centered image.
Raises:
ValueError: If the rank of `image` is unknown, if `image` has a rank other
than three or if the number of channels in `image` doesn't match the
number of values in `means`.
"""
if image.get_shape().ndims != 3:
raise ValueError('Input must be of size [height, width, C>0]')
num_channels = image.get_shape().as_list()[-1]
if len(means) != num_channels:
raise ValueError('len(means) must match the number of channels')
channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image)
for i in range(num_channels):
channels[i] -= means[i]
return tf.concat(axis=2, values=channels)
def _aspect_preserving_resize(image, smallest_side):
"""Resize images preserving the original aspect ratio.
Args:
image: A 3-D image `Tensor`.
smallest_side: A python integer or scalar `Tensor` indicating the size of
the smallest side after resize.
Returns:
resized_image: A 3-D tensor containing the resized image.
"""
smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
shape = tf.shape(image)
height = shape[0]
width = shape[1]
new_height, new_width = _smallest_size_at_least(
height, width, smallest_side
)
image = tf.expand_dims(image, 0)
resized_image = tf.image.resize(image, [new_height, new_width])
resized_image = tf.squeeze(resized_image)
resized_image.set_shape([None, None, 3])
return resized_image
def _smallest_size_at_least(height, width, smallest_side):
"""Computes new shape with the smallest side equal to `smallest_side`.
Computes new shape with the smallest side equal to `smallest_side` while
preserving the original aspect ratio.
Args:
height: an int32 scalar tensor indicating the current height.
width: an int32 scalar tensor indicating the current width.
smallest_side: A python integer or scalar `Tensor` indicating the size of
the smallest side after resize.
Returns:
new_height: an int32 scalar tensor indicating the new height.
new_width: and int32 scalar tensor indicating the new width.
"""
smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
smallest_side = tf.cast(smallest_side, tf.float32)
scale = tf.cond(
tf.greater(height, width), lambda: smallest_side / width,
lambda: smallest_side / height
)
new_height = tf.cast(tf.math.rint(height * scale), tf.int32)
new_width = tf.cast(tf.math.rint(width * scale), tf.int32)
return new_height, new_width
def inception_preprocess(
image,
height,
width,
central_fraction=0.875,
scope=None,
central_crop=False
):
"""Prepare one image for evaluation.
If height and width are specified it would output an image with that size by
applying resize_bilinear.
If central_fraction is specified it would crop the central fraction of the
input image.
Args:
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
[0, 1], otherwise it would converted to tf.float32 assuming that the range
is [0, MAX], where MAX is largest positive representable number for
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details).
height: integer
width: integer
central_fraction: Optional Float, fraction of the image to crop.
scope: Optional scope for name_scope.
central_crop: Enable central cropping of images during preprocessing for
evaluation.
Returns:
3-D float Tensor of prepared image.
"""
if image.dtype != tf.float32:
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
# Crop the central region of the image with an area containing 87.5% of
# the original image.
image = tf.image.central_crop(image, central_fraction=central_fraction)
if height and width:
# Resize the image to the specified height and width.
image = tf.expand_dims(image, 0)
image = tf.image.resize(image, [height, width])
image = tf.squeeze(image, [0])
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0)
return image
def vgg_preprocess(image, output_height, output_width):
image = _aspect_preserving_resize(image, _RESIZE_SIDE_MIN)
image = _central_crop([image], output_height, output_width)[0]
image.set_shape([output_height, output_width, 3])
image = tf.cast(image, tf.float32)
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
def resnet50_v1_5_tf1_ngc_preprocess(
image,
height,
width,
central_fraction=0.875,
scope=None,
central_crop=False
):
"""Prepare one image for evaluation.
If height and width are specified it would output an image with that size by
applying resize_bilinear.
If central_fraction is specified it would crop the central fraction of the
input image.
Args:
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
[0, 1], otherwise it would converted to tf.float32 assuming that the range
is [0, MAX], where MAX is largest positive representable number for
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details).
height: integer
width: integer
central_fraction: Optional Float, fraction of the image to crop.
scope: Optional scope for name_scope.
central_crop: Enable central cropping of images during preprocessing for
evaluation.
Returns:
3-D float Tensor of prepared image.
"""
if image.dtype != tf.float32:
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
# Crop the central region of the image with an area containing 87.5% of
# the original image.
image = tf.image.central_crop(image, central_fraction=central_fraction)
if height and width:
# Resize the image to the specified height and width.
image = tf.expand_dims(image, 0)
image = tf.image.resize(image, [height, width])
image = tf.squeeze(image, [0])
image = image * 255
return image
def vision_transformer_preprocess(image, height, width):
image_resized = tf.image.resize(image, (height, width))
image_resized = tf.cast(image_resized, tf.float32)
image_resized = (image_resized-127.5) / 127.5
return image_resized
def swin_transformer_preprocess(image, height, width):
if height != width:
raise UnimplementedError(
"Preprocessed image must have the same height and width."
)
else:
size = height
crop_layer = tf.keras.layers.CenterCrop(size, size)
norm_layer = tf.keras.layers.Normalization(
mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
variance=[(0.229 * 255)**2, (0.224 * 255)**2, (0.225 * 255)**2],
)
if size == 224:
resize_size = int((256/224) * size)
image = tf.image.resize(
image, (resize_size, resize_size), method="bicubic"
)
image = crop_layer(image)
else:
image = tf.image.resize(image, (size, size), method="bicubic")
return norm_layer(image)