From 7397f4b0d0d4ffd0685a2a9c84d407b348199f5d Mon Sep 17 00:00:00 2001 From: githubnemo Date: Mon, 13 Jun 2016 06:16:21 +0200 Subject: [PATCH] Resolve #2960 (#2961) * Resolve #2960 Introduce `K.var` so that the standard deviation computation can be made numerically stable. Instead of K.std(x) the user is able to write K.sqrt(K.var(x) + self.epsilon) avoiding a division by zero in the gradient computation of `sqrt`. * Fix typos --- keras/backend/tensorflow_backend.py | 16 +++++++++++----- keras/backend/theano_backend.py | 4 ++++ keras/layers/normalization.py | 2 +- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py index f2a98a00f1f..13a78be15ba 100644 --- a/keras/backend/tensorflow_backend.py +++ b/keras/backend/tensorflow_backend.py @@ -314,17 +314,23 @@ def prod(x, axis=None, keepdims=False): return tf.reduce_prod(x, reduction_indices=axis, keep_dims=keepdims) -def std(x, axis=None, keepdims=False): - '''Standard deviation of a tensor, alongside the specificied axis. +def var(x, axis=None, keepdims=False): + '''Variance of a tensor, alongside the specified axis. ''' axis = _normalize_axis(axis, ndim(x)) if x.dtype.base_dtype == tf.bool: x = tf.cast(x, _FLOATX) m = tf.reduce_mean(x, reduction_indices=axis, keep_dims=True) devs_squared = tf.square(x - m) - return tf.sqrt(tf.reduce_mean(devs_squared, - reduction_indices=axis, - keep_dims=keepdims)) + return tf.reduce_mean(devs_squared, + reduction_indices=axis, + keep_dims=keepdims) + + +def std(x, axis=None, keepdims=False): + '''Standard deviation of a tensor, alongside the specified axis. + ''' + return tf.sqrt(var(x, axis=axis, keepdims=keepdims)) def mean(x, axis=None, keepdims=False): diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py index 0e2d230b251..53bc5eaaa37 100644 --- a/keras/backend/theano_backend.py +++ b/keras/backend/theano_backend.py @@ -200,6 +200,10 @@ def std(x, axis=None, keepdims=False): return T.std(x, axis=axis, keepdims=keepdims) +def var(x, axis=None, keepdims=False): + return T.var(x, axis=axis, keepdims=keepdims) + + def any(x, axis=None, keepdims=False): '''Bitwise reduction (logical OR). ''' diff --git a/keras/layers/normalization.py b/keras/layers/normalization.py index 0c4d653d42b..93b424b280d 100644 --- a/keras/layers/normalization.py +++ b/keras/layers/normalization.py @@ -139,7 +139,7 @@ def call(self, x, mask=None): elif self.mode == 1: # sample-wise normalization m = K.mean(x, axis=-1, keepdims=True) - std = K.std(x, axis=-1, keepdims=True) + std = K.sqrt(K.var(x, axis=-1, keepdims=True) + self.epsilon) x_normed = (x - m) / (std + self.epsilon) out = self.gamma * x_normed + self.beta return out