-
Notifications
You must be signed in to change notification settings - Fork 132
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #190 from mottodora/mse-ignore-nan
mean_squared_error with `ignore_nan` option
- Loading branch information
Showing
4 changed files
with
277 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
from chainer_chemistry.functions.matmul import matmul # NOQA | ||
from chainer_chemistry.functions.mean_squared_error import mean_squared_error # NOQA | ||
from chainer_chemistry.functions.mean_squared_error import MeanSquaredError # NOQA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import numpy | ||
|
||
from chainer import cuda | ||
from chainer import function_node | ||
import chainer.functions | ||
from chainer.utils import type_check | ||
|
||
|
||
class MeanSquaredError(function_node.FunctionNode): | ||
|
||
"""Mean squared error (a.k.a. Euclidean loss) function.""" | ||
|
||
def __init__(self, ignore_nan=False): | ||
# TODO(mottodora): implement task weight calculation | ||
self.ignore_nan = ignore_nan | ||
|
||
def check_type_forward(self, in_types): | ||
type_check.expect(in_types.size() == 2) | ||
type_check.expect( | ||
in_types[0].dtype == numpy.float32, | ||
in_types[1].dtype == numpy.float32, | ||
in_types[0].shape == in_types[1].shape | ||
) | ||
|
||
def forward_cpu(self, inputs): | ||
self.retain_inputs((0, 1)) | ||
diff = (inputs[0] - inputs[1]).ravel() | ||
# TODO(mottodora): add reduce option | ||
if self.ignore_nan: | ||
diff[numpy.isnan(diff)] = 0. | ||
return numpy.array(diff.dot(diff) / diff.size, dtype=diff.dtype), | ||
|
||
def forward_gpu(self, inputs): | ||
cupy = cuda.cupy | ||
self.retain_inputs((0, 1)) | ||
diff = (inputs[0] - inputs[1]).ravel() | ||
# TODO(mottodora): add reduce option | ||
if self.ignore_nan: | ||
diff[cupy.isnan(diff)] = 0. | ||
return diff.dot(diff) / diff.dtype.type(diff.size), | ||
|
||
def backward(self, indexes, gy): | ||
x0, x1 = self.get_retained_inputs() | ||
xp = cuda.get_array_module(x0) | ||
ret = [] | ||
diff = x0 - x1 | ||
if self.ignore_nan: | ||
diff = chainer.functions.where(xp.isnan(diff.array), | ||
xp.zeros_like(diff.array), diff) | ||
gy0 = chainer.functions.broadcast_to(gy[0], diff.shape) | ||
gx0 = gy0 * diff * (2. / diff.size) | ||
if 0 in indexes: | ||
ret.append(gx0) | ||
if 1 in indexes: | ||
ret.append(-gx0) | ||
return ret | ||
|
||
|
||
def mean_squared_error(x0, x1, ignore_nan=False): | ||
"""Mean squared error function. | ||
This function computes mean squared error between two variables. The mean | ||
is taken over the minibatch. Note that the error is not scaled by 1/2. | ||
Args: | ||
x0 (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ | ||
:class:`cupy.ndarray`): Input variable. | ||
x1 (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ | ||
:class:`cupy.ndarray`): Input variable. | ||
ignore_nan (bool): If `True`, this function compute mean squared error | ||
ignoring NaNs. The arithmetic mean is the sum of the non-NaN | ||
elements along the axis divided by the number of whole elements. | ||
Returns: | ||
~chainer.Variable: | ||
A variable holding an array representing the mean squared | ||
error of two inputs. | ||
""" | ||
return MeanSquaredError(ignore_nan).apply((x0, x1))[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
import numpy | ||
import pytest | ||
|
||
import chainer | ||
from chainer import cuda | ||
from chainer import gradient_check | ||
|
||
import chainer_chemistry | ||
|
||
|
||
@pytest.fixture | ||
def inputs(): | ||
x0 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32) | ||
x1 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32) | ||
x2 = numpy.asarray([[0.3, numpy.nan, 0.2], | ||
[numpy.nan, 0.1, 0.5], | ||
[0.9, 0.7, numpy.nan], | ||
[0.2, -0.3, 0.4]]).astype(numpy.float32) | ||
return x0, x1, x2 | ||
|
||
|
||
@pytest.fixture | ||
def grads(): | ||
gy = numpy.random.uniform(-1, 1, ()).astype(numpy.float32) | ||
ggx0 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32) | ||
ggx1 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32) | ||
return gy, ggx0, ggx1 | ||
|
||
|
||
def check_forward(inputs): | ||
x0_data, x1_data, _ = inputs | ||
x0 = chainer.Variable(x0_data) | ||
x1 = chainer.Variable(x1_data) | ||
loss = chainer_chemistry.functions.mean_squared_error(x0, x1) | ||
loss_value = cuda.to_cpu(loss.data) | ||
assert loss.dtype == numpy.float32 | ||
assert loss_value.shape == () | ||
|
||
loss_expect = numpy.zeros(()) | ||
x0_data = cuda.to_cpu(x0_data) | ||
x1_data = cuda.to_cpu(x1_data) | ||
for i in numpy.ndindex(x0_data.shape): | ||
loss_expect += ((x0_data[i] - x1_data[i]) ** 2) | ||
loss_expect /= x0_data.size | ||
assert numpy.allclose(loss_value, loss_expect) | ||
|
||
|
||
def check_forward_ignore_nan(inputs): | ||
x0_data, _, x2_data = inputs | ||
x0 = chainer.Variable(x0_data) | ||
x2 = chainer.Variable(x2_data) | ||
loss = chainer_chemistry.functions.mean_squared_error(x0, x2, | ||
ignore_nan=True) | ||
loss_value = cuda.to_cpu(loss.data) | ||
assert loss.dtype == numpy.float32 | ||
assert loss_value.shape == () | ||
|
||
loss_expect = numpy.zeros(()) | ||
x0_data = cuda.to_cpu(x0_data) | ||
x2_data = cuda.to_cpu(x2_data) | ||
nan_mask = numpy.invert(numpy.isnan(x2_data)).astype(x2_data.dtype) | ||
for i in numpy.ndindex(x0_data.shape): | ||
loss_expect += ((x0_data[i] - numpy.nan_to_num(x2_data[i])) ** 2 | ||
* nan_mask[i]) | ||
loss_expect /= x0_data.size | ||
assert numpy.allclose(loss_value, loss_expect) | ||
|
||
|
||
def check_forward_ignore_nan_with_nonnan_value(inputs): | ||
x0_data, x1_data, _ = inputs | ||
x0 = chainer.Variable(x0_data) | ||
x1 = chainer.Variable(x1_data) | ||
loss = chainer_chemistry.functions.mean_squared_error(x0, x1, | ||
ignore_nan=True) | ||
loss_value = cuda.to_cpu(loss.data) | ||
assert loss.dtype == numpy.float32 | ||
assert loss_value.shape == () | ||
|
||
loss_expect = numpy.zeros(()) | ||
x0_data = cuda.to_cpu(x0_data) | ||
x1_data = cuda.to_cpu(x1_data) | ||
nan_mask = numpy.invert(numpy.isnan(x1_data)).astype(x1_data.dtype) | ||
for i in numpy.ndindex(x0_data.shape): | ||
loss_expect += ((x0_data[i] - numpy.nan_to_num(x1_data[i])) ** 2 | ||
* nan_mask[i]) | ||
loss_expect /= x0_data.size | ||
assert numpy.allclose(loss_value, loss_expect) | ||
|
||
|
||
def test_forward_cpu(inputs): | ||
check_forward(inputs) | ||
check_forward_ignore_nan(inputs) | ||
check_forward_ignore_nan_with_nonnan_value(inputs) | ||
|
||
|
||
@pytest.mark.gpu | ||
def test_forward_gpu(inputs): | ||
x0, x1, x2 = inputs | ||
check_forward((cuda.to_gpu(x0), cuda.to_gpu(x1), None)) | ||
check_forward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2))) | ||
|
||
|
||
def check_backward(inputs): | ||
x0_data, x1_data, _ = inputs | ||
gradient_check.check_backward( | ||
chainer_chemistry.functions.mean_squared_error, | ||
(x0_data, x1_data), None, eps=1e-2) | ||
|
||
|
||
def check_backward_ignore_nan(inputs): | ||
x0_data, _, x2_data = inputs | ||
|
||
def func(x0, x1): | ||
return chainer_chemistry.functions.mean_squared_error(x0, x1, | ||
ignore_nan=True) | ||
gradient_check.check_backward(func, (x0_data, x2_data), None, eps=1e-2) | ||
|
||
|
||
def check_backward_ignore_nan_with_nonnan_value(inputs): | ||
x0_data, x1_data, _ = inputs | ||
|
||
def func(x0, x1): | ||
return chainer_chemistry.functions.mean_squared_error(x0, x1, | ||
ignore_nan=True) | ||
gradient_check.check_backward(func, (x0_data, x1_data), None, eps=1e-2) | ||
|
||
|
||
def test_backward_cpu(inputs): | ||
check_backward(inputs) | ||
check_backward_ignore_nan(inputs) | ||
check_backward_ignore_nan_with_nonnan_value(inputs) | ||
|
||
|
||
@pytest.mark.gpu | ||
def test_backward_gpu(inputs): | ||
x0, x1, x2 = inputs | ||
check_backward((cuda.to_gpu(x0), cuda.to_gpu(x1), None)) | ||
check_backward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2))) | ||
check_backward_ignore_nan_with_nonnan_value((cuda.to_gpu(x0), | ||
cuda.to_gpu(x1), None)) | ||
|
||
|
||
def check_double_backward(inputs, grads): | ||
x0, x1, _ = inputs | ||
gy, ggx0, ggx1 = grads | ||
gradient_check.check_double_backward( | ||
chainer_chemistry.functions.mean_squared_error, | ||
(x0, x1), gy, (ggx0, ggx1)) | ||
|
||
|
||
def check_double_backward_ignore_nan(inputs, grads): | ||
x0, _, x2 = inputs | ||
gy, ggx0, ggx1 = grads | ||
|
||
def func(x0, x1): | ||
return chainer_chemistry.functions.mean_squared_error(x0, x1, | ||
ignore_nan=True) | ||
gradient_check.check_double_backward(func, (x0, x2), gy, (ggx0, ggx1)) | ||
|
||
|
||
def check_double_backward_ignore_nan_with_nonnan_value(inputs, grads): | ||
x0, x1, _ = inputs | ||
gy, ggx0, ggx1 = grads | ||
|
||
def func(x0, x1): | ||
return chainer_chemistry.functions.mean_squared_error(x0, x1, | ||
ignore_nan=True) | ||
gradient_check.check_double_backward(func, (x0, x1), gy, (ggx0, ggx1)) | ||
|
||
|
||
def test_double_backward_cpu(inputs, grads): | ||
check_double_backward(inputs, grads) | ||
check_double_backward_ignore_nan(inputs, grads) | ||
check_double_backward_ignore_nan_with_nonnan_value(inputs, grads) | ||
|
||
|
||
@pytest.mark.gpu | ||
def test_double_backward_gpu(inputs, grads): | ||
x0, x1, x2 = inputs | ||
gy, ggx0, ggx1 = grads | ||
check_double_backward((cuda.to_gpu(x0), cuda.to_gpu(x1), None), | ||
(cuda.to_gpu(gy), cuda.to_gpu(ggx0), | ||
cuda.to_gpu(ggx1))) | ||
check_double_backward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2)), | ||
(cuda.to_gpu(gy), cuda.to_gpu(ggx0), | ||
cuda.to_gpu(ggx1))) | ||
check_double_backward_ignore_nan_with_nonnan_value((cuda.to_gpu(x0), | ||
cuda.to_gpu(x1), | ||
None), | ||
(cuda.to_gpu(gy), | ||
cuda.to_gpu(ggx0), | ||
cuda.to_gpu(ggx1))) | ||
|
||
|
||
if __name__ == '__main__': | ||
pytest.main([__file__, '-v']) |