Merge pull request #190 from mottodora/mse-ignore-nan

mean_squared_error with `ignore_nan` option
chainer · Jun 22, 2018 · ea10e10 · ea10e10
2 parents 72895a7 + 2c26c00
commit ea10e10
Show file tree

Hide file tree

Showing 4 changed files with 277 additions and 0 deletions.
diff --git a/chainer_chemistry/functions/__init__.py b/chainer_chemistry/functions/__init__.py
@@ -1 +1,3 @@
 from chainer_chemistry.functions.matmul import matmul  # NOQA
+from chainer_chemistry.functions.mean_squared_error import mean_squared_error  # NOQA
+from chainer_chemistry.functions.mean_squared_error import MeanSquaredError  # NOQA
diff --git a/chainer_chemistry/functions/mean_squared_error.py b/chainer_chemistry/functions/mean_squared_error.py
@@ -0,0 +1,78 @@
+import numpy
+
+from chainer import cuda
+from chainer import function_node
+import chainer.functions
+from chainer.utils import type_check
+
+
+class MeanSquaredError(function_node.FunctionNode):
+
+    """Mean squared error (a.k.a. Euclidean loss) function."""
+
+    def __init__(self, ignore_nan=False):
+        # TODO(mottodora): implement task weight calculation
+        self.ignore_nan = ignore_nan
+
+    def check_type_forward(self, in_types):
+        type_check.expect(in_types.size() == 2)
+        type_check.expect(
+            in_types[0].dtype == numpy.float32,
+            in_types[1].dtype == numpy.float32,
+            in_types[0].shape == in_types[1].shape
+        )
+
+    def forward_cpu(self, inputs):
+        self.retain_inputs((0, 1))
+        diff = (inputs[0] - inputs[1]).ravel()
+        # TODO(mottodora): add reduce option
+        if self.ignore_nan:
+            diff[numpy.isnan(diff)] = 0.
+        return numpy.array(diff.dot(diff) / diff.size, dtype=diff.dtype),
+
+    def forward_gpu(self, inputs):
+        cupy = cuda.cupy
+        self.retain_inputs((0, 1))
+        diff = (inputs[0] - inputs[1]).ravel()
+        # TODO(mottodora): add reduce option
+        if self.ignore_nan:
+            diff[cupy.isnan(diff)] = 0.
+        return diff.dot(diff) / diff.dtype.type(diff.size),
+
+    def backward(self, indexes, gy):
+        x0, x1 = self.get_retained_inputs()
+        xp = cuda.get_array_module(x0)
+        ret = []
+        diff = x0 - x1
+        if self.ignore_nan:
+            diff = chainer.functions.where(xp.isnan(diff.array),
+                                           xp.zeros_like(diff.array), diff)
+        gy0 = chainer.functions.broadcast_to(gy[0], diff.shape)
+        gx0 = gy0 * diff * (2. / diff.size)
+        if 0 in indexes:
+            ret.append(gx0)
+        if 1 in indexes:
+            ret.append(-gx0)
+        return ret
+
+
+def mean_squared_error(x0, x1, ignore_nan=False):
+    """Mean squared error function.
+    This function computes mean squared error between two variables. The mean
+    is taken over the minibatch. Note that the error is not scaled by 1/2.
+
+    Args:
+        x0 (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
+        :class:`cupy.ndarray`): Input variable.
+        x1 (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
+        :class:`cupy.ndarray`): Input variable.
+        ignore_nan (bool): If `True`, this function compute mean squared error
+            ignoring NaNs. The arithmetic mean is the sum of the non-NaN
+            elements along the axis divided by the number of whole elements.
+
+    Returns:
+        ~chainer.Variable:
+            A variable holding an array representing the mean squared
+            error of two inputs.
+    """
+    return MeanSquaredError(ignore_nan).apply((x0, x1))[0]
diff --git a/docs/source/functions.rst b/docs/source/functions.rst
@@ -11,3 +11,4 @@ Function implementations
    :nosignatures:
 
    chainer_chemistry.functions.matmul
+   chainer_chemistry.functions.mean_squared_error
diff --git a/tests/functions_tests/test_mean_squared_error.py b/tests/functions_tests/test_mean_squared_error.py
@@ -0,0 +1,196 @@
+import numpy
+import pytest
+
+import chainer
+from chainer import cuda
+from chainer import gradient_check
+
+import chainer_chemistry
+
+
+@pytest.fixture
+def inputs():
+    x0 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
+    x1 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
+    x2 = numpy.asarray([[0.3, numpy.nan, 0.2],
+                        [numpy.nan, 0.1, 0.5],
+                        [0.9, 0.7, numpy.nan],
+                        [0.2, -0.3, 0.4]]).astype(numpy.float32)
+    return x0, x1, x2
+
+
+@pytest.fixture
+def grads():
+    gy = numpy.random.uniform(-1, 1, ()).astype(numpy.float32)
+    ggx0 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
+    ggx1 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
+    return gy, ggx0, ggx1
+
+
+def check_forward(inputs):
+    x0_data, x1_data, _ = inputs
+    x0 = chainer.Variable(x0_data)
+    x1 = chainer.Variable(x1_data)
+    loss = chainer_chemistry.functions.mean_squared_error(x0, x1)
+    loss_value = cuda.to_cpu(loss.data)
+    assert loss.dtype == numpy.float32
+    assert loss_value.shape == ()
+
+    loss_expect = numpy.zeros(())
+    x0_data = cuda.to_cpu(x0_data)
+    x1_data = cuda.to_cpu(x1_data)
+    for i in numpy.ndindex(x0_data.shape):
+        loss_expect += ((x0_data[i] - x1_data[i]) ** 2)
+    loss_expect /= x0_data.size
+    assert numpy.allclose(loss_value, loss_expect)
+
+
+def check_forward_ignore_nan(inputs):
+    x0_data, _, x2_data = inputs
+    x0 = chainer.Variable(x0_data)
+    x2 = chainer.Variable(x2_data)
+    loss = chainer_chemistry.functions.mean_squared_error(x0, x2,
+                                                          ignore_nan=True)
+    loss_value = cuda.to_cpu(loss.data)
+    assert loss.dtype == numpy.float32
+    assert loss_value.shape == ()
+
+    loss_expect = numpy.zeros(())
+    x0_data = cuda.to_cpu(x0_data)
+    x2_data = cuda.to_cpu(x2_data)
+    nan_mask = numpy.invert(numpy.isnan(x2_data)).astype(x2_data.dtype)
+    for i in numpy.ndindex(x0_data.shape):
+        loss_expect += ((x0_data[i] - numpy.nan_to_num(x2_data[i])) ** 2
+                        * nan_mask[i])
+    loss_expect /= x0_data.size
+    assert numpy.allclose(loss_value, loss_expect)
+
+
+def check_forward_ignore_nan_with_nonnan_value(inputs):
+    x0_data, x1_data, _ = inputs
+    x0 = chainer.Variable(x0_data)
+    x1 = chainer.Variable(x1_data)
+    loss = chainer_chemistry.functions.mean_squared_error(x0, x1,
+                                                          ignore_nan=True)
+    loss_value = cuda.to_cpu(loss.data)
+    assert loss.dtype == numpy.float32
+    assert loss_value.shape == ()
+
+    loss_expect = numpy.zeros(())
+    x0_data = cuda.to_cpu(x0_data)
+    x1_data = cuda.to_cpu(x1_data)
+    nan_mask = numpy.invert(numpy.isnan(x1_data)).astype(x1_data.dtype)
+    for i in numpy.ndindex(x0_data.shape):
+        loss_expect += ((x0_data[i] - numpy.nan_to_num(x1_data[i])) ** 2
+                        * nan_mask[i])
+    loss_expect /= x0_data.size
+    assert numpy.allclose(loss_value, loss_expect)
+
+
+def test_forward_cpu(inputs):
+    check_forward(inputs)
+    check_forward_ignore_nan(inputs)
+    check_forward_ignore_nan_with_nonnan_value(inputs)
+
+
+@pytest.mark.gpu
+def test_forward_gpu(inputs):
+    x0, x1, x2 = inputs
+    check_forward((cuda.to_gpu(x0), cuda.to_gpu(x1), None))
+    check_forward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2)))
+
+
+def check_backward(inputs):
+    x0_data, x1_data, _ = inputs
+    gradient_check.check_backward(
+        chainer_chemistry.functions.mean_squared_error,
+        (x0_data, x1_data), None, eps=1e-2)
+
+
+def check_backward_ignore_nan(inputs):
+    x0_data, _, x2_data = inputs
+
+    def func(x0, x1):
+        return chainer_chemistry.functions.mean_squared_error(x0, x1,
+                                                              ignore_nan=True)
+    gradient_check.check_backward(func, (x0_data, x2_data), None, eps=1e-2)
+
+
+def check_backward_ignore_nan_with_nonnan_value(inputs):
+    x0_data, x1_data, _ = inputs
+
+    def func(x0, x1):
+        return chainer_chemistry.functions.mean_squared_error(x0, x1,
+                                                              ignore_nan=True)
+    gradient_check.check_backward(func, (x0_data, x1_data), None, eps=1e-2)
+
+
+def test_backward_cpu(inputs):
+    check_backward(inputs)
+    check_backward_ignore_nan(inputs)
+    check_backward_ignore_nan_with_nonnan_value(inputs)
+
+
+@pytest.mark.gpu
+def test_backward_gpu(inputs):
+    x0, x1, x2 = inputs
+    check_backward((cuda.to_gpu(x0), cuda.to_gpu(x1), None))
+    check_backward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2)))
+    check_backward_ignore_nan_with_nonnan_value((cuda.to_gpu(x0),
+                                                 cuda.to_gpu(x1), None))
+
+
+def check_double_backward(inputs, grads):
+    x0, x1, _ = inputs
+    gy, ggx0, ggx1 = grads
+    gradient_check.check_double_backward(
+        chainer_chemistry.functions.mean_squared_error,
+        (x0, x1), gy, (ggx0, ggx1))
+
+
+def check_double_backward_ignore_nan(inputs, grads):
+    x0, _, x2 = inputs
+    gy, ggx0, ggx1 = grads
+
+    def func(x0, x1):
+        return chainer_chemistry.functions.mean_squared_error(x0, x1,
+                                                              ignore_nan=True)
+    gradient_check.check_double_backward(func, (x0, x2), gy, (ggx0, ggx1))
+
+
+def check_double_backward_ignore_nan_with_nonnan_value(inputs, grads):
+    x0, x1, _ = inputs
+    gy, ggx0, ggx1 = grads
+
+    def func(x0, x1):
+        return chainer_chemistry.functions.mean_squared_error(x0, x1,
+                                                              ignore_nan=True)
+    gradient_check.check_double_backward(func, (x0, x1), gy, (ggx0, ggx1))
+
+
+def test_double_backward_cpu(inputs, grads):
+    check_double_backward(inputs, grads)
+    check_double_backward_ignore_nan(inputs, grads)
+    check_double_backward_ignore_nan_with_nonnan_value(inputs, grads)
+
+
+@pytest.mark.gpu
+def test_double_backward_gpu(inputs, grads):
+    x0, x1, x2 = inputs
+    gy, ggx0, ggx1 = grads
+    check_double_backward((cuda.to_gpu(x0), cuda.to_gpu(x1), None),
+                          (cuda.to_gpu(gy), cuda.to_gpu(ggx0),
+                           cuda.to_gpu(ggx1)))
+    check_double_backward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2)),
+                                     (cuda.to_gpu(gy), cuda.to_gpu(ggx0),
+                                      cuda.to_gpu(ggx1)))
+    check_double_backward_ignore_nan_with_nonnan_value((cuda.to_gpu(x0),
+                                                        cuda.to_gpu(x1),
+                                                        None),
+                                                       (cuda.to_gpu(gy),
+                                                        cuda.to_gpu(ggx0),
+                                                        cuda.to_gpu(ggx1)))
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,3 +11,4 @@ Function implementations
		:nosignatures:

		chainer_chemistry.functions.matmul
		chainer_chemistry.functions.mean_squared_error