Skip to content

Commit

Permalink
Merge pull request #190 from mottodora/mse-ignore-nan
Browse files Browse the repository at this point in the history
mean_squared_error with `ignore_nan` option
  • Loading branch information
delta2323 authored Jun 22, 2018
2 parents 72895a7 + 2c26c00 commit ea10e10
Show file tree
Hide file tree
Showing 4 changed files with 277 additions and 0 deletions.
2 changes: 2 additions & 0 deletions chainer_chemistry/functions/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from chainer_chemistry.functions.matmul import matmul # NOQA
from chainer_chemistry.functions.mean_squared_error import mean_squared_error # NOQA
from chainer_chemistry.functions.mean_squared_error import MeanSquaredError # NOQA
78 changes: 78 additions & 0 deletions chainer_chemistry/functions/mean_squared_error.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import numpy

from chainer import cuda
from chainer import function_node
import chainer.functions
from chainer.utils import type_check


class MeanSquaredError(function_node.FunctionNode):

"""Mean squared error (a.k.a. Euclidean loss) function."""

def __init__(self, ignore_nan=False):
# TODO(mottodora): implement task weight calculation
self.ignore_nan = ignore_nan

def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 2)
type_check.expect(
in_types[0].dtype == numpy.float32,
in_types[1].dtype == numpy.float32,
in_types[0].shape == in_types[1].shape
)

def forward_cpu(self, inputs):
self.retain_inputs((0, 1))
diff = (inputs[0] - inputs[1]).ravel()
# TODO(mottodora): add reduce option
if self.ignore_nan:
diff[numpy.isnan(diff)] = 0.
return numpy.array(diff.dot(diff) / diff.size, dtype=diff.dtype),

def forward_gpu(self, inputs):
cupy = cuda.cupy
self.retain_inputs((0, 1))
diff = (inputs[0] - inputs[1]).ravel()
# TODO(mottodora): add reduce option
if self.ignore_nan:
diff[cupy.isnan(diff)] = 0.
return diff.dot(diff) / diff.dtype.type(diff.size),

def backward(self, indexes, gy):
x0, x1 = self.get_retained_inputs()
xp = cuda.get_array_module(x0)
ret = []
diff = x0 - x1
if self.ignore_nan:
diff = chainer.functions.where(xp.isnan(diff.array),
xp.zeros_like(diff.array), diff)
gy0 = chainer.functions.broadcast_to(gy[0], diff.shape)
gx0 = gy0 * diff * (2. / diff.size)
if 0 in indexes:
ret.append(gx0)
if 1 in indexes:
ret.append(-gx0)
return ret


def mean_squared_error(x0, x1, ignore_nan=False):
"""Mean squared error function.
This function computes mean squared error between two variables. The mean
is taken over the minibatch. Note that the error is not scaled by 1/2.
Args:
x0 (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`): Input variable.
x1 (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`): Input variable.
ignore_nan (bool): If `True`, this function compute mean squared error
ignoring NaNs. The arithmetic mean is the sum of the non-NaN
elements along the axis divided by the number of whole elements.
Returns:
~chainer.Variable:
A variable holding an array representing the mean squared
error of two inputs.
"""
return MeanSquaredError(ignore_nan).apply((x0, x1))[0]
1 change: 1 addition & 0 deletions docs/source/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ Function implementations
:nosignatures:

chainer_chemistry.functions.matmul
chainer_chemistry.functions.mean_squared_error
196 changes: 196 additions & 0 deletions tests/functions_tests/test_mean_squared_error.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import numpy
import pytest

import chainer
from chainer import cuda
from chainer import gradient_check

import chainer_chemistry


@pytest.fixture
def inputs():
x0 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
x1 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
x2 = numpy.asarray([[0.3, numpy.nan, 0.2],
[numpy.nan, 0.1, 0.5],
[0.9, 0.7, numpy.nan],
[0.2, -0.3, 0.4]]).astype(numpy.float32)
return x0, x1, x2


@pytest.fixture
def grads():
gy = numpy.random.uniform(-1, 1, ()).astype(numpy.float32)
ggx0 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
ggx1 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
return gy, ggx0, ggx1


def check_forward(inputs):
x0_data, x1_data, _ = inputs
x0 = chainer.Variable(x0_data)
x1 = chainer.Variable(x1_data)
loss = chainer_chemistry.functions.mean_squared_error(x0, x1)
loss_value = cuda.to_cpu(loss.data)
assert loss.dtype == numpy.float32
assert loss_value.shape == ()

loss_expect = numpy.zeros(())
x0_data = cuda.to_cpu(x0_data)
x1_data = cuda.to_cpu(x1_data)
for i in numpy.ndindex(x0_data.shape):
loss_expect += ((x0_data[i] - x1_data[i]) ** 2)
loss_expect /= x0_data.size
assert numpy.allclose(loss_value, loss_expect)


def check_forward_ignore_nan(inputs):
x0_data, _, x2_data = inputs
x0 = chainer.Variable(x0_data)
x2 = chainer.Variable(x2_data)
loss = chainer_chemistry.functions.mean_squared_error(x0, x2,
ignore_nan=True)
loss_value = cuda.to_cpu(loss.data)
assert loss.dtype == numpy.float32
assert loss_value.shape == ()

loss_expect = numpy.zeros(())
x0_data = cuda.to_cpu(x0_data)
x2_data = cuda.to_cpu(x2_data)
nan_mask = numpy.invert(numpy.isnan(x2_data)).astype(x2_data.dtype)
for i in numpy.ndindex(x0_data.shape):
loss_expect += ((x0_data[i] - numpy.nan_to_num(x2_data[i])) ** 2
* nan_mask[i])
loss_expect /= x0_data.size
assert numpy.allclose(loss_value, loss_expect)


def check_forward_ignore_nan_with_nonnan_value(inputs):
x0_data, x1_data, _ = inputs
x0 = chainer.Variable(x0_data)
x1 = chainer.Variable(x1_data)
loss = chainer_chemistry.functions.mean_squared_error(x0, x1,
ignore_nan=True)
loss_value = cuda.to_cpu(loss.data)
assert loss.dtype == numpy.float32
assert loss_value.shape == ()

loss_expect = numpy.zeros(())
x0_data = cuda.to_cpu(x0_data)
x1_data = cuda.to_cpu(x1_data)
nan_mask = numpy.invert(numpy.isnan(x1_data)).astype(x1_data.dtype)
for i in numpy.ndindex(x0_data.shape):
loss_expect += ((x0_data[i] - numpy.nan_to_num(x1_data[i])) ** 2
* nan_mask[i])
loss_expect /= x0_data.size
assert numpy.allclose(loss_value, loss_expect)


def test_forward_cpu(inputs):
check_forward(inputs)
check_forward_ignore_nan(inputs)
check_forward_ignore_nan_with_nonnan_value(inputs)


@pytest.mark.gpu
def test_forward_gpu(inputs):
x0, x1, x2 = inputs
check_forward((cuda.to_gpu(x0), cuda.to_gpu(x1), None))
check_forward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2)))


def check_backward(inputs):
x0_data, x1_data, _ = inputs
gradient_check.check_backward(
chainer_chemistry.functions.mean_squared_error,
(x0_data, x1_data), None, eps=1e-2)


def check_backward_ignore_nan(inputs):
x0_data, _, x2_data = inputs

def func(x0, x1):
return chainer_chemistry.functions.mean_squared_error(x0, x1,
ignore_nan=True)
gradient_check.check_backward(func, (x0_data, x2_data), None, eps=1e-2)


def check_backward_ignore_nan_with_nonnan_value(inputs):
x0_data, x1_data, _ = inputs

def func(x0, x1):
return chainer_chemistry.functions.mean_squared_error(x0, x1,
ignore_nan=True)
gradient_check.check_backward(func, (x0_data, x1_data), None, eps=1e-2)


def test_backward_cpu(inputs):
check_backward(inputs)
check_backward_ignore_nan(inputs)
check_backward_ignore_nan_with_nonnan_value(inputs)


@pytest.mark.gpu
def test_backward_gpu(inputs):
x0, x1, x2 = inputs
check_backward((cuda.to_gpu(x0), cuda.to_gpu(x1), None))
check_backward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2)))
check_backward_ignore_nan_with_nonnan_value((cuda.to_gpu(x0),
cuda.to_gpu(x1), None))


def check_double_backward(inputs, grads):
x0, x1, _ = inputs
gy, ggx0, ggx1 = grads
gradient_check.check_double_backward(
chainer_chemistry.functions.mean_squared_error,
(x0, x1), gy, (ggx0, ggx1))


def check_double_backward_ignore_nan(inputs, grads):
x0, _, x2 = inputs
gy, ggx0, ggx1 = grads

def func(x0, x1):
return chainer_chemistry.functions.mean_squared_error(x0, x1,
ignore_nan=True)
gradient_check.check_double_backward(func, (x0, x2), gy, (ggx0, ggx1))


def check_double_backward_ignore_nan_with_nonnan_value(inputs, grads):
x0, x1, _ = inputs
gy, ggx0, ggx1 = grads

def func(x0, x1):
return chainer_chemistry.functions.mean_squared_error(x0, x1,
ignore_nan=True)
gradient_check.check_double_backward(func, (x0, x1), gy, (ggx0, ggx1))


def test_double_backward_cpu(inputs, grads):
check_double_backward(inputs, grads)
check_double_backward_ignore_nan(inputs, grads)
check_double_backward_ignore_nan_with_nonnan_value(inputs, grads)


@pytest.mark.gpu
def test_double_backward_gpu(inputs, grads):
x0, x1, x2 = inputs
gy, ggx0, ggx1 = grads
check_double_backward((cuda.to_gpu(x0), cuda.to_gpu(x1), None),
(cuda.to_gpu(gy), cuda.to_gpu(ggx0),
cuda.to_gpu(ggx1)))
check_double_backward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2)),
(cuda.to_gpu(gy), cuda.to_gpu(ggx0),
cuda.to_gpu(ggx1)))
check_double_backward_ignore_nan_with_nonnan_value((cuda.to_gpu(x0),
cuda.to_gpu(x1),
None),
(cuda.to_gpu(gy),
cuda.to_gpu(ggx0),
cuda.to_gpu(ggx1)))


if __name__ == '__main__':
pytest.main([__file__, '-v'])

0 comments on commit ea10e10

Please sign in to comment.