Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mean_squared_error with ignore_nan option #190

Merged
merged 12 commits into from
Jun 22, 2018
Merged
2 changes: 2 additions & 0 deletions chainer_chemistry/functions/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from chainer_chemistry.functions.matmul import matmul # NOQA
from chainer_chemistry.functions.mean_squared_error import mean_squared_error # NOQA
from chainer_chemistry.functions.mean_squared_error import MeanSquaredError # NOQA
78 changes: 78 additions & 0 deletions chainer_chemistry/functions/mean_squared_error.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import numpy

from chainer import cuda
from chainer import function_node
import chainer.functions
from chainer.utils import type_check


class MeanSquaredError(function_node.FunctionNode):

"""Mean squared error (a.k.a. Euclidean loss) function."""

def __init__(self, ignore_nan=False):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, what "ignore" means can differ from people to people. So, I would suggest more descriptive option name "e.g. convert_nan_to_zero" or write what this option does in detail in the document.

# TODO(mottodora): implement task weight calculation
self.ignore_nan = ignore_nan

def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 2)
type_check.expect(
in_types[0].dtype == numpy.float32,
in_types[1].dtype == numpy.float32,
in_types[0].shape == in_types[1].shape
)

def forward_cpu(self, inputs):
self.retain_inputs((0, 1))
diff = (inputs[0] - inputs[1]).ravel()
# TODO(mottodora): add reduce option
if self.ignore_nan:
diff[numpy.isnan(diff)] = 0.
return numpy.array(diff.dot(diff) / diff.size, dtype=diff.dtype),

def forward_gpu(self, inputs):
cupy = cuda.cupy
self.retain_inputs((0, 1))
diff = (inputs[0] - inputs[1]).ravel()
# TODO(mottodora): add reduce option
if self.ignore_nan:
diff[cupy.isnan(diff)] = 0.
return diff.dot(diff) / diff.dtype.type(diff.size),

def backward(self, indexes, gy):
x0, x1 = self.get_retained_inputs()
xp = cuda.get_array_module(x0)
ret = []
diff = x0 - x1
if self.ignore_nan:
diff = chainer.functions.where(xp.isnan(diff.array),
xp.zeros_like(diff.array), diff)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure but xp.zeros_like(diff.array) can be just 0, in that case we can skip creating (allocating memory of) maybe large zero array.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

chainer where function receive only same shape ndarray. (https://docs.chainer.org/en/stable/reference/generated/chainer.functions.where.html)

gy0 = chainer.functions.broadcast_to(gy[0], diff.shape)
gx0 = gy0 * diff * (2. / diff.size)
if 0 in indexes:
ret.append(gx0)
if 1 in indexes:
ret.append(-gx0)
return ret


def mean_squared_error(x0, x1, ignore_nan=False):
"""Mean squared error function.
This function computes mean squared error between two variables. The mean
is taken over the minibatch. Note that the error is not scaled by 1/2.

Args:
x0 (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`): Input variable.
x1 (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`): Input variable.
ignore_nan (bool): If `True`, this function compute mean squared error
ignoring NaNs. The arithmetic mean is the sum of the non-NaN
elements along the axis divided by the number of whole elements.

Returns:
~chainer.Variable:
A variable holding an array representing the mean squared
error of two inputs.
"""
return MeanSquaredError(ignore_nan).apply((x0, x1))[0]
1 change: 1 addition & 0 deletions docs/source/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ Function implementations
:nosignatures:

chainer_chemistry.functions.matmul
chainer_chemistry.functions.mean_squared_error
196 changes: 196 additions & 0 deletions tests/functions_tests/test_mean_squared_error.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import numpy
import pytest

import chainer
from chainer import cuda
from chainer import gradient_check

import chainer_chemistry


@pytest.fixture
def inputs():
x0 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
x1 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
x2 = numpy.asarray([[0.3, numpy.nan, 0.2],
[numpy.nan, 0.1, 0.5],
[0.9, 0.7, numpy.nan],
[0.2, -0.3, 0.4]]).astype(numpy.float32)
return x0, x1, x2


@pytest.fixture
def grads():
gy = numpy.random.uniform(-1, 1, ()).astype(numpy.float32)
ggx0 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
ggx1 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32)
return gy, ggx0, ggx1


def check_forward(inputs):
x0_data, x1_data, _ = inputs
x0 = chainer.Variable(x0_data)
x1 = chainer.Variable(x1_data)
loss = chainer_chemistry.functions.mean_squared_error(x0, x1)
loss_value = cuda.to_cpu(loss.data)
assert loss.dtype == numpy.float32
assert loss_value.shape == ()

loss_expect = numpy.zeros(())
x0_data = cuda.to_cpu(x0_data)
x1_data = cuda.to_cpu(x1_data)
for i in numpy.ndindex(x0_data.shape):
loss_expect += ((x0_data[i] - x1_data[i]) ** 2)
loss_expect /= x0_data.size
assert numpy.allclose(loss_value, loss_expect)


def check_forward_ignore_nan(inputs):
x0_data, _, x2_data = inputs
x0 = chainer.Variable(x0_data)
x2 = chainer.Variable(x2_data)
loss = chainer_chemistry.functions.mean_squared_error(x0, x2,
ignore_nan=True)
loss_value = cuda.to_cpu(loss.data)
assert loss.dtype == numpy.float32
assert loss_value.shape == ()

loss_expect = numpy.zeros(())
x0_data = cuda.to_cpu(x0_data)
x2_data = cuda.to_cpu(x2_data)
nan_mask = numpy.invert(numpy.isnan(x2_data)).astype(x2_data.dtype)
for i in numpy.ndindex(x0_data.shape):
loss_expect += ((x0_data[i] - numpy.nan_to_num(x2_data[i])) ** 2
* nan_mask[i])
loss_expect /= x0_data.size
assert numpy.allclose(loss_value, loss_expect)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you use numpy.allclose in this function, while pytest.approx in check_forward?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix this. Thank you.



def check_forward_ignore_nan_with_nonnan_value(inputs):
x0_data, x1_data, _ = inputs
x0 = chainer.Variable(x0_data)
x1 = chainer.Variable(x1_data)
loss = chainer_chemistry.functions.mean_squared_error(x0, x1,
ignore_nan=True)
loss_value = cuda.to_cpu(loss.data)
assert loss.dtype == numpy.float32
assert loss_value.shape == ()

loss_expect = numpy.zeros(())
x0_data = cuda.to_cpu(x0_data)
x1_data = cuda.to_cpu(x1_data)
nan_mask = numpy.invert(numpy.isnan(x1_data)).astype(x1_data.dtype)
for i in numpy.ndindex(x0_data.shape):
loss_expect += ((x0_data[i] - numpy.nan_to_num(x1_data[i])) ** 2
* nan_mask[i])
loss_expect /= x0_data.size
assert numpy.allclose(loss_value, loss_expect)


def test_forward_cpu(inputs):
check_forward(inputs)
check_forward_ignore_nan(inputs)
check_forward_ignore_nan_with_nonnan_value(inputs)


@pytest.mark.gpu
def test_forward_gpu(inputs):
x0, x1, x2 = inputs
check_forward((cuda.to_gpu(x0), cuda.to_gpu(x1), None))
check_forward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2)))


def check_backward(inputs):
x0_data, x1_data, _ = inputs
gradient_check.check_backward(
chainer_chemistry.functions.mean_squared_error,
(x0_data, x1_data), None, eps=1e-2)


def check_backward_ignore_nan(inputs):
x0_data, _, x2_data = inputs

def func(x0, x1):
return chainer_chemistry.functions.mean_squared_error(x0, x1,
ignore_nan=True)
gradient_check.check_backward(func, (x0_data, x2_data), None, eps=1e-2)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add tests for backward and double backward with ignore_nan=True using x0 and x1?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fix


def check_backward_ignore_nan_with_nonnan_value(inputs):
x0_data, x1_data, _ = inputs

def func(x0, x1):
return chainer_chemistry.functions.mean_squared_error(x0, x1,
ignore_nan=True)
gradient_check.check_backward(func, (x0_data, x1_data), None, eps=1e-2)


def test_backward_cpu(inputs):
check_backward(inputs)
check_backward_ignore_nan(inputs)
check_backward_ignore_nan_with_nonnan_value(inputs)


@pytest.mark.gpu
def test_backward_gpu(inputs):
x0, x1, x2 = inputs
check_backward((cuda.to_gpu(x0), cuda.to_gpu(x1), None))
check_backward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2)))
check_backward_ignore_nan_with_nonnan_value((cuda.to_gpu(x0),
cuda.to_gpu(x1), None))


def check_double_backward(inputs, grads):
x0, x1, _ = inputs
gy, ggx0, ggx1 = grads
gradient_check.check_double_backward(
chainer_chemistry.functions.mean_squared_error,
(x0, x1), gy, (ggx0, ggx1))


def check_double_backward_ignore_nan(inputs, grads):
x0, _, x2 = inputs
gy, ggx0, ggx1 = grads

def func(x0, x1):
return chainer_chemistry.functions.mean_squared_error(x0, x1,
ignore_nan=True)
gradient_check.check_double_backward(func, (x0, x2), gy, (ggx0, ggx1))


def check_double_backward_ignore_nan_with_nonnan_value(inputs, grads):
x0, x1, _ = inputs
gy, ggx0, ggx1 = grads

def func(x0, x1):
return chainer_chemistry.functions.mean_squared_error(x0, x1,
ignore_nan=True)
gradient_check.check_double_backward(func, (x0, x1), gy, (ggx0, ggx1))


def test_double_backward_cpu(inputs, grads):
check_double_backward(inputs, grads)
check_double_backward_ignore_nan(inputs, grads)
check_double_backward_ignore_nan_with_nonnan_value(inputs, grads)


@pytest.mark.gpu
def test_double_backward_gpu(inputs, grads):
x0, x1, x2 = inputs
gy, ggx0, ggx1 = grads
check_double_backward((cuda.to_gpu(x0), cuda.to_gpu(x1), None),
(cuda.to_gpu(gy), cuda.to_gpu(ggx0),
cuda.to_gpu(ggx1)))
check_double_backward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2)),
(cuda.to_gpu(gy), cuda.to_gpu(ggx0),
cuda.to_gpu(ggx1)))
check_double_backward_ignore_nan_with_nonnan_value((cuda.to_gpu(x0),
cuda.to_gpu(x1),
None),
(cuda.to_gpu(gy),
cuda.to_gpu(ggx0),
cuda.to_gpu(ggx1)))


if __name__ == '__main__':
pytest.main([__file__, '-v'])