-
Notifications
You must be signed in to change notification settings - Fork 132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
mean_squared_error with ignore_nan
option
#190
Changes from all commits
65f3ffe
8df5a30
a7a66ab
843251d
620a784
72a6ab4
3aab1f8
734916b
c115fcb
761d5f9
273252f
2c26c00
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
from chainer_chemistry.functions.matmul import matmul # NOQA | ||
from chainer_chemistry.functions.mean_squared_error import mean_squared_error # NOQA | ||
from chainer_chemistry.functions.mean_squared_error import MeanSquaredError # NOQA |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import numpy | ||
|
||
from chainer import cuda | ||
from chainer import function_node | ||
import chainer.functions | ||
from chainer.utils import type_check | ||
|
||
|
||
class MeanSquaredError(function_node.FunctionNode): | ||
|
||
"""Mean squared error (a.k.a. Euclidean loss) function.""" | ||
|
||
def __init__(self, ignore_nan=False): | ||
# TODO(mottodora): implement task weight calculation | ||
self.ignore_nan = ignore_nan | ||
|
||
def check_type_forward(self, in_types): | ||
type_check.expect(in_types.size() == 2) | ||
type_check.expect( | ||
in_types[0].dtype == numpy.float32, | ||
in_types[1].dtype == numpy.float32, | ||
in_types[0].shape == in_types[1].shape | ||
) | ||
|
||
def forward_cpu(self, inputs): | ||
self.retain_inputs((0, 1)) | ||
diff = (inputs[0] - inputs[1]).ravel() | ||
# TODO(mottodora): add reduce option | ||
if self.ignore_nan: | ||
diff[numpy.isnan(diff)] = 0. | ||
return numpy.array(diff.dot(diff) / diff.size, dtype=diff.dtype), | ||
|
||
def forward_gpu(self, inputs): | ||
cupy = cuda.cupy | ||
self.retain_inputs((0, 1)) | ||
diff = (inputs[0] - inputs[1]).ravel() | ||
# TODO(mottodora): add reduce option | ||
if self.ignore_nan: | ||
diff[cupy.isnan(diff)] = 0. | ||
return diff.dot(diff) / diff.dtype.type(diff.size), | ||
|
||
def backward(self, indexes, gy): | ||
x0, x1 = self.get_retained_inputs() | ||
xp = cuda.get_array_module(x0) | ||
ret = [] | ||
diff = x0 - x1 | ||
if self.ignore_nan: | ||
diff = chainer.functions.where(xp.isnan(diff.array), | ||
xp.zeros_like(diff.array), diff) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure but There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. chainer |
||
gy0 = chainer.functions.broadcast_to(gy[0], diff.shape) | ||
gx0 = gy0 * diff * (2. / diff.size) | ||
if 0 in indexes: | ||
ret.append(gx0) | ||
if 1 in indexes: | ||
ret.append(-gx0) | ||
return ret | ||
|
||
|
||
def mean_squared_error(x0, x1, ignore_nan=False): | ||
"""Mean squared error function. | ||
This function computes mean squared error between two variables. The mean | ||
is taken over the minibatch. Note that the error is not scaled by 1/2. | ||
|
||
Args: | ||
x0 (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ | ||
:class:`cupy.ndarray`): Input variable. | ||
x1 (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ | ||
:class:`cupy.ndarray`): Input variable. | ||
ignore_nan (bool): If `True`, this function compute mean squared error | ||
ignoring NaNs. The arithmetic mean is the sum of the non-NaN | ||
elements along the axis divided by the number of whole elements. | ||
|
||
Returns: | ||
~chainer.Variable: | ||
A variable holding an array representing the mean squared | ||
error of two inputs. | ||
""" | ||
return MeanSquaredError(ignore_nan).apply((x0, x1))[0] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
import numpy | ||
import pytest | ||
|
||
import chainer | ||
from chainer import cuda | ||
from chainer import gradient_check | ||
|
||
import chainer_chemistry | ||
|
||
|
||
@pytest.fixture | ||
def inputs(): | ||
x0 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32) | ||
x1 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32) | ||
x2 = numpy.asarray([[0.3, numpy.nan, 0.2], | ||
[numpy.nan, 0.1, 0.5], | ||
[0.9, 0.7, numpy.nan], | ||
[0.2, -0.3, 0.4]]).astype(numpy.float32) | ||
return x0, x1, x2 | ||
|
||
|
||
@pytest.fixture | ||
def grads(): | ||
gy = numpy.random.uniform(-1, 1, ()).astype(numpy.float32) | ||
ggx0 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32) | ||
ggx1 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32) | ||
return gy, ggx0, ggx1 | ||
|
||
|
||
def check_forward(inputs): | ||
x0_data, x1_data, _ = inputs | ||
x0 = chainer.Variable(x0_data) | ||
x1 = chainer.Variable(x1_data) | ||
loss = chainer_chemistry.functions.mean_squared_error(x0, x1) | ||
loss_value = cuda.to_cpu(loss.data) | ||
assert loss.dtype == numpy.float32 | ||
assert loss_value.shape == () | ||
|
||
loss_expect = numpy.zeros(()) | ||
x0_data = cuda.to_cpu(x0_data) | ||
x1_data = cuda.to_cpu(x1_data) | ||
for i in numpy.ndindex(x0_data.shape): | ||
loss_expect += ((x0_data[i] - x1_data[i]) ** 2) | ||
loss_expect /= x0_data.size | ||
assert numpy.allclose(loss_value, loss_expect) | ||
|
||
|
||
def check_forward_ignore_nan(inputs): | ||
x0_data, _, x2_data = inputs | ||
x0 = chainer.Variable(x0_data) | ||
x2 = chainer.Variable(x2_data) | ||
loss = chainer_chemistry.functions.mean_squared_error(x0, x2, | ||
ignore_nan=True) | ||
loss_value = cuda.to_cpu(loss.data) | ||
assert loss.dtype == numpy.float32 | ||
assert loss_value.shape == () | ||
|
||
loss_expect = numpy.zeros(()) | ||
x0_data = cuda.to_cpu(x0_data) | ||
x2_data = cuda.to_cpu(x2_data) | ||
nan_mask = numpy.invert(numpy.isnan(x2_data)).astype(x2_data.dtype) | ||
for i in numpy.ndindex(x0_data.shape): | ||
loss_expect += ((x0_data[i] - numpy.nan_to_num(x2_data[i])) ** 2 | ||
* nan_mask[i]) | ||
loss_expect /= x0_data.size | ||
assert numpy.allclose(loss_value, loss_expect) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why did you use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix this. Thank you. |
||
|
||
|
||
def check_forward_ignore_nan_with_nonnan_value(inputs): | ||
x0_data, x1_data, _ = inputs | ||
x0 = chainer.Variable(x0_data) | ||
x1 = chainer.Variable(x1_data) | ||
loss = chainer_chemistry.functions.mean_squared_error(x0, x1, | ||
ignore_nan=True) | ||
loss_value = cuda.to_cpu(loss.data) | ||
assert loss.dtype == numpy.float32 | ||
assert loss_value.shape == () | ||
|
||
loss_expect = numpy.zeros(()) | ||
x0_data = cuda.to_cpu(x0_data) | ||
x1_data = cuda.to_cpu(x1_data) | ||
nan_mask = numpy.invert(numpy.isnan(x1_data)).astype(x1_data.dtype) | ||
for i in numpy.ndindex(x0_data.shape): | ||
loss_expect += ((x0_data[i] - numpy.nan_to_num(x1_data[i])) ** 2 | ||
* nan_mask[i]) | ||
loss_expect /= x0_data.size | ||
assert numpy.allclose(loss_value, loss_expect) | ||
|
||
|
||
def test_forward_cpu(inputs): | ||
check_forward(inputs) | ||
check_forward_ignore_nan(inputs) | ||
check_forward_ignore_nan_with_nonnan_value(inputs) | ||
|
||
|
||
@pytest.mark.gpu | ||
def test_forward_gpu(inputs): | ||
x0, x1, x2 = inputs | ||
check_forward((cuda.to_gpu(x0), cuda.to_gpu(x1), None)) | ||
check_forward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2))) | ||
|
||
|
||
def check_backward(inputs): | ||
x0_data, x1_data, _ = inputs | ||
gradient_check.check_backward( | ||
chainer_chemistry.functions.mean_squared_error, | ||
(x0_data, x1_data), None, eps=1e-2) | ||
|
||
|
||
def check_backward_ignore_nan(inputs): | ||
x0_data, _, x2_data = inputs | ||
|
||
def func(x0, x1): | ||
return chainer_chemistry.functions.mean_squared_error(x0, x1, | ||
ignore_nan=True) | ||
gradient_check.check_backward(func, (x0_data, x2_data), None, eps=1e-2) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add tests for backward and double backward with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fix |
||
|
||
def check_backward_ignore_nan_with_nonnan_value(inputs): | ||
x0_data, x1_data, _ = inputs | ||
|
||
def func(x0, x1): | ||
return chainer_chemistry.functions.mean_squared_error(x0, x1, | ||
ignore_nan=True) | ||
gradient_check.check_backward(func, (x0_data, x1_data), None, eps=1e-2) | ||
|
||
|
||
def test_backward_cpu(inputs): | ||
check_backward(inputs) | ||
check_backward_ignore_nan(inputs) | ||
check_backward_ignore_nan_with_nonnan_value(inputs) | ||
|
||
|
||
@pytest.mark.gpu | ||
def test_backward_gpu(inputs): | ||
x0, x1, x2 = inputs | ||
check_backward((cuda.to_gpu(x0), cuda.to_gpu(x1), None)) | ||
check_backward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2))) | ||
check_backward_ignore_nan_with_nonnan_value((cuda.to_gpu(x0), | ||
cuda.to_gpu(x1), None)) | ||
|
||
|
||
def check_double_backward(inputs, grads): | ||
x0, x1, _ = inputs | ||
gy, ggx0, ggx1 = grads | ||
gradient_check.check_double_backward( | ||
chainer_chemistry.functions.mean_squared_error, | ||
(x0, x1), gy, (ggx0, ggx1)) | ||
|
||
|
||
def check_double_backward_ignore_nan(inputs, grads): | ||
x0, _, x2 = inputs | ||
gy, ggx0, ggx1 = grads | ||
|
||
def func(x0, x1): | ||
return chainer_chemistry.functions.mean_squared_error(x0, x1, | ||
ignore_nan=True) | ||
gradient_check.check_double_backward(func, (x0, x2), gy, (ggx0, ggx1)) | ||
|
||
|
||
def check_double_backward_ignore_nan_with_nonnan_value(inputs, grads): | ||
x0, x1, _ = inputs | ||
gy, ggx0, ggx1 = grads | ||
|
||
def func(x0, x1): | ||
return chainer_chemistry.functions.mean_squared_error(x0, x1, | ||
ignore_nan=True) | ||
gradient_check.check_double_backward(func, (x0, x1), gy, (ggx0, ggx1)) | ||
|
||
|
||
def test_double_backward_cpu(inputs, grads): | ||
check_double_backward(inputs, grads) | ||
check_double_backward_ignore_nan(inputs, grads) | ||
check_double_backward_ignore_nan_with_nonnan_value(inputs, grads) | ||
|
||
|
||
@pytest.mark.gpu | ||
def test_double_backward_gpu(inputs, grads): | ||
x0, x1, x2 = inputs | ||
gy, ggx0, ggx1 = grads | ||
check_double_backward((cuda.to_gpu(x0), cuda.to_gpu(x1), None), | ||
(cuda.to_gpu(gy), cuda.to_gpu(ggx0), | ||
cuda.to_gpu(ggx1))) | ||
check_double_backward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2)), | ||
(cuda.to_gpu(gy), cuda.to_gpu(ggx0), | ||
cuda.to_gpu(ggx1))) | ||
check_double_backward_ignore_nan_with_nonnan_value((cuda.to_gpu(x0), | ||
cuda.to_gpu(x1), | ||
None), | ||
(cuda.to_gpu(gy), | ||
cuda.to_gpu(ggx0), | ||
cuda.to_gpu(ggx1))) | ||
|
||
|
||
if __name__ == '__main__': | ||
pytest.main([__file__, '-v']) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IMO, what "ignore" means can differ from people to people. So, I would suggest more descriptive option name "e.g. convert_nan_to_zero" or write what this option does in detail in the document.