Skip to content

Commit

Permalink
Detailed report for testing.assert_equal and testing.assert_identical (
Browse files Browse the repository at this point in the history
…#1507)

* more detailed AssertionError message for assert_identical

* print differing dimensions/data/variables/attributes

* minor tweaks

* add what's new entry

* add tests for diff_array_repr and diff_dataset_repr

* pep8

* add differing dimensions in diff_array_repr

* fix tests (explicit numpy dtypes)

* fix tests (dtype shown / not shown in array repr)

* minor tweaks
  • Loading branch information
benbovy authored Jan 18, 2019
1 parent dc87dea commit 1d0a2bc
Show file tree
Hide file tree
Showing 4 changed files with 272 additions and 11 deletions.
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ Enhancements
- Upsampling an array via interpolation with resample is now dask-compatible,
as long as the array is not chunked along the resampling dimension.
By `Spencer Clark <https://github.com/spencerkclark>`_.
- :py:func:`xarray.testing.assert_equal` and
:py:func:`xarray.testing.assert_identical` now provide a more detailed
report showing what exactly differs between the two objects (dimensions /
coordinates / variables / attributes) (:issue:`1507`).
By `Benoit Bovy <https://github.com/benbovy>`_.

Bug fixes
~~~~~~~~~
Expand Down
146 changes: 140 additions & 6 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import numpy as np
import pandas as pd

from .duck_array_ops import array_equiv
from .options import OPTIONS
from .pycompat import (
PY2, bytes_type, dask_array_type, unicode_type, zip_longest)
Expand Down Expand Up @@ -411,6 +412,15 @@ def short_dask_repr(array, show_dtype=True):
return 'dask.array<shape=%s, chunksize=%s>' % (array.shape, chunksize)


def short_data_repr(array):
if isinstance(getattr(array, 'variable', array)._data, dask_array_type):
return short_dask_repr(array)
elif array._in_memory or array.size < 1e5:
return short_array_repr(array.values)
else:
return u'[%s values with dtype=%s]' % (array.size, array.dtype)


def array_repr(arr):
# used for DataArray, Variable and IndexVariable
if hasattr(arr, 'name') and arr.name is not None:
Expand All @@ -421,12 +431,7 @@ def array_repr(arr):
summary = [u'<xarray.%s %s(%s)>'
% (type(arr).__name__, name_str, dim_summary(arr))]

if isinstance(getattr(arr, 'variable', arr)._data, dask_array_type):
summary.append(short_dask_repr(arr))
elif arr._in_memory or arr.size < 1e5:
summary.append(short_array_repr(arr.values))
else:
summary.append(u'[%s values with dtype=%s]' % (arr.size, arr.dtype))
summary.append(short_data_repr(arr))

if hasattr(arr, 'coords'):
if arr.coords:
Expand Down Expand Up @@ -463,3 +468,132 @@ def dataset_repr(ds):
summary.append(attrs_repr(ds.attrs))

return u'\n'.join(summary)


def diff_dim_summary(a, b):
if a.dims != b.dims:
return "Differing dimensions:\n ({}) != ({})".format(
dim_summary(a), dim_summary(b))
else:
return ""


def _diff_mapping_repr(a_mapping, b_mapping, compat,
title, summarizer, col_width=None):

def extra_items_repr(extra_keys, mapping, ab_side):
extra_repr = [summarizer(k, mapping[k], col_width) for k in extra_keys]
if extra_repr:
header = "{} only on the {} object:".format(title, ab_side)
return [header] + extra_repr
else:
return []

a_keys = set(a_mapping)
b_keys = set(b_mapping)

summary = []

diff_items = []

for k in a_keys & b_keys:
try:
# compare xarray variable
compatible = getattr(a_mapping[k], compat)(b_mapping[k])
is_variable = True
except AttributeError:
# compare attribute value
compatible = a_mapping[k] == b_mapping[k]
is_variable = False

if not compatible:
temp = [summarizer(k, vars[k], col_width)
for vars in (a_mapping, b_mapping)]

if compat == 'identical' and is_variable:
attrs_summary = []

for m in (a_mapping, b_mapping):
attr_s = "\n".join([summarize_attr(ak, av)
for ak, av in m[k].attrs.items()])
attrs_summary.append(attr_s)

temp = ["\n".join([var_s, attr_s]) if attr_s else var_s
for var_s, attr_s in zip(temp, attrs_summary)]

diff_items += [ab_side + s[1:]
for ab_side, s in zip(('L', 'R'), temp)]

if diff_items:
summary += ["Differing {}:".format(title.lower())] + diff_items

summary += extra_items_repr(a_keys - b_keys, a_mapping, "left")
summary += extra_items_repr(b_keys - a_keys, b_mapping, "right")

return "\n".join(summary)


diff_coords_repr = functools.partial(_diff_mapping_repr,
title="Coordinates",
summarizer=summarize_coord)


diff_data_vars_repr = functools.partial(_diff_mapping_repr,
title="Data variables",
summarizer=summarize_datavar)


diff_attrs_repr = functools.partial(_diff_mapping_repr,
title="Attributes",
summarizer=summarize_attr)


def _compat_to_str(compat):
if compat == "equals":
return "equal"
else:
return compat


def diff_array_repr(a, b, compat):
# used for DataArray, Variable and IndexVariable
summary = ["Left and right {} objects are not {}"
.format(type(a).__name__, _compat_to_str(compat))]

summary.append(diff_dim_summary(a, b))

if not array_equiv(a.data, b.data):
temp = [wrap_indent(short_array_repr(obj), start=' ')
for obj in (a, b)]
diff_data_repr = [ab_side + "\n" + ab_data_repr
for ab_side, ab_data_repr in zip(('L', 'R'), temp)]
summary += ["Differing values:"] + diff_data_repr

if hasattr(a, 'coords'):
col_width = _calculate_col_width(set(a.coords) | set(b.coords))
summary.append(diff_coords_repr(a.coords, b.coords, compat,
col_width=col_width))

if compat == 'identical':
summary.append(diff_attrs_repr(a.attrs, b.attrs, compat))

return "\n".join(summary)


def diff_dataset_repr(a, b, compat):
summary = ["Left and right {} objects are not {}"
.format(type(a).__name__, _compat_to_str(compat))]

col_width = _calculate_col_width(
set(_get_col_items(a.variables) + _get_col_items(b.variables)))

summary.append(diff_dim_summary(a, b))
summary.append(diff_coords_repr(a.coords, b.coords, compat,
col_width=col_width))
summary.append(diff_data_vars_repr(a.data_vars, b.data_vars, compat,
col_width=col_width))

if compat == 'identical':
summary.append(diff_attrs_repr(a.attrs, b.attrs, compat))

return "\n".join(summary)
15 changes: 10 additions & 5 deletions xarray/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np

from xarray.core import duck_array_ops
from xarray.core import formatting


def _decode_string_data(data):
Expand Down Expand Up @@ -49,8 +50,10 @@ def assert_equal(a, b):
import xarray as xr
__tracebackhide__ = True # noqa: F841
assert type(a) == type(b) # noqa
if isinstance(a, (xr.Variable, xr.DataArray, xr.Dataset)):
assert a.equals(b), '{}\n{}'.format(a, b)
if isinstance(a, (xr.Variable, xr.DataArray)):
assert a.equals(b), formatting.diff_array_repr(a, b, 'equals')
elif isinstance(a, xr.Dataset):
assert a.equals(b), formatting.diff_dataset_repr(a, b, 'equals')
else:
raise TypeError('{} not supported by assertion comparison'
.format(type(a)))
Expand All @@ -76,11 +79,13 @@ def assert_identical(a, b):
import xarray as xr
__tracebackhide__ = True # noqa: F841
assert type(a) == type(b) # noqa
if isinstance(a, xr.DataArray):
if isinstance(a, xr.Variable):
assert a.identical(b), formatting.diff_array_repr(a, b, 'identical')
elif isinstance(a, xr.DataArray):
assert a.name == b.name
assert_identical(a._to_temp_dataset(), b._to_temp_dataset())
assert a.identical(b), formatting.diff_array_repr(a, b, 'identical')
elif isinstance(a, (xr.Dataset, xr.Variable)):
assert a.identical(b), '{}\n{}'.format(a, b)
assert a.identical(b), formatting.diff_dataset_repr(a, b, 'identical')
else:
raise TypeError('{} not supported by assertion comparison'
.format(type(a)))
Expand Down
117 changes: 117 additions & 0 deletions xarray/tests/test_formatting.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function

from textwrap import dedent

import numpy as np
import pandas as pd

import xarray as xr
from xarray.core import formatting
from xarray.core.pycompat import PY3

Expand Down Expand Up @@ -190,6 +193,120 @@ def test_attribute_repr(self):
assert u'\n' not in newlines
assert u'\t' not in tabs

def test_diff_array_repr(self):
da_a = xr.DataArray(
np.array([[1, 2, 3], [4, 5, 6]], dtype='int64'),
dims=('x', 'y'),
coords={'x': np.array(['a', 'b'], dtype='U1'),
'y': np.array([1, 2, 3], dtype='int64')},
attrs={'units': 'm', 'description': 'desc'})

da_b = xr.DataArray(
np.array([1, 2], dtype='int64'),
dims='x',
coords={'x': np.array(['a', 'c'], dtype='U1'),
'label': ('x', np.array([1, 2], dtype='int64'))},
attrs={'units': 'kg'})

expected = dedent("""\
Left and right DataArray objects are not identical
Differing dimensions:
(x: 2, y: 3) != (x: 2)
Differing values:
L
array([[1, 2, 3],
[4, 5, 6]], dtype=int64)
R
array([1, 2], dtype=int64)
Differing coordinates:
L * x (x) <U1 'a' 'b'
R * x (x) <U1 'a' 'c'
Coordinates only on the left object:
* y (y) int64 1 2 3
Coordinates only on the right object:
label (x) int64 1 2
Differing attributes:
L units: m
R units: kg
Attributes only on the left object:
description: desc""")

actual = formatting.diff_array_repr(da_a, da_b, 'identical')
try:
assert actual == expected
except AssertionError:
# depending on platform, dtype may not be shown in numpy array repr
assert actual == expected.replace(", dtype=int64", "")

va = xr.Variable('x', np.array([1, 2, 3], dtype='int64'),
{'title': 'test Variable'})
vb = xr.Variable(('x', 'y'),
np.array([[1, 2, 3], [4, 5, 6]], dtype='int64'))

expected = dedent("""\
Left and right Variable objects are not equal
Differing dimensions:
(x: 3) != (x: 2, y: 3)
Differing values:
L
array([1, 2, 3], dtype=int64)
R
array([[1, 2, 3],
[4, 5, 6]], dtype=int64)""")

actual = formatting.diff_array_repr(va, vb, 'equals')
try:
assert actual == expected
except AssertionError:
assert actual == expected.replace(", dtype=int64", "")

def test_diff_dataset_repr(self):
ds_a = xr.Dataset(
data_vars={
'var1': (('x', 'y'),
np.array([[1, 2, 3], [4, 5, 6]], dtype='int64')),
'var2': ('x', np.array([3, 4], dtype='int64'))
},
coords={'x': np.array(['a', 'b'], dtype='U1'),
'y': np.array([1, 2, 3], dtype='int64')},
attrs={'units': 'm', 'description': 'desc'}
)

ds_b = xr.Dataset(
data_vars={'var1': ('x', np.array([1, 2], dtype='int64'))},
coords={
'x': ('x', np.array(['a', 'c'], dtype='U1'), {'source': 0}),
'label': ('x', np.array([1, 2], dtype='int64'))
},
attrs={'units': 'kg'}
)

expected = dedent("""\
Left and right Dataset objects are not identical
Differing dimensions:
(x: 2, y: 3) != (x: 2)
Differing coordinates:
L * x (x) <U1 'a' 'b'
R * x (x) <U1 'a' 'c'
source: 0
Coordinates only on the left object:
* y (y) int64 1 2 3
Coordinates only on the right object:
label (x) int64 1 2
Differing data variables:
L var1 (x, y) int64 1 2 3 4 5 6
R var1 (x) int64 1 2
Data variables only on the left object:
var2 (x) int64 3 4
Differing attributes:
L units: m
R units: kg
Attributes only on the left object:
description: desc""")

actual = formatting.diff_dataset_repr(ds_a, ds_b, 'identical')
assert actual == expected


def test_set_numpy_options():
original_options = np.get_printoptions()
Expand Down

0 comments on commit 1d0a2bc

Please sign in to comment.