Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change an == to an is. Fix tests so that this won't happen again. #2648

Merged
merged 2 commits into from
Jan 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

from .. import Dataset, backends, conventions
from ..core import indexing
from ..core.combine import _auto_combine, _infer_concat_order_from_positions
from ..core.combine import (
_CONCAT_DIM_DEFAULT, _auto_combine, _infer_concat_order_from_positions)
from ..core.pycompat import basestring, path_type
from ..core.utils import close_on_error, is_grib_path, is_remote_uri
from .common import ArrayWriter
Expand Down Expand Up @@ -483,9 +484,6 @@ def close(self):
f.close()


_CONCAT_DIM_DEFAULT = '__infer_concat_dim__'


def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
compat='no_conflicts', preprocess=None, engine=None,
lock=None, data_vars='all', coords='different',
Expand Down Expand Up @@ -606,7 +604,7 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
# Coerce 1D input into ND to maintain backwards-compatible API until API
# for N-D combine decided
# (see https://github.com/pydata/xarray/pull/2553/#issuecomment-445892746)
if concat_dim is None or concat_dim == _CONCAT_DIM_DEFAULT:
if concat_dim is None or concat_dim is _CONCAT_DIM_DEFAULT:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure we want this to be an is test? Are we sure that the keyword argument will default to that string object?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we use that as the default value here.

Possibly a better choice would be to use something like ReprObject('<inferred>') for the sentinel object, e.g., as done here:

NA = utils.ReprObject('<NA>')

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess what I am getting at is a matter of user-friendliness. A user looking up the call signature of this method will see the default as the string (and not an object), but, they'll never be able to explicitly force concat dimension inference, because passing the string themselves will not work (it'll be a different object).

concat_dims = concat_dim
elif not isinstance(concat_dim, list):
concat_dims = [concat_dim]
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
return concat(datasets, dim=dim, data_vars=data_vars, coords=coords)


_CONCAT_DIM_DEFAULT = '__infer_concat_dim__'
_CONCAT_DIM_DEFAULT = utils.ReprObject('<inferred>')


def _infer_concat_order_from_positions(datasets, concat_dims):
Expand Down
23 changes: 23 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2367,6 +2367,29 @@ def test_open_single_dataset(self):
with open_mfdataset([tmp], concat_dim=dim) as actual:
assert_identical(expected, actual)

def test_open_multi_dataset(self):
# Test for issue GH #1988 and #2647. This makes sure that the
# concat_dim is utilized when specified in open_mfdataset().
# The additional wrinkle is to ensure that a length greater
# than one is tested as well due to numpy's implicit casting
# of 1-length arrays to booleans in tests, which allowed
# #2647 to still pass the test_open_single_dataset(),
# which is itself still needed as-is because the original
# bug caused one-length arrays to not be used correctly
# in concatenation.
rnddata = np.random.randn(10)
original = Dataset({'foo': ('x', rnddata)})
dim = DataArray([100, 150], name='baz', dims='baz')
expected = Dataset({'foo': (('baz', 'x'),
np.tile(rnddata[np.newaxis, :], (2, 1)))},
{'baz': [100, 150]})
with create_tmp_file() as tmp1, \
create_tmp_file() as tmp2:
original.to_netcdf(tmp1)
original.to_netcdf(tmp2)
with open_mfdataset([tmp1, tmp2], concat_dim=dim) as actual:
assert_identical(expected, actual)

def test_dask_roundtrip(self):
with create_tmp_file() as tmp:
data = create_test_data()
Expand Down