Skip to content

Commit

Permalink
concat now handles non-dim coordinates only present in one dataset (#…
Browse files Browse the repository at this point in the history
…3769)

* concat can now deal with non-dim coordinates only present in one dataset.

* fix test

* minor fixes.
  • Loading branch information
dcherian authored Feb 23, 2020
1 parent 24cfdd2 commit 3ef75ae
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 4 deletions.
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ Bug fixes
checking. (:issue:`3779`, :pull:`3787`)
By `Justus Magin <https://github.com/keewis>`_.

- :py:func:`concat` can now handle coordinate variables only present in one of
the objects to be concatenated when ``coords="different"``.
By `Deepak Cherian <https://github.com/dcherian>`_.

Documentation
~~~~~~~~~~~~~

Expand Down
18 changes: 17 additions & 1 deletion xarray/core/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,23 @@ def process_subset_opt(opt, subset):
for k in getattr(datasets[0], subset):
if k not in concat_over:
equals[k] = None
variables = [ds.variables[k] for ds in datasets]

variables = []
for ds in datasets:
if k in ds.variables:
variables.append(ds.variables[k])

if len(variables) == 1:
# coords="different" doesn't make sense when only one object
# contains a particular variable.
break
elif len(variables) != len(datasets) and opt == "different":
raise ValueError(
f"{k!r} not present in all datasets and coords='different'. "
f"Either add {k!r} to datasets where it is missing or "
"specify coords='minimal'."
)

# first check without comparing values i.e. no computes
for var in variables[1:]:
equals[k] = getattr(variables[0], compat)(
Expand Down
7 changes: 4 additions & 3 deletions xarray/tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,9 +365,10 @@ def test_nested_concat(self):
expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])})
assert_identical(expected, actual)

objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
with pytest.raises(KeyError):
combine_nested(objs, concat_dim="x")
objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1]})]
actual = combine_nested(objs, concat_dim="x")
expected = Dataset({"x": [0, 1], "y": [0]})
assert_identical(expected, actual)

@pytest.mark.parametrize(
"join, expected",
Expand Down
21 changes: 21 additions & 0 deletions xarray/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,3 +475,24 @@ def test_concat_attrs_first_variable(attr1, attr2):

concat_attrs = concat(arrs, "y").attrs
assert concat_attrs == attr1


def test_concat_merge_single_non_dim_coord():
da1 = DataArray([1, 2, 3], dims="x", coords={"x": [1, 2, 3], "y": 1})
da2 = DataArray([4, 5, 6], dims="x", coords={"x": [4, 5, 6]})

expected = DataArray(range(1, 7), dims="x", coords={"x": range(1, 7), "y": 1})

for coords in ["different", "minimal"]:
actual = concat([da1, da2], "x", coords=coords)
assert_identical(actual, expected)

with raises_regex(ValueError, "'y' is not present in all datasets."):
concat([da1, da2], dim="x", coords="all")

da1 = DataArray([1, 2, 3], dims="x", coords={"x": [1, 2, 3], "y": 1})
da2 = DataArray([4, 5, 6], dims="x", coords={"x": [4, 5, 6]})
da3 = DataArray([7, 8, 9], dims="x", coords={"x": [7, 8, 9], "y": 1})
for coords in ["different", "all"]:
with raises_regex(ValueError, "'y' not present in all datasets"):
concat([da1, da2, da3], dim="x")

0 comments on commit 3ef75ae

Please sign in to comment.