From 151eb4207fff6123cad7691a5d13980954f517e3 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 12 Jul 2019 08:46:35 -0600 Subject: [PATCH 01/10] mfdatset, concat now support the 'join' kwarg. Closes #1354 --- xarray/backends/api.py | 12 +++++++---- xarray/core/combine.py | 46 ++++++++++++++++++++++++++---------------- xarray/core/concat.py | 14 +++++++------ 3 files changed, 45 insertions(+), 27 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index e0f269eb51f..529a2b8e2a2 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -609,7 +609,7 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied', compat='no_conflicts', preprocess=None, engine=None, lock=None, data_vars='all', coords='different', combine='_old_auto', autoclose=None, parallel=False, - **kwargs): + join='outer', **kwargs): """Open multiple files as a single dataset. If combine='by_coords' then the function ``combine_by_coords`` is used to @@ -704,6 +704,8 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied', parallel : bool, optional If True, the open and preprocess steps of this function will be performed in parallel using ``dask.delayed``. Default is False. + join : {'outer', 'inner', 'left', 'right', 'exact'}, optional + Passed on to align. **kwargs : optional Additional arguments passed on to :py:func:`xarray.open_dataset`. @@ -798,18 +800,20 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied', combined = auto_combine(datasets, concat_dim=concat_dim, compat=compat, data_vars=data_vars, - coords=coords, from_openmfds=True) + coords=coords, join=join, + from_openmfds=True) elif combine == 'nested': # Combined nested list by successive concat and merge operations # along each dimension, using structure given by "ids" combined = _nested_combine(datasets, concat_dims=concat_dim, compat=compat, data_vars=data_vars, - coords=coords, ids=ids) + coords=coords, ids=ids, join=join) elif combine == 'by_coords': # Redo ordering from coordinates, ignoring how they were ordered # previously combined = combine_by_coords(datasets, compat=compat, - data_vars=data_vars, coords=coords) + data_vars=data_vars, coords=coords, + join=join) else: raise ValueError("{} is an invalid option for the keyword argument" " ``combine``".format(combine)) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 37ae903b6c3..61a0dbe2da7 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -136,7 +136,7 @@ def _check_shape_tile_ids(combined_tile_ids): def _combine_nd(combined_ids, concat_dims, data_vars='all', coords='different', compat='no_conflicts', - fill_value=dtypes.NA): + fill_value=dtypes.NA, join='outer'): """ Combines an N-dimensional structure of datasets into one by applying a series of either concat and merge operations along each dimension. @@ -177,13 +177,14 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all', data_vars=data_vars, coords=coords, compat=compat, - fill_value=fill_value) + fill_value=fill_value, + join=join) (combined_ds,) = combined_ids.values() return combined_ds def _combine_all_along_first_dim(combined_ids, dim, data_vars, coords, compat, - fill_value=dtypes.NA): + fill_value=dtypes.NA, join='outer'): # Group into lines of datasets which must be combined along dim # need to sort by _new_tile_id first for groupby to work @@ -197,12 +198,13 @@ def _combine_all_along_first_dim(combined_ids, dim, data_vars, coords, compat, combined_ids = OrderedDict(sorted(group)) datasets = combined_ids.values() new_combined_ids[new_id] = _combine_1d(datasets, dim, compat, - data_vars, coords, fill_value) + data_vars, coords, fill_value, + join) return new_combined_ids def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all', - coords='different', fill_value=dtypes.NA): + coords='different', fill_value=dtypes.NA, join='outer'): """ Applies either concat or merge to 1D list of datasets depending on value of concat_dim @@ -222,7 +224,8 @@ def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all', else: raise else: - combined = merge(datasets, compat=compat, fill_value=fill_value) + combined = merge(datasets, compat=compat, fill_value=fill_value, + join=join) return combined @@ -233,7 +236,7 @@ def _new_tile_id(single_id_ds_pair): def _nested_combine(datasets, concat_dims, compat, data_vars, coords, ids, - fill_value=dtypes.NA): + fill_value=dtypes.NA, join='outer'): if len(datasets) == 0: return Dataset() @@ -254,12 +257,13 @@ def _nested_combine(datasets, concat_dims, compat, data_vars, coords, ids, # Apply series of concatenate or merge operations along each dimension combined = _combine_nd(combined_ids, concat_dims, compat=compat, data_vars=data_vars, coords=coords, - fill_value=fill_value) + fill_value=fill_value, join=join) return combined def combine_nested(datasets, concat_dim, compat='no_conflicts', - data_vars='all', coords='different', fill_value=dtypes.NA): + data_vars='all', coords='different', fill_value=dtypes.NA, + join='outer'): """ Explicitly combine an N-dimensional grid of datasets into one by using a succession of concat and merge operations along each dimension of the grid. @@ -312,6 +316,8 @@ def combine_nested(datasets, concat_dim, compat='no_conflicts', Details are in the documentation of concat fill_value : scalar, optional Value to use for newly missing values + join : {'outer', 'inner', 'left', 'right', 'exact'}, optional + How to combine objects with different indexes. Returns ------- @@ -383,7 +389,7 @@ def combine_nested(datasets, concat_dim, compat='no_conflicts', # The IDs argument tells _manual_combine that datasets aren't yet sorted return _nested_combine(datasets, concat_dims=concat_dim, compat=compat, data_vars=data_vars, coords=coords, ids=False, - fill_value=fill_value) + fill_value=fill_value, join=join) def vars_as_keys(ds): @@ -391,7 +397,7 @@ def vars_as_keys(ds): def combine_by_coords(datasets, compat='no_conflicts', data_vars='all', - coords='different', fill_value=dtypes.NA): + coords='different', fill_value=dtypes.NA, join='outer'): """ Attempt to auto-magically combine the given datasets into one by using dimension coordinates. @@ -439,6 +445,8 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all', Details are in the documentation of concat fill_value : scalar, optional Value to use for newly missing values + join : {'outer', 'inner', 'left', 'right', 'exact'}, optional + How to combine objects with different indexes. Returns ------- @@ -524,7 +532,7 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all', def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts', data_vars='all', coords='different', fill_value=dtypes.NA, - from_openmfds=False): + join='outer', from_openmfds=False): """ Attempt to auto-magically combine the given datasets into one. @@ -572,6 +580,8 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts', Details are in the documentation of concat fill_value : scalar, optional Value to use for newly missing values + join : {'outer', 'inner', 'left', 'right', 'exact'}, optional + How to combine objects with different indexes. Returns ------- @@ -630,7 +640,8 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts', return _old_auto_combine(datasets, concat_dim=concat_dim, compat=compat, data_vars=data_vars, - coords=coords, fill_value=fill_value) + coords=coords, fill_value=fill_value, + join=join) def _dimension_coords_exist(datasets): @@ -671,7 +682,7 @@ def _requires_concat_and_merge(datasets): def _old_auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT, compat='no_conflicts', data_vars='all', coords='different', - fill_value=dtypes.NA): + fill_value=dtypes.NA, join='outer'): if concat_dim is not None: dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim @@ -680,16 +691,17 @@ def _old_auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT, concatenated = [_auto_concat(list(datasets), dim=dim, data_vars=data_vars, coords=coords, - fill_value=fill_value) + fill_value=fill_value, join=join) for vars, datasets in grouped] else: concatenated = datasets - merged = merge(concatenated, compat=compat, fill_value=fill_value) + merged = merge(concatenated, compat=compat, fill_value=fill_value, + join=join) return merged def _auto_concat(datasets, dim=None, data_vars='all', coords='different', - fill_value=dtypes.NA): + fill_value=dtypes.NA, join='outer'): if len(datasets) == 1 and dim is None: # There is nothing more to combine, so kick out early. return datasets[0] diff --git a/xarray/core/concat.py b/xarray/core/concat.py index cd59d87870e..fecb2ba863c 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -11,7 +11,7 @@ def concat(objs, dim=None, data_vars='all', coords='different', compat='equals', positions=None, indexers=None, mode=None, - concat_over=None, fill_value=dtypes.NA): + concat_over=None, fill_value=dtypes.NA, join='outer'): """Concatenate xarray objects along a new or existing dimension. Parameters @@ -65,6 +65,8 @@ def concat(objs, dim=None, data_vars='all', coords='different', supplied, objects are concatenated in the provided order. fill_value : scalar, optional Value to use for newly missing values + join : {'outer', 'inner', 'left', 'right', 'exact'}, optional + How to combine objects with different indexes. indexers, mode, concat_over : deprecated Returns @@ -116,7 +118,7 @@ def concat(objs, dim=None, data_vars='all', coords='different', else: raise TypeError('can only concatenate xarray Dataset and DataArray ' 'objects, got %s' % type(first_obj)) - return f(objs, dim, data_vars, coords, compat, positions, fill_value) + return f(objs, dim, data_vars, coords, compat, positions, fill_value, join) def _calc_concat_dim_coord(dim): @@ -212,7 +214,7 @@ def process_subset_opt(opt, subset): def _dataset_concat(datasets, dim, data_vars, coords, compat, positions, - fill_value=dtypes.NA): + fill_value=dtypes.NA, join='outer'): """ Concatenate a sequence of datasets along a new or existing dimension """ @@ -225,7 +227,7 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions, dim, coord = _calc_concat_dim_coord(dim) # Make sure we're working on a copy (we'll be loading variables) datasets = [ds.copy() for ds in datasets] - datasets = align(*datasets, join='outer', copy=False, exclude=[dim], + datasets = align(*datasets, join=join, copy=False, exclude=[dim], fill_value=fill_value) concat_over, equals = _calc_concat_over(datasets, dim, data_vars, coords) @@ -318,7 +320,7 @@ def ensure_common_dims(vars): def _dataarray_concat(arrays, dim, data_vars, coords, compat, - positions, fill_value=dtypes.NA): + positions, fill_value=dtypes.NA, join='outer'): arrays = list(arrays) if data_vars != 'all': @@ -337,5 +339,5 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat, datasets.append(arr._to_temp_dataset()) ds = _dataset_concat(datasets, dim, data_vars, coords, compat, - positions, fill_value=fill_value) + positions, fill_value=fill_value, join=join) return arrays[0]._from_temp_dataset(ds, name) From 49f87bf34a8d1e8e9414437b615feac0b11457b7 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 12 Jul 2019 08:51:47 -0600 Subject: [PATCH 02/10] Add whats-new.rst --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 03898ae1d2a..a83ff5f0af5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,6 +30,9 @@ New functions/methods Enhancements ~~~~~~~~~~~~ +- :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg. + It is passed down to :py:func:`~xarray.align`. By `Deepak Cherian `_. + Bug fixes ~~~~~~~~~ From 5b7f0ed5857fa0ffde9f9a8feb74a8ca297ffe9e Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 15 Jul 2019 10:06:24 -0600 Subject: [PATCH 03/10] Add concat tests --- xarray/tests/test_concat.py | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 6218f752bb7..deed6748761 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -164,6 +164,32 @@ def test_concat_errors(self): with raises_regex(ValueError, 'no longer a valid'): concat([data, data], 'new_dim', concat_over='different') + def test_concat_join_kwarg(self): + ds1 = Dataset({'a': (('x', 'y'), [[0]])}, + coords={'x': [0], 'y': [0]}) + ds2 = Dataset({'a': (('x', 'y'), [[0]])}, + coords={'x': [1], 'y': [0.0001]}) + + expected = dict() + expected['outer'] = Dataset({'a': (('x', 'y'), + [[0, np.nan], [np.nan, 0]])}, + {'x': [0, 1], 'y': [0, 0.0001]}) + expected['inner'] = Dataset({'a': (('x', 'y'), [[], []])}, + {'x': [0, 1], 'y': []}) + expected['left'] = Dataset({'a': (('x', 'y'), + np.array([0, np.nan], ndmin=2).T)}, + coords={'x': [0, 1], 'y': [0]}) + expected['right'] = Dataset({'a': (('x', 'y'), + np.array([np.nan, 0], ndmin=2).T)}, + coords={'x': [0, 1], 'y': [0.0001]}) + + with raises_regex(ValueError, "indexes along dimension 'y'"): + actual = concat([ds1, ds2], join='exact', dim='x') + + for join in expected: + actual = concat([ds1, ds2], join=join, dim='x') + assert_equal(actual, expected[join]) + def test_concat_promote_shape(self): # mixed dims within variables objs = [Dataset({}, {'x': 0}), Dataset({'x': [1]})] @@ -318,3 +344,29 @@ def test_concat_fill_value(self, fill_value): dims=['y', 'x'], coords={'x': [1, 2, 3]}) actual = concat((foo, bar), dim='y', fill_value=fill_value) assert_identical(actual, expected) + + def test_concat_join_kwarg(self): + ds1 = Dataset({'a': (('x', 'y'), [[0]])}, + coords={'x': [0], 'y': [0]}).to_array() + ds2 = Dataset({'a': (('x', 'y'), [[0]])}, + coords={'x': [1], 'y': [0.0001]}).to_array() + + expected = dict() + expected['outer'] = Dataset({'a': (('x', 'y'), + [[0, np.nan], [np.nan, 0]])}, + {'x': [0, 1], 'y': [0, 0.0001]}) + expected['inner'] = Dataset({'a': (('x', 'y'), [[], []])}, + {'x': [0, 1], 'y': []}) + expected['left'] = Dataset({'a': (('x', 'y'), + np.array([0, np.nan], ndmin=2).T)}, + coords={'x': [0, 1], 'y': [0]}) + expected['right'] = Dataset({'a': (('x', 'y'), + np.array([np.nan, 0], ndmin=2).T)}, + coords={'x': [0, 1], 'y': [0.0001]}) + + with raises_regex(ValueError, "indexes along dimension 'y'"): + actual = concat([ds1, ds2], join='exact', dim='x') + + for join in expected: + actual = concat([ds1, ds2], join=join, dim='x') + assert_equal(actual, expected[join].to_array()) From cdb8ee4a468245e140ef73386aabe75d0d1b0901 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 15 Jul 2019 10:12:45 -0600 Subject: [PATCH 04/10] doc improvements. --- xarray/backends/api.py | 2 +- xarray/core/concat.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 529a2b8e2a2..f415dfd6756 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -705,7 +705,7 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied', If True, the open and preprocess steps of this function will be performed in parallel using ``dask.delayed``. Default is False. join : {'outer', 'inner', 'left', 'right', 'exact'}, optional - Passed on to align. + How to combine objects with different indexes. **kwargs : optional Additional arguments passed on to :py:func:`xarray.open_dataset`. diff --git a/xarray/core/concat.py b/xarray/core/concat.py index fecb2ba863c..9f8c8493b03 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -66,7 +66,8 @@ def concat(objs, dim=None, data_vars='all', coords='different', fill_value : scalar, optional Value to use for newly missing values join : {'outer', 'inner', 'left', 'right', 'exact'}, optional - How to combine objects with different indexes. + How to combine objects with different indexes + (excluding index along 'dim'). indexers, mode, concat_over : deprecated Returns From 85c1341b04f1db2acabb5d8ac779a4e61f398fa9 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 15 Jul 2019 10:20:47 -0600 Subject: [PATCH 05/10] update todo. --- xarray/core/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 9f8c8493b03..194076e4f2a 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -79,7 +79,7 @@ def concat(objs, dim=None, data_vars='all', coords='different', merge auto_combine """ - # TODO: add join and ignore_index arguments copied from pandas.concat + # TODO: add ignore_index arguments copied from pandas.concat # TODO: support concatenating scalar coordinates even if the concatenated # dimension already exists from .dataset import Dataset From fb65e0ff90d3586cb86e9d1d9cf69310bd7025db Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 17 Jul 2019 09:40:02 -0600 Subject: [PATCH 06/10] mfdataset tests. --- xarray/core/combine.py | 6 +++--- xarray/tests/test_backends.py | 35 ++++++++++++++++++++++++----------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 61a0dbe2da7..b0c61880508 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -213,7 +213,7 @@ def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all', if concat_dim is not None: try: combined = concat(datasets, dim=concat_dim, data_vars=data_vars, - coords=coords, fill_value=fill_value) + coords=coords, fill_value=fill_value, join=join) except ValueError as err: if "encountered unexpected variable" in str(err): raise ValueError("These objects cannot be combined using only " @@ -506,7 +506,7 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all', # Concatenate along all of concat_dims one by one to create single ds concatenated = _combine_nd(combined_ids, concat_dims=concat_dims, data_vars=data_vars, coords=coords, - fill_value=fill_value) + fill_value=fill_value, join=join) # Check the overall coordinates are monotonically increasing for dim in concatenated.dims: @@ -520,7 +520,7 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all', concatenated_grouped_by_data_vars.append(concatenated) return merge(concatenated_grouped_by_data_vars, compat=compat, - fill_value=fill_value) + fill_value=fill_value, join=join) # Everything beyond here is only needed until the deprecation cycle in #2616 diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f5c27f2fb92..ccd233fc8b4 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2358,8 +2358,12 @@ class TestOpenMFDatasetWithDataVarsAndCoordsKw: var_name = 'v1' @contextlib.contextmanager - def setup_files_and_datasets(self): + def setup_files_and_datasets(self, fuzz=0): ds1, ds2 = self.gen_datasets_with_common_coord_and_time() + + # to test join='exact' + ds1['x'] = ds1.x + fuzz + with create_tmp_file() as tmpfile1: with create_tmp_file() as tmpfile2: @@ -2396,20 +2400,29 @@ def gen_datasets_with_common_coord_and_time(self): return ds1, ds2 + @pytest.mark.parametrize('combine', ['nested', 'by_coords']) @pytest.mark.parametrize('opt', ['all', 'minimal', 'different']) - def test_open_mfdataset_does_same_as_concat(self, opt): + @pytest.mark.parametrize('join', ['outer', 'inner', 'left', 'right']) + def test_open_mfdataset_does_same_as_concat(self, combine, opt, join): with self.setup_files_and_datasets() as (files, [ds1, ds2]): - with open_mfdataset(files, data_vars=opt, - combine='nested', concat_dim='t') as ds: - kwargs = dict(data_vars=opt, dim='t') - ds_expect = xr.concat([ds1, ds2], **kwargs) - assert_identical(ds, ds_expect) - with open_mfdataset(files, coords=opt, - combine='nested', concat_dim='t') as ds: - kwargs = dict(coords=opt, dim='t') - ds_expect = xr.concat([ds1, ds2], **kwargs) + if combine == 'by_coords': + files.reverse() + with open_mfdataset(files, data_vars=opt, combine=combine, + concat_dim='t', join=join) as ds: + ds_expect = xr.concat([ds1, ds2], data_vars=opt, dim='t', + join=join) assert_identical(ds, ds_expect) + @pytest.mark.parametrize('combine', ['nested', 'by_coords']) + @pytest.mark.parametrize('opt', ['all', 'minimal', 'different']) + def test_open_mfdataset_exact_join_raises_error(self, combine, opt): + with self.setup_files_and_datasets(fuzz=0.1) as (files, [ds1, ds2]): + if combine == 'by_coords': + files.reverse() + with raises_regex(ValueError, 'indexes along dimension'): + open_mfdataset(files, data_vars=opt, combine=combine, + concat_dim='t', join='exact') + def test_common_coord_when_datavars_all(self): opt = 'all' From ed01d7c2d4c63451661440d5d151617a8611c47c Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 6 Aug 2019 08:02:36 -0600 Subject: [PATCH 07/10] =?UTF-8?q?manual=5Fcombine=20=E2=86=92=20combine=5F?= =?UTF-8?q?nested?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- xarray/tests/test_combine.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 2a71a3a3ed4..9468a0879d8 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -306,8 +306,8 @@ def test_check_lengths(self): _check_shape_tile_ids(combined_tile_ids) -class TestManualCombine: - def test_manual_concat(self): +class TestNestedCombine: + def test_nested_concat(self): objs = [Dataset({'x': [0]}), Dataset({'x': [1]})] expected = Dataset({'x': [0, 1]}) actual = combine_nested(objs, concat_dim='x') @@ -326,7 +326,7 @@ def test_manual_concat(self): expected = Dataset({'x': [0, 1, 2]}) assert_identical(expected, actual) - # ensure manual_combine handles non-sorted variables + # ensure combine_nested handles non-sorted variables objs = [Dataset(OrderedDict([('x', ('a', [0])), ('y', ('a', [0]))])), Dataset(OrderedDict([('y', ('a', [1])), ('x', ('a', [1]))]))] actual = combine_nested(objs, concat_dim='a') @@ -342,12 +342,12 @@ def test_empty_input(self): # Fails because of concat's weird treatment of dimension coords, see #2975 @pytest.mark.xfail - def test_manual_concat_too_many_dims_at_once(self): + def test_nested_concat_too_many_dims_at_once(self): objs = [Dataset({'x': [0], 'y': [1]}), Dataset({'y': [0], 'x': [1]})] with pytest.raises(ValueError, match="not equal across datasets"): combine_nested(objs, concat_dim='x', coords='minimal') - def test_manual_concat_along_new_dim(self): + def test_nested_concat_along_new_dim(self): objs = [Dataset({'a': ('x', [10]), 'x': [0]}), Dataset({'a': ('x', [20]), 'x': [0]})] expected = Dataset({'a': (('t', 'x'), [[10], [20]]), 'x': [0]}) @@ -361,7 +361,7 @@ def test_manual_concat_along_new_dim(self): actual = combine_nested(objs, concat_dim=dim) assert_identical(expected, actual) - def test_manual_merge(self): + def test_nested_merge(self): data = Dataset({'x': 0}) actual = combine_nested([data, data, data], concat_dim=None) assert_identical(data, actual) @@ -450,7 +450,7 @@ def test_auto_combine_2d(self): result = combine_nested(datasets, concat_dim=['dim1', 'dim2']) assert_equal(result, expected) - def test_manual_combine_missing_data_new_dim(self): + def test_combine_nested_missing_data_new_dim(self): # Your data includes "time" and "station" dimensions, and each year's # data has a different set of stations. datasets = [Dataset({'a': ('x', [2, 3]), 'x': [1, 2]}), @@ -513,7 +513,7 @@ def test_combine_concat_over_redundant_nesting(self): expected = Dataset({'x': [0]}) assert_identical(expected, actual) - def test_manual_combine_but_need_auto_combine(self): + def test_combine_nested_but_need_auto_combine(self): objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2], 'wall': [0]})] with raises_regex(ValueError, 'cannot be combined'): combine_nested(objs, concat_dim='x') From e4252e9e85ee71ff7b0364f0ed2fb173898d7f55 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 6 Aug 2019 08:03:04 -0600 Subject: [PATCH 08/10] Add tests for combine_nested & combine_coords --- xarray/tests/test_combine.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 9468a0879d8..c56daa8208d 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -337,6 +337,24 @@ def test_nested_concat(self): with pytest.raises(KeyError): combine_nested(objs, concat_dim='x') + @pytest.mark.parametrize("join, expected", + [('outer', Dataset({'x': [0, 1], 'y': [0, 1]})), + ('inner', Dataset({'x': [0, 1], 'y': []})), + ('left', Dataset({'x': [0, 1], 'y': [0]})), + ('right', Dataset({'x': [0, 1], 'y': [1]})), + ]) + def test_combine_nested_join(self, join, expected): + objs = [Dataset({'x': [0], 'y': [0]}), + Dataset({'x': [1], 'y': [1]})] + actual = combine_nested(objs, concat_dim='x', join=join) + assert_identical(expected, actual) + + def test_combine_nested_join_exact(self): + objs = [Dataset({'x': [0], 'y': [0]}), + Dataset({'x': [1], 'y': [1]})] + with raises_regex(ValueError, 'indexes along dimension'): + actual = combine_nested(objs, concat_dim='x', join='exact') + def test_empty_input(self): assert_identical(Dataset(), combine_nested([], concat_dim='x')) @@ -574,6 +592,24 @@ def test_combine_by_coords(self): def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([])) + @pytest.mark.parametrize("join, expected", + [('outer', Dataset({'x': [0, 1], 'y': [0, 1]})), + ('inner', Dataset({'x': [0, 1], 'y': []})), + ('left', Dataset({'x': [0, 1], 'y': [0]})), + ('right', Dataset({'x': [0, 1], 'y': [1]})), + ]) + def test_combine_coords_join(self, join, expected): + objs = [Dataset({'x': [0], 'y': [0]}), + Dataset({'x': [1], 'y': [1]})] + actual = combine_nested(objs, concat_dim='x', join=join) + assert_identical(expected, actual) + + def test_combine_coords_join_exact(self): + objs = [Dataset({'x': [0], 'y': [0]}), + Dataset({'x': [1], 'y': [1]})] + with raises_regex(ValueError, 'indexes along dimension'): + actual = combine_nested(objs, concat_dim='x', join='exact') + def test_infer_order_from_coords(self): data = create_test_data() objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))] From 23875bbac8d81da3d104f8794715050398b97bf2 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 6 Aug 2019 08:23:32 -0600 Subject: [PATCH 09/10] Update docstring. --- xarray/backends/api.py | 10 +++++++++- xarray/core/combine.py | 30 +++++++++++++++++++++++++++--- xarray/core/concat.py | 14 +++++++++++--- xarray/core/merge.py | 9 ++++++++- 4 files changed, 55 insertions(+), 8 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index c6d4befaceb..2535c7118a5 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -705,7 +705,15 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied', If True, the open and preprocess steps of this function will be performed in parallel using ``dask.delayed``. Default is False. join : {'outer', 'inner', 'left', 'right', 'exact'}, optional - How to combine objects with different indexes. + String indicating how to combine differing indexes + (excluding concat_dim) in objects + + - 'outer': use the union of object indexes + - 'inner': use the intersection of object indexes + - 'left': use indexes from the first object with each dimension + - 'right': use indexes from the last object with each dimension + - 'exact': instead of aligning, raise `ValueError` when indexes to be + aligned are not equal **kwargs : optional Additional arguments passed on to :py:func:`xarray.open_dataset`. diff --git a/xarray/core/combine.py b/xarray/core/combine.py index a66566c6dd5..71da4e4e094 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -317,7 +317,15 @@ def combine_nested(datasets, concat_dim, compat='no_conflicts', fill_value : scalar, optional Value to use for newly missing values join : {'outer', 'inner', 'left', 'right', 'exact'}, optional - How to combine objects with different indexes. + String indicating how to combine differing indexes + (excluding concat_dim) in objects + + - 'outer': use the union of object indexes + - 'inner': use the intersection of object indexes + - 'left': use indexes from the first object with each dimension + - 'right': use indexes from the last object with each dimension + - 'exact': instead of aligning, raise `ValueError` when indexes to be + aligned are not equal Returns ------- @@ -446,7 +454,15 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all', fill_value : scalar, optional Value to use for newly missing values join : {'outer', 'inner', 'left', 'right', 'exact'}, optional - How to combine objects with different indexes. + String indicating how to combine differing indexes + (excluding concat_dim) in objects + + - 'outer': use the union of object indexes + - 'inner': use the intersection of object indexes + - 'left': use indexes from the first object with each dimension + - 'right': use indexes from the last object with each dimension + - 'exact': instead of aligning, raise `ValueError` when indexes to be + aligned are not equal Returns ------- @@ -580,7 +596,15 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts', fill_value : scalar, optional Value to use for newly missing values join : {'outer', 'inner', 'left', 'right', 'exact'}, optional - How to combine objects with different indexes. + String indicating how to combine differing indexes + (excluding concat_dim) in objects + + - 'outer': use the union of object indexes + - 'inner': use the intersection of object indexes + - 'left': use indexes from the first object with each dimension + - 'right': use indexes from the last object with each dimension + - 'exact': instead of aligning, raise `ValueError` when indexes to be + aligned are not equal Returns ------- diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 194076e4f2a..a6570525cc5 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -52,7 +52,7 @@ def concat(objs, dim=None, data_vars='all', coords='different', * 'all': All coordinate variables will be concatenated, except those corresponding to other dimensions. * list of str: The listed coordinate variables will be concatenated, - in addition the 'minimal' coordinates. + in addition to the 'minimal' coordinates. compat : {'equals', 'identical'}, optional String indicating how to compare non-concatenated variables and dataset global attributes for potential conflicts. 'equals' means @@ -66,8 +66,16 @@ def concat(objs, dim=None, data_vars='all', coords='different', fill_value : scalar, optional Value to use for newly missing values join : {'outer', 'inner', 'left', 'right', 'exact'}, optional - How to combine objects with different indexes - (excluding index along 'dim'). + String indicating how to combine differing indexes + (excluding dim) in objects + + - 'outer': use the union of object indexes + - 'inner': use the intersection of object indexes + - 'left': use indexes from the first object with each dimension + - 'right': use indexes from the last object with each dimension + - 'exact': instead of aligning, raise `ValueError` when indexes to be + aligned are not equal + indexers, mode, concat_over : deprecated Returns diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 9c909aa197c..289b70ed518 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -530,7 +530,14 @@ def merge(objects, compat='no_conflicts', join='outer', fill_value=dtypes.NA): must be equal. The returned dataset then contains the combination of all non-null values. join : {'outer', 'inner', 'left', 'right', 'exact'}, optional - How to combine objects with different indexes. + String indicating how to combine differing indexes in objects. + + - 'outer': use the union of object indexes + - 'inner': use the intersection of object indexes + - 'left': use indexes from the first object with each dimension + - 'right': use indexes from the last object with each dimension + - 'exact': instead of aligning, raise `ValueError` when indexes to be + aligned are not equal fill_value : scalar, optional Value to use for newly missing values From a4750ce41ab9e0bfeb85cf307af02898dbbe3f04 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 6 Aug 2019 08:33:08 -0600 Subject: [PATCH 10/10] lint. --- xarray/tests/test_combine.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index c56daa8208d..8c9308466a4 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -337,12 +337,14 @@ def test_nested_concat(self): with pytest.raises(KeyError): combine_nested(objs, concat_dim='x') - @pytest.mark.parametrize("join, expected", - [('outer', Dataset({'x': [0, 1], 'y': [0, 1]})), - ('inner', Dataset({'x': [0, 1], 'y': []})), - ('left', Dataset({'x': [0, 1], 'y': [0]})), - ('right', Dataset({'x': [0, 1], 'y': [1]})), - ]) + @pytest.mark.parametrize( + "join, expected", + [ + ('outer', Dataset({'x': [0, 1], 'y': [0, 1]})), + ('inner', Dataset({'x': [0, 1], 'y': []})), + ('left', Dataset({'x': [0, 1], 'y': [0]})), + ('right', Dataset({'x': [0, 1], 'y': [1]})), + ]) def test_combine_nested_join(self, join, expected): objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [1], 'y': [1]})] @@ -353,7 +355,7 @@ def test_combine_nested_join_exact(self): objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [1], 'y': [1]})] with raises_regex(ValueError, 'indexes along dimension'): - actual = combine_nested(objs, concat_dim='x', join='exact') + combine_nested(objs, concat_dim='x', join='exact') def test_empty_input(self): assert_identical(Dataset(), combine_nested([], concat_dim='x')) @@ -592,12 +594,14 @@ def test_combine_by_coords(self): def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([])) - @pytest.mark.parametrize("join, expected", - [('outer', Dataset({'x': [0, 1], 'y': [0, 1]})), - ('inner', Dataset({'x': [0, 1], 'y': []})), - ('left', Dataset({'x': [0, 1], 'y': [0]})), - ('right', Dataset({'x': [0, 1], 'y': [1]})), - ]) + @pytest.mark.parametrize( + "join, expected", + [ + ('outer', Dataset({'x': [0, 1], 'y': [0, 1]})), + ('inner', Dataset({'x': [0, 1], 'y': []})), + ('left', Dataset({'x': [0, 1], 'y': [0]})), + ('right', Dataset({'x': [0, 1], 'y': [1]})), + ]) def test_combine_coords_join(self, join, expected): objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [1], 'y': [1]})] @@ -608,7 +612,7 @@ def test_combine_coords_join_exact(self): objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [1], 'y': [1]})] with raises_regex(ValueError, 'indexes along dimension'): - actual = combine_nested(objs, concat_dim='x', join='exact') + combine_nested(objs, concat_dim='x', join='exact') def test_infer_order_from_coords(self): data = create_test_data()