Skip to content

Commit

Permalink
mfdatset, concat now support the 'join' kwarg.
Browse files Browse the repository at this point in the history
Closes #1354
  • Loading branch information
dcherian committed Jul 12, 2019
1 parent 8f0d9e5 commit ec7fced
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 27 deletions.
11 changes: 7 additions & 4 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
compat='no_conflicts', preprocess=None, engine=None,
lock=None, data_vars='all', coords='different',
combine='_old_auto', autoclose=None, parallel=False,
**kwargs):
join='outer', **kwargs):
"""Open multiple files as a single dataset.
If combine='by_coords' then the function ``combine_by_coords`` is used to
Expand Down Expand Up @@ -703,6 +703,8 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
parallel : bool, optional
If True, the open and preprocess steps of this function will be
performed in parallel using ``dask.delayed``. Default is False.
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
Passed on to align.
**kwargs : optional
Additional arguments passed on to :py:func:`xarray.open_dataset`.
Expand Down Expand Up @@ -788,18 +790,19 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
# Remove this after deprecation cycle from #2616 is complete
combined = auto_combine(datasets, concat_dim=concat_dim,
compat=compat, data_vars=data_vars,
coords=coords)
coords=coords, join=join)
elif combine == 'nested':
# Combined nested list by successive concat and merge operations
# along each dimension, using structure given by "ids"
combined = _nested_combine(datasets, concat_dims=concat_dim,
compat=compat, data_vars=data_vars,
coords=coords, ids=ids)
coords=coords, ids=ids, join=join)
elif combine == 'by_coords':
# Redo ordering from coordinates, ignoring how they were ordered
# previously
combined = combine_by_coords(datasets, compat=compat,
data_vars=data_vars, coords=coords)
data_vars=data_vars, coords=coords,
join=join)
else:
raise ValueError("{} is an invalid option for the keyword argument"
" ``combine``".format(combine))
Expand Down
47 changes: 30 additions & 17 deletions xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def _check_shape_tile_ids(combined_tile_ids):

def _combine_nd(combined_ids, concat_dims, data_vars='all',
coords='different', compat='no_conflicts',
fill_value=dtypes.NA):
fill_value=dtypes.NA, join='outer'):
"""
Combines an N-dimensional structure of datasets into one by applying a
series of either concat and merge operations along each dimension.
Expand Down Expand Up @@ -177,13 +177,14 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
data_vars=data_vars,
coords=coords,
compat=compat,
fill_value=fill_value)
fill_value=fill_value,
join=join)
(combined_ds,) = combined_ids.values()
return combined_ds


def _combine_all_along_first_dim(combined_ids, dim, data_vars, coords, compat,
fill_value=dtypes.NA):
fill_value=dtypes.NA, join='outer'):

# Group into lines of datasets which must be combined along dim
# need to sort by _new_tile_id first for groupby to work
Expand All @@ -197,12 +198,13 @@ def _combine_all_along_first_dim(combined_ids, dim, data_vars, coords, compat,
combined_ids = OrderedDict(sorted(group))
datasets = combined_ids.values()
new_combined_ids[new_id] = _combine_1d(datasets, dim, compat,
data_vars, coords, fill_value)
data_vars, coords, fill_value,
join)
return new_combined_ids


def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all',
coords='different', fill_value=dtypes.NA):
coords='different', fill_value=dtypes.NA, join='outer'):
"""
Applies either concat or merge to 1D list of datasets depending on value
of concat_dim
Expand All @@ -222,7 +224,8 @@ def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all',
else:
raise
else:
combined = merge(datasets, compat=compat, fill_value=fill_value)
combined = merge(datasets, compat=compat, fill_value=fill_value,
join=join)

return combined

Expand All @@ -233,7 +236,7 @@ def _new_tile_id(single_id_ds_pair):


def _nested_combine(datasets, concat_dims, compat, data_vars, coords, ids,
fill_value=dtypes.NA):
fill_value=dtypes.NA, join='outer'):

if len(datasets) == 0:
return Dataset()
Expand All @@ -254,12 +257,13 @@ def _nested_combine(datasets, concat_dims, compat, data_vars, coords, ids,
# Apply series of concatenate or merge operations along each dimension
combined = _combine_nd(combined_ids, concat_dims, compat=compat,
data_vars=data_vars, coords=coords,
fill_value=fill_value)
fill_value=fill_value, join=join)
return combined


def combine_nested(datasets, concat_dim, compat='no_conflicts',
data_vars='all', coords='different', fill_value=dtypes.NA):
data_vars='all', coords='different', fill_value=dtypes.NA,
join='outer'):
"""
Explicitly combine an N-dimensional grid of datasets into one by using a
succession of concat and merge operations along each dimension of the grid.
Expand Down Expand Up @@ -312,6 +316,8 @@ def combine_nested(datasets, concat_dim, compat='no_conflicts',
Details are in the documentation of concat
fill_value : scalar, optional
Value to use for newly missing values
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
How to combine objects with different indexes.
Returns
-------
Expand Down Expand Up @@ -383,15 +389,15 @@ def combine_nested(datasets, concat_dim, compat='no_conflicts',
# The IDs argument tells _manual_combine that datasets aren't yet sorted
return _nested_combine(datasets, concat_dims=concat_dim, compat=compat,
data_vars=data_vars, coords=coords, ids=False,
fill_value=fill_value)
fill_value=fill_value, join=join)


def vars_as_keys(ds):
return tuple(sorted(ds))


def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
coords='different', fill_value=dtypes.NA):
coords='different', fill_value=dtypes.NA, join='outer'):
"""
Attempt to auto-magically combine the given datasets into one by using
dimension coordinates.
Expand Down Expand Up @@ -439,6 +445,8 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
Details are in the documentation of concat
fill_value : scalar, optional
Value to use for newly missing values
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
How to combine objects with different indexes.
Returns
-------
Expand Down Expand Up @@ -523,7 +531,8 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',


def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts',
data_vars='all', coords='different', fill_value=dtypes.NA):
data_vars='all', coords='different', fill_value=dtypes.NA,
join='outer'):
"""
Attempt to auto-magically combine the given datasets into one.
Expand Down Expand Up @@ -571,6 +580,8 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts',
Details are in the documentation of concat
fill_value : scalar, optional
Value to use for newly missing values
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
How to combine objects with different indexes.
Returns
-------
Expand Down Expand Up @@ -626,7 +637,8 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts',

return _old_auto_combine(datasets, concat_dim=concat_dim,
compat=compat, data_vars=data_vars,
coords=coords, fill_value=fill_value)
coords=coords, fill_value=fill_value,
join=join)


def _dimension_coords_exist(datasets):
Expand Down Expand Up @@ -667,7 +679,7 @@ def _requires_concat_and_merge(datasets):
def _old_auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
compat='no_conflicts',
data_vars='all', coords='different',
fill_value=dtypes.NA):
fill_value=dtypes.NA, join='outer'):
if concat_dim is not None:
dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim

Expand All @@ -676,16 +688,17 @@ def _old_auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,

concatenated = [_auto_concat(list(datasets), dim=dim,
data_vars=data_vars, coords=coords,
fill_value=fill_value)
fill_value=fill_value, join=join)
for vars, datasets in grouped]
else:
concatenated = datasets
merged = merge(concatenated, compat=compat, fill_value=fill_value)
merged = merge(concatenated, compat=compat, fill_value=fill_value,
join=join)
return merged


def _auto_concat(datasets, dim=None, data_vars='all', coords='different',
fill_value=dtypes.NA):
fill_value=dtypes.NA, join='outer'):
if len(datasets) == 1 and dim is None:
# There is nothing more to combine, so kick out early.
return datasets[0]
Expand Down
14 changes: 8 additions & 6 deletions xarray/core/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

def concat(objs, dim=None, data_vars='all', coords='different',
compat='equals', positions=None, indexers=None, mode=None,
concat_over=None, fill_value=dtypes.NA):
concat_over=None, fill_value=dtypes.NA, join='outer'):
"""Concatenate xarray objects along a new or existing dimension.
Parameters
Expand Down Expand Up @@ -65,6 +65,8 @@ def concat(objs, dim=None, data_vars='all', coords='different',
supplied, objects are concatenated in the provided order.
fill_value : scalar, optional
Value to use for newly missing values
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
How to combine objects with different indexes.
indexers, mode, concat_over : deprecated
Returns
Expand Down Expand Up @@ -116,7 +118,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
else:
raise TypeError('can only concatenate xarray Dataset and DataArray '
'objects, got %s' % type(first_obj))
return f(objs, dim, data_vars, coords, compat, positions, fill_value)
return f(objs, dim, data_vars, coords, compat, positions, fill_value, join)


def _calc_concat_dim_coord(dim):
Expand Down Expand Up @@ -212,7 +214,7 @@ def process_subset_opt(opt, subset):


def _dataset_concat(datasets, dim, data_vars, coords, compat, positions,
fill_value=dtypes.NA):
fill_value=dtypes.NA, join='outer'):
"""
Concatenate a sequence of datasets along a new or existing dimension
"""
Expand All @@ -225,7 +227,7 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions,
dim, coord = _calc_concat_dim_coord(dim)
# Make sure we're working on a copy (we'll be loading variables)
datasets = [ds.copy() for ds in datasets]
datasets = align(*datasets, join='outer', copy=False, exclude=[dim],
datasets = align(*datasets, join=join, copy=False, exclude=[dim],
fill_value=fill_value)

concat_over, equals = _calc_concat_over(datasets, dim, data_vars, coords)
Expand Down Expand Up @@ -318,7 +320,7 @@ def ensure_common_dims(vars):


def _dataarray_concat(arrays, dim, data_vars, coords, compat,
positions, fill_value=dtypes.NA):
positions, fill_value=dtypes.NA, join='outer'):
arrays = list(arrays)

if data_vars != 'all':
Expand All @@ -337,5 +339,5 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat,
datasets.append(arr._to_temp_dataset())

ds = _dataset_concat(datasets, dim, data_vars, coords, compat,
positions, fill_value=fill_value)
positions, fill_value=fill_value, join=join)
return arrays[0]._from_temp_dataset(ds, name)

0 comments on commit ec7fced

Please sign in to comment.