Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add transpose_coords option to DataArray.transpose #2556

Merged
merged 20 commits into from
May 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ Enhancements
- Character arrays' character dimension name decoding and encoding handled by
``var.encoding['char_dim_name']`` (:issue:`2895`)
By `James McCreight <https://github.com/jmccreight>`_.
- :py:meth:`DataArray.transpose` now accepts a keyword argument
``transpose_coords`` which enables transposition of coordinates in the
same way as :py:meth:`Dataset.transpose`. :py:meth:`DataArray.groupby`
:py:meth:`DataArray.groupby_bins`, and :py:meth:`DataArray.resample` now
accept a keyword argument ``restore_coord_dims`` which keeps the order
of the dimensions of multi-dimensional coordinates intact (:issue:`1856`).
By `Peter Hausamann <http://github.com/phausamann>`_.
- Clean up Python 2 compatibility in code (:issue:`2950`)
By `Guido Imperiale <https://github.com/crusaderky>`_.
- Implement ``load_dataset()`` and ``load_dataarray()`` as alternatives to
Expand Down
27 changes: 21 additions & 6 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,8 @@ def pipe(self, func: Union[Callable[..., T], Tuple[Callable[..., T], str]],
else:
return func(self, *args, **kwargs)

def groupby(self, group, squeeze: bool = True):
def groupby(self, group, squeeze: bool = True,
restore_coord_dims: Optional[bool] = None):
"""Returns a GroupBy object for performing grouped operations.

Parameters
Expand All @@ -453,6 +454,9 @@ def groupby(self, group, squeeze: bool = True):
If "group" is a dimension of any arrays in this dataset, `squeeze`
controls whether the subarrays have a dimension of length 1 along
that dimension or if the dimension is squeezed out.
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.

Returns
-------
Expand Down Expand Up @@ -485,11 +489,13 @@ def groupby(self, group, squeeze: bool = True):
core.groupby.DataArrayGroupBy
core.groupby.DatasetGroupBy
""" # noqa
return self._groupby_cls(self, group, squeeze=squeeze)
return self._groupby_cls(self, group, squeeze=squeeze,
restore_coord_dims=restore_coord_dims)

def groupby_bins(self, group, bins, right: bool = True, labels=None,
precision: int = 3, include_lowest: bool = False,
squeeze: bool = True):
squeeze: bool = True,
restore_coord_dims: Optional[bool] = None):
"""Returns a GroupBy object for performing grouped operations.

Rather than using all unique values of `group`, the values are discretized
Expand Down Expand Up @@ -522,6 +528,9 @@ def groupby_bins(self, group, bins, right: bool = True, labels=None,
If "group" is a dimension of any arrays in this dataset, `squeeze`
controls whether the subarrays have a dimension of length 1 along
that dimension or if the dimension is squeezed out.
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.

Returns
-------
Expand All @@ -536,9 +545,11 @@ def groupby_bins(self, group, bins, right: bool = True, labels=None,
.. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
""" # noqa
return self._groupby_cls(self, group, squeeze=squeeze, bins=bins,
restore_coord_dims=restore_coord_dims,
cut_kwargs={'right': right, 'labels': labels,
'precision': precision,
'include_lowest': include_lowest})
'include_lowest':
include_lowest})

def rolling(self, dim: Optional[Mapping[Hashable, int]] = None,
min_periods: Optional[int] = None, center: bool = False,
Expand Down Expand Up @@ -669,7 +680,7 @@ def resample(self, indexer: Optional[Mapping[Hashable, str]] = None,
skipna=None, closed: Optional[str] = None,
label: Optional[str] = None,
base: int = 0, keep_attrs: Optional[bool] = None,
loffset=None,
loffset=None, restore_coord_dims: Optional[bool] = None,
**indexer_kwargs: str):
"""Returns a Resample object for performing resampling operations.

Expand Down Expand Up @@ -697,6 +708,9 @@ def resample(self, indexer: Optional[Mapping[Hashable, str]] = None,
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
**indexer_kwargs : {dim: freq}
The keyword arguments form of ``indexer``.
One of indexer or indexer_kwargs must be provided.
Expand Down Expand Up @@ -786,7 +800,8 @@ def resample(self, indexer: Optional[Mapping[Hashable, str]] = None,
dims=dim_coord.dims, name=RESAMPLE_DIM)
resampler = self._resample_cls(self, group=group, dim=dim_name,
grouper=grouper,
resample_dim=RESAMPLE_DIM)
resample_dim=RESAMPLE_DIM,
restore_coord_dims=restore_coord_dims)

return resampler

Expand Down
26 changes: 24 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1405,14 +1405,16 @@ def unstack(self, dim=None):
ds = self._to_temp_dataset().unstack(dim)
return self._from_temp_dataset(ds)

def transpose(self, *dims) -> 'DataArray':
def transpose(self, *dims, transpose_coords=None) -> 'DataArray':
"""Return a new DataArray object with transposed dimensions.

Parameters
----------
*dims : str, optional
By default, reverse the dimensions. Otherwise, reorder the
dimensions to this order.
transpose_coords : boolean, optional
If True, also transpose the coordinates of this DataArray.

Returns
-------
Expand All @@ -1430,8 +1432,28 @@ def transpose(self, *dims) -> 'DataArray':
numpy.transpose
Dataset.transpose
"""
if dims:
if set(dims) ^ set(self.dims):
raise ValueError('arguments to transpose (%s) must be '
'permuted array dimensions (%s)'
% (dims, tuple(self.dims)))

variable = self.variable.transpose(*dims)
return self._replace(variable)
if transpose_coords:
coords = {}
for name, coord in self.coords.items():
coord_dims = tuple(dim for dim in dims if dim in coord.dims)
coords[name] = coord.variable.transpose(*coord_dims)
return self._replace(variable, coords)
else:
if transpose_coords is None \
and any(self[c].ndim > 1 for c in self.coords):
warnings.warn('This DataArray contains multi-dimensional '
'coordinates. In the future, these coordinates '
'will be transposed as well unless you specify '
'transpose_coords=False.',
FutureWarning, stacklevel=2)
return self._replace(variable)

@property
def T(self) -> 'DataArray':
Expand Down
25 changes: 20 additions & 5 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ class GroupBy(SupportsArithmetic):
"""

def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
cut_kwargs={}):
restore_coord_dims=None, cut_kwargs={}):
"""Create a GroupBy object

Parameters
Expand All @@ -215,6 +215,9 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
bins : array-like, optional
If `bins` is specified, the groups will be discretized into the
specified bins by `pandas.cut`.
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
cut_kwargs : dict, optional
Extra keyword arguments to pass to `pandas.cut`

Expand Down Expand Up @@ -279,6 +282,16 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
safe_cast_to_index(group), sort=(bins is None))
unique_coord = IndexVariable(group.name, unique_values)

if isinstance(obj, DataArray) \
and restore_coord_dims is None \
and any(obj[c].ndim > 1 for c in obj.coords):
warnings.warn('This DataArray contains multi-dimensional '
'coordinates. In the future, the dimension order '
'of these coordinates will be restored as well '
'unless you specify restore_coord_dims=False.',
FutureWarning, stacklevel=2)
restore_coord_dims = False

# specification for the groupby operation
self._obj = obj
self._group = group
Expand All @@ -288,6 +301,7 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
self._stacked_dim = stacked_dim
self._inserted_dims = inserted_dims
self._full_index = full_index
self._restore_coord_dims = restore_coord_dims

# cached attributes
self._groups = None
Expand Down Expand Up @@ -508,7 +522,8 @@ def lookup_order(dimension):
return axis

new_order = sorted(stacked.dims, key=lookup_order)
return stacked.transpose(*new_order)
return stacked.transpose(
*new_order, transpose_coords=self._restore_coord_dims)

def apply(self, func, shortcut=False, args=(), **kwargs):
"""Apply a function over each array in the group and concatenate them
Expand Down Expand Up @@ -558,7 +573,7 @@ def apply(self, func, shortcut=False, args=(), **kwargs):
for arr in grouped)
return self._combine(applied, shortcut=shortcut)

def _combine(self, applied, shortcut=False):
def _combine(self, applied, restore_coord_dims=False, shortcut=False):
"""Recombine the applied objects like the original."""
applied_example, applied = peek_at(applied)
coord, dim, positions = self._infer_concat_args(applied_example)
Expand All @@ -580,8 +595,8 @@ def _combine(self, applied, shortcut=False):
combined = self._maybe_unstack(combined)
return combined

def reduce(self, func, dim=None, axis=None,
keep_attrs=None, shortcut=True, **kwargs):
def reduce(self, func, dim=None, axis=None, keep_attrs=None,
shortcut=True, **kwargs):
"""Reduce the items in this group by applying `func` along some
dimension(s).

Expand Down
14 changes: 9 additions & 5 deletions xarray/plot/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,10 @@ def _infer_line_data(darray, x, y, hue):
if huename in darray.dims:
otherindex = 1 if darray.dims.index(huename) == 0 else 0
otherdim = darray.dims[otherindex]
yplt = darray.transpose(otherdim, huename)
xplt = xplt.transpose(otherdim, huename)
yplt = darray.transpose(
otherdim, huename, transpose_coords=False)
xplt = xplt.transpose(
otherdim, huename, transpose_coords=False)
else:
raise ValueError('For 2D inputs, hue must be a dimension'
+ ' i.e. one of ' + repr(darray.dims))
Expand All @@ -79,7 +81,9 @@ def _infer_line_data(darray, x, y, hue):
if yplt.ndim > 1:
if huename in darray.dims:
otherindex = 1 if darray.dims.index(huename) == 0 else 0
xplt = darray.transpose(otherdim, huename)
otherdim = darray.dims[otherindex]
xplt = darray.transpose(
otherdim, huename, transpose_coords=False)
else:
raise ValueError('For 2D inputs, hue must be a dimension'
+ ' i.e. one of ' + repr(darray.dims))
Expand Down Expand Up @@ -614,9 +618,9 @@ def newplotfunc(darray, x=None, y=None, figsize=None, size=None,
yx_dims = (ylab, xlab)
dims = yx_dims + tuple(d for d in darray.dims if d not in yx_dims)
if dims != darray.dims:
darray = darray.transpose(*dims)
darray = darray.transpose(*dims, transpose_coords=True)
elif darray[xlab].dims[-1] == darray.dims[0]:
darray = darray.transpose()
darray = darray.transpose(transpose_coords=True)

# Pass the data as a masked ndarray too
zval = darray.to_masked_array(copy=False)
Expand Down
57 changes: 48 additions & 9 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1681,14 +1681,14 @@ def test_math_with_coords(self):
assert_identical(expected, actual)

actual = orig[0, :] + orig[:, 0]
assert_identical(expected.T, actual)
assert_identical(expected.transpose(transpose_coords=True), actual)

actual = orig - orig.T
actual = orig - orig.transpose(transpose_coords=True)
expected = DataArray(np.zeros((2, 3)), orig.coords)
assert_identical(expected, actual)

actual = orig.T - orig
assert_identical(expected.T, actual)
actual = orig.transpose(transpose_coords=True) - orig
assert_identical(expected.transpose(transpose_coords=True), actual)

alt = DataArray([1, 1], {'x': [-1, -2], 'c': 'foo', 'd': 555}, 'x')
actual = orig + alt
Expand Down Expand Up @@ -1801,8 +1801,27 @@ def test_stack_nonunique_consistency(self):
assert_identical(expected, actual)

def test_transpose(self):
assert_equal(self.dv.variable.transpose(),
self.dv.transpose().variable)
da = DataArray(np.random.randn(3, 4, 5), dims=('x', 'y', 'z'),
coords={'x': range(3), 'y': range(4), 'z': range(5),
'xy': (('x', 'y'), np.random.randn(3, 4))})

actual = da.transpose(transpose_coords=False)
expected = DataArray(da.values.T, dims=('z', 'y', 'x'),
coords=da.coords)
assert_equal(expected, actual)

actual = da.transpose('z', 'y', 'x', transpose_coords=True)
expected = DataArray(da.values.T, dims=('z', 'y', 'x'),
coords={'x': da.x.values, 'y': da.y.values,
'z': da.z.values,
'xy': (('y', 'x'), da.xy.values.T)})
assert_equal(expected, actual)

with pytest.raises(ValueError):
da.transpose('x', 'y')

with pytest.warns(FutureWarning):
da.transpose()

def test_squeeze(self):
assert_equal(self.dv.variable.squeeze(), self.dv.squeeze().variable)
Expand Down Expand Up @@ -2258,6 +2277,23 @@ def test_groupby_restore_dim_order(self):
result = array.groupby(by).apply(lambda x: x.squeeze())
assert result.dims == expected_dims

def test_groupby_restore_coord_dims(self):
array = DataArray(np.random.randn(5, 3),
coords={'a': ('x', range(5)), 'b': ('y', range(3)),
'c': (('x', 'y'), np.random.randn(5, 3))},
dims=['x', 'y'])

for by, expected_dims in [('x', ('x', 'y')),
('y', ('x', 'y')),
('a', ('a', 'y')),
('b', ('x', 'b'))]:
result = array.groupby(by, restore_coord_dims=True).apply(
lambda x: x.squeeze())['c']
assert result.dims == expected_dims

with pytest.warns(FutureWarning):
array.groupby('x').apply(lambda x: x.squeeze())

def test_groupby_first_and_last(self):
array = DataArray([1, 2, 3, 4, 5], dims='x')
by = DataArray(['a'] * 2 + ['b'] * 3, dims='x', name='ab')
Expand Down Expand Up @@ -2445,15 +2481,18 @@ def test_resample_drop_nondim_coords(self):
array = ds['data']

# Re-sample
actual = array.resample(time="12H").mean('time')
actual = array.resample(
time="12H", restore_coord_dims=True).mean('time')
assert 'tc' not in actual.coords

# Up-sample - filling
actual = array.resample(time="1H").ffill()
actual = array.resample(
time="1H", restore_coord_dims=True).ffill()
assert 'tc' not in actual.coords

# Up-sample - interpolation
actual = array.resample(time="1H").interpolate('linear')
actual = array.resample(
time="1H", restore_coord_dims=True).interpolate('linear')
assert 'tc' not in actual.coords

def test_resample_keep_attrs(self):
Expand Down
12 changes: 9 additions & 3 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4062,14 +4062,20 @@ def test_dataset_math_errors(self):

def test_dataset_transpose(self):
ds = Dataset({'a': (('x', 'y'), np.random.randn(3, 4)),
'b': (('y', 'x'), np.random.randn(4, 3))})
'b': (('y', 'x'), np.random.randn(4, 3))},
coords={'x': range(3), 'y': range(4),
'xy': (('x', 'y'), np.random.randn(3, 4))})

actual = ds.transpose()
expected = ds.apply(lambda x: x.transpose())
expected = Dataset({'a': (('y', 'x'), ds.a.values.T),
'b': (('x', 'y'), ds.b.values.T)},
coords={'x': ds.x.values, 'y': ds.y.values,
'xy': (('y', 'x'), ds.xy.values.T)})
assert_identical(expected, actual)

actual = ds.transpose('x', 'y')
expected = ds.apply(lambda x: x.transpose('x', 'y'))
expected = ds.apply(
lambda x: x.transpose('x', 'y', transpose_coords=True))
assert_identical(expected, actual)

ds = create_test_data()
Expand Down
6 changes: 4 additions & 2 deletions xarray/tests/test_interp.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ def func(obj, dim, new_x):
'y': da['y'],
'x': ('z', xdest.values),
'x2': ('z', func(da['x2'], 'x', xdest))})
assert_allclose(actual, expected.transpose('z', 'y'))
assert_allclose(actual,
expected.transpose('z', 'y', transpose_coords=True))

# xdest is 2d
xdest = xr.DataArray(np.linspace(0.1, 0.9, 30).reshape(6, 5),
Expand All @@ -160,7 +161,8 @@ def func(obj, dim, new_x):
coords={'z': xdest['z'], 'w': xdest['w'], 'z2': xdest['z2'],
'y': da['y'], 'x': (('z', 'w'), xdest),
'x2': (('z', 'w'), func(da['x2'], 'x', xdest))})
assert_allclose(actual, expected.transpose('z', 'w', 'y'))
assert_allclose(actual,
expected.transpose('z', 'w', 'y', transpose_coords=True))


@pytest.mark.parametrize('case', [3, 4])
Expand Down
Loading