Skip to content

Commit

Permalink
Add rename_vars and rename_dims (#3045)
Browse files Browse the repository at this point in the history
* Added rename_coords and rename_dims

* Removed white space from blank lines

* Changed rename_coords to rename_vars

* Changed rename_coords to rename_vars in "See Also" or rename fx

* Fixed renaming dimension indexing

* Added testing for rename_vars and rename_dims

* Testing and fx for renaming vars and dims

* Met pep8 standards

* Undid autopep8 for lines w noqa

* Update xarray/tests/test_dataset.py

Co-Authored-By: Maximilian Roos <[email protected]>

* Cleaned up helper fxs and added actual_2 test

* Update xarray/core/dataset.py

Co-Authored-By: Maximilian Roos <[email protected]>

* Update xarray/core/dataset.py

Co-Authored-By: Maximilian Roos <[email protected]>

* deleted misc file

* Update xarray/core/dataset.py

Co-Authored-By: Maximilian Roos <[email protected]>

* _rename_var_dims_helper undefined test

* Use separate rename_dims and rename_vars dictionaries

* Fixed documentation and added inplace back

* removing changes from rename

* removed test set up to fail (will add back)

* fixed coord vs variable in test rename_

* Moved rename_var to under new fx/methods

* Update whats-new.rst

* use pytest.raises to test for ValueError

* did not assign failed

* pep8 compliance
  • Loading branch information
jukent authored and shoyer committed Jul 2, 2019
1 parent 378c330 commit af37ddd
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 37 deletions.
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ Dataset contents
Dataset.pipe
Dataset.merge
Dataset.rename
Dataset.rename_vars
Dataset.rename_dims
Dataset.swap_dims
Dataset.expand_dims
Dataset.drop
Expand Down
8 changes: 8 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ v0.12.3 (unreleased)
Enhancements
~~~~~~~~~~~~

- Renaming variables and dimensions independently:
Datasets with coordinate dimensions can now have only their dimension
(using rename_dim) or only their coordinate (using rename_vars) renamed
instead of the rename function applyingto both. (:issue:`3026`)
By `Julia Kent <https://github.com/jukent>`_.

Bug fixes
~~~~~~~~~

Expand Down Expand Up @@ -102,6 +108,8 @@ Enhancements to existing functionality
accept a keyword argument ``restore_coord_dims`` which keeps the order
of the dimensions of multi-dimensional coordinates intact (:issue:`1856`).
By `Peter Hausamann <http://github.com/phausamann>`_.
- Clean up Python 2 compatibility in code (:issue:`2950`)
By `Guido Imperiale <https://github.com/crusaderky>`_.
- Better warning message when supplying invalid objects to ``xr.merge``
(:issue:`2948`). By `Mathias Hauser <https://github.com/mathause>`_.
- Add ``errors`` keyword argument to :py:meth:`Dataset.drop` and :py:meth:`Dataset.drop_dims`
Expand Down
143 changes: 106 additions & 37 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ def merge_indexes(

for n in var_names:
var = variables[n]
if (current_index_variable is not None and
var.dims != current_index_variable.dims):
if (current_index_variable is not None
and var.dims != current_index_variable.dims):
raise ValueError(
"dimension mismatch between %r %s and %r %s"
% (dim, current_index_variable.dims, n, var.dims))
Expand Down Expand Up @@ -209,8 +209,8 @@ def split_indexes(
Not public API. Used in Dataset and DataArray reset_index
methods.
"""
if (isinstance(dims_or_levels, str)
or not isinstance(dims_or_levels, Sequence)):
if (isinstance(dims_or_levels, str) or
not isinstance(dims_or_levels, Sequence)):
dims_or_levels = [dims_or_levels]

dim_levels \
Expand Down Expand Up @@ -287,8 +287,8 @@ def __len__(self) -> int:
return len(self._dataset._variables) - len(self._dataset._coord_names)

def __contains__(self, key) -> bool:
return (key in self._dataset._variables and
key not in self._dataset._coord_names)
return (key in self._dataset._variables
and key not in self._dataset._coord_names)

def __getitem__(self, key) -> 'DataArray':
if key not in self._dataset._coord_names:
Expand Down Expand Up @@ -1188,8 +1188,8 @@ def identical(self, other):
Dataset.equals
"""
try:
return (utils.dict_equiv(self.attrs, other.attrs) and
self._all_compat(other, 'identical'))
return (utils.dict_equiv(self.attrs, other.attrs)
and self._all_compat(other, 'identical'))
except (TypeError, AttributeError):
return False

Expand Down Expand Up @@ -2151,8 +2151,8 @@ def _validate_interp_indexer(x, new_x):
# In the case of datetimes, the restrictions placed on indexers
# used with interp are stronger than those which are placed on
# isel, so we need an additional check after _validate_indexers.
if (_contains_datetime_like_objects(x) and
not _contains_datetime_like_objects(new_x)):
if (_contains_datetime_like_objects(x)
and not _contains_datetime_like_objects(new_x)):
raise TypeError('When interpolating over a datetime-like '
'coordinate, the coordinates to '
'interpolate to must be either datetime '
Expand Down Expand Up @@ -2264,19 +2264,18 @@ def _rename_vars(self, name_dict, dims_dict):
variables = OrderedDict()
coord_names = set()
for k, v in self.variables.items():
name = name_dict.get(k, k)
dims = tuple(dims_dict.get(dim, dim) for dim in v.dims)
var = v.copy(deep=False)
var.dims = dims
var.dims = tuple(dims_dict.get(dim, dim) for dim in v.dims)
name = name_dict.get(k, k)
if name in variables:
raise ValueError('the new name %r conflicts' % (name,))
variables[name] = var
if k in self._coord_names:
coord_names.add(name)
return variables, coord_names

def _rename_dims(self, dims_dict):
return {dims_dict.get(k, k): v for k, v in self.dims.items()}
def _rename_dims(self, name_dict):
return {name_dict.get(k, k): v for k, v in self.dims.items()}

def _rename_indexes(self, name_dict):
if self._indexes is None:
Expand All @@ -2293,9 +2292,9 @@ def _rename_indexes(self, name_dict):
indexes[new_name] = index
return indexes

def _rename_all(self, name_dict, dim_dict):
variables, coord_names = self._rename_vars(name_dict, dim_dict)
dims = self._rename_dims(dim_dict)
def _rename_all(self, name_dict, dims_dict):
variables, coord_names = self._rename_vars(name_dict, dims_dict)
dims = self._rename_dims(dims_dict)
indexes = self._rename_indexes(name_dict)
return variables, coord_names, dims, indexes

Expand All @@ -2322,21 +2321,91 @@ def rename(self, name_dict=None, inplace=None, **names):
See Also
--------
Dataset.swap_dims
Dataset.rename_vars
Dataset.rename_dims
DataArray.rename
"""
# TODO: add separate rename_vars and rename_dims methods.
inplace = _check_inplace(inplace)
name_dict = either_dict_or_kwargs(name_dict, names, 'rename')
for k, v in name_dict.items():
for k in name_dict.keys():
if k not in self and k not in self.dims:
raise ValueError("cannot rename %r because it is not a "
"variable or dimension in this dataset" % k)

variables, coord_names, dims, indexes = self._rename_all(
name_dict=name_dict, dim_dict=name_dict)
name_dict=name_dict, dims_dict=name_dict)
return self._replace(variables, coord_names, dims=dims,
indexes=indexes, inplace=inplace)

def rename_dims(self, dims_dict=None, **dims):
"""Returns a new object with renamed dimensions only.
Parameters
----------
dims_dict : dict-like, optional
Dictionary whose keys are current dimension names and
whose values are the desired names.
**dims, optional
Keyword form of ``dims_dict``.
One of dims_dict or dims must be provided.
Returns
-------
renamed : Dataset
Dataset with renamed dimensions.
See Also
--------
Dataset.swap_dims
Dataset.rename
Dataset.rename_vars
DataArray.rename
"""
dims_dict = either_dict_or_kwargs(dims_dict, dims, 'rename_dims')
for k in dims_dict:
if k not in self.dims:
raise ValueError("cannot rename %r because it is not a "
"dimension in this dataset" % k)

variables, coord_names, dims, indexes = self._rename_all(
name_dict={}, dims_dict=dims_dict)
return self._replace(variables, coord_names, dims=dims,
indexes=indexes)

def rename_vars(self, name_dict=None, **names):
"""Returns a new object with renamed variables including coordinates
Parameters
----------
name_dict : dict-like, optional
Dictionary whose keys are current variable or coordinate names and
whose values are the desired names.
**names, optional
Keyword form of ``name_dict``.
One of name_dict or names must be provided.
Returns
-------
renamed : Dataset
Dataset with renamed variables including coordinates
See Also
--------
Dataset.swap_dims
Dataset.rename
Dataset.rename_dims
DataArray.rename
"""
name_dict = either_dict_or_kwargs(name_dict, names, 'rename_vars')
for k in name_dict:
if k not in self:
raise ValueError("cannot rename %r because it is not a "
"variable or coordinate in this dataset" % k)
variables, coord_names, dims, indexes = self._rename_all(
name_dict=name_dict, dims_dict={})
return self._replace(variables, coord_names, dims=dims,
indexes=indexes)

def swap_dims(self, dims_dict, inplace=None):
"""Returns a new object with swapped dimensions.
Expand Down Expand Up @@ -2464,8 +2533,8 @@ def expand_dims(self, dim=None, axis=None, **dim_kwargs):
if d in self.dims:
raise ValueError(
'Dimension {dim} already exists.'.format(dim=d))
if (d in self._variables and
not utils.is_scalar(self._variables[d])):
if (d in self._variables
and not utils.is_scalar(self._variables[d])):
raise ValueError(
'{dim} already exists as coordinate or'
' variable name.'.format(dim=d))
Expand Down Expand Up @@ -3256,9 +3325,9 @@ def reduce(self, func, dim=None, keep_attrs=None, keepdims=False,
if not reduce_dims:
variables[name] = var
else:
if (not numeric_only or
np.issubdtype(var.dtype, np.number) or
(var.dtype == np.bool_)):
if (not numeric_only
or np.issubdtype(var.dtype, np.number)
or (var.dtype == np.bool_)):
if len(reduce_dims) == 1:
# unpack dimensions for the benefit of functions
# like np.argmin which can't handle tuple arguments
Expand Down Expand Up @@ -3791,8 +3860,8 @@ def diff(self, dim, n=1, label='upper'):
for name, var in self.variables.items():
if dim in var.dims:
if name in self.data_vars:
variables[name] = (var.isel(**kwargs_end) -
var.isel(**kwargs_start))
variables[name] = (var.isel(**kwargs_end)
- var.isel(**kwargs_start))
else:
variables[name] = var.isel(**kwargs_new)
else:
Expand Down Expand Up @@ -3976,8 +4045,8 @@ def sortby(self, variables, ascending=True):
for data_array in aligned_other_vars:
if data_array.ndim != 1:
raise ValueError("Input DataArray is not 1-D.")
if (data_array.dtype == object and
LooseVersion(np.__version__) < LooseVersion('1.11.0')):
if (data_array.dtype == object
and LooseVersion(np.__version__) < LooseVersion('1.11.0')):
raise NotImplementedError(
'sortby uses np.lexsort under the hood, which requires '
'numpy 1.11.0 or later to support object data-type.')
Expand Down Expand Up @@ -4053,9 +4122,9 @@ def quantile(self, q, dim=None, interpolation='linear',
reduce_dims = [d for d in var.dims if d in dims]
if reduce_dims or not var.dims:
if name not in self.coords:
if (not numeric_only or
np.issubdtype(var.dtype, np.number) or
var.dtype == np.bool_):
if (not numeric_only
or np.issubdtype(var.dtype, np.number)
or var.dtype == np.bool_):
if len(reduce_dims) == var.ndim:
# prefer to aggregate over axis=None rather than
# axis=(0, 1) if they will be equivalent, because
Expand Down Expand Up @@ -4171,8 +4240,8 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):

variables = OrderedDict()
for k, v in self.variables.items():
if (k in self.data_vars and dim in v.dims and
k not in self.coords):
if (k in self.data_vars and dim in v.dims
and k not in self.coords):
if _contains_datetime_like_objects(v):
v = v._to_numeric(datetime_unit=datetime_unit)
grad = duck_array_ops.gradient(
Expand Down Expand Up @@ -4348,8 +4417,8 @@ def filter_by_attrs(self, **kwargs):
has_value_flag = False
for attr_name, pattern in kwargs.items():
attr_value = variable.attrs.get(attr_name)
if ((callable(pattern) and pattern(attr_value)) or
attr_value == pattern):
if ((callable(pattern) and pattern(attr_value))
or attr_value == pattern):
has_value_flag = True
else:
has_value_flag = False
Expand Down
34 changes: 34 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2162,6 +2162,40 @@ def test_rename_inplace(self):
# check virtual variables
assert_array_equal(data['t.dayofyear'], [1, 2, 3])

def test_rename_dims(self):
original = Dataset(
{'x': ('x', [0, 1, 2]), 'y': ('x', [10, 11, 12]), 'z': 42})
expected = Dataset(
{'x': ('x_new', [0, 1, 2]), 'y': ('x_new', [10, 11, 12]), 'z': 42})
expected = expected.set_coords('x')
dims_dict = {'x': 'x_new'}
actual = original.rename_dims(dims_dict)
assert_identical(expected, actual)
actual_2 = original.rename_dims(**dims_dict)
assert_identical(expected, actual_2)

# Test to raise ValueError
dims_dict_bad = {'x_bad': 'x_new'}
with pytest.raises(ValueError):
original.rename_dims(dims_dict_bad)

def test_rename_vars(self):
original = Dataset(
{'x': ('x', [0, 1, 2]), 'y': ('x', [10, 11, 12]), 'z': 42})
expected = Dataset(
{'x_new': ('x', [0, 1, 2]), 'y': ('x', [10, 11, 12]), 'z': 42})
expected = expected.set_coords('x_new')
name_dict = {'x': 'x_new'}
actual = original.rename_vars(name_dict)
assert_identical(expected, actual)
actual_2 = original.rename_vars(**name_dict)
assert_identical(expected, actual_2)

# Test to raise ValueError
names_dict_bad = {'x_bad': 'x_new'}
with pytest.raises(ValueError):
original.rename_vars(names_dict_bad)

def test_swap_dims(self):
original = Dataset({'x': [1, 2, 3], 'y': ('x', list('abc')), 'z': 42})
expected = Dataset({'z': 42},
Expand Down

0 comments on commit af37ddd

Please sign in to comment.