From b080142e28a971b121dfebf90934ceb58ac1fcf0 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sun, 19 Jul 2015 22:29:29 -0700 Subject: [PATCH 1/7] Add pointwise indexing via isel_points method This provides behavior equivalent to numpy slicing with multiple lists. Example ------- >>> da = xray.DataArray(np.arange(56).reshape((7, 8)), dims=['x', 'y']) >>> da array([[ 0, 1, 2, 3, 4, 5, 6, 7], [ 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55]]) Coordinates: * x (x) int64 0 1 2 3 4 5 6 * y (y) int64 0 1 2 3 4 5 6 7 >>> da.isel_points(x=[0, 1, 6], y=[0, 1, 0]) array([ 0, 9, 48]) Coordinates: y (points) int64 0 1 0 x (points) int64 0 1 6 * points (points) int64 0 1 2 related: #475 --- doc/api.rst | 2 ++ doc/indexing.rst | 13 ++++++++- doc/whats-new.rst | 1 + xray/core/dataarray.py | 12 ++++++++ xray/core/dataset.py | 55 ++++++++++++++++++++++++++++++++++- xray/test/test_dataarray.py | 58 +++++++++++++++++++++++++++++++++++++ xray/test/test_dataset.py | 41 ++++++++++++++++++++++++++ 7 files changed, 180 insertions(+), 2 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index e8b8a06ef57..37973153582 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -93,6 +93,7 @@ Indexing Dataset.loc Dataset.isel Dataset.sel + Dataset.isel_points Dataset.squeeze Dataset.reindex Dataset.reindex_like @@ -202,6 +203,7 @@ Indexing DataArray.loc DataArray.isel DataArray.sel + DataArray.isel_points DataArray.squeeze DataArray.reindex DataArray.reindex_like diff --git a/doc/indexing.rst b/doc/indexing.rst index 1ecaf6ddfd4..452b23fe659 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -36,6 +36,8 @@ below and summarized in this table: | By name | By label | ``arr.sel(space='IA')`` or |br| | ``ds.sel(space='IA')`` or |br| | | | | ``arr.loc[dict(space='IA')]`` | ``ds.loc[dict(space='IA')]`` | +------------------+--------------+---------------------------------+--------------------------------+ +| By name | By integers | ``arr.isel_points(x=[0, 1])`` | ``ds.isel_points(x=[0, 1])`` | ++------------------+--------------+---------------------------------+--------------------------------+ Positional indexing ------------------- @@ -57,6 +59,7 @@ DataArray: Positional indexing deviates from the NumPy when indexing with multiple arrays like ``arr[[0, 1], [0, 1]]``, as described in :ref:`indexing details`. + Use :py:meth:`~xray.Dataset.isel_points` to achieve this functionality. xray also supports label-based indexing, just like pandas. Because we use a :py:class:`pandas.Index` under the hood, label based indexing is very @@ -108,6 +111,13 @@ use them explicitly to slice data. There are two ways to do this: # index by dimension coordinate labels arr.sel(time=slice('2000-01-01', '2000-01-02')) +3. Use the :py:meth:`~xray.DataArray.isel_points` method: + + .. ipython:: python + + # index by integer array indices + arr.isel_points(space=[0, 1], dim='points') + The arguments to these methods can be any objects that could index the array along the dimension given by the keyword, e.g., labels for an individual value, Python :py:func:`slice` objects or 1-dimensional arrays. @@ -122,7 +132,7 @@ __ http://legacy.python.org/dev/peps/pep-0472/ .. warning:: - Do not try to assign values when using ``isel`` or ``sel``:: + Do not try to assign values when using ``isel``, ``isel_points`` or ``sel``:: # DO NOT do this arr.isel(space=0) = 0 @@ -145,6 +155,7 @@ simultaneously, returning a new dataset: ds = arr.to_dataset() ds.isel(space=[0], time=[0]) ds.sel(time='2000-01-01') + ds.isel_points(space=[0, 1], dim='points') Positional indexing on a dataset is not supported because the ordering of dimensions in a dataset is somewhat ambiguous (it can vary between different diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 89f48bc452a..646e27d54d4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,6 +14,7 @@ v0.5.3 (unreleased) - Dataset variables are now written to netCDF files in order of appearance when using the netcdf4 backend (:issue:`479`). +- Added :py:meth:`~xray.Dataset.isel_points` and :py:meth:`~xray.DataArray.isel_points` to support pointwise indexing of Datasets and DataArrays (:issue:`475`). v0.5.2 (16 July 2015) --------------------- diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py index 509d5fadb57..475d6782cd7 100644 --- a/xray/core/dataarray.py +++ b/xray/core/dataarray.py @@ -551,6 +551,18 @@ def sel(self, method=None, **indexers): return self.isel(**indexing.remap_label_indexers(self, indexers, method=method)) + def isel_points(self, dim='points', **indexers): + """Return a new DataArray whose dataset is given by pointwise integer + indexing along the specified dimension(s). + + See Also + -------- + Dataset.isel_points + DataArray.sel_points + """ + ds = self._dataset.isel_points(dim=dim, **indexers) + return self._with_replaced_dataset(ds) + def reindex_like(self, other, method=None, copy=True): """Conform this object onto the indexes of another object, filling in missing values with NaN. diff --git a/xray/core/dataset.py b/xray/core/dataset.py index 35ff0cfd3ee..0659d0353f0 100644 --- a/xray/core/dataset.py +++ b/xray/core/dataset.py @@ -1,6 +1,6 @@ import functools import warnings -from collections import Mapping +from collections import Mapping, Sequence from numbers import Number import numpy as np @@ -21,6 +21,7 @@ from .variable import as_variable, Variable, Coordinate, broadcast_variables from .pycompat import (iteritems, itervalues, basestring, OrderedDict, dask_array_type) +from .combine import concat # list of attributes of pd.DatetimeIndex that are ndarrays of time info @@ -1028,6 +1029,58 @@ def sel(self, method=None, **indexers): return self.isel(**indexing.remap_label_indexers(self, indexers, method=method)) + def isel_points(self, dim='points', **indexers): + """Returns a new dataset with each array indexed pointwise along the + specified dimension(s). + + This method selects pointwise values from each array and is akin to + the NumPy indexing behavior of `arr[[0, 1], [0, 1]]`, except this + method does not require knowing the order of each array's dimensions. + + Parameters + ---------- + dim : str, optional + Dimension name for which the points will be added to. + **indexers : {dim: indexer, ...} + Keyword arguments with names matching dimensions and values given + by integers, slice objects or arrays. All indexers must be the same + length. + + Returns + ------- + obj : Dataset + A new Dataset with the same contents as this dataset, except each + array and dimension is indexed by the appropriate indexers. In + general, each array's data will be a view of the array's data + in this dataset, unless numpy fancy indexing was triggered by using + an array indexer, in which case the data will be a copy. + + See Also + -------- + Dataset.sel + DataArray.isel + DataArray.sel + DataArray.isel_points + """ + invalid = [k for k in indexers if k not in self.dims] + if invalid: + raise ValueError("dimensions %r do not exist" % invalid) + + # all the indexers should be iterables + keys = indexers.keys() + indexers = [(k, ([v] if not isinstance(v, Sequence) else v)) + for k, v in iteritems(indexers)] + + # all the indexers should have the same length + lengths = set([len(v) for k, v in indexers]) + if len(lengths) > 1: + raise ValueError('All indexers must be the same length') + + return concat([self.isel(**d) for d in + [dict(zip(keys, inds)) for inds in + zip(*[v for k, v in indexers])]], + dim=dim) + def reindex_like(self, other, method=None, copy=True): """Conform this object onto the indexes of another object, filling in missing values with NaN. diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index 20d8e9d73ef..d71347c1885 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -382,6 +382,64 @@ def test_sel_method(self): actual = data.sel(x=[0.9, 1.9], method='backfill') self.assertDataArrayIdentical(expected, actual) + def test_isel_points_method(self): + shape = (10, 5, 6) + np_array = np.random.random(shape) + da = DataArray(np_array, dims=['time', 'y', 'x']) + y = [1, 3] + x = [3, 0] + + expected = da.values[:, y, x] + + actual = da.isel_points(y=y, x=x, dim='test_coord') + assert 'test_coord' in actual.coords + assert actual.coords['test_coord'].shape == (len(y), ) + + actual = da.isel_points(y=y, x=x) + assert 'points' in actual.coords + # not sure why actual needs to be transposed + np.testing.assert_equal(actual.T, expected) + + # test scalars (should match isel but will have points dim) + y = 1 + x = 3 + expected = da.values[:, y, x] + + actual = da.isel_points(y=y, x=x) + # squeeze to drop "points" dim + assert 'points' in actual.coords + np.testing.assert_allclose(actual.squeeze().values, expected) + self.assertDataArrayIdentical(actual.squeeze().drop(['points']), + da.isel(y=y, x=x)) + + # a few corner cases + da.isel_points(time=[1, 2], x=[2, 2], y=[3, 4]) + np.testing.assert_allclose( + da.isel_points(time=1, x=2, y=4).values.squeeze(), + np_array[1, 4, 2].squeeze()) + + da.isel_points(time=1) + da.isel_points(time=[1, 2]) + + # test that leaving out a dim is the same as slice(None) + self.assertDataArrayIdentical( + da.isel_points(time=slice(None), y=y, x=x), + da.isel_points(time=np.arange(len(da['time'])), y=y, x=x)) + self.assertDataArrayIdentical( + da.isel_points(time=slice(None), y=y, x=x), + da.isel_points(y=y, x=x)) + + # test that the order of the indexers doesn't matter + self.assertDataArrayIdentical( + da.isel_points(y=y, x=x), + da.isel_points(x=x, y=y)) + + # make sure we're raising errors in the right places + with self.assertRaises(ValueError): + da.isel_points(y=[1, 2], x=[1, 2, 3]) + with self.assertRaises(ValueError): + da.isel_points(bad_key=[1, 2]) + def test_loc(self): self.ds['x'] = ('x', np.array(list('abcdefghij'))) da = self.ds['foo'] diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py index b4a8ffcede7..3728a67fa03 100644 --- a/xray/test/test_dataset.py +++ b/xray/test/test_dataset.py @@ -661,6 +661,47 @@ def test_sel(self): self.assertDatasetEqual(data.isel(td=slice(1, 3)), data.sel(td=slice('1 days', '2 days'))) + def test_isel_points(self): + data = create_test_data() + + pdim1 = [1, 2, 3] + pdim2 = [4, 5, 1] + pdim3 = [1, 2, 3] + + actual = data.isel_points(dim1=pdim1, dim2=pdim2, dim3=pdim3, + dim='test_coord') + assert 'test_coord' in actual.coords + assert actual.coords['test_coord'].shape == (len(pdim1), ) + + actual = data.isel_points(dim1=pdim1, dim2=pdim2) + assert 'points' in actual.coords + np.testing.assert_array_equal(pdim1, actual['dim1']) + + # # test scalars (should match isel but will have points dim) + pdim1 = 1 + pdim2 = 3 + + actual = data.isel_points(dim1=pdim1, dim2=pdim2) + # squeeze to drop "points" dim + assert 'points' in actual.coords + self.assertDatasetEqual(actual.squeeze().drop(['points']), + data.isel(dim1=pdim1, dim2=pdim2)) + + # test that leaving out a dim is the same as slice(None) + self.assertDatasetIdentical( + data.isel_points(time=slice(None), dim1=pdim1, dim2=pdim2), + data.isel_points(dim1=pdim1, dim2=pdim2)) + + # test that the order of the indexers doesn't matter + self.assertDatasetIdentical(data.isel_points(dim1=pdim1, dim2=pdim2), + data.isel_points(dim2=pdim2, dim1=pdim1)) + + # make sure we're raising errors in the right places + with self.assertRaises(ValueError): + data.isel_points(dim1=[1, 2], dim2=[1, 2, 3]) + with self.assertRaises(ValueError): + data.isel_points(bad_key=[1, 2]) + def test_sel_method(self): data = create_test_data() From ae1f3a84a289a900563fd28b4e2922abc7369c20 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 22 Jul 2015 22:09:13 -0700 Subject: [PATCH 2/7] update pointwise indexing docs --- doc/indexing.rst | 28 ++++++++++++++++++---------- doc/whats-new.rst | 29 +++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/doc/indexing.rst b/doc/indexing.rst index 452b23fe659..d9db38fff6b 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -36,8 +36,6 @@ below and summarized in this table: | By name | By label | ``arr.sel(space='IA')`` or |br| | ``ds.sel(space='IA')`` or |br| | | | | ``arr.loc[dict(space='IA')]`` | ``ds.loc[dict(space='IA')]`` | +------------------+--------------+---------------------------------+--------------------------------+ -| By name | By integers | ``arr.isel_points(x=[0, 1])`` | ``ds.isel_points(x=[0, 1])`` | -+------------------+--------------+---------------------------------+--------------------------------+ Positional indexing ------------------- @@ -111,13 +109,6 @@ use them explicitly to slice data. There are two ways to do this: # index by dimension coordinate labels arr.sel(time=slice('2000-01-01', '2000-01-02')) -3. Use the :py:meth:`~xray.DataArray.isel_points` method: - - .. ipython:: python - - # index by integer array indices - arr.isel_points(space=[0, 1], dim='points') - The arguments to these methods can be any objects that could index the array along the dimension given by the keyword, e.g., labels for an individual value, Python :py:func:`slice` objects or 1-dimensional arrays. @@ -144,6 +135,21 @@ __ http://legacy.python.org/dev/peps/pep-0472/ # this is safe arr[dict(space=0)] = 0 +Pointwise indexing +-------------------------------- + +xray pointwise indexing supports the indexing along multiple labeled dimensions +using list-like objects. While :py:meth:`~xray.DataArray.isel` performs +orthogonal indexing, the :py:meth:`~xray.DataArray.isel_points` method +provides similar numpy indexing behavior as if you were using multiple lists to index an array (e.g. `arr[[0, 1], [0, 1]]` ): + +.. ipython:: python + + # index by integer array indices + da = xray.DataArray(np.arange(56).reshape((7, 8)), dims=['x', 'y']) + da + da.isel_points(x=[0, 1, 6], y=[0, 1, 0]) + Dataset indexing ---------------- @@ -155,7 +161,8 @@ simultaneously, returning a new dataset: ds = arr.to_dataset() ds.isel(space=[0], time=[0]) ds.sel(time='2000-01-01') - ds.isel_points(space=[0, 1], dim='points') + ds2 = da.to_dataset() + ds2.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points') Positional indexing on a dataset is not supported because the ordering of dimensions in a dataset is somewhat ambiguous (it can vary between different @@ -163,6 +170,7 @@ arrays). However, you can do normal indexing with labeled dimensions: .. ipython:: python + ds[dict(space=[0], time=[0])] ds.loc[dict(time='2000-01-01')] diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 646e27d54d4..083b13c61c2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -16,6 +16,35 @@ v0.5.3 (unreleased) when using the netcdf4 backend (:issue:`479`). - Added :py:meth:`~xray.Dataset.isel_points` and :py:meth:`~xray.DataArray.isel_points` to support pointwise indexing of Datasets and DataArrays (:issue:`475`). + .. ipython:: + :verbatim: + + In [1]: da = xray.DataArray(np.arange(56).reshape((7, 8)), + dims=['x', 'y']) + + In [2]: da + Out[2]: + + array([[ 0, 1, 2, 3, 4, 5, 6, 7], + [ 8, 9, 10, 11, 12, 13, 14, 15], + [16, 17, 18, 19, 20, 21, 22, 23], + [24, 25, 26, 27, 28, 29, 30, 31], + [32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47], + [48, 49, 50, 51, 52, 53, 54, 55]]) + Coordinates: + * x (x) int64 0 1 2 3 4 5 6 + * y (y) int64 0 1 2 3 4 5 6 7 + + In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points') + Out[3]: + + array([ 0, 9, 48]) + Coordinates: + x (points) int64 0 1 6 + y (points) int64 0 1 0 + * points (points) int64 0 1 2 + v0.5.2 (16 July 2015) --------------------- From c3cb3a5c652f0b2b116f57f8a9fd5bd38b200efa Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 22 Jul 2015 23:09:31 -0700 Subject: [PATCH 3/7] update isel_method api, does not support slice indexers --- xray/core/dataset.py | 48 +++++++++++++++++++++++++------------ xray/test/test_dataarray.py | 41 ++++++++++++------------------- xray/test/test_dataset.py | 32 ++++++++++++------------- 3 files changed, 63 insertions(+), 58 deletions(-) diff --git a/xray/core/dataset.py b/xray/core/dataset.py index 0659d0353f0..bd2860ae6bf 100644 --- a/xray/core/dataset.py +++ b/xray/core/dataset.py @@ -1039,21 +1039,25 @@ def isel_points(self, dim='points', **indexers): Parameters ---------- - dim : str, optional - Dimension name for which the points will be added to. + dim : str or DataArray or pandas.Index, optinal + Name of the dimension to concatenate along. This can either be a + new dimension name, in which case it is added along axis=0, or an + existing dimension name, in which case the location of the + dimension is unchanged. If dimension is provided as a DataArray or + Index, its name is used as the dimension to concatenate along and + the values are added as a coordinate. **indexers : {dim: indexer, ...} Keyword arguments with names matching dimensions and values given - by integers, slice objects or arrays. All indexers must be the same - length. + by array-like objects. All indexers must be the same length and + 1 dimensional. Returns ------- obj : Dataset A new Dataset with the same contents as this dataset, except each - array and dimension is indexed by the appropriate indexers. In - general, each array's data will be a view of the array's data - in this dataset, unless numpy fancy indexing was triggered by using - an array indexer, in which case the data will be a copy. + array and dimension is indexed by the appropriate indexers. With + pointwise indexing, the new Dataset will always be a copy of the + original. See Also -------- @@ -1062,24 +1066,38 @@ def isel_points(self, dim='points', **indexers): DataArray.sel DataArray.isel_points """ - invalid = [k for k in indexers if k not in self.dims] - if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + indexer_dims = set(indexers) + + def relevant_keys(mapping): + return [k for k, v in mapping.items() + if any(d in indexer_dims for d in v.dims)] + + data_vars = relevant_keys(self.data_vars) + coords = relevant_keys(self.coords) # all the indexers should be iterables keys = indexers.keys() - indexers = [(k, ([v] if not isinstance(v, Sequence) else v)) - for k, v in iteritems(indexers)] + indexers = [(k, np.asarray(v)) for k, v in iteritems(indexers)] + # Check that indexers are valid dims, integers, and 1D + for k, v in indexers: + if k not in self.dims: + raise ValueError("dimension %s does not exist" % k) + if v.dtype.kind != 'i': + raise TypeError('Indexers must be integers') + if v.ndim != 1: + raise ValueError('Indexers must be 1 dimensional') # all the indexers should have the same length - lengths = set([len(v) for k, v in indexers]) + lengths = set(len(v) for k, v in indexers) if len(lengths) > 1: raise ValueError('All indexers must be the same length') + # TODO: This would be sped up with vectorized indexing. This will + # require dask to support pointwise indexing as well. return concat([self.isel(**d) for d in [dict(zip(keys, inds)) for inds in zip(*[v for k, v in indexers])]], - dim=dim) + dim=dim, coords=coords, data_vars=data_vars) def reindex_like(self, other, method=None, copy=True): """Conform this object onto the indexes of another object, filling diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index d71347c1885..dca0238808d 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -397,48 +397,37 @@ def test_isel_points_method(self): actual = da.isel_points(y=y, x=x) assert 'points' in actual.coords - # not sure why actual needs to be transposed + # Note that because xray always concatenates along the first dimension, + # We must transpose the result to match the numpy style of + # concatentation. np.testing.assert_equal(actual.T, expected) - # test scalars (should match isel but will have points dim) - y = 1 - x = 3 - expected = da.values[:, y, x] - - actual = da.isel_points(y=y, x=x) - # squeeze to drop "points" dim - assert 'points' in actual.coords - np.testing.assert_allclose(actual.squeeze().values, expected) - self.assertDataArrayIdentical(actual.squeeze().drop(['points']), - da.isel(y=y, x=x)) - # a few corner cases da.isel_points(time=[1, 2], x=[2, 2], y=[3, 4]) np.testing.assert_allclose( - da.isel_points(time=1, x=2, y=4).values.squeeze(), + da.isel_points(time=[1], x=[2], y=[4]).values.squeeze(), np_array[1, 4, 2].squeeze()) - - da.isel_points(time=1) da.isel_points(time=[1, 2]) - # test that leaving out a dim is the same as slice(None) - self.assertDataArrayIdentical( - da.isel_points(time=slice(None), y=y, x=x), - da.isel_points(time=np.arange(len(da['time'])), y=y, x=x)) - self.assertDataArrayIdentical( - da.isel_points(time=slice(None), y=y, x=x), - da.isel_points(y=y, x=x)) - # test that the order of the indexers doesn't matter self.assertDataArrayIdentical( da.isel_points(y=y, x=x), da.isel_points(x=x, y=y)) # make sure we're raising errors in the right places - with self.assertRaises(ValueError): + with self.assertRaisesRegexp(ValueError, + 'All indexers must be the same length'): da.isel_points(y=[1, 2], x=[1, 2, 3]) - with self.assertRaises(ValueError): + with self.assertRaisesRegexp(ValueError, + 'dimension bad_key does not exist'): da.isel_points(bad_key=[1, 2]) + with self.assertRaisesRegexp(TypeError, 'Indexers must be integers'): + da.isel_points(y=[1.5, 2.2]) + with self.assertRaisesRegexp(TypeError, 'Indexers must be integers'): + da.isel_points(x=[1, 2, 3], y=slice(3)) + with self.assertRaisesRegexp(ValueError, + 'Indexers must be 1 dimensional'): + da.isel_points(y=1, x=2) def test_loc(self): self.ds['x'] = ('x', np.array(list('abcdefghij'))) diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py index 3728a67fa03..624c3ed376a 100644 --- a/xray/test/test_dataset.py +++ b/xray/test/test_dataset.py @@ -677,30 +677,28 @@ def test_isel_points(self): assert 'points' in actual.coords np.testing.assert_array_equal(pdim1, actual['dim1']) - # # test scalars (should match isel but will have points dim) - pdim1 = 1 - pdim2 = 3 - - actual = data.isel_points(dim1=pdim1, dim2=pdim2) - # squeeze to drop "points" dim - assert 'points' in actual.coords - self.assertDatasetEqual(actual.squeeze().drop(['points']), - data.isel(dim1=pdim1, dim2=pdim2)) - - # test that leaving out a dim is the same as slice(None) - self.assertDatasetIdentical( - data.isel_points(time=slice(None), dim1=pdim1, dim2=pdim2), - data.isel_points(dim1=pdim1, dim2=pdim2)) - # test that the order of the indexers doesn't matter self.assertDatasetIdentical(data.isel_points(dim1=pdim1, dim2=pdim2), data.isel_points(dim2=pdim2, dim1=pdim1)) # make sure we're raising errors in the right places - with self.assertRaises(ValueError): + with self.assertRaisesRegexp(ValueError, + 'All indexers must be the same length'): data.isel_points(dim1=[1, 2], dim2=[1, 2, 3]) - with self.assertRaises(ValueError): + with self.assertRaisesRegexp(ValueError, + 'dimension bad_key does not exist'): data.isel_points(bad_key=[1, 2]) + with self.assertRaisesRegexp(TypeError, 'Indexers must be integers'): + data.isel_points(dim1=[1.5, 2.2]) + with self.assertRaisesRegexp(TypeError, 'Indexers must be integers'): + data.isel_points(dim1=[1, 2, 3], dim2=slice(3)) + with self.assertRaisesRegexp(ValueError, + 'Indexers must be 1 dimensional'): + data.isel_points(dim1=1, dim2=2) + # test to be sure we keep around variables that were not indexed + ds = Dataset({'x': [1, 2, 3, 4], 'y': 0}) + actual = ds.isel_points(x=[0, 1, 2]) + self.assertDataArrayIdentical(ds['y'], actual['y']) def test_sel_method(self): data = create_test_data() From bf0915b11555dbe68cfacf68c0f18d43e75a4887 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 23 Jul 2015 07:29:47 -0700 Subject: [PATCH 4/7] add test cases for isel_points: negative indicies, existing dim names, and a few others --- xray/core/dataset.py | 8 +++++++- xray/test/test_dataarray.py | 11 ++++++++++- xray/test/test_dataset.py | 3 +++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/xray/core/dataset.py b/xray/core/dataset.py index bd2860ae6bf..d77685df9cf 100644 --- a/xray/core/dataset.py +++ b/xray/core/dataset.py @@ -1045,7 +1045,8 @@ def isel_points(self, dim='points', **indexers): existing dimension name, in which case the location of the dimension is unchanged. If dimension is provided as a DataArray or Index, its name is used as the dimension to concatenate along and - the values are added as a coordinate. + the values are added as a coordinate. Existing dimension names are + not valid choices. **indexers : {dim: indexer, ...} Keyword arguments with names matching dimensions and values given by array-like objects. All indexers must be the same length and @@ -1092,6 +1093,11 @@ def relevant_keys(mapping): if len(lengths) > 1: raise ValueError('All indexers must be the same length') + # Existing dimensions are not valid choices for the dim argument + if dim in self.dims: + raise ValueError('Existing dimensions are not valid choices for ' + 'the dim argument in sel_points') + # TODO: This would be sped up with vectorized indexing. This will # require dask to support pointwise indexing as well. return concat([self.isel(**d) for d in diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index dca0238808d..bf00f7674f9 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -394,7 +394,8 @@ def test_isel_points_method(self): actual = da.isel_points(y=y, x=x, dim='test_coord') assert 'test_coord' in actual.coords assert actual.coords['test_coord'].shape == (len(y), ) - + assert all(x in actual for x in ['time', 'x', 'y', 'test_coord']) + assert actual.dims == ('test_coord', 'time') actual = da.isel_points(y=y, x=x) assert 'points' in actual.coords # Note that because xray always concatenates along the first dimension, @@ -408,6 +409,11 @@ def test_isel_points_method(self): da.isel_points(time=[1], x=[2], y=[4]).values.squeeze(), np_array[1, 4, 2].squeeze()) da.isel_points(time=[1, 2]) + y = [-1, 0] + x = [-2, 2] + expected = da.values[:, y, x] + actual = da.isel_points(x=x, y=y).values + np.testing.assert_equal(actual.T, expected) # test that the order of the indexers doesn't matter self.assertDataArrayIdentical( @@ -428,6 +434,9 @@ def test_isel_points_method(self): with self.assertRaisesRegexp(ValueError, 'Indexers must be 1 dimensional'): da.isel_points(y=1, x=2) + with self.assertRaisesRegexp(ValueError, + 'Existing dimensions are not valid'): + da.isel_points(y=[1, 2], x=[1, 2], dim='x') def test_loc(self): self.ds['x'] = ('x', np.array(list('abcdefghij'))) diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py index 624c3ed376a..2fe8bcfc9dc 100644 --- a/xray/test/test_dataset.py +++ b/xray/test/test_dataset.py @@ -695,6 +695,9 @@ def test_isel_points(self): with self.assertRaisesRegexp(ValueError, 'Indexers must be 1 dimensional'): data.isel_points(dim1=1, dim2=2) + with self.assertRaisesRegexp(ValueError, + 'Existing dimensions are not valid'): + data.isel_points(dim1=[1, 2], dim2=[1, 2], dim='dim2') # test to be sure we keep around variables that were not indexed ds = Dataset({'x': [1, 2, 3, 4], 'y': 0}) actual = ds.isel_points(x=[0, 1, 2]) From e4851ba7517aff7214779c89b62924b002f43d11 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sun, 26 Jul 2015 09:53:36 -0700 Subject: [PATCH 5/7] add isel_points support for non string dim arguments such as dataarrays or lists --- xray/core/dataset.py | 33 ++++++++++++++++++++++----------- xray/test/test_dataarray.py | 6 +++++- xray/test/test_dataset.py | 28 +++++++++++++++++++++++++++- 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/xray/core/dataset.py b/xray/core/dataset.py index d77685df9cf..481ec1dbbb4 100644 --- a/xray/core/dataset.py +++ b/xray/core/dataset.py @@ -1039,14 +1039,13 @@ def isel_points(self, dim='points', **indexers): Parameters ---------- - dim : str or DataArray or pandas.Index, optinal - Name of the dimension to concatenate along. This can either be a - new dimension name, in which case it is added along axis=0, or an - existing dimension name, in which case the location of the - dimension is unchanged. If dimension is provided as a DataArray or - Index, its name is used as the dimension to concatenate along and - the values are added as a coordinate. Existing dimension names are - not valid choices. + dim : str or DataArray or pandas.Index or other list-like object, optinal + Name of the dimension to concatenate along. If dim is provided as a + string, it must be a new dimension name, in which case it is added + along axis=0. If dim is provided as a DataArray or Index or + list-like object, its name, which must not be present in the + dataset, is used as the dimension to concatenate along and the + values are added as a coordinate. **indexers : {dim: indexer, ...} Keyword arguments with names matching dimensions and values given by array-like objects. All indexers must be the same length and @@ -1094,9 +1093,21 @@ def relevant_keys(mapping): raise ValueError('All indexers must be the same length') # Existing dimensions are not valid choices for the dim argument - if dim in self.dims: - raise ValueError('Existing dimensions are not valid choices for ' - 'the dim argument in sel_points') + if isinstance(dim, basestring): + if dim in self.dims: + # dim is an invalid string + raise ValueError('Existing dimension names are not valid ' + 'choices for the dim argument in sel_points') + elif hasattr(dim, 'dims'): + # dim is a DataArray or Coordinate + if dim.name in self.dims: + # dim already exists + raise ValueError('Existing dimensions are not valid choices ' + 'for the dim argument in sel_points') + else: + # try to cast dim to DataArray with name = points + from .dataarray import DataArray + dim = DataArray(dim, dims='points', name='points') # TODO: This would be sped up with vectorized indexing. This will # require dask to support pointwise indexing as well. diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index bf00f7674f9..1cd86bb76a6 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -435,9 +435,13 @@ def test_isel_points_method(self): 'Indexers must be 1 dimensional'): da.isel_points(y=1, x=2) with self.assertRaisesRegexp(ValueError, - 'Existing dimensions are not valid'): + 'Existing dimension names are not'): da.isel_points(y=[1, 2], x=[1, 2], dim='x') + # using non string dims + acutal = da.isel_points(y=[1, 2], x=[1, 2], dim=['A', 'B']) + assert 'points' in actual.coords + def test_loc(self): self.ds['x'] = ('x', np.array(list('abcdefghij'))) da = self.ds['foo'] diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py index 2fe8bcfc9dc..71fe83714f1 100644 --- a/xray/test/test_dataset.py +++ b/xray/test/test_dataset.py @@ -696,13 +696,39 @@ def test_isel_points(self): 'Indexers must be 1 dimensional'): data.isel_points(dim1=1, dim2=2) with self.assertRaisesRegexp(ValueError, - 'Existing dimensions are not valid'): + 'Existing dimension names are not valid'): data.isel_points(dim1=[1, 2], dim2=[1, 2], dim='dim2') + # test to be sure we keep around variables that were not indexed ds = Dataset({'x': [1, 2, 3, 4], 'y': 0}) actual = ds.isel_points(x=[0, 1, 2]) self.assertDataArrayIdentical(ds['y'], actual['y']) + # tests using index or DataArray as a dim + stations = Dataset() + stations['station'] = ('station', ['A', 'B', 'C']) + stations['dim1s'] = ('station', [1, 2, 3]) + stations['dim2s'] = ('station', [4, 5, 1]) + + actual = data.isel_points(dim1=stations['dim1s'], + dim2=stations['dim2s'], + dim=stations['station']) + assert 'station' in actual.coords + assert 'station' in actual.dims + self.assertDataArrayIdentical(actual['station'].drop(['dim1', 'dim2']), + stations['station']) + + # make sure we get the default points coordinate when a list is passed + actual = data.isel_points(dim1=stations['dim1s'], + dim2=stations['dim2s'], + dim=['A', 'B', 'C']) + assert 'points' in actual.coords + + # can pass a numpy array + data.isel_points(dim1=stations['dim1s'], + dim2=stations['dim2s'], + dim=np.array([4, 5, 6])) + def test_sel_method(self): data = create_test_data() From 10b67e328f66636a7193cbd69e0c5692a89df746 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sun, 26 Jul 2015 11:03:47 -0700 Subject: [PATCH 6/7] fix typo in test_dataarray.py --- xray/test/test_dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index 1cd86bb76a6..3faa9372c81 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -439,7 +439,7 @@ def test_isel_points_method(self): da.isel_points(y=[1, 2], x=[1, 2], dim='x') # using non string dims - acutal = da.isel_points(y=[1, 2], x=[1, 2], dim=['A', 'B']) + actual = da.isel_points(y=[1, 2], x=[1, 2], dim=['A', 'B']) assert 'points' in actual.coords def test_loc(self): From 5ab9d4b2f65584aac74e0e54050a84a1a0697773 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sun, 26 Jul 2015 21:44:25 -0700 Subject: [PATCH 7/7] doc updates for isel_points --- doc/indexing.rst | 6 ++++-- xray/core/dataarray.py | 1 - xray/core/dataset.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/indexing.rst b/doc/indexing.rst index d9db38fff6b..c331bddeedf 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -57,7 +57,7 @@ DataArray: Positional indexing deviates from the NumPy when indexing with multiple arrays like ``arr[[0, 1], [0, 1]]``, as described in :ref:`indexing details`. - Use :py:meth:`~xray.Dataset.isel_points` to achieve this functionality. + See :ref:`pointwise indexing` and :py:meth:`~xray.Dataset.isel_points` for more on this functionality. xray also supports label-based indexing, just like pandas. Because we use a :py:class:`pandas.Index` under the hood, label based indexing is very @@ -135,8 +135,10 @@ __ http://legacy.python.org/dev/peps/pep-0472/ # this is safe arr[dict(space=0)] = 0 +.. _pointwise indexing: + Pointwise indexing --------------------------------- +------------------ xray pointwise indexing supports the indexing along multiple labeled dimensions using list-like objects. While :py:meth:`~xray.DataArray.isel` performs diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py index 475d6782cd7..030eb62b056 100644 --- a/xray/core/dataarray.py +++ b/xray/core/dataarray.py @@ -558,7 +558,6 @@ def isel_points(self, dim='points', **indexers): See Also -------- Dataset.isel_points - DataArray.sel_points """ ds = self._dataset.isel_points(dim=dim, **indexers) return self._with_replaced_dataset(ds) diff --git a/xray/core/dataset.py b/xray/core/dataset.py index 481ec1dbbb4..ec11e727cc6 100644 --- a/xray/core/dataset.py +++ b/xray/core/dataset.py @@ -1039,7 +1039,7 @@ def isel_points(self, dim='points', **indexers): Parameters ---------- - dim : str or DataArray or pandas.Index or other list-like object, optinal + dim : str or DataArray or pandas.Index or other list-like object, optional Name of the dimension to concatenate along. If dim is provided as a string, it must be a new dimension name, in which case it is added along axis=0. If dim is provided as a DataArray or Index or