From 25cdf3d698bc16241d964ba5344640d64d135870 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Fri, 31 Jul 2015 18:51:13 -0700 Subject: [PATCH 1/3] Add sel_points for point-wise indexing by label xref GH475 Example usage: In [1]: da = xray.DataArray(np.arange(56).reshape((7, 8)), ...: coords={'x': list('abcdefg'), ...: 'y': 10 * np.arange(8)}, ...: dims=['x', 'y']) ...: In [2]: da Out[2]: array([[ 0, 1, 2, 3, 4, 5, 6, 7], [ 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55]]) Coordinates: * y (y) int64 0 10 20 30 40 50 60 70 * x (x) |S1 'a' 'b' 'c' 'd' 'e' 'f' 'g' # we can index by position along each dimension In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points') Out[3]: array([ 0, 9, 48]) Coordinates: y (points) int64 0 10 0 x (points) |S1 'a' 'b' 'g' * points (points) int64 0 1 2 # or equivalently by label In [4]: da.sel_points(x=['a', 'b', 'g'], y=[0, 10, 0], dim='points') Out[4]: array([ 0, 9, 48]) Coordinates: y (points) int64 0 10 0 x (points) |S1 'a' 'b' 'g' * points (points) int64 0 1 2 Bug fixes cc jhamman --- doc/api.rst | 2 ++ doc/indexing.rst | 16 ++++++++-- doc/whats-new.rst | 27 +++++++++++----- xray/core/dataarray.py | 11 +++++++ xray/core/dataset.py | 62 ++++++++++++++++++++++++++++++++++--- xray/test/test_dataarray.py | 12 ++++++- xray/test/test_dataset.py | 20 ++++++++++++ 7 files changed, 134 insertions(+), 16 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index ccbd61c2ae9..9ca97dd36bb 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -94,6 +94,7 @@ Indexing Dataset.isel Dataset.sel Dataset.isel_points + Dataset.sel_points Dataset.squeeze Dataset.reindex Dataset.reindex_like @@ -206,6 +207,7 @@ Indexing DataArray.isel DataArray.sel DataArray.isel_points + DataArray.sel_points DataArray.squeeze DataArray.reindex DataArray.reindex_like diff --git a/doc/indexing.rst b/doc/indexing.rst index f3f7609974f..0411ece4b73 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -57,7 +57,7 @@ DataArray: Positional indexing deviates from the NumPy when indexing with multiple arrays like ``arr[[0, 1], [0, 1]]``, as described in :ref:`indexing details`. - See :ref:`pointwise indexing` and :py:meth:`~xray.Dataset.isel_points` for more on this functionality. + See :ref:`pointwise indexing` for how to achieve this functionality in xray. xray also supports label-based indexing, just like pandas. Because we use a :py:class:`pandas.Index` under the hood, label based indexing is very @@ -123,7 +123,8 @@ __ http://legacy.python.org/dev/peps/pep-0472/ .. warning:: - Do not try to assign values when using ``isel``, ``isel_points`` or ``sel``:: + Do not try to assign values when using any of the indexing methods ``isel``, + ``isel_points``, ``sel`` or ``sel_points``:: # DO NOT do this arr.isel(space=0) = 0 @@ -143,7 +144,8 @@ Pointwise indexing xray pointwise indexing supports the indexing along multiple labeled dimensions using list-like objects. While :py:meth:`~xray.DataArray.isel` performs orthogonal indexing, the :py:meth:`~xray.DataArray.isel_points` method -provides similar numpy indexing behavior as if you were using multiple lists to index an array (e.g. `arr[[0, 1], [0, 1]]` ): +provides similar numpy indexing behavior as if you were using multiple +lists to index an array (e.g. ``arr[[0, 1], [0, 1]]`` ): .. ipython:: python @@ -152,6 +154,14 @@ provides similar numpy indexing behavior as if you were using multiple lists to da da.isel_points(x=[0, 1, 6], y=[0, 1, 0]) +There is also :py:meth:`~xray.DataArray.sel_points`, which analogously +allows you to do point-wise indexing by label: + +.. ipython:: python + + times = pd.to_datetime(['2000-01-03', '2000-01-02', '2000-01-01']) + arr.sel_points(space=['IA', 'IL', 'IN'], time=times) + Dataset indexing ---------------- diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 681aa7a89d6..b6900ad3fc6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -17,13 +17,16 @@ v0.5.3 (unreleased) - Dataset variables are now written to netCDF files in order of appearance when using the netcdf4 backend (:issue:`479`). -- Added :py:meth:`~xray.Dataset.isel_points` and :py:meth:`~xray.DataArray.isel_points` to support pointwise indexing of Datasets and DataArrays (:issue:`475`). +- Added :py:meth:`~xray.Dataset.isel_points` and :py:meth:`~xray.Dataset.sel_points` + to support pointwise indexing of Datasets and DataArrays (:issue:`475`). .. ipython:: :verbatim: In [1]: da = xray.DataArray(np.arange(56).reshape((7, 8)), - dims=['x', 'y']) + ...: coords={'x': list('abcdefg'), + ...: 'y': 10 * np.arange(8)}, + ...: dims=['x', 'y']) In [2]: da Out[2]: @@ -36,16 +39,27 @@ v0.5.3 (unreleased) [40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55]]) Coordinates: - * x (x) int64 0 1 2 3 4 5 6 - * y (y) int64 0 1 2 3 4 5 6 7 + * y (y) int64 0 10 20 30 40 50 60 70 + * x (x) |S1 'a' 'b' 'c' 'd' 'e' 'f' 'g' + # we can index by position along each dimension In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points') Out[3]: array([ 0, 9, 48]) Coordinates: - x (points) int64 0 1 6 - y (points) int64 0 1 0 + y (points) int64 0 10 0 + x (points) |S1 'a' 'b' 'g' + * points (points) int64 0 1 2 + + # or equivalently by label + In [9]: da.sel_points(x=['a', 'b', 'g'], y=[0, 10, 0], dim='points') + Out[9]: + + array([ 0, 9, 48]) + Coordinates: + y (points) int64 0 10 0 + x (points) |S1 'a' 'b' 'g' * points (points) int64 0 1 2 - New :py:meth:`~xray.Dataset.where` method for masking xray objects according @@ -59,7 +73,6 @@ v0.5.3 (unreleased) @savefig where_example.png width=4in height=4in ds.distance.where(ds.distance < 100).plot() - Bug fixes ~~~~~~~~~ diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py index 817586ad720..b4876846c62 100644 --- a/xray/core/dataarray.py +++ b/xray/core/dataarray.py @@ -562,6 +562,17 @@ def isel_points(self, dim='points', **indexers): ds = self._dataset.isel_points(dim=dim, **indexers) return self._with_replaced_dataset(ds) + def sel_points(self, dim='points', method=None, **indexers): + """Return a new DataArray whose dataset is given by pointwise selection + of index labels along the specified dimension(s). + + See Also + -------- + Dataset.sel_points + """ + ds = self._dataset.sel_points(dim=dim, method=method, **indexers) + return self._with_replaced_dataset(ds) + def reindex_like(self, other, method=None, copy=True): """Conform this object onto the indexes of another object, filling in missing values with NaN. diff --git a/xray/core/dataset.py b/xray/core/dataset.py index 8f38895fed6..2b014f21a23 100644 --- a/xray/core/dataset.py +++ b/xray/core/dataset.py @@ -962,8 +962,9 @@ def isel(self, **indexers): See Also -------- Dataset.sel + Dataset.sel_points + Dataset.isel_points DataArray.isel - DataArray.sel """ invalid = [k for k in indexers if not k in self.dims] if invalid: @@ -988,7 +989,7 @@ def sel(self, method=None, **indexers): In contrast to `Dataset.isel`, indexers for this method should use labels instead of integers. - Under the hood, this method is powered by using Panda's powerful Index + Under the hood, this method is powered by using pandas's powerful Index objects. This makes label based indexing essentially just as fast as using integer indexing. @@ -1023,7 +1024,8 @@ def sel(self, method=None, **indexers): See Also -------- Dataset.isel - DataArray.isel + Dataset.sel_points + Dataset.isel_points DataArray.sel """ return self.isel(**indexing.remap_label_indexers(self, indexers, @@ -1062,8 +1064,8 @@ def isel_points(self, dim='points', **indexers): See Also -------- Dataset.sel - DataArray.isel - DataArray.sel + Dataset.isel + Dataset.sel_points DataArray.isel_points """ indexer_dims = set(indexers) @@ -1116,6 +1118,56 @@ def relevant_keys(mapping): zip(*[v for k, v in indexers])]], dim=dim, coords=coords, data_vars=data_vars) + def sel_points(self, dim='points', method=None, **indexers): + """Returns a new dataset with each array indexed pointwise by tick + labels along the specified dimension(s). + + In contrast to `Dataset.isel_points`, indexers for this method should + use labels instead of integers. + + In contrast to `Dataset.sel`, this method selects points along the + diagonal of multi-dimensional arrays, not the intersection. + + Parameters + ---------- + dim : str or DataArray or pandas.Index or other list-like object, optional + Name of the dimension to concatenate along. If dim is provided as a + string, it must be a new dimension name, in which case it is added + along axis=0. If dim is provided as a DataArray or Index or + list-like object, its name, which must not be present in the + dataset, is used as the dimension to concatenate along and the + values are added as a coordinate. + method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional + Method to use for inexact matches (requires pandas>=0.16): + + * default: only exact matches + * pad / ffill: propgate last valid index value forward + * backfill / bfill: propagate next valid index value backward + * nearest: use nearest valid index value + **indexers : {dim: indexer, ...} + Keyword arguments with names matching dimensions and values given + by array-like objects. All indexers must be the same length and + 1 dimensional. + + Returns + ------- + obj : Dataset + A new Dataset with the same contents as this dataset, except each + array and dimension is indexed by the appropriate indexers. With + pointwise indexing, the new Dataset will always be a copy of the + original. + + See Also + -------- + Dataset.sel + Dataset.isel + Dataset.isel_points + DataArray.sel_points + """ + pos_indexers = indexing.remap_label_indexers(self, indexers, + method=method) + return self.isel_points(dim=dim, **pos_indexers) + def reindex_like(self, other, method=None, copy=True): """Conform this object onto the indexes of another object, filling in missing values with NaN. diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index 3faa9372c81..a7222a9110d 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -382,7 +382,7 @@ def test_sel_method(self): actual = data.sel(x=[0.9, 1.9], method='backfill') self.assertDataArrayIdentical(expected, actual) - def test_isel_points_method(self): + def test_isel_points(self): shape = (10, 5, 6) np_array = np.random.random(shape) da = DataArray(np_array, dims=['time', 'y', 'x']) @@ -442,6 +442,16 @@ def test_isel_points_method(self): actual = da.isel_points(y=[1, 2], x=[1, 2], dim=['A', 'B']) assert 'points' in actual.coords + def test_isel_points(self): + shape = (10, 5, 6) + np_array = np.random.random(shape) + da = DataArray(np_array, dims=['time', 'y', 'x']) + y = [1, 3] + x = [3, 0] + expected = da.isel_points(x=x, y=y) + actual = da.sel_points(x=x, y=y) + self.assertDataArrayIdentical(expected, actual) + def test_loc(self): self.ds['x'] = ('x', np.array(list('abcdefghij'))) da = self.ds['foo'] diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py index bf63b263a8a..55e1b9273d2 100644 --- a/xray/test/test_dataset.py +++ b/xray/test/test_dataset.py @@ -729,6 +729,26 @@ def test_isel_points(self): dim2=stations['dim2s'], dim=np.array([4, 5, 6])) + def test_sel_points(self): + data = create_test_data() + + pdim1 = [1, 2, 3] + pdim2 = [4, 5, 1] + pdim3 = [1, 2, 3] + expected = data.isel_points(dim1=pdim1, dim2=pdim2, dim3=pdim3, + dim='test_coord') + actual = data.sel_points(dim1=data.dim1[pdim1], dim2=data.dim2[pdim2], + dim3=data.dim3[pdim3], dim='test_coord') + self.assertDatasetIdentical(expected, actual) + + data = Dataset({'foo': (('x', 'y'), np.arange(9).reshape(3, 3))}) + expected = Dataset({'foo': ('points', [0, 4, 8])}, + {'x': ('points', range(3)), + 'y': ('points', range(3))}) + actual = data.sel_points(x=[0.1, 1.1, 2.5], y=[0, 1.2, 2.0], + method='pad') + self.assertDatasetIdentical(expected, actual) + def test_sel_method(self): data = create_test_data() From 769f7742d7737ad171709819db319a6eafb50e34 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Fri, 31 Jul 2015 19:17:44 -0700 Subject: [PATCH 2/3] Note DataFrame.lookup --- doc/indexing.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/indexing.rst b/doc/indexing.rst index 0411ece4b73..c25b63da88f 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -162,6 +162,9 @@ allows you to do point-wise indexing by label: times = pd.to_datetime(['2000-01-03', '2000-01-02', '2000-01-01']) arr.sel_points(space=['IA', 'IL', 'IN'], time=times) +The equivalent pandas method to ``sel_points`` is +:py:meth:`~pandas.DataFrame.lookup`. + Dataset indexing ---------------- From 3ffa8edb52218d97b9048678905641c7e8ff1901 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sat, 1 Aug 2015 18:36:21 -0700 Subject: [PATCH 3/3] change docstring description of default method --- xray/core/alignment.py | 2 +- xray/core/dataarray.py | 4 ++-- xray/core/dataset.py | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/xray/core/alignment.py b/xray/core/alignment.py index a113a43ffc0..588f11fcb69 100644 --- a/xray/core/alignment.py +++ b/xray/core/alignment.py @@ -119,7 +119,7 @@ def reindex_variables(variables, indexes, indexers, method=None, copy=True): method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional Method to use for filling index values in ``indexers`` not found in this dataset: - * default: don't fill gaps + * None (default): don't fill gaps * pad / ffill: propgate last valid index value forward * backfill / bfill: propagate next valid index value backward * nearest: use nearest valid index value diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py index b4876846c62..a192cdfc4c4 100644 --- a/xray/core/dataarray.py +++ b/xray/core/dataarray.py @@ -590,7 +590,7 @@ def reindex_like(self, other, method=None, copy=True): Method to use for filling index values from other not found on this data array: - * default: don't fill gaps + * None (default): don't fill gaps * pad / ffill: propgate last valid index value forward * backfill / bfill: propagate next valid index value backward * nearest: use nearest valid index value (requires pandas>=0.16) @@ -626,7 +626,7 @@ def reindex(self, method=None, copy=True, **indexers): Method to use for filling index values in ``indexers`` not found on this data array: - * default: don't fill gaps + * None (default): don't fill gaps * pad / ffill: propgate last valid index value forward * backfill / bfill: propagate next valid index value backward * nearest: use nearest valid index value (requires pandas>=0.16) diff --git a/xray/core/dataset.py b/xray/core/dataset.py index 2b014f21a23..4185bfe6b37 100644 --- a/xray/core/dataset.py +++ b/xray/core/dataset.py @@ -1004,7 +1004,7 @@ def sel(self, method=None, **indexers): method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional Method to use for inexact matches (requires pandas>=0.16): - * default: only exact matches + * None (default): only exact matches * pad / ffill: propgate last valid index value forward * backfill / bfill: propagate next valid index value backward * nearest: use nearest valid index value @@ -1140,7 +1140,7 @@ def sel_points(self, dim='points', method=None, **indexers): method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional Method to use for inexact matches (requires pandas>=0.16): - * default: only exact matches + * None (default): only exact matches * pad / ffill: propgate last valid index value forward * backfill / bfill: propagate next valid index value backward * nearest: use nearest valid index value @@ -1185,7 +1185,7 @@ def reindex_like(self, other, method=None, copy=True): Method to use for filling index values from other not found in this dataset: - * default: don't fill gaps + * None (default): don't fill gaps * pad / ffill: propgate last valid index value forward * backfill / bfill: propagate next valid index value backward * nearest: use nearest valid index value (requires pandas>=0.16) @@ -1222,7 +1222,7 @@ def reindex(self, indexers=None, method=None, copy=True, **kw_indexers): Method to use for filling index values in ``indexers`` not found in this dataset: - * default: don't fill gaps + * None (default): don't fill gaps * pad / ffill: propgate last valid index value forward * backfill / bfill: propagate next valid index value backward * nearest: use nearest valid index value (requires pandas>=0.16)