-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add pointwise indexing via isel_points method #481
Changes from all commits
b080142
ae1f3a8
c3cb3a5
bf0915b
e4851ba
10b67e3
5ab9d4b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
import functools | ||
import warnings | ||
from collections import Mapping | ||
from collections import Mapping, Sequence | ||
from numbers import Number | ||
|
||
import numpy as np | ||
|
@@ -21,6 +21,7 @@ | |
from .variable import as_variable, Variable, Coordinate, broadcast_variables | ||
from .pycompat import (iteritems, itervalues, basestring, OrderedDict, | ||
dask_array_type) | ||
from .combine import concat | ||
|
||
|
||
# list of attributes of pd.DatetimeIndex that are ndarrays of time info | ||
|
@@ -1028,6 +1029,93 @@ def sel(self, method=None, **indexers): | |
return self.isel(**indexing.remap_label_indexers(self, indexers, | ||
method=method)) | ||
|
||
def isel_points(self, dim='points', **indexers): | ||
"""Returns a new dataset with each array indexed pointwise along the | ||
specified dimension(s). | ||
|
||
This method selects pointwise values from each array and is akin to | ||
the NumPy indexing behavior of `arr[[0, 1], [0, 1]]`, except this | ||
method does not require knowing the order of each array's dimensions. | ||
|
||
Parameters | ||
---------- | ||
dim : str or DataArray or pandas.Index or other list-like object, optional | ||
Name of the dimension to concatenate along. If dim is provided as a | ||
string, it must be a new dimension name, in which case it is added | ||
along axis=0. If dim is provided as a DataArray or Index or | ||
list-like object, its name, which must not be present in the | ||
dataset, is used as the dimension to concatenate along and the | ||
values are added as a coordinate. | ||
**indexers : {dim: indexer, ...} | ||
Keyword arguments with names matching dimensions and values given | ||
by array-like objects. All indexers must be the same length and | ||
1 dimensional. | ||
|
||
Returns | ||
------- | ||
obj : Dataset | ||
A new Dataset with the same contents as this dataset, except each | ||
array and dimension is indexed by the appropriate indexers. With | ||
pointwise indexing, the new Dataset will always be a copy of the | ||
original. | ||
|
||
See Also | ||
-------- | ||
Dataset.sel | ||
DataArray.isel | ||
DataArray.sel | ||
DataArray.isel_points | ||
""" | ||
indexer_dims = set(indexers) | ||
|
||
def relevant_keys(mapping): | ||
return [k for k, v in mapping.items() | ||
if any(d in indexer_dims for d in v.dims)] | ||
|
||
data_vars = relevant_keys(self.data_vars) | ||
coords = relevant_keys(self.coords) | ||
|
||
# all the indexers should be iterables | ||
keys = indexers.keys() | ||
indexers = [(k, np.asarray(v)) for k, v in iteritems(indexers)] | ||
# Check that indexers are valid dims, integers, and 1D | ||
for k, v in indexers: | ||
if k not in self.dims: | ||
raise ValueError("dimension %s does not exist" % k) | ||
if v.dtype.kind != 'i': | ||
raise TypeError('Indexers must be integers') | ||
if v.ndim != 1: | ||
raise ValueError('Indexers must be 1 dimensional') | ||
|
||
# all the indexers should have the same length | ||
lengths = set(len(v) for k, v in indexers) | ||
if len(lengths) > 1: | ||
raise ValueError('All indexers must be the same length') | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a TODO note about speeding this up using vectorized indexing? |
||
# Existing dimensions are not valid choices for the dim argument | ||
if isinstance(dim, basestring): | ||
if dim in self.dims: | ||
# dim is an invalid string | ||
raise ValueError('Existing dimension names are not valid ' | ||
'choices for the dim argument in sel_points') | ||
elif hasattr(dim, 'dims'): | ||
# dim is a DataArray or Coordinate | ||
if dim.name in self.dims: | ||
# dim already exists | ||
raise ValueError('Existing dimensions are not valid choices ' | ||
'for the dim argument in sel_points') | ||
else: | ||
# try to cast dim to DataArray with name = points | ||
from .dataarray import DataArray | ||
dim = DataArray(dim, dims='points', name='points') | ||
|
||
# TODO: This would be sped up with vectorized indexing. This will | ||
# require dask to support pointwise indexing as well. | ||
return concat([self.isel(**d) for d in | ||
[dict(zip(keys, inds)) for inds in | ||
zip(*[v for k, v in indexers])]], | ||
dim=dim, coords=coords, data_vars=data_vars) | ||
|
||
def reindex_like(self, other, method=None, copy=True): | ||
"""Conform this object onto the indexes of another object, filling | ||
in missing values with NaN. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -382,6 +382,66 @@ def test_sel_method(self): | |
actual = data.sel(x=[0.9, 1.9], method='backfill') | ||
self.assertDataArrayIdentical(expected, actual) | ||
|
||
def test_isel_points_method(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a test case for negative indexers? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
shape = (10, 5, 6) | ||
np_array = np.random.random(shape) | ||
da = DataArray(np_array, dims=['time', 'y', 'x']) | ||
y = [1, 3] | ||
x = [3, 0] | ||
|
||
expected = da.values[:, y, x] | ||
|
||
actual = da.isel_points(y=y, x=x, dim='test_coord') | ||
assert 'test_coord' in actual.coords | ||
assert actual.coords['test_coord'].shape == (len(y), ) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would also verify that Probably easier just to construct the expected data-array and then compare them with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
assert all(x in actual for x in ['time', 'x', 'y', 'test_coord']) | ||
assert actual.dims == ('test_coord', 'time') | ||
actual = da.isel_points(y=y, x=x) | ||
assert 'points' in actual.coords | ||
# Note that because xray always concatenates along the first dimension, | ||
# We must transpose the result to match the numpy style of | ||
# concatentation. | ||
np.testing.assert_equal(actual.T, expected) | ||
|
||
# a few corner cases | ||
da.isel_points(time=[1, 2], x=[2, 2], y=[3, 4]) | ||
np.testing.assert_allclose( | ||
da.isel_points(time=[1], x=[2], y=[4]).values.squeeze(), | ||
np_array[1, 4, 2].squeeze()) | ||
da.isel_points(time=[1, 2]) | ||
y = [-1, 0] | ||
x = [-2, 2] | ||
expected = da.values[:, y, x] | ||
actual = da.isel_points(x=x, y=y).values | ||
np.testing.assert_equal(actual.T, expected) | ||
|
||
# test that the order of the indexers doesn't matter | ||
self.assertDataArrayIdentical( | ||
da.isel_points(y=y, x=x), | ||
da.isel_points(x=x, y=y)) | ||
|
||
# make sure we're raising errors in the right places | ||
with self.assertRaisesRegexp(ValueError, | ||
'All indexers must be the same length'): | ||
da.isel_points(y=[1, 2], x=[1, 2, 3]) | ||
with self.assertRaisesRegexp(ValueError, | ||
'dimension bad_key does not exist'): | ||
da.isel_points(bad_key=[1, 2]) | ||
with self.assertRaisesRegexp(TypeError, 'Indexers must be integers'): | ||
da.isel_points(y=[1.5, 2.2]) | ||
with self.assertRaisesRegexp(TypeError, 'Indexers must be integers'): | ||
da.isel_points(x=[1, 2, 3], y=slice(3)) | ||
with self.assertRaisesRegexp(ValueError, | ||
'Indexers must be 1 dimensional'): | ||
da.isel_points(y=1, x=2) | ||
with self.assertRaisesRegexp(ValueError, | ||
'Existing dimension names are not'): | ||
da.isel_points(y=[1, 2], x=[1, 2], dim='x') | ||
|
||
# using non string dims | ||
actual = da.isel_points(y=[1, 2], x=[1, 2], dim=['A', 'B']) | ||
assert 'points' in actual.coords | ||
|
||
def test_loc(self): | ||
self.ds['x'] = ('x', np.array(list('abcdefghij'))) | ||
da = self.ds['foo'] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A brief example would be nice to add here. You could also link to the new documentation section.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.