Skip to content

Commit

Permalink
Remove np.asarray in formatting.py (#8100)
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian authored Oct 18, 2023
1 parent ae41d82 commit e611c97
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 30 deletions.
42 changes: 29 additions & 13 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
import pandas as pd
from pandas.errors import OutOfBoundsDatetime

from xarray.core.duck_array_ops import array_equiv
from xarray.core.indexing import ExplicitlyIndexed, MemoryCachedArray
from xarray.core.duck_array_ops import array_equiv, astype
from xarray.core.indexing import MemoryCachedArray
from xarray.core.options import OPTIONS, _get_boolean_with_default
from xarray.core.pycompat import array_type
from xarray.core.pycompat import array_type, to_duck_array, to_numpy
from xarray.core.utils import is_duck_array

if TYPE_CHECKING:
Expand Down Expand Up @@ -68,6 +68,8 @@ def first_n_items(array, n_desired):
# might not be a numpy.ndarray. Moreover, access to elements of the array
# could be very expensive (e.g. if it's only available over DAP), so go out
# of our way to get them in a single call to __getitem__ using only slices.
from xarray.core.variable import Variable

if n_desired < 1:
raise ValueError("must request at least one item")

Expand All @@ -78,7 +80,14 @@ def first_n_items(array, n_desired):
if n_desired < array.size:
indexer = _get_indexer_at_least_n_items(array.shape, n_desired, from_end=False)
array = array[indexer]
return np.asarray(array).flat[:n_desired]

# We pass variable objects in to handle indexing
# with indexer above. It would not work with our
# lazy indexing classes at the moment, so we cannot
# pass Variable._data
if isinstance(array, Variable):
array = array._data
return np.ravel(to_duck_array(array))[:n_desired]


def last_n_items(array, n_desired):
Expand All @@ -87,13 +96,22 @@ def last_n_items(array, n_desired):
# might not be a numpy.ndarray. Moreover, access to elements of the array
# could be very expensive (e.g. if it's only available over DAP), so go out
# of our way to get them in a single call to __getitem__ using only slices.
from xarray.core.variable import Variable

if (n_desired == 0) or (array.size == 0):
return []

if n_desired < array.size:
indexer = _get_indexer_at_least_n_items(array.shape, n_desired, from_end=True)
array = array[indexer]
return np.asarray(array).flat[-n_desired:]

# We pass variable objects in to handle indexing
# with indexer above. It would not work with our
# lazy indexing classes at the moment, so we cannot
# pass Variable._data
if isinstance(array, Variable):
array = array._data
return np.ravel(to_duck_array(array))[-n_desired:]


def last_item(array):
Expand All @@ -103,7 +121,8 @@ def last_item(array):
return []

indexer = (slice(-1, None),) * array.ndim
return np.ravel(np.asarray(array[indexer])).tolist()
# to_numpy since dask doesn't support tolist
return np.ravel(to_numpy(array[indexer])).tolist()


def calc_max_rows_first(max_rows: int) -> int:
Expand Down Expand Up @@ -171,10 +190,10 @@ def format_item(x, timedelta_format=None, quote_strings=True):

def format_items(x):
"""Returns a succinct summaries of all items in a sequence as strings"""
x = np.asarray(x)
x = to_duck_array(x)
timedelta_format = "datetime"
if np.issubdtype(x.dtype, np.timedelta64):
x = np.asarray(x, dtype="timedelta64[ns]")
x = astype(x, dtype="timedelta64[ns]")
day_part = x[~pd.isnull(x)].astype("timedelta64[D]").astype("timedelta64[ns]")
time_needed = x[~pd.isnull(x)] != day_part
day_needed = day_part != np.timedelta64(0, "ns")
Expand Down Expand Up @@ -584,12 +603,9 @@ def limit_lines(string: str, *, limit: int):
def short_array_repr(array):
from xarray.core.common import AbstractArray

if isinstance(array, ExplicitlyIndexed):
array = array.get_duck_array()
elif isinstance(array, AbstractArray):
if isinstance(array, AbstractArray):
array = array.data
if not is_duck_array(array):
array = np.asarray(array)
array = to_duck_array(array)

# default to lower precision so a full (abbreviated) line can fit on
# one line with the default display_width
Expand Down
34 changes: 34 additions & 0 deletions xarray/core/pycompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,37 @@ def is_chunked_array(x) -> bool:

def is_0d_dask_array(x):
return is_duck_dask_array(x) and is_scalar(x)


def to_numpy(data) -> np.ndarray:
from xarray.core.indexing import ExplicitlyIndexed
from xarray.core.parallelcompat import get_chunked_array_type

if isinstance(data, ExplicitlyIndexed):
data = data.get_duck_array()

# TODO first attempt to call .to_numpy() once some libraries implement it
if hasattr(data, "chunks"):
chunkmanager = get_chunked_array_type(data)
data, *_ = chunkmanager.compute(data)
if isinstance(data, array_type("cupy")):
data = data.get()
# pint has to be imported dynamically as pint imports xarray
if isinstance(data, array_type("pint")):
data = data.magnitude
if isinstance(data, array_type("sparse")):
data = data.todense()
data = np.asarray(data)

return data


def to_duck_array(data):
from xarray.core.indexing import ExplicitlyIndexed

if isinstance(data, ExplicitlyIndexed):
return data.get_duck_array()
elif is_duck_array(data):
return data
else:
return np.asarray(data)
19 changes: 2 additions & 17 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@
from xarray.core.options import OPTIONS, _get_keep_attrs
from xarray.core.parallelcompat import get_chunked_array_type, guess_chunkmanager
from xarray.core.pycompat import (
array_type,
integer_types,
is_0d_dask_array,
is_chunked_array,
is_duck_dask_array,
to_numpy,
)
from xarray.core.utils import (
OrderedSet,
Expand Down Expand Up @@ -1093,22 +1093,7 @@ def chunk(
def to_numpy(self) -> np.ndarray:
"""Coerces wrapped data to numpy and returns a numpy.ndarray"""
# TODO an entrypoint so array libraries can choose coercion method?
data = self.data

# TODO first attempt to call .to_numpy() once some libraries implement it
if hasattr(data, "chunks"):
chunkmanager = get_chunked_array_type(data)
data, *_ = chunkmanager.compute(data)
if isinstance(data, array_type("cupy")):
data = data.get()
# pint has to be imported dynamically as pint imports xarray
if isinstance(data, array_type("pint")):
data = data.magnitude
if isinstance(data, array_type("sparse")):
data = data.todense()
data = np.asarray(data)

return data
return to_numpy(self._data)

def as_numpy(self) -> Self:
"""Coerces wrapped data into a numpy array, returning a Variable."""
Expand Down

0 comments on commit e611c97

Please sign in to comment.