Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: remove _ndarray_values #32768

Merged
merged 3 commits into from
Mar 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions doc/source/development/internals.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,10 @@ pandas extends NumPy's type system with custom types, like ``Categorical`` or
datetimes with a timezone, so we have multiple notions of "values". For 1-D
containers (``Index`` classes and ``Series``) we have the following convention:

* ``cls._ndarray_values`` is *always* a NumPy ``ndarray``. Ideally,
``_ndarray_values`` is cheap to compute. For example, for a ``Categorical``,
this returns the codes, not the array of objects.
* ``cls._values`` refers is the "best possible" array. This could be an
``ndarray``, ``ExtensionArray``, or in ``Index`` subclass (note: we're in the
process of removing the index subclasses here so that it's always an
``ndarray`` or ``ExtensionArray``).
``ndarray`` or ``ExtensionArray``.

So, for example, ``Series[category]._values`` is a ``Categorical``, while
``Series[category]._ndarray_values`` is the underlying codes.
So, for example, ``Series[category]._values`` is a ``Categorical``.

.. _ref-subclassing-pandas:

Expand Down
1 change: 0 additions & 1 deletion doc/source/reference/extensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ objects.
api.extensions.ExtensionArray._from_factorized
api.extensions.ExtensionArray._from_sequence
api.extensions.ExtensionArray._from_sequence_of_strings
api.extensions.ExtensionArray._ndarray_values
api.extensions.ExtensionArray._reduce
api.extensions.ExtensionArray._values_for_argsort
api.extensions.ExtensionArray._values_for_factorize
Expand Down
17 changes: 0 additions & 17 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ class ExtensionArray:
_from_factorized
_from_sequence
_from_sequence_of_strings
_ndarray_values
_reduce
_values_for_argsort
_values_for_factorize
Expand Down Expand Up @@ -1044,22 +1043,6 @@ def _concat_same_type(
# of objects
_can_hold_na = True

@property
def _ndarray_values(self) -> np.ndarray:
"""
Internal pandas method for lossy conversion to a NumPy ndarray.

This method is not part of the pandas interface.

The expectation is that this is cheap to compute, and is primarily
used for interacting with our indexers.

Returns
-------
array : ndarray
"""
return np.array(self)

def _reduce(self, name, skipna=True, **kwargs):
"""
Return a scalar result of performing the reduction operation.
Expand Down
13 changes: 2 additions & 11 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,10 +451,6 @@ def dtype(self) -> CategoricalDtype:
"""
return self._dtype

@property
def _ndarray_values(self) -> np.ndarray:
return self.codes

@property
def _constructor(self) -> Type["Categorical"]:
return Categorical
Expand Down Expand Up @@ -2567,12 +2563,7 @@ def _get_codes_for_values(values, categories):
"""
dtype_equal = is_dtype_equal(values.dtype, categories.dtype)

if dtype_equal:
# To prevent erroneous dtype coercion in _get_data_algo, retrieve
# the underlying numpy array. gh-22702
values = getattr(values, "_ndarray_values", values)
categories = getattr(categories, "_ndarray_values", categories)
elif is_extension_array_dtype(categories.dtype) and is_object_dtype(values):
if is_extension_array_dtype(categories.dtype) and is_object_dtype(values):
# Support inferring the correct extension dtype from an array of
# scalar objects. e.g.
# Categorical(array[Period, Period], categories=PeriodIndex(...))
Expand All @@ -2582,7 +2573,7 @@ def _get_codes_for_values(values, categories):
# exception raised in _from_sequence
values = ensure_object(values)
categories = ensure_object(categories)
else:
elif not dtype_equal:
values = ensure_object(values)
categories = ensure_object(categories)

Expand Down
4 changes: 0 additions & 4 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,10 +455,6 @@ def asi8(self) -> np.ndarray:
# do not cache or you'll create a memory leak
return self._data.view("i8")

@property
def _ndarray_values(self):
return self._data

# ----------------------------------------------------------------
# Rendering Methods

Expand Down
12 changes: 0 additions & 12 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,18 +478,6 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
data = self.to_numpy(dtype=dtype, **kwargs)
return astype_nansafe(data, dtype, copy=False)

@property
def _ndarray_values(self) -> np.ndarray:
"""
Internal pandas method for lossy conversion to a NumPy ndarray.

This method is not part of the pandas interface.

The expectation is that this is cheap to compute, and is primarily
used for interacting with our indexers.
"""
return self._data

def _values_for_factorize(self) -> Tuple[np.ndarray, float]:
# TODO: https://github.com/pandas-dev/pandas/issues/30037
# use masked algorithms, rather than object-dtype / np.nan.
Expand Down
17 changes: 0 additions & 17 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,23 +855,6 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs):
result[self.isna()] = na_value
return result

@property
def _ndarray_values(self) -> np.ndarray:
"""
The data as an ndarray, possibly losing information.

The expectation is that this is cheap to compute, and is primarily
used for interacting with our indexers.

- categorical -> codes
"""
if is_extension_array_dtype(self):
return self.array._ndarray_values
# As a mixin, we depend on the mixing class having values.
# Special mixin syntax may be developed in the future:
# https://github.com/python/typing/issues/246
return self.values # type: ignore

@property
def empty(self):
return not self.size
Expand Down
31 changes: 13 additions & 18 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,7 @@ def _simple_new(cls, values, name: Label = None):
# _index_data is a (temporary?) fix to ensure that the direct data
# manipulation we do in `_libs/reduction.pyx` continues to work.
# We need access to the actual ndarray, since we're messing with
# data buffers and strides. We don't re-use `_ndarray_values`, since
# we actually set this value too.
# data buffers and strides.
result._index_data = values
result._name = name
result._cache = {}
Expand Down Expand Up @@ -625,7 +624,8 @@ def ravel(self, order="C"):
--------
numpy.ndarray.ravel
"""
return self._ndarray_values.ravel(order=order)
values = self._get_engine_target()
return values.ravel(order=order)

def view(self, cls=None):

Expand Down Expand Up @@ -3846,29 +3846,24 @@ def _values(self) -> Union[ExtensionArray, np.ndarray]:
"""
The best array representation.

This is an ndarray or ExtensionArray. This differs from
``_ndarray_values``, which always returns an ndarray.
This is an ndarray or ExtensionArray.

Both ``_values`` and ``_ndarray_values`` are consistent between
``Series`` and ``Index`` (except for datetime64[ns], which returns
a DatetimeArray for _values on the Index, but ndarray[M8ns] on the
Series).
``_values`` are consistent between``Series`` and ``Index``.

It may differ from the public '.values' method.

index | values | _values | _ndarray_values |
----------------- | --------------- | ------------- | --------------- |
Index | ndarray | ndarray | ndarray |
CategoricalIndex | Categorical | Categorical | ndarray[int] |
DatetimeIndex | ndarray[M8ns] | DatetimeArray | ndarray[M8ns] |
DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray | ndarray[M8ns] |
PeriodIndex | ndarray[object] | PeriodArray | ndarray[int] |
IntervalIndex | IntervalArray | IntervalArray | ndarray[object] |
index | values | _values |
----------------- | --------------- | ------------- |
Index | ndarray | ndarray |
CategoricalIndex | Categorical | Categorical |
DatetimeIndex | ndarray[M8ns] | DatetimeArray |
DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray |
PeriodIndex | ndarray[object] | PeriodArray |
IntervalIndex | IntervalArray | IntervalArray |

See Also
--------
values
_ndarray_values
"""
return self._data

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def sort_values(self, return_indexer=False, ascending=True):
sorted_index = self.take(_as)
return sorted_index, _as
else:
# NB: using asi8 instead of _ndarray_values matters in numpy 1.18
# NB: using asi8 instead of _data matters in numpy 1.18
# because the treatment of NaT has been changed to put NaT last
# instead of first.
sorted_values = np.sort(self.asi8)
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/indexes/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,6 @@ def __iter__(self):
def __array__(self, dtype=None) -> np.ndarray:
return np.asarray(self._data, dtype=dtype)

@property
def _ndarray_values(self) -> np.ndarray:
return self._data._ndarray_values

def _get_engine_target(self) -> np.ndarray:
return self._data._values_for_argsort()

Expand Down
22 changes: 9 additions & 13 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,21 +550,17 @@ def _values(self):
timedelta64 dtypes), while ``.array`` ensures to always return an
ExtensionArray.

Differs from ``._ndarray_values``, as that ensures to always return a
numpy array (it will call ``_ndarray_values`` on the ExtensionArray, if
the Series was backed by an ExtensionArray).

Overview:

dtype | values | _values | array | _ndarray_values |
----------- | ------------- | ------------- | ------------- | --------------- |
Numeric | ndarray | ndarray | PandasArray | ndarray |
Category | Categorical | Categorical | Categorical | ndarray[int] |
dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] |
dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] |
td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] | ndarray[m8ns] |
Period | ndarray[obj] | PeriodArray | PeriodArray | ndarray[int] |
Nullable | EA | EA | EA | ndarray |
dtype | values | _values | array |
----------- | ------------- | ------------- | ------------- |
Numeric | ndarray | ndarray | PandasArray |
Category | Categorical | Categorical | Categorical |
dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] |
Period | ndarray[obj] | PeriodArray | PeriodArray |
Nullable | EA | EA | EA |

"""
return self._data.internal_values()
Expand Down
28 changes: 0 additions & 28 deletions pandas/tests/base/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,34 +220,6 @@ def test_values_consistent(array, expected_type, dtype):
tm.assert_equal(l_values, r_values)


@pytest.mark.parametrize(
"array, expected",
[
(np.array([0, 1], dtype=np.int64), np.array([0, 1], dtype=np.int64)),
(np.array(["0", "1"]), np.array(["0", "1"], dtype=object)),
(pd.Categorical(["a", "a"]), np.array([0, 0], dtype="int8")),
(
pd.DatetimeIndex(["2017-01-01T00:00:00"]),
np.array(["2017-01-01T00:00:00"], dtype="M8[ns]"),
),
(
pd.DatetimeIndex(["2017-01-01T00:00:00"], tz="US/Eastern"),
np.array(["2017-01-01T05:00:00"], dtype="M8[ns]"),
),
(pd.TimedeltaIndex([10 ** 10]), np.array([10 ** 10], dtype="m8[ns]")),
(
pd.PeriodIndex(["2017", "2018"], freq="D"),
np.array([17167, 17532], dtype=np.int64),
),
],
)
def test_ndarray_values(array, expected):
l_values = pd.Series(array)._ndarray_values
r_values = pd.Index(array)._ndarray_values
tm.assert_numpy_array_equal(l_values, r_values)
tm.assert_numpy_array_equal(l_values, expected)


@pytest.mark.parametrize("arr", [np.array([1, 2, 3])])
def test_numpy_array(arr):
ser = pd.Series(arr)
Expand Down
10 changes: 1 addition & 9 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,16 +313,11 @@ def test_ensure_copied_data(self, indices):
result = result.tz_localize("UTC").tz_convert(indices.tz)

tm.assert_index_equal(indices, result)
tm.assert_numpy_array_equal(
indices._ndarray_values, result._ndarray_values, check_same="copy"
)

if isinstance(indices, PeriodIndex):
# .values an object array of Period, thus copied
result = index_type(ordinal=indices.asi8, copy=False, **init_kwargs)
tm.assert_numpy_array_equal(
indices._ndarray_values, result._ndarray_values, check_same="same"
)
tm.assert_numpy_array_equal(indices.asi8, result.asi8, check_same="same")
elif isinstance(indices, IntervalIndex):
# checked in test_interval.py
pass
Expand All @@ -331,9 +326,6 @@ def test_ensure_copied_data(self, indices):
tm.assert_numpy_array_equal(
indices.values, result.values, check_same="same"
)
tm.assert_numpy_array_equal(
indices._ndarray_values, result._ndarray_values, check_same="same"
)

def test_memory_usage(self, indices):
indices._engine.clear_mapping()
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/interval/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test_constructor_nan(self, constructor, breaks, closed):

assert result.closed == closed
assert result.dtype.subtype == expected_subtype
tm.assert_numpy_array_equal(result._ndarray_values, expected_values)
tm.assert_numpy_array_equal(np.array(result), expected_values)

@pytest.mark.parametrize(
"breaks",
Expand All @@ -114,7 +114,7 @@ def test_constructor_empty(self, constructor, breaks, closed):
assert result.empty
assert result.closed == closed
assert result.dtype.subtype == expected_subtype
tm.assert_numpy_array_equal(result._ndarray_values, expected_values)
tm.assert_numpy_array_equal(np.array(result), expected_values)

@pytest.mark.parametrize(
"breaks",
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def test_ensure_copied_data(self, closed):
)

# by-definition make a copy
result = IntervalIndex(index._ndarray_values, copy=False)
result = IntervalIndex(np.array(index), copy=False)
tm.assert_numpy_array_equal(
index.left.values, result.left.values, check_same="copy"
)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/period/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,9 @@ def test_constructor_fromarraylike(self):

msg = "freq not specified and cannot be inferred"
with pytest.raises(ValueError, match=msg):
PeriodIndex(idx._ndarray_values)
PeriodIndex(idx.asi8)
with pytest.raises(ValueError, match=msg):
PeriodIndex(list(idx._ndarray_values))
PeriodIndex(list(idx.asi8))

msg = "'Period' object is not iterable"
with pytest.raises(TypeError, match=msg):
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/indexes/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,23 +161,23 @@ def test_values(self):
tm.assert_numpy_array_equal(idx.to_numpy(), exp)

exp = np.array([], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
tm.assert_numpy_array_equal(idx.asi8, exp)

idx = PeriodIndex(["2011-01", NaT], freq="M")

exp = np.array([Period("2011-01", freq="M"), NaT], dtype=object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.to_numpy(), exp)
exp = np.array([492, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
tm.assert_numpy_array_equal(idx.asi8, exp)

idx = PeriodIndex(["2011-01-01", NaT], freq="D")

exp = np.array([Period("2011-01-01", freq="D"), NaT], dtype=object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.to_numpy(), exp)
exp = np.array([14975, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
tm.assert_numpy_array_equal(idx.asi8, exp)

def test_period_index_length(self):
pi = period_range(freq="A", start="1/1/2001", end="12/1/2009")
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ def test_ops(self, opname, obj):
if not isinstance(obj, PeriodIndex):
expected = getattr(obj.values, opname)()
else:
expected = pd.Period(
ordinal=getattr(obj._ndarray_values, opname)(), freq=obj.freq
)
expected = pd.Period(ordinal=getattr(obj.asi8, opname)(), freq=obj.freq)
try:
assert result == expected
except TypeError:
Expand Down