Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POC: 2D EAs via composition #27015

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ def _ensure_data(values, dtype=None):
else:
# Datetime
from pandas import DatetimeIndex
from pandas.core.arrays import unwrap_reshapeable
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this for something like factorize(EA)? Shouldn't the EA (or your wrapper) do this in _values_for_factorize?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

values_for_factorize might do it, ill check. But at this point we dont necessarily have a EA, so need a conditional un-wrapper.

Making DTA validate that inputs are 1D can be done separately from the rest of this, which should resolve this particular part of the diff

values = unwrap_reshapeable(values)
assert values.ndim == 1, (type(values), values.shape)
values = DatetimeIndex(values)
dtype = values.dtype

Expand Down Expand Up @@ -1525,7 +1528,7 @@ def take(arr, indices, axis=0, allow_fill=False, fill_value=None):

if allow_fill:
# Pandas style, -1 means NA
validate_indices(indices, len(arr))
validate_indices(indices, arr.shape[axis])
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
result = take_1d(arr, indices, axis=axis, allow_fill=True,
fill_value=fill_value)
else:
Expand Down Expand Up @@ -1575,7 +1578,11 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
# TODO(EA): Remove these if / elifs as datetimeTZ, interval, become EAs
# dispatch to internal type takes
if is_extension_array_dtype(arr):
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
try:
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis)
except TypeError:
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
# `axis` kwarg not yet available
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
elif is_datetime64tz_dtype(arr):
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
elif is_interval_dtype(arr):
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/arrays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@
IntegerArray, integer_array)
from .sparse import SparseArray # noqa
from .numpy_ import PandasArray, PandasDtype # noqa
from .reshaping import ( # noqa
ReshapeableArray, ReshapeMixin, unwrap_reshapeable)
17 changes: 14 additions & 3 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,12 @@ class ExtensionArray:
# '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.
# Don't override this.
_typ = 'extension'
_allows_2d = False

# ------------------------------------------------------------------------
# Constructors
# ------------------------------------------------------------------------

@classmethod
def _from_sequence(cls, scalars, dtype=None, copy=False):
"""
Expand Down Expand Up @@ -286,6 +288,7 @@ def __iter__(self):
# ------------------------------------------------------------------------
# Required attributes
# ------------------------------------------------------------------------

@property
def dtype(self) -> ExtensionDtype:
"""
Expand All @@ -305,7 +308,14 @@ def ndim(self) -> int:
"""
Extension Arrays are only allowed to be 1-dimensional.
"""
return 1
return len(self.shape)

@property
def size(self) -> int:
"""
The number of elements in this array.
"""
return np.prod(self.shape)

@property
def nbytes(self) -> int:
Expand All @@ -319,6 +329,7 @@ def nbytes(self) -> int:
# ------------------------------------------------------------------------
# Additional Methods
# ------------------------------------------------------------------------

def astype(self, dtype, copy=True):
"""
Cast to a NumPy array with 'dtype'.
Expand Down Expand Up @@ -479,8 +490,7 @@ def dropna(self):
def shift(
self,
periods: int = 1,
fill_value: object = None,
) -> ABCExtensionArray:
fill_value: object = None) -> ABCExtensionArray:
"""
Shift values by desired number.

Expand Down Expand Up @@ -836,6 +846,7 @@ def copy(self, deep: bool = False) -> ABCExtensionArray:
# ------------------------------------------------------------------------
# Printing
# ------------------------------------------------------------------------

def __repr__(self):
from pandas.io.formats.printing import format_object_summary

Expand Down
10 changes: 8 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
from pandas.io.formats import console

from .base import ExtensionArray, _extension_array_shared_docs
from .reshaping import unwrap_reshapeable


_take_msg = textwrap.dedent("""\
Interpreting negative values in 'indexer' as missing values.
Expand Down Expand Up @@ -349,6 +351,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
values = [values[idx] for idx in np.where(~null_mask)[0]]
values = sanitize_array(values, None, dtype=sanitize_dtype)

values = unwrap_reshapeable(values)
if dtype.categories is None:
try:
codes, categories = factorize(values, sort=True)
Expand Down Expand Up @@ -457,11 +460,14 @@ def _formatter(self, boxed=False):
# Defer to CategoricalFormatter's formatter.
return None

def copy(self):
def copy(self, deep: bool = False):
"""
Copy constructor.
"""
return self._constructor(values=self._codes.copy(),
values = self._codes
if deep:
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
values = values.copy()
return self._constructor(values=values,
dtype=self.dtype,
fastpath=True)

Expand Down
7 changes: 1 addition & 6 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from .base import ExtensionArray, ExtensionOpsMixin


class AttributesMixin:
class AttributesMixin: # TODO: how much of this do we still need?
_data = None # type: np.ndarray

@property
Expand Down Expand Up @@ -401,11 +401,6 @@ def __array__(self, dtype=None):
return np.array(list(self), dtype=object)
return self._data

@property
def size(self) -> int:
"""The number of elements in this array."""
return np.prod(self.shape)

def __len__(self):
return len(self._data)

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False):
)
raise ValueError(msg.format(values.dtype))

if values.ndim != 1:
raise ValueError("Only 1-dimensional inputs are valid.")

dtype = _validate_dt64_dtype(dtype)

if freq == "infer":
Expand Down Expand Up @@ -353,7 +356,7 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False):

@classmethod
def _simple_new(cls, values, freq=None, dtype=_NS_DTYPE):
assert isinstance(values, np.ndarray)
assert isinstance(values, np.ndarray), type(values)
if values.dtype == 'i8':
values = values.view(_NS_DTYPE)

Expand Down
Loading