-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ca2d261
commit 2ef5216
Showing
9 changed files
with
566 additions
and
80 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from .base import ExtensionArray # noqa | ||
from .categorical import Categorical # noqa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
"""An interface for extending pandas with custom arrays.""" | ||
import abc | ||
|
||
import numpy as np | ||
|
||
from pandas.compat import add_metaclass | ||
|
||
|
||
_not_implemented_message = "{} does not implement {}." | ||
|
||
|
||
@add_metaclass(abc.ABCMeta) | ||
class ExtensionArray(object): | ||
"""Abstract base class for custom array types | ||
pandas will recognize instances of this class as proper arrays | ||
with a custom type and will not attempt to coerce them to objects. | ||
Subclasses are expected to implement the following methods. | ||
""" | ||
# ------------------------------------------------------------------------ | ||
# Must be a Sequence | ||
# ------------------------------------------------------------------------ | ||
@abc.abstractmethod | ||
def __getitem__(self, item): | ||
"""Select a subset of self | ||
Notes | ||
----- | ||
As a sequence, __getitem__ should expect integer or slice ``key``. | ||
For slice ``key``, you should return an instance of yourself, even | ||
if the slice is length 0 or 1. | ||
For scalar ``key``, you may return a scalar suitable for your type. | ||
The scalar need not be an instance or subclass of your array type. | ||
""" | ||
# type (Any) -> Any | ||
|
||
def __setitem__(self, key, value): | ||
# type: (Any, Any) -> None | ||
raise NotImplementedError(_not_implemented_message.format( | ||
type(self), '__setitem__') | ||
) | ||
|
||
@abc.abstractmethod | ||
def __iter__(self): | ||
# type: () -> Iterator | ||
pass | ||
|
||
@abc.abstractmethod | ||
def __len__(self): | ||
# type: () -> int | ||
pass | ||
|
||
# ------------------------------------------------------------------------ | ||
# Required attributes | ||
# ------------------------------------------------------------------------ | ||
@property | ||
def base(self): | ||
"""The base array I am a view of. None by default.""" | ||
|
||
@property | ||
@abc.abstractmethod | ||
def dtype(self): | ||
"""An instance of 'ExtensionDtype'.""" | ||
# type: () -> ExtensionDtype | ||
pass | ||
|
||
@property | ||
def shape(self): | ||
# type: () -> Tuple[int, ...] | ||
return (len(self),) | ||
|
||
@property | ||
def ndim(self): | ||
# type: () -> int | ||
"""Extension Arrays are only allowed to be 1-dimensional.""" | ||
return 1 | ||
|
||
@property | ||
@abc.abstractmethod | ||
def nbytes(self): | ||
"""The number of bytes needed to store this object in memory.""" | ||
# type: () -> int | ||
pass | ||
|
||
# ------------------------------------------------------------------------ | ||
# Additional Methods | ||
# ------------------------------------------------------------------------ | ||
@abc.abstractmethod | ||
def isna(self): | ||
"""Boolean NumPy array indicating if each value is missing.""" | ||
# type: () -> np.ndarray | ||
pass | ||
|
||
# ------------------------------------------------------------------------ | ||
# Indexing methods | ||
# ------------------------------------------------------------------------ | ||
@abc.abstractmethod | ||
def take(self, indexer, allow_fill=True, fill_value=None): | ||
# type: (Sequence, bool, Optional[Any]) -> ExtensionArray | ||
"""For slicing""" | ||
|
||
def take_nd(self, indexer, allow_fill=True, fill_value=None): | ||
"""For slicing""" | ||
# TODO: this isn't really nescessary for 1-D | ||
return self.take(indexer, allow_fill=allow_fill, | ||
fill_value=fill_value) | ||
|
||
@abc.abstractmethod | ||
def copy(self, deep=False): | ||
# type: (bool) -> ExtensionArray | ||
"""Return a copy of the array.""" | ||
|
||
# ------------------------------------------------------------------------ | ||
# Block-related methods | ||
# ------------------------------------------------------------------------ | ||
@property | ||
def _fill_value(self): | ||
"""The missing value for this type, e.g. np.nan""" | ||
# type: () -> Any | ||
return None | ||
|
||
@abc.abstractmethod | ||
def _formatting_values(self): | ||
# type: () -> np.ndarray | ||
# At the moment, this has to be an array since we use result.dtype | ||
"""An array of values to be printed in, e.g. the Series repr""" | ||
|
||
@classmethod | ||
@abc.abstractmethod | ||
def _concat_same_type(cls, to_concat): | ||
# type: (Sequence[ExtensionArray]) -> ExtensionArray | ||
"""Concatenate multiple array | ||
Parameters | ||
---------- | ||
to_concat : sequence of this type | ||
Returns | ||
------- | ||
ExtensionArray | ||
""" | ||
|
||
@abc.abstractmethod | ||
def get_values(self): | ||
# type: () -> np.ndarray | ||
"""Get the underlying values backing your data | ||
""" | ||
pass | ||
|
||
def _can_hold_na(self): | ||
"""Whether your array can hold missing values. True by default. | ||
Notes | ||
----- | ||
Setting this to false will optimize some operations like fillna. | ||
""" | ||
# type: () -> bool | ||
return True | ||
|
||
@property | ||
def is_sparse(self): | ||
"""Whether your array is sparse. True by default.""" | ||
# type: () -> bool | ||
return False | ||
|
||
def _slice(self, slicer): | ||
# type: (Union[tuple, Sequence, int]) -> 'ExtensionArray' | ||
"""Return a new array sliced by `slicer`. | ||
Parameters | ||
---------- | ||
slicer : slice or np.ndarray | ||
If an array, it should just be a boolean mask | ||
Returns | ||
------- | ||
array : ExtensionArray | ||
Should return an ExtensionArray, even if ``self[slicer]`` | ||
would return a scalar. | ||
""" | ||
return type(self)(self[slicer]) | ||
|
||
def value_counts(self, dropna=True): | ||
"""Optional method for computing the histogram of the counts. | ||
Parameters | ||
---------- | ||
dropna : bool, default True | ||
whether to exclude missing values from the computation | ||
Returns | ||
------- | ||
counts : Series | ||
""" | ||
from pandas.core.algorithms import value_counts | ||
mask = ~np.asarray(self.isna()) | ||
values = self[mask] # XXX: this imposes boolean indexing | ||
return value_counts(np.asarray(values), dropna=dropna) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
"""Extend pandas with custom array types""" | ||
import abc | ||
|
||
from pandas.compat import add_metaclass | ||
|
||
|
||
@add_metaclass(abc.ABCMeta) | ||
class ExtensionDtype(object): | ||
"""A custom data type for your array. | ||
""" | ||
@property | ||
def type(self): | ||
"""Typically a metaclass inheriting from 'type' with no methods.""" | ||
return type(self.name, (), {}) | ||
|
||
@property | ||
def kind(self): | ||
"""A character code (one of 'biufcmMOSUV'), default 'O' | ||
See Also | ||
-------- | ||
numpy.dtype.kind | ||
""" | ||
return 'O' | ||
|
||
@property | ||
@abc.abstractmethod | ||
def name(self): | ||
"""An string identifying the data type. | ||
Will be used in, e.g. ``Series.dtype`` | ||
""" | ||
|
||
@property | ||
def names(self): | ||
"""Ordered list of field names, or None if there are no fields""" | ||
return None | ||
|
||
@classmethod | ||
def construct_from_string(cls, string): | ||
"""Attempt to construct this type from a string. | ||
Parameters | ||
---------- | ||
string : str | ||
Returns | ||
------- | ||
self : instance of 'cls' | ||
Raises | ||
------ | ||
TypeError | ||
Notes | ||
----- | ||
The default implementation checks if 'string' matches your | ||
type's name. If so, it calls your class with no arguments. | ||
""" | ||
if string == cls.name: | ||
return cls() | ||
else: | ||
raise TypeError("Cannot construct a '{}' from " | ||
"'{}'".format(cls, string)) | ||
|
||
@classmethod | ||
def is_dtype(cls, dtype): | ||
"""Check if we match 'dtype' | ||
Parameters | ||
---------- | ||
dtype : str or dtype | ||
Returns | ||
------- | ||
is_dtype : bool | ||
Notes | ||
----- | ||
The default implementation is True if | ||
1. 'dtype' is a string that returns true for | ||
``cls.construct_from_string`` | ||
2. 'dtype' is ``cls`` or a subclass of ``cls``. | ||
""" | ||
if isinstance(dtype, str): | ||
try: | ||
return isinstance(cls.construct_from_string(dtype), cls) | ||
except TypeError: | ||
return False | ||
else: | ||
return issubclass(dtype, cls) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.