From 2ef52169767c50682e2e9ee7a5fda2163b80754e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 15 Jan 2018 10:06:34 -0600 Subject: [PATCH] REF: Define extension base classes --- pandas/core/arrays/__init__.py | 1 + pandas/core/arrays/base.py | 201 ++++++++++++++ pandas/core/arrays/categorical.py | 18 +- pandas/core/dtypes/base.py | 92 +++++++ pandas/core/dtypes/common.py | 32 +++ pandas/core/dtypes/dtypes.py | 14 +- pandas/core/internals.py | 248 +++++++++++++----- pandas/tests/dtypes/test_dtypes.py | 36 ++- pandas/tests/internals/test_external_block.py | 4 +- 9 files changed, 566 insertions(+), 80 deletions(-) create mode 100644 pandas/core/arrays/base.py create mode 100644 pandas/core/dtypes/base.py diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index ee32b12f0e712..f8adcf520c15b 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -1 +1,2 @@ +from .base import ExtensionArray # noqa from .categorical import Categorical # noqa diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py new file mode 100644 index 0000000000000..ad29edde34ce6 --- /dev/null +++ b/pandas/core/arrays/base.py @@ -0,0 +1,201 @@ +"""An interface for extending pandas with custom arrays.""" +import abc + +import numpy as np + +from pandas.compat import add_metaclass + + +_not_implemented_message = "{} does not implement {}." + + +@add_metaclass(abc.ABCMeta) +class ExtensionArray(object): + """Abstract base class for custom array types + + pandas will recognize instances of this class as proper arrays + with a custom type and will not attempt to coerce them to objects. + + Subclasses are expected to implement the following methods. + """ + # ------------------------------------------------------------------------ + # Must be a Sequence + # ------------------------------------------------------------------------ + @abc.abstractmethod + def __getitem__(self, item): + """Select a subset of self + + Notes + ----- + As a sequence, __getitem__ should expect integer or slice ``key``. + + For slice ``key``, you should return an instance of yourself, even + if the slice is length 0 or 1. + + For scalar ``key``, you may return a scalar suitable for your type. + The scalar need not be an instance or subclass of your array type. + """ + # type (Any) -> Any + + def __setitem__(self, key, value): + # type: (Any, Any) -> None + raise NotImplementedError(_not_implemented_message.format( + type(self), '__setitem__') + ) + + @abc.abstractmethod + def __iter__(self): + # type: () -> Iterator + pass + + @abc.abstractmethod + def __len__(self): + # type: () -> int + pass + + # ------------------------------------------------------------------------ + # Required attributes + # ------------------------------------------------------------------------ + @property + def base(self): + """The base array I am a view of. None by default.""" + + @property + @abc.abstractmethod + def dtype(self): + """An instance of 'ExtensionDtype'.""" + # type: () -> ExtensionDtype + pass + + @property + def shape(self): + # type: () -> Tuple[int, ...] + return (len(self),) + + @property + def ndim(self): + # type: () -> int + """Extension Arrays are only allowed to be 1-dimensional.""" + return 1 + + @property + @abc.abstractmethod + def nbytes(self): + """The number of bytes needed to store this object in memory.""" + # type: () -> int + pass + + # ------------------------------------------------------------------------ + # Additional Methods + # ------------------------------------------------------------------------ + @abc.abstractmethod + def isna(self): + """Boolean NumPy array indicating if each value is missing.""" + # type: () -> np.ndarray + pass + + # ------------------------------------------------------------------------ + # Indexing methods + # ------------------------------------------------------------------------ + @abc.abstractmethod + def take(self, indexer, allow_fill=True, fill_value=None): + # type: (Sequence, bool, Optional[Any]) -> ExtensionArray + """For slicing""" + + def take_nd(self, indexer, allow_fill=True, fill_value=None): + """For slicing""" + # TODO: this isn't really nescessary for 1-D + return self.take(indexer, allow_fill=allow_fill, + fill_value=fill_value) + + @abc.abstractmethod + def copy(self, deep=False): + # type: (bool) -> ExtensionArray + """Return a copy of the array.""" + + # ------------------------------------------------------------------------ + # Block-related methods + # ------------------------------------------------------------------------ + @property + def _fill_value(self): + """The missing value for this type, e.g. np.nan""" + # type: () -> Any + return None + + @abc.abstractmethod + def _formatting_values(self): + # type: () -> np.ndarray + # At the moment, this has to be an array since we use result.dtype + """An array of values to be printed in, e.g. the Series repr""" + + @classmethod + @abc.abstractmethod + def _concat_same_type(cls, to_concat): + # type: (Sequence[ExtensionArray]) -> ExtensionArray + """Concatenate multiple array + + Parameters + ---------- + to_concat : sequence of this type + + Returns + ------- + ExtensionArray + """ + + @abc.abstractmethod + def get_values(self): + # type: () -> np.ndarray + """Get the underlying values backing your data + """ + pass + + def _can_hold_na(self): + """Whether your array can hold missing values. True by default. + + Notes + ----- + Setting this to false will optimize some operations like fillna. + """ + # type: () -> bool + return True + + @property + def is_sparse(self): + """Whether your array is sparse. True by default.""" + # type: () -> bool + return False + + def _slice(self, slicer): + # type: (Union[tuple, Sequence, int]) -> 'ExtensionArray' + """Return a new array sliced by `slicer`. + + Parameters + ---------- + slicer : slice or np.ndarray + If an array, it should just be a boolean mask + + Returns + ------- + array : ExtensionArray + Should return an ExtensionArray, even if ``self[slicer]`` + would return a scalar. + """ + return type(self)(self[slicer]) + + def value_counts(self, dropna=True): + """Optional method for computing the histogram of the counts. + + Parameters + ---------- + dropna : bool, default True + whether to exclude missing values from the computation + + Returns + ------- + counts : Series + """ + from pandas.core.algorithms import value_counts + mask = ~np.asarray(self.isna()) + values = self[mask] # XXX: this imposes boolean indexing + return value_counts(np.asarray(values), dropna=dropna) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 708f903cd73cb..f0ec046e00e65 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -44,6 +44,8 @@ from pandas.util._validators import validate_bool_kwarg from pandas.core.config import get_option +from .base import ExtensionArray + def _cat_compare_op(op): def f(self, other): @@ -149,7 +151,7 @@ def _maybe_to_categorical(array): """ -class Categorical(PandasObject): +class Categorical(ExtensionArray, PandasObject): """ Represents a categorical variable in classic R / S-plus fashion @@ -2131,6 +2133,20 @@ def repeat(self, repeats, *args, **kwargs): return self._constructor(values=codes, categories=self.categories, ordered=self.ordered, fastpath=True) + # Interface things + # can_hold_na, concat_same_type, formatting_values + @property + def _can_hold_na(self): + return True + + @classmethod + def _concat_same_type(self, to_concat): + from pandas.types.concat import union_categoricals + return union_categoricals(to_concat) + + def _formatting_values(self): + return self + # The Series.cat accessor diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py new file mode 100644 index 0000000000000..348b4f077673a --- /dev/null +++ b/pandas/core/dtypes/base.py @@ -0,0 +1,92 @@ +"""Extend pandas with custom array types""" +import abc + +from pandas.compat import add_metaclass + + +@add_metaclass(abc.ABCMeta) +class ExtensionDtype(object): + """A custom data type for your array. + """ + @property + def type(self): + """Typically a metaclass inheriting from 'type' with no methods.""" + return type(self.name, (), {}) + + @property + def kind(self): + """A character code (one of 'biufcmMOSUV'), default 'O' + + See Also + -------- + numpy.dtype.kind + """ + return 'O' + + @property + @abc.abstractmethod + def name(self): + """An string identifying the data type. + + Will be used in, e.g. ``Series.dtype`` + """ + + @property + def names(self): + """Ordered list of field names, or None if there are no fields""" + return None + + @classmethod + def construct_from_string(cls, string): + """Attempt to construct this type from a string. + + Parameters + ---------- + string : str + + Returns + ------- + self : instance of 'cls' + + Raises + ------ + TypeError + + Notes + ----- + The default implementation checks if 'string' matches your + type's name. If so, it calls your class with no arguments. + """ + if string == cls.name: + return cls() + else: + raise TypeError("Cannot construct a '{}' from " + "'{}'".format(cls, string)) + + @classmethod + def is_dtype(cls, dtype): + """Check if we match 'dtype' + + Parameters + ---------- + dtype : str or dtype + + Returns + ------- + is_dtype : bool + + Notes + ----- + The default implementation is True if + + 1. 'dtype' is a string that returns true for + ``cls.construct_from_string`` + 2. 'dtype' is ``cls`` or a subclass of ``cls``. + """ + if isinstance(dtype, str): + try: + return isinstance(cls.construct_from_string(dtype), cls) + except TypeError: + return False + else: + return issubclass(dtype, cls) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index dca9a5fde0d74..2e4d0d884bf95 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1685,6 +1685,38 @@ def is_extension_type(arr): return False +def is_extension_array_dtype(arr_or_dtype): + """Check if an object is a pandas extension array type + + Parameters + ---------- + arr_or_dtype : object + + Returns + ------- + bool + + Notes + ----- + This checks whether an object implements the pandas extension + array interface. In pandas, this includes: + + * Categorical + * PeriodArray + * IntervalArray + * SparseArray + + Third-party libraries may implement arrays or types satisfying + this interface as well. + """ + from pandas.core.arrays import ExtensionArray + + # we want to unpack series, anything else? + if isinstance(arr_or_dtype, ABCSeries): + arr_or_dtype = arr_or_dtype.values + return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray)) + + def is_complex_dtype(arr_or_dtype): """ Check whether the provided array or dtype is of a complex dtype. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 1eb87aa99fd1e..df7b0dc9ea60e 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -5,15 +5,15 @@ from pandas import compat from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex +from .base import ExtensionDtype -class ExtensionDtype(object): + +class PandasExtensionDtype(ExtensionDtype): """ A np.dtype duck-typed class, suitable for holding a custom dtype. THIS IS NOT A REAL NUMPY DTYPE """ - name = None - names = None type = None subdtype = None kind = None @@ -108,7 +108,7 @@ class CategoricalDtypeType(type): pass -class CategoricalDtype(ExtensionDtype): +class CategoricalDtype(PandasExtensionDtype): """ Type for categorical data with the categories and orderedness @@ -387,7 +387,7 @@ class DatetimeTZDtypeType(type): pass -class DatetimeTZDtype(ExtensionDtype): +class DatetimeTZDtype(PandasExtensionDtype): """ A np.dtype duck-typed class, suitable for holding a custom datetime with tz @@ -501,7 +501,7 @@ class PeriodDtypeType(type): pass -class PeriodDtype(ExtensionDtype): +class PeriodDtype(PandasExtensionDtype): __metaclass__ = PeriodDtypeType """ A Period duck-typed class, suitable for holding a period with freq dtype. @@ -619,7 +619,7 @@ class IntervalDtypeType(type): pass -class IntervalDtype(ExtensionDtype): +class IntervalDtype(PandasExtensionDtype): __metaclass__ = IntervalDtypeType """ A Interval duck-typed class, suitable for holding an interval diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 764e06c19e76c..fb52a60c4cdd5 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -31,6 +31,7 @@ is_datetimelike_v_numeric, is_float_dtype, is_numeric_dtype, is_numeric_v_string_like, is_extension_type, + is_extension_array_dtype, is_list_like, is_re, is_re_compilable, @@ -59,7 +60,7 @@ from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import maybe_convert_indices, length_of_indexer -from pandas.core.arrays.categorical import Categorical, _maybe_to_categorical +from pandas.core.arrays import Categorical from pandas.core.indexes.datetimes import DatetimeIndex from pandas.io.formats.printing import pprint_thing @@ -95,6 +96,7 @@ class Block(PandasObject): is_object = False is_categorical = False is_sparse = False + is_extension = False _box_to_block_values = True _can_hold_na = False _can_consolidate = True @@ -107,14 +109,15 @@ class Block(PandasObject): def __init__(self, values, placement, ndim=None, fastpath=False): if ndim is None: ndim = values.ndim - elif values.ndim != ndim: + elif self._validate_ndim and values.ndim != ndim: raise ValueError('Wrong number of dimensions') self.ndim = ndim self.mgr_locs = placement self.values = values - if ndim and len(self.mgr_locs) != len(self.values): + if (self._validate_ndim and ndim and + len(self.mgr_locs) != len(self.values)): raise ValueError( 'Wrong number of items passed {val}, placement implies ' '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs))) @@ -273,7 +276,6 @@ def reshape_nd(self, labels, shape, ref_items, mgr=None): return a new block that is transformed to a nd block """ - return _block2d_to_blocknd(values=self.get_values().T, placement=self.mgr_locs, shape=shape, labels=labels, ref_items=ref_items) @@ -548,15 +550,20 @@ def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs): def _astype(self, dtype, copy=False, errors='raise', values=None, klass=None, mgr=None, **kwargs): - """ - Coerce to the new type + """Coerce to the new type + Parameters + ---------- dtype : str, dtype convertible copy : boolean, default False copy if indicated errors : str, {'raise', 'ignore'}, default 'ignore' - ``raise`` : allow exceptions to be raised - ``ignore`` : suppress exceptions. On error return original object + + Returns + ------- + IntervalArray """ errors_legal_values = ('raise', 'ignore') @@ -1695,24 +1702,20 @@ class NonConsolidatableMixIn(object): _holder = None def __init__(self, values, placement, ndim=None, fastpath=False, **kwargs): + # Placement must be converted to BlockPlacement so that we can check + # its length + if not isinstance(placement, BlockPlacement): + placement = BlockPlacement(placement) - # Placement must be converted to BlockPlacement via property setter - # before ndim logic, because placement may be a slice which doesn't - # have a length. - self.mgr_locs = placement - - # kludgetastic + # Maybe infer ndim from placement if ndim is None: - if len(self.mgr_locs) != 1: + if len(placement) != 1: ndim = 1 else: ndim = 2 - self.ndim = ndim - - if not isinstance(values, self._holder): - raise TypeError("values must be {0}".format(self._holder.__name__)) - - self.values = values + super(NonConsolidatableMixIn, self).__init__(values, placement, + ndim=ndim, + fastpath=fastpath) @property def shape(self): @@ -1763,7 +1766,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, Returns ------- - a new block(s), the result of the putmask + a new block, the result of the putmask """ inplace = validate_bool_kwarg(inplace, 'inplace') @@ -1821,6 +1824,130 @@ def _unstack(self, unstacker_func, new_columns): return blocks, mask +class ExtensionBlock(NonConsolidatableMixIn, Block): + """Block for holding extension types. + + Notes + ----- + This is the holds all 3rd-party extension types. It's also the immediate + parent class for our internal extension types' blocks, CategoricalBlock. + + All extension arrays *must* be 1-D, which simplifies things a bit. + """ + # Some questions / notes as comments, will be removed. + # + # Currently inherited from NCB. We'll keep it around until SparseBlock + # and DatetimeTZBlock are refactored. + # - set + # - iget + # - should_store + # - putmask + # - _slice + # - _try_cast_result + # - unstack + + # Think about overriding these methods from Block + # - _maybe_downcast: (never downcast) + + # Methods we can (probably) ignore and just use Block's: + + # * replace / replace_single + # Categorical got Object, but was hopefully unnescessary. + # DatetimeTZ, Sparse got Block + # * is_view + # Categorical overrides to say that it is not. + # DatetimeTZ, Sparse inherits Base anyway + + is_extension = True + + # XXX + # is_bool is is a change for CategoricalBlock. Used to inherit + # from Object to infer from values. If this matters, we should + # override it directly in CategoricalBlock so that we infer from + # the categories, not the codes. + is_bool = False + + def __init__(self, values, placement, ndim=None, fastpath=False): + self._holder = type(values) + super(ExtensionBlock, self).__init__(values, placement, ndim=ndim, + fastpath=fastpath) + + def get_values(self, dtype=None): + # ExtensionArrays must be iterable, so this works. + values = np.asarray(self.values) + if values.ndim == self.ndim - 1: + values = values.reshape((1,) + values.shape) + return values + + def to_dense(self): + return self.values.to_dense().view() + + def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): + """ + Take values according to indexer and return them as a block.bb + """ + if fill_tuple is None: + fill_value = None + else: + fill_value = fill_tuple[0] + + # axis doesn't matter; we are really a single-dim object + # but are passed the axis depending on the calling routing + # if its REALLY axis 0, then this will be a reindex and not a take + new_values = self.values.take_nd(indexer, fill_value=fill_value) + + # if we are a 1-dim object, then always place at 0 + if self.ndim == 1: + new_mgr_locs = [0] + else: + if new_mgr_locs is None: + new_mgr_locs = self.mgr_locs + + return self.make_block_same_class(new_values, new_mgr_locs) + + def _can_hold_element(self, element): + # XXX: + # Not defined on NCM. + # Categorical got True from ObjectBlock + # DatetimeTZ gets DatetimeBlock + # Sparse gets Block + # Let's just assume yes for now, but we can maybe push + # this onto the array. + return True + + def convert(self, copy=True, **kwargs): + # We're dedicated to a type, we don't convert. + # Taken from CategoricalBlock / Block. + return self.copy() if copy else self + + def _slice(self, slicer): + """ return a slice of my values """ + + # slice the category + # return same dims as we currently have + + if isinstance(slicer, tuple) and len(slicer) == 2: + if not is_null_slice(slicer[0]): + raise AssertionError("invalid slicing for a 1-ndim " + "categorical") + slicer = slicer[1] + + return self.values._slice(slicer) + + def formatting_values(self): + return self.values._formatting_values() + + def concat_same_type(self, to_concat, placement=None): + """ + Concatenate list of single blocks of the same type. + """ + values = self._holder._concat_same_type( + [blk.values for blk in to_concat]) + placement = placement or slice(0, len(values), 1) + return self.make_block_same_class(values, ndim=self.ndim, + placement=placement) + + class NumericBlock(Block): __slots__ = () is_numeric = True @@ -2334,7 +2461,7 @@ def re_replacer(s): return block -class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock): +class CategoricalBlock(ExtensionBlock): __slots__ = () is_categorical = True _verify_integrity = True @@ -2343,6 +2470,7 @@ class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock): _concatenator = staticmethod(_concat._concat_categorical) def __init__(self, values, placement, fastpath=False, **kwargs): + from pandas.core.arrays.categorical import _maybe_to_categorical # coerce to categorical if we can super(CategoricalBlock, self).__init__(_maybe_to_categorical(values), @@ -2354,12 +2482,6 @@ def is_view(self): """ I am never a view """ return False - def to_dense(self): - return self.values.to_dense().view() - - def convert(self, copy=True, **kwargs): - return self.copy() if copy else self - @property def array_dtype(self): """ the dtype to return if I want to construct this block as an @@ -2367,13 +2489,6 @@ def array_dtype(self): """ return np.object_ - def _slice(self, slicer): - """ return a slice of my values """ - - # slice the category - # return same dims as we currently have - return self.values._slice(slicer) - def _try_coerce_result(self, result): """ reverse of try_coerce_args """ @@ -2410,29 +2525,6 @@ def shift(self, periods, axis=0, mgr=None): return self.make_block_same_class(values=self.values.shift(periods), placement=self.mgr_locs) - def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): - """ - Take values according to indexer and return them as a block.bb - """ - if fill_tuple is None: - fill_value = None - else: - fill_value = fill_tuple[0] - - # axis doesn't matter; we are really a single-dim object - # but are passed the axis depending on the calling routing - # if its REALLY axis 0, then this will be a reindex and not a take - new_values = self.values.take_nd(indexer, fill_value=fill_value) - - # if we are a 1-dim object, then always place at 0 - if self.ndim == 1: - new_mgr_locs = [0] - else: - if new_mgr_locs is None: - new_mgr_locs = self.mgr_locs - - return self.make_block_same_class(new_values, new_mgr_locs) - def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ @@ -2447,17 +2539,6 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs): # we are expected to return a 2-d ndarray return values.reshape(1, len(values)) - def concat_same_type(self, to_concat, placement=None): - """ - Concatenate list of single blocks of the same type. - """ - values = self._concatenator([blk.values for blk in to_concat], - axis=self.ndim - 1) - # not using self.make_block_same_class as values can be object dtype - return make_block( - values, placement=placement or slice(0, len(values), 1), - ndim=self.ndim) - class DatetimeBlock(DatetimeLikeBlockMixin, Block): __slots__ = () @@ -2465,7 +2546,8 @@ class DatetimeBlock(DatetimeLikeBlockMixin, Block): _can_hold_na = True def __init__(self, values, placement, fastpath=False, **kwargs): - if values.dtype != _NS_DTYPE: + if values.dtype != _NS_DTYPE and values.dtype.base != _NS_DTYPE: + # not datetime64 or datetime64tz values = conversion.ensure_datetime64ns(values) super(DatetimeBlock, self).__init__(values, fastpath=True, @@ -2954,6 +3036,8 @@ def get_block_type(values, dtype=None): cls = BoolBlock elif is_categorical(values): cls = CategoricalBlock + elif is_extension_array_dtype(values): + cls = ExtensionBlock else: cls = ObjectBlock return cls @@ -4681,6 +4765,7 @@ def form_blocks(arrays, names, axes): # generalize? items_dict = defaultdict(list) extra_locs = [] + external_items = [] names_idx = Index(names) if names_idx.equals(axes[0]): @@ -4748,6 +4833,31 @@ def form_blocks(arrays, names, axes): for i, _, array in items_dict['CategoricalBlock']] blocks.extend(cat_blocks) + if len(items_dict['ExtensionBlock']): + + external_blocks = [] + for i, _, array in items_dict['ExtensionBlock']: + if isinstance(array, ABCSeries): + array = array.values + # Allow our internal arrays to chose their block type. + block_type = getattr(array, '_block_type', ExtensionBlock) + external_blocks.append( + make_block(array, klass=block_type, + fastpath=True, placement=[i])) + blocks.extend(external_blocks) + + if len(external_items): + external_blocks = [] + for i, _, array in external_items: + if isinstance(array, ABCSeries): + array = array.values + # Allow our internal arrays to chose their block type. + block_type = getattr(array, '_block_type', ExtensionBlock) + external_blocks.append( + make_block(array, klass=block_type, + fastpath=True, placement=[i])) + blocks.extend(external_blocks) + if len(extra_locs): shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:]) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index d800a7b92b559..3423e22a4c64e 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -10,12 +10,14 @@ Series, Categorical, CategoricalIndex, IntervalIndex, date_range) from pandas.compat import string_types +from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, PeriodDtype, - IntervalDtype, CategoricalDtype) + IntervalDtype, CategoricalDtype, ExtensionDtype) from pandas.core.dtypes.common import ( is_categorical_dtype, is_categorical, is_datetime64tz_dtype, is_datetimetz, + is_extension_array_dtype, is_period_dtype, is_period, is_dtype_equal, is_datetime64_ns_dtype, is_datetime64_dtype, is_interval_dtype, @@ -742,3 +744,35 @@ def test_categorical_categories(self): tm.assert_index_equal(c1.categories, pd.Index(['a', 'b'])) c1 = CategoricalDtype(CategoricalIndex(['a', 'b'])) tm.assert_index_equal(c1.categories, pd.Index(['a', 'b'])) + + +class DummyArray(object): + pass + + +class DummyDtype(object): + pass + + +ExtensionArray.register(DummyArray) +ExtensionDtype.register(DummyDtype) + + +class TestExtensionArrayDtype(object): + + @pytest.mark.parametrize('values', [ + pd.Categorical([]), + pd.Categorical([]).dtype, + pd.Series(pd.Categorical([])), + DummyDtype(), + DummyArray(), + ]) + def test_is_extension_array_dtype(self, values): + assert is_extension_array_dtype(values) + + @pytest.mark.parametrize('values', [ + np.array([]), + pd.Series(np.array([])), + ]) + def test_is_not_extension_array_dtype(self, values): + assert not is_extension_array_dtype(values) diff --git a/pandas/tests/internals/test_external_block.py b/pandas/tests/internals/test_external_block.py index 729ee0093b6dc..2487363df8f99 100644 --- a/pandas/tests/internals/test_external_block.py +++ b/pandas/tests/internals/test_external_block.py @@ -5,12 +5,12 @@ import pandas as pd from pandas.core.internals import ( - Block, BlockManager, SingleBlockManager, NonConsolidatableMixIn) + BlockManager, SingleBlockManager, ExtensionBlock) import pytest -class CustomBlock(NonConsolidatableMixIn, Block): +class CustomBlock(ExtensionBlock): _holder = np.ndarray