From 2ef52169767c50682e2e9ee7a5fda2163b80754e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 15 Jan 2018 10:06:34 -0600
Subject: [PATCH] REF: Define extension base classes

---
 pandas/core/arrays/__init__.py                |   1 +
 pandas/core/arrays/base.py                    | 201 ++++++++++++++
 pandas/core/arrays/categorical.py             |  18 +-
 pandas/core/dtypes/base.py                    |  92 +++++++
 pandas/core/dtypes/common.py                  |  32 +++
 pandas/core/dtypes/dtypes.py                  |  14 +-
 pandas/core/internals.py                      | 248 +++++++++++++-----
 pandas/tests/dtypes/test_dtypes.py            |  36 ++-
 pandas/tests/internals/test_external_block.py |   4 +-
 9 files changed, 566 insertions(+), 80 deletions(-)
 create mode 100644 pandas/core/arrays/base.py
 create mode 100644 pandas/core/dtypes/base.py

diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
index ee32b12f0e712..f8adcf520c15b 100644
--- a/pandas/core/arrays/__init__.py
+++ b/pandas/core/arrays/__init__.py
@@ -1 +1,2 @@
+from .base import ExtensionArray  # noqa
 from .categorical import Categorical  # noqa
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
new file mode 100644
index 0000000000000..ad29edde34ce6
--- /dev/null
+++ b/pandas/core/arrays/base.py
@@ -0,0 +1,201 @@
+"""An interface for extending pandas with custom arrays."""
+import abc
+
+import numpy as np
+
+from pandas.compat import add_metaclass
+
+
+_not_implemented_message = "{} does not implement {}."
+
+
+@add_metaclass(abc.ABCMeta)
+class ExtensionArray(object):
+    """Abstract base class for custom array types
+
+    pandas will recognize instances of this class as proper arrays
+    with a custom type and will not attempt to coerce them to objects.
+
+    Subclasses are expected to implement the following methods.
+    """
+    # ------------------------------------------------------------------------
+    # Must be a Sequence
+    # ------------------------------------------------------------------------
+    @abc.abstractmethod
+    def __getitem__(self, item):
+        """Select a subset of self
+
+        Notes
+        -----
+        As a sequence, __getitem__ should expect integer or slice ``key``.
+
+        For slice ``key``, you should return an instance of yourself, even
+        if the slice is length 0 or 1.
+
+        For scalar ``key``, you may return a scalar suitable for your type.
+        The scalar need not be an instance or subclass of your array type.
+        """
+        # type (Any) -> Any
+
+    def __setitem__(self, key, value):
+        # type: (Any, Any) -> None
+        raise NotImplementedError(_not_implemented_message.format(
+            type(self), '__setitem__')
+        )
+
+    @abc.abstractmethod
+    def __iter__(self):
+        # type: () -> Iterator
+        pass
+
+    @abc.abstractmethod
+    def __len__(self):
+        # type: () -> int
+        pass
+
+    # ------------------------------------------------------------------------
+    # Required attributes
+    # ------------------------------------------------------------------------
+    @property
+    def base(self):
+        """The base array I am a view of. None by default."""
+
+    @property
+    @abc.abstractmethod
+    def dtype(self):
+        """An instance of 'ExtensionDtype'."""
+        # type: () -> ExtensionDtype
+        pass
+
+    @property
+    def shape(self):
+        # type: () -> Tuple[int, ...]
+        return (len(self),)
+
+    @property
+    def ndim(self):
+        # type: () -> int
+        """Extension Arrays are only allowed to be 1-dimensional."""
+        return 1
+
+    @property
+    @abc.abstractmethod
+    def nbytes(self):
+        """The number of bytes needed to store this object in memory."""
+        # type: () -> int
+        pass
+
+    # ------------------------------------------------------------------------
+    # Additional Methods
+    # ------------------------------------------------------------------------
+    @abc.abstractmethod
+    def isna(self):
+        """Boolean NumPy array indicating if each value is missing."""
+        # type: () -> np.ndarray
+        pass
+
+    # ------------------------------------------------------------------------
+    # Indexing methods
+    # ------------------------------------------------------------------------
+    @abc.abstractmethod
+    def take(self, indexer, allow_fill=True, fill_value=None):
+        # type: (Sequence, bool, Optional[Any]) -> ExtensionArray
+        """For slicing"""
+
+    def take_nd(self, indexer, allow_fill=True, fill_value=None):
+        """For slicing"""
+        # TODO: this isn't really nescessary for 1-D
+        return self.take(indexer, allow_fill=allow_fill,
+                         fill_value=fill_value)
+
+    @abc.abstractmethod
+    def copy(self, deep=False):
+        # type: (bool) -> ExtensionArray
+        """Return a copy of the array."""
+
+    # ------------------------------------------------------------------------
+    # Block-related methods
+    # ------------------------------------------------------------------------
+    @property
+    def _fill_value(self):
+        """The missing value for this type, e.g. np.nan"""
+        # type: () -> Any
+        return None
+
+    @abc.abstractmethod
+    def _formatting_values(self):
+        # type: () -> np.ndarray
+        # At the moment, this has to be an array since we use result.dtype
+        """An array of values to be printed in, e.g. the Series repr"""
+
+    @classmethod
+    @abc.abstractmethod
+    def _concat_same_type(cls, to_concat):
+        # type: (Sequence[ExtensionArray]) -> ExtensionArray
+        """Concatenate multiple array
+
+        Parameters
+        ----------
+        to_concat : sequence of this type
+
+        Returns
+        -------
+        ExtensionArray
+        """
+
+    @abc.abstractmethod
+    def get_values(self):
+        # type: () -> np.ndarray
+        """Get the underlying values backing your data
+        """
+        pass
+
+    def _can_hold_na(self):
+        """Whether your array can hold missing values. True by default.
+
+        Notes
+        -----
+        Setting this to false will optimize some operations like fillna.
+        """
+        # type: () -> bool
+        return True
+
+    @property
+    def is_sparse(self):
+        """Whether your array is sparse. True by default."""
+        # type: () -> bool
+        return False
+
+    def _slice(self, slicer):
+        # type: (Union[tuple, Sequence, int]) -> 'ExtensionArray'
+        """Return a new array sliced by `slicer`.
+
+        Parameters
+        ----------
+        slicer : slice or np.ndarray
+            If an array, it should just be a boolean mask
+
+        Returns
+        -------
+        array : ExtensionArray
+            Should return an ExtensionArray, even if ``self[slicer]``
+            would return a scalar.
+        """
+        return type(self)(self[slicer])
+
+    def value_counts(self, dropna=True):
+        """Optional method for computing the histogram of the counts.
+
+        Parameters
+        ----------
+        dropna : bool, default True
+            whether to exclude missing values from the computation
+
+        Returns
+        -------
+        counts : Series
+        """
+        from pandas.core.algorithms import value_counts
+        mask = ~np.asarray(self.isna())
+        values = self[mask]  # XXX: this imposes boolean indexing
+        return value_counts(np.asarray(values), dropna=dropna)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 708f903cd73cb..f0ec046e00e65 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -44,6 +44,8 @@
 from pandas.util._validators import validate_bool_kwarg
 from pandas.core.config import get_option
 
+from .base import ExtensionArray
+
 
 def _cat_compare_op(op):
     def f(self, other):
@@ -149,7 +151,7 @@ def _maybe_to_categorical(array):
 """
 
 
-class Categorical(PandasObject):
+class Categorical(ExtensionArray, PandasObject):
     """
     Represents a categorical variable in classic R / S-plus fashion
 
@@ -2131,6 +2133,20 @@ def repeat(self, repeats, *args, **kwargs):
         return self._constructor(values=codes, categories=self.categories,
                                  ordered=self.ordered, fastpath=True)
 
+    # Interface things
+    # can_hold_na, concat_same_type, formatting_values
+    @property
+    def _can_hold_na(self):
+        return True
+
+    @classmethod
+    def _concat_same_type(self, to_concat):
+        from pandas.types.concat import union_categoricals
+        return union_categoricals(to_concat)
+
+    def _formatting_values(self):
+        return self
+
 # The Series.cat accessor
 
 
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
new file mode 100644
index 0000000000000..348b4f077673a
--- /dev/null
+++ b/pandas/core/dtypes/base.py
@@ -0,0 +1,92 @@
+"""Extend pandas with custom array types"""
+import abc
+
+from pandas.compat import add_metaclass
+
+
+@add_metaclass(abc.ABCMeta)
+class ExtensionDtype(object):
+    """A custom data type for your array.
+    """
+    @property
+    def type(self):
+        """Typically a metaclass inheriting from 'type' with no methods."""
+        return type(self.name, (), {})
+
+    @property
+    def kind(self):
+        """A character code (one of 'biufcmMOSUV'), default 'O'
+
+        See Also
+        --------
+        numpy.dtype.kind
+        """
+        return 'O'
+
+    @property
+    @abc.abstractmethod
+    def name(self):
+        """An string identifying the data type.
+
+        Will be used in, e.g. ``Series.dtype``
+        """
+
+    @property
+    def names(self):
+        """Ordered list of field names, or None if there are no fields"""
+        return None
+
+    @classmethod
+    def construct_from_string(cls, string):
+        """Attempt to construct this type from a string.
+
+        Parameters
+        ----------
+        string : str
+
+        Returns
+        -------
+        self : instance of 'cls'
+
+        Raises
+        ------
+        TypeError
+
+        Notes
+        -----
+        The default implementation checks if 'string' matches your
+        type's name. If so, it calls your class with no arguments.
+        """
+        if string == cls.name:
+            return cls()
+        else:
+            raise TypeError("Cannot construct a '{}' from "
+                            "'{}'".format(cls, string))
+
+    @classmethod
+    def is_dtype(cls, dtype):
+        """Check if we match 'dtype'
+
+        Parameters
+        ----------
+        dtype : str or dtype
+
+        Returns
+        -------
+        is_dtype : bool
+
+        Notes
+        -----
+        The default implementation is True if
+
+        1. 'dtype' is a string that returns true for
+           ``cls.construct_from_string``
+        2. 'dtype' is ``cls`` or a subclass of ``cls``.
+        """
+        if isinstance(dtype, str):
+            try:
+                return isinstance(cls.construct_from_string(dtype), cls)
+            except TypeError:
+                return False
+        else:
+            return issubclass(dtype, cls)
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index dca9a5fde0d74..2e4d0d884bf95 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1685,6 +1685,38 @@ def is_extension_type(arr):
     return False
 
 
+def is_extension_array_dtype(arr_or_dtype):
+    """Check if an object is a pandas extension array type
+
+    Parameters
+    ----------
+    arr_or_dtype : object
+
+    Returns
+    -------
+    bool
+
+    Notes
+    -----
+    This checks whether an object implements the pandas extension
+    array interface. In pandas, this includes:
+
+    * Categorical
+    * PeriodArray
+    * IntervalArray
+    * SparseArray
+
+    Third-party libraries may implement arrays or types satisfying
+    this interface as well.
+    """
+    from pandas.core.arrays import ExtensionArray
+
+    # we want to unpack series, anything else?
+    if isinstance(arr_or_dtype, ABCSeries):
+        arr_or_dtype = arr_or_dtype.values
+    return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray))
+
+
 def is_complex_dtype(arr_or_dtype):
     """
     Check whether the provided array or dtype is of a complex dtype.
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 1eb87aa99fd1e..df7b0dc9ea60e 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -5,15 +5,15 @@
 from pandas import compat
 from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex
 
+from .base import ExtensionDtype
 
-class ExtensionDtype(object):
+
+class PandasExtensionDtype(ExtensionDtype):
     """
     A np.dtype duck-typed class, suitable for holding a custom dtype.
 
     THIS IS NOT A REAL NUMPY DTYPE
     """
-    name = None
-    names = None
     type = None
     subdtype = None
     kind = None
@@ -108,7 +108,7 @@ class CategoricalDtypeType(type):
     pass
 
 
-class CategoricalDtype(ExtensionDtype):
+class CategoricalDtype(PandasExtensionDtype):
     """
     Type for categorical data with the categories and orderedness
 
@@ -387,7 +387,7 @@ class DatetimeTZDtypeType(type):
     pass
 
 
-class DatetimeTZDtype(ExtensionDtype):
+class DatetimeTZDtype(PandasExtensionDtype):
 
     """
     A np.dtype duck-typed class, suitable for holding a custom datetime with tz
@@ -501,7 +501,7 @@ class PeriodDtypeType(type):
     pass
 
 
-class PeriodDtype(ExtensionDtype):
+class PeriodDtype(PandasExtensionDtype):
     __metaclass__ = PeriodDtypeType
     """
     A Period duck-typed class, suitable for holding a period with freq dtype.
@@ -619,7 +619,7 @@ class IntervalDtypeType(type):
     pass
 
 
-class IntervalDtype(ExtensionDtype):
+class IntervalDtype(PandasExtensionDtype):
     __metaclass__ = IntervalDtypeType
     """
     A Interval duck-typed class, suitable for holding an interval
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 764e06c19e76c..fb52a60c4cdd5 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -31,6 +31,7 @@
     is_datetimelike_v_numeric,
     is_float_dtype, is_numeric_dtype,
     is_numeric_v_string_like, is_extension_type,
+    is_extension_array_dtype,
     is_list_like,
     is_re,
     is_re_compilable,
@@ -59,7 +60,7 @@
 
 from pandas.core.index import Index, MultiIndex, _ensure_index
 from pandas.core.indexing import maybe_convert_indices, length_of_indexer
-from pandas.core.arrays.categorical import Categorical, _maybe_to_categorical
+from pandas.core.arrays import Categorical
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.io.formats.printing import pprint_thing
 
@@ -95,6 +96,7 @@ class Block(PandasObject):
     is_object = False
     is_categorical = False
     is_sparse = False
+    is_extension = False
     _box_to_block_values = True
     _can_hold_na = False
     _can_consolidate = True
@@ -107,14 +109,15 @@ class Block(PandasObject):
     def __init__(self, values, placement, ndim=None, fastpath=False):
         if ndim is None:
             ndim = values.ndim
-        elif values.ndim != ndim:
+        elif self._validate_ndim and values.ndim != ndim:
             raise ValueError('Wrong number of dimensions')
         self.ndim = ndim
 
         self.mgr_locs = placement
         self.values = values
 
-        if ndim and len(self.mgr_locs) != len(self.values):
+        if (self._validate_ndim and ndim and
+                len(self.mgr_locs) != len(self.values)):
             raise ValueError(
                 'Wrong number of items passed {val}, placement implies '
                 '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))
@@ -273,7 +276,6 @@ def reshape_nd(self, labels, shape, ref_items, mgr=None):
 
         return a new block that is transformed to a nd block
         """
-
         return _block2d_to_blocknd(values=self.get_values().T,
                                    placement=self.mgr_locs, shape=shape,
                                    labels=labels, ref_items=ref_items)
@@ -548,15 +550,20 @@ def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
 
     def _astype(self, dtype, copy=False, errors='raise', values=None,
                 klass=None, mgr=None, **kwargs):
-        """
-        Coerce to the new type
+        """Coerce to the new type
 
+        Parameters
+        ----------
         dtype : str, dtype convertible
         copy : boolean, default False
             copy if indicated
         errors : str, {'raise', 'ignore'}, default 'ignore'
             - ``raise`` : allow exceptions to be raised
             - ``ignore`` : suppress exceptions. On error return original object
+
+        Returns
+        -------
+        IntervalArray
         """
         errors_legal_values = ('raise', 'ignore')
 
@@ -1695,24 +1702,20 @@ class NonConsolidatableMixIn(object):
     _holder = None
 
     def __init__(self, values, placement, ndim=None, fastpath=False, **kwargs):
+        # Placement must be converted to BlockPlacement so that we can check
+        # its length
+        if not isinstance(placement, BlockPlacement):
+            placement = BlockPlacement(placement)
 
-        # Placement must be converted to BlockPlacement via property setter
-        # before ndim logic, because placement may be a slice which doesn't
-        # have a length.
-        self.mgr_locs = placement
-
-        # kludgetastic
+        # Maybe infer ndim from placement
         if ndim is None:
-            if len(self.mgr_locs) != 1:
+            if len(placement) != 1:
                 ndim = 1
             else:
                 ndim = 2
-        self.ndim = ndim
-
-        if not isinstance(values, self._holder):
-            raise TypeError("values must be {0}".format(self._holder.__name__))
-
-        self.values = values
+        super(NonConsolidatableMixIn, self).__init__(values, placement,
+                                                     ndim=ndim,
+                                                     fastpath=fastpath)
 
     @property
     def shape(self):
@@ -1763,7 +1766,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
 
         Returns
         -------
-        a new block(s), the result of the putmask
+        a new block, the result of the putmask
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
 
@@ -1821,6 +1824,130 @@ def _unstack(self, unstacker_func, new_columns):
         return blocks, mask
 
 
+class ExtensionBlock(NonConsolidatableMixIn, Block):
+    """Block for holding extension types.
+
+    Notes
+    -----
+    This is the holds all 3rd-party extension types. It's also the immediate
+    parent class for our internal extension types' blocks, CategoricalBlock.
+
+    All extension arrays *must* be 1-D, which simplifies things a bit.
+    """
+    # Some questions / notes as comments, will be removed.
+    #
+    # Currently inherited from NCB. We'll keep it around until SparseBlock
+    # and DatetimeTZBlock are refactored.
+    # - set
+    # - iget
+    # - should_store
+    # - putmask
+    # - _slice
+    # - _try_cast_result
+    # - unstack
+
+    # Think about overriding these methods from Block
+    # - _maybe_downcast: (never downcast)
+
+    # Methods we can (probably) ignore and just use Block's:
+
+    # * replace / replace_single
+    #   Categorical got Object, but was hopefully unnescessary.
+    #   DatetimeTZ, Sparse got Block
+    # * is_view
+    #   Categorical overrides to say that it is not.
+    #   DatetimeTZ, Sparse inherits Base anyway
+
+    is_extension = True
+
+    # XXX
+    # is_bool is is a change for CategoricalBlock. Used to inherit
+    # from Object to infer from values. If this matters, we should
+    # override it directly in CategoricalBlock so that we infer from
+    # the categories, not the codes.
+    is_bool = False
+
+    def __init__(self, values, placement, ndim=None, fastpath=False):
+        self._holder = type(values)
+        super(ExtensionBlock, self).__init__(values, placement, ndim=ndim,
+                                             fastpath=fastpath)
+
+    def get_values(self, dtype=None):
+        # ExtensionArrays must be iterable, so this works.
+        values = np.asarray(self.values)
+        if values.ndim == self.ndim - 1:
+            values = values.reshape((1,) + values.shape)
+        return values
+
+    def to_dense(self):
+        return self.values.to_dense().view()
+
+    def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
+        """
+        Take values according to indexer and return them as a block.bb
+        """
+        if fill_tuple is None:
+            fill_value = None
+        else:
+            fill_value = fill_tuple[0]
+
+        # axis doesn't matter; we are really a single-dim object
+        # but are passed the axis depending on the calling routing
+        # if its REALLY axis 0, then this will be a reindex and not a take
+        new_values = self.values.take_nd(indexer, fill_value=fill_value)
+
+        # if we are a 1-dim object, then always place at 0
+        if self.ndim == 1:
+            new_mgr_locs = [0]
+        else:
+            if new_mgr_locs is None:
+                new_mgr_locs = self.mgr_locs
+
+        return self.make_block_same_class(new_values, new_mgr_locs)
+
+    def _can_hold_element(self, element):
+        # XXX:
+        # Not defined on NCM.
+        # Categorical got True from ObjectBlock
+        # DatetimeTZ gets DatetimeBlock
+        # Sparse gets Block
+        # Let's just assume yes for now, but we can maybe push
+        # this onto the array.
+        return True
+
+    def convert(self, copy=True, **kwargs):
+        # We're dedicated to a type, we don't convert.
+        # Taken from CategoricalBlock / Block.
+        return self.copy() if copy else self
+
+    def _slice(self, slicer):
+        """ return a slice of my values """
+
+        # slice the category
+        # return same dims as we currently have
+
+        if isinstance(slicer, tuple) and len(slicer) == 2:
+            if not is_null_slice(slicer[0]):
+                raise AssertionError("invalid slicing for a 1-ndim "
+                                     "categorical")
+            slicer = slicer[1]
+
+        return self.values._slice(slicer)
+
+    def formatting_values(self):
+        return self.values._formatting_values()
+
+    def concat_same_type(self, to_concat, placement=None):
+        """
+        Concatenate list of single blocks of the same type.
+        """
+        values = self._holder._concat_same_type(
+            [blk.values for blk in to_concat])
+        placement = placement or slice(0, len(values), 1)
+        return self.make_block_same_class(values, ndim=self.ndim,
+                                          placement=placement)
+
+
 class NumericBlock(Block):
     __slots__ = ()
     is_numeric = True
@@ -2334,7 +2461,7 @@ def re_replacer(s):
         return block
 
 
-class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock):
+class CategoricalBlock(ExtensionBlock):
     __slots__ = ()
     is_categorical = True
     _verify_integrity = True
@@ -2343,6 +2470,7 @@ class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock):
     _concatenator = staticmethod(_concat._concat_categorical)
 
     def __init__(self, values, placement, fastpath=False, **kwargs):
+        from pandas.core.arrays.categorical import _maybe_to_categorical
 
         # coerce to categorical if we can
         super(CategoricalBlock, self).__init__(_maybe_to_categorical(values),
@@ -2354,12 +2482,6 @@ def is_view(self):
         """ I am never a view """
         return False
 
-    def to_dense(self):
-        return self.values.to_dense().view()
-
-    def convert(self, copy=True, **kwargs):
-        return self.copy() if copy else self
-
     @property
     def array_dtype(self):
         """ the dtype to return if I want to construct this block as an
@@ -2367,13 +2489,6 @@ def array_dtype(self):
         """
         return np.object_
 
-    def _slice(self, slicer):
-        """ return a slice of my values """
-
-        # slice the category
-        # return same dims as we currently have
-        return self.values._slice(slicer)
-
     def _try_coerce_result(self, result):
         """ reverse of try_coerce_args """
 
@@ -2410,29 +2525,6 @@ def shift(self, periods, axis=0, mgr=None):
         return self.make_block_same_class(values=self.values.shift(periods),
                                           placement=self.mgr_locs)
 
-    def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
-        """
-        Take values according to indexer and return them as a block.bb
-        """
-        if fill_tuple is None:
-            fill_value = None
-        else:
-            fill_value = fill_tuple[0]
-
-        # axis doesn't matter; we are really a single-dim object
-        # but are passed the axis depending on the calling routing
-        # if its REALLY axis 0, then this will be a reindex and not a take
-        new_values = self.values.take_nd(indexer, fill_value=fill_value)
-
-        # if we are a 1-dim object, then always place at 0
-        if self.ndim == 1:
-            new_mgr_locs = [0]
-        else:
-            if new_mgr_locs is None:
-                new_mgr_locs = self.mgr_locs
-
-        return self.make_block_same_class(new_values, new_mgr_locs)
-
     def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
         """ convert to our native types format, slicing if desired """
 
@@ -2447,17 +2539,6 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
         # we are expected to return a 2-d ndarray
         return values.reshape(1, len(values))
 
-    def concat_same_type(self, to_concat, placement=None):
-        """
-        Concatenate list of single blocks of the same type.
-        """
-        values = self._concatenator([blk.values for blk in to_concat],
-                                    axis=self.ndim - 1)
-        # not using self.make_block_same_class as values can be object dtype
-        return make_block(
-            values, placement=placement or slice(0, len(values), 1),
-            ndim=self.ndim)
-
 
 class DatetimeBlock(DatetimeLikeBlockMixin, Block):
     __slots__ = ()
@@ -2465,7 +2546,8 @@ class DatetimeBlock(DatetimeLikeBlockMixin, Block):
     _can_hold_na = True
 
     def __init__(self, values, placement, fastpath=False, **kwargs):
-        if values.dtype != _NS_DTYPE:
+        if values.dtype != _NS_DTYPE and values.dtype.base != _NS_DTYPE:
+            # not datetime64 or datetime64tz
             values = conversion.ensure_datetime64ns(values)
 
         super(DatetimeBlock, self).__init__(values, fastpath=True,
@@ -2954,6 +3036,8 @@ def get_block_type(values, dtype=None):
         cls = BoolBlock
     elif is_categorical(values):
         cls = CategoricalBlock
+    elif is_extension_array_dtype(values):
+        cls = ExtensionBlock
     else:
         cls = ObjectBlock
     return cls
@@ -4681,6 +4765,7 @@ def form_blocks(arrays, names, axes):
     # generalize?
     items_dict = defaultdict(list)
     extra_locs = []
+    external_items = []
 
     names_idx = Index(names)
     if names_idx.equals(axes[0]):
@@ -4748,6 +4833,31 @@ def form_blocks(arrays, names, axes):
                       for i, _, array in items_dict['CategoricalBlock']]
         blocks.extend(cat_blocks)
 
+    if len(items_dict['ExtensionBlock']):
+
+        external_blocks = []
+        for i, _, array in items_dict['ExtensionBlock']:
+            if isinstance(array, ABCSeries):
+                array = array.values
+            # Allow our internal arrays to chose their block type.
+            block_type = getattr(array, '_block_type', ExtensionBlock)
+            external_blocks.append(
+                make_block(array, klass=block_type,
+                           fastpath=True, placement=[i]))
+        blocks.extend(external_blocks)
+
+    if len(external_items):
+        external_blocks = []
+        for i, _, array in external_items:
+            if isinstance(array, ABCSeries):
+                array = array.values
+            # Allow our internal arrays to chose their block type.
+            block_type = getattr(array, '_block_type', ExtensionBlock)
+            external_blocks.append(
+                make_block(array, klass=block_type,
+                           fastpath=True, placement=[i]))
+        blocks.extend(external_blocks)
+
     if len(extra_locs):
         shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])
 
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index d800a7b92b559..3423e22a4c64e 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -10,12 +10,14 @@
     Series, Categorical, CategoricalIndex, IntervalIndex, date_range)
 
 from pandas.compat import string_types
+from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.dtypes import (
     DatetimeTZDtype, PeriodDtype,
-    IntervalDtype, CategoricalDtype)
+    IntervalDtype, CategoricalDtype, ExtensionDtype)
 from pandas.core.dtypes.common import (
     is_categorical_dtype, is_categorical,
     is_datetime64tz_dtype, is_datetimetz,
+    is_extension_array_dtype,
     is_period_dtype, is_period,
     is_dtype_equal, is_datetime64_ns_dtype,
     is_datetime64_dtype, is_interval_dtype,
@@ -742,3 +744,35 @@ def test_categorical_categories(self):
         tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
         c1 = CategoricalDtype(CategoricalIndex(['a', 'b']))
         tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
+
+
+class DummyArray(object):
+    pass
+
+
+class DummyDtype(object):
+    pass
+
+
+ExtensionArray.register(DummyArray)
+ExtensionDtype.register(DummyDtype)
+
+
+class TestExtensionArrayDtype(object):
+
+    @pytest.mark.parametrize('values', [
+        pd.Categorical([]),
+        pd.Categorical([]).dtype,
+        pd.Series(pd.Categorical([])),
+        DummyDtype(),
+        DummyArray(),
+    ])
+    def test_is_extension_array_dtype(self, values):
+        assert is_extension_array_dtype(values)
+
+    @pytest.mark.parametrize('values', [
+        np.array([]),
+        pd.Series(np.array([])),
+    ])
+    def test_is_not_extension_array_dtype(self, values):
+        assert not is_extension_array_dtype(values)
diff --git a/pandas/tests/internals/test_external_block.py b/pandas/tests/internals/test_external_block.py
index 729ee0093b6dc..2487363df8f99 100644
--- a/pandas/tests/internals/test_external_block.py
+++ b/pandas/tests/internals/test_external_block.py
@@ -5,12 +5,12 @@
 
 import pandas as pd
 from pandas.core.internals import (
-    Block, BlockManager, SingleBlockManager, NonConsolidatableMixIn)
+    BlockManager, SingleBlockManager, ExtensionBlock)
 
 import pytest
 
 
-class CustomBlock(NonConsolidatableMixIn, Block):
+class CustomBlock(ExtensionBlock):
 
     _holder = np.ndarray