pandas-dev · jreback · Jan 2, 2019 · Nov 30, 2018 · Nov 30, 2018 · Nov 30, 2018
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -430,6 +430,7 @@ Backwards incompatible API changes
 - ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
 - :func:`read_csv` will now raise a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`)
 - The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`)
+- Incorrectly passing a :class:`DatetimeIndex` to :meth:`MultiIndex.from_tuples`, rather than a sequence of tuples, now raises a ``TypeError`` rather than a ``ValueError`` (:issue:`24024`)
 - :func:`pd.offsets.generate_range` argument ``time_rule`` has been removed; use ``offset`` instead (:issue:`24157`)
 
 Percentage change on groupby
@@ -1368,6 +1369,7 @@ Datetimelike
 - Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``np.datetime64`` object with non-nanosecond unit failing to convert to nanoseconds (:issue:`18874`, :issue:`22163`)
 - Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`, :issue:`22163`)
 - Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`, :issue:`22163`)
+- Bug in :attr:`DataFrame.values` returning a :class:`DatetimeIndex` for a single-column ``DataFrame`` with tz-aware datetime values. Now a 2-D :class:`numpy.ndarray` of :class:`Timestamp` objects is returned (:issue:`24024`)
 - Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`, :issue:`22163`)
 - Bug in :class:`DatetimeIndex` subtraction that incorrectly failed to raise ``OverflowError`` (:issue:`22492`, :issue:`22508`)
 - Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`)
@@ -1384,6 +1386,7 @@ Datetimelike
 - Bug in :func:`period_range` ignoring the frequency of ``start`` and ``end`` when those are provided as :class:`Period` objects (:issue:`20535`).
 - Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`)
 - Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`)
+- Bug in :class:`DataFrame` when creating a new column from an ndarray of :class:`Timestamp` objects with timezones creating an object-dtype column, rather than datetime with timezone (:issue:`23932`)
 - Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`)
 - Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`)
 - Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`)

diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c
@@ -228,6 +228,11 @@ static PyObject *get_values(PyObject *obj) {
     PRINTMARK();
 
     if (values && !PyArray_CheckExact(values)) {
+
+        if (PyObject_HasAttrString(values, "to_numpy")) {
+            values = PyObject_CallMethod(values, "to_numpy", NULL);
+        }
+
         if (PyObject_HasAttrString(values, "values")) {
             PyObject *subvals = get_values(values);
             PyErr_Clear();
@@ -279,8 +284,8 @@ static PyObject *get_values(PyObject *obj) {
             repr = PyString_FromString("<unknown dtype>");
         }
 
-        PyErr_Format(PyExc_ValueError, "%s or %s are not JSON serializable yet",
-                     PyString_AS_STRING(repr), PyString_AS_STRING(typeRepr));
+        PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet",
+                     repr, typeRepr);
         Py_DECREF(repr);
         Py_DECREF(typeRepr);
 

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -47,7 +47,7 @@ def cmp_method(self, other):
         if isinstance(other, ABCDataFrame):
             return NotImplemented
 
-        if isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)):
+        if isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries, cls)):
             if other.ndim > 0 and len(self) != len(other):
                 raise ValueError('Lengths must match to compare')
 
@@ -1162,9 +1162,10 @@ def _addsub_offset_array(self, other, op):
         left = lib.values_from_object(self.astype('O'))
 
         res_values = op(left, np.array(other))
+        kwargs = {}
         if not is_period_dtype(self):
-            return type(self)(res_values, freq='infer')
-        return self._from_sequence(res_values)
+            kwargs['freq'] = 'infer'
+        return self._from_sequence(res_values, **kwargs)
 
     def _time_shift(self, periods, freq=None):
         """

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -97,6 +97,9 @@ def _dt_array_cmp(cls, op):
 
     def wrapper(self, other):
         meth = getattr(dtl.DatetimeLikeArrayMixin, opname)
+        # TODO: return NotImplemented for Series / Index and let pandas unbox
+        # Right now, returning NotImplemented for Index fails because we
+        # go into the index implementation, which may be a bug?
 
         other = lib.item_from_zerodim(other)
 
@@ -145,9 +148,16 @@ def wrapper(self, other):
                 return ops.invalid_comparison(self, other, op)
             else:
                 self._assert_tzawareness_compat(other)
-                if not hasattr(other, 'asi8'):
-                    # ndarray, Series
-                    other = type(self)(other)
+                if isinstance(other, (ABCIndexClass, ABCSeries)):
+                    other = other.array
+
+                if (is_datetime64_dtype(other) and
+                        not is_datetime64_ns_dtype(other) or
+                        not hasattr(other, 'asi8')):
+                    # e.g. other.dtype == 'datetime64[s]'
+                    # or an object-dtype ndarray
+                    other = type(self)._from_sequence(other)
+
                 result = meth(self, other)
                 o_mask = other._isnan
 
@@ -171,10 +181,24 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin,
                          dtl.TimelikeOps,
                          dtl.DatelikeOps):
     """
-    Assumes that subclass __new__/__init__ defines:
-        tz
-        _freq
-        _data
+    Pandas ExtensionArray for tz-naive or tz-aware datetime data.
+
+    .. versionadded:: 0.24.0
+
+    Parameters
+    ----------
+    values : Series, Index, DatetimeArray, ndarray
+        The datetime data.
+
+        For DatetimeArray `values` (or a Series or Index boxing one),
+        `dtype` and `freq` will be extracted from `values`, with
+        precedence given to
+
+    dtype : numpy.dtype or DatetimeTZDtype
+        Note that the only NumPy dtype allowed is 'datetime64[ns]'.
+    freq : str or Offset, optional
+    copy : bool, default False
+        Whether to copy the underlying array of values.
     """
     _typ = "datetimearray"
     _scalar_type = Timestamp
@@ -213,38 +237,84 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin,
     _dtype = None  # type: Union[np.dtype, DatetimeTZDtype]
     _freq = None
 
-    @classmethod
-    def _simple_new(cls, values, freq=None, tz=None):
-        """
-        we require the we have a dtype compat for the values
-        if we are passed a non-dtype compat, then coerce using the constructor
-        """
-        assert isinstance(values, np.ndarray), type(values)
+    def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False):
+        if isinstance(values, (ABCSeries, ABCIndexClass)):
+            values = values._values
+
+        if isinstance(values, type(self)):
+            # validation
+            dtz = getattr(dtype, 'tz', None)
+            if dtz and values.tz is None:
+                dtype = DatetimeTZDtype(tz=dtype.tz)
+            elif dtz and values.tz:
+                if not timezones.tz_compare(dtz, values.tz):
+                    msg = (
+                        "Timezone of the array and 'dtype' do not match. "
+                        "'{}' != '{}'"
+                    )
+                    raise TypeError(msg.format(dtz, values.tz))
+            elif values.tz:
+                dtype = values.dtype
+            # freq = validate_values_freq(values, freq)
+            if freq is None:
+                freq = values.freq
+            values = values._data
+
+        if not isinstance(values, np.ndarray):
+            msg = (
+                "Unexpected type '{}'. 'values' must be a DatetimeArray "
+                "ndarray, or Series or Index containing one of those."
+            )
+            raise ValueError(msg.format(type(values).__name__))
+
         if values.dtype == 'i8':
             # for compat with datetime/timedelta/period shared methods,
             #  we can sometimes get here with int64 values.  These represent
             #  nanosecond UTC (or tz-naive) unix timestamps
             values = values.view(_NS_DTYPE)
 
-        assert values.dtype == 'M8[ns]', values.dtype
+        if values.dtype != _NS_DTYPE:
+            msg = (
+                "The dtype of 'values' is incorrect. Must be 'datetime64[ns]'."
+                " Got {} instead."
+            )
+            raise ValueError(msg.format(values.dtype))
 
-        result = object.__new__(cls)
-        result._data = values
-        result._freq = freq
-        if tz is None:
-            dtype = _NS_DTYPE
-        else:
-            tz = timezones.maybe_get_tz(tz)
-            tz = timezones.tz_standardize(tz)
-            dtype = DatetimeTZDtype('ns', tz)
-        result._dtype = dtype
-        return result
+        dtype = pandas_dtype(dtype)
+        _validate_dt64_dtype(dtype)
 
-    def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False,
-                dayfirst=False, yearfirst=False, ambiguous='raise'):
-        return cls._from_sequence(
-            values, freq=freq, tz=tz, dtype=dtype, copy=copy,
-            dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous)
+        if freq == "infer":
+            msg = (
+                "Frequency inference not allowed in DatetimeArray.__init__. "
+                "Use 'pd.array()' instead."
+            )
+            raise ValueError(msg)
+
+        if copy:
+            values = values.copy()
+        if freq:
+            freq = to_offset(freq)
+        if getattr(dtype, 'tz', None):
+            # https://github.com/pandas-dev/pandas/issues/18595
+            # Ensure that we have a standard timezone for pytz objects.
+            # Without this, things like adding an array of timedeltas and
+            # a  tz-aware Timestamp (with a tz specific to its datetime) will
+            # be incorrect(ish?) for the array as a whole
+            dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz))
+
+        self._data = values
+        self._dtype = dtype
+        self._freq = freq
+
+    @classmethod
+    def _simple_new(cls, values, freq=None, tz=None):
+        """
+        we require the we have a dtype compat for the values
+        if we are passed a non-dtype compat, then coerce using the constructor
+        """
+        dtype = DatetimeTZDtype(tz=tz) if tz else _NS_DTYPE
+
+        return cls(values, freq=freq, dtype=dtype)
 
     @classmethod
     def _from_sequence(cls, data, dtype=None, copy=False,
@@ -459,8 +529,7 @@ def __array__(self, dtype=None):
         elif is_int64_dtype(dtype):
             return self.asi8
 
-        # TODO: warn that conversion may be lossy?
-        return self._data.view(np.ndarray)  # follow Index.__array__
+        return self._data
 
     def __iter__(self):
         """
@@ -519,7 +588,7 @@ def astype(self, dtype, copy=True):
 
     @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
     def _validate_fill_value(self, fill_value):
-        if isna(fill_value):
+        if isna(fill_value) or fill_value == iNaT:
             fill_value = iNaT
         elif isinstance(fill_value, (datetime, np.datetime64)):
             self._assert_tzawareness_compat(fill_value)
@@ -1574,6 +1643,9 @@ def sequence_to_dt64ns(data, dtype=None, copy=False,
     # if dtype has an embedded tz, capture it
     tz = validate_tz_from_dtype(dtype, tz)
 
+    if isinstance(data, ABCIndexClass):
+        data = data._data
+
     # By this point we are assured to have either a numpy array or Index
     data, copy = maybe_convert_dtype(data, copy)
 
@@ -1590,12 +1662,15 @@ def sequence_to_dt64ns(data, dtype=None, copy=False,
                 data, dayfirst=dayfirst, yearfirst=yearfirst)
             tz = maybe_infer_tz(tz, inferred_tz)
 
+    # `data` may have originally been a Categorical[datetime64[ns, tz]],
+    # so we need to handle these types.
     if is_datetime64tz_dtype(data):
+        # DatetimeArray -> ndarray
         tz = maybe_infer_tz(tz, data.tz)
         result = data._data
 
     elif is_datetime64_dtype(data):
-        # tz-naive DatetimeArray/Index or ndarray[datetime64]
+        # tz-naive DatetimeArray or ndarray[datetime64]
         data = getattr(data, "_data", data)
         if data.dtype != _NS_DTYPE:
             data = conversion.ensure_datetime64ns(data)
@@ -1750,7 +1825,7 @@ def maybe_convert_dtype(data, copy):
         # GH#18664 preserve tz in going DTI->Categorical->DTI
         # TODO: cases where we need to do another pass through this func,
         #  e.g. the categories are timedelta64s
-        data = data.categories.take(data.codes, fill_value=NaT)
+        data = data.categories.take(data.codes, fill_value=NaT)._values
         copy = False
 
     elif is_extension_type(data) and not is_datetime64tz_dtype(data):

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -179,8 +179,7 @@ def __init__(self, values, freq=None, dtype=None, copy=False):
 
     @classmethod
     def _simple_new(cls, values, freq=None, **kwargs):
-        # TODO(DatetimeArray): remove once all constructors are aligned.
-        # alias from PeriodArray.__init__
+        # alias for PeriodArray.__init__
         return cls(values, freq=freq, **kwargs)
 
     @classmethod