From 04778805dd595e37e33f6ef0e02d9337a1154e09 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 13 Jan 2018 13:18:41 -0500
Subject: [PATCH] BUG/TST: assure conversions of datetimelikes for object,
 numeric dtypes (#19224)

closes #19223
closes #12425
---
 doc/source/whatsnew/v0.23.0.txt          |  5 ++
 pandas/_libs/tslibs/conversion.pyx       | 26 +++++++++-
 pandas/core/dtypes/cast.py               | 42 +++++++--------
 pandas/core/dtypes/common.py             |  8 ++-
 pandas/core/internals.py                 |  9 +++-
 pandas/tests/frame/test_dtypes.py        | 65 ++++++++++++++++++++++++
 pandas/tests/reshape/merge/test_merge.py | 24 ++++-----
 pandas/tests/series/test_constructors.py | 18 +++++--
 pandas/tests/series/test_operators.py    | 32 +++++-------
 9 files changed, 164 insertions(+), 65 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 3a3c2bf0c5ae4..a0205a8d64cb7 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -385,6 +385,11 @@ Conversion
 - Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`)
 - :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`)
 
+
+
+- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`)
+
+
 Indexing
 ^^^^^^^^
 
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 11e1787cd77da..53abdd013ec37 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -29,7 +29,7 @@ from np_datetime cimport (check_dts_bounds,
 
 from util cimport (is_string_object,
                    is_datetime64_object,
-                   is_integer_object, is_float_object)
+                   is_integer_object, is_float_object, is_array)
 
 from timedeltas cimport cast_from_unit
 from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
@@ -45,6 +45,8 @@ from nattype cimport NPY_NAT, checknull_with_nat
 # Constants
 
 cdef int64_t DAY_NS = 86400000000000LL
+NS_DTYPE = np.dtype('M8[ns]')
+TD_DTYPE = np.dtype('m8[ns]')
 
 UTC = pytz.UTC
 
@@ -73,13 +75,14 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
     return ival
 
 
-def ensure_datetime64ns(ndarray arr):
+def ensure_datetime64ns(ndarray arr, copy=True):
     """
     Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'
 
     Parameters
     ----------
     arr : ndarray
+    copy : boolean, default True
 
     Returns
     -------
@@ -104,6 +107,8 @@ def ensure_datetime64ns(ndarray arr):
 
     unit = get_datetime64_unit(arr.flat[0])
     if unit == PANDAS_FR_ns:
+        if copy:
+            arr = arr.copy()
         result = arr
     else:
         for i in range(n):
@@ -117,6 +122,23 @@ def ensure_datetime64ns(ndarray arr):
     return result
 
 
+def ensure_timedelta64ns(ndarray arr, copy=True):
+    """
+    Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]'
+
+    Parameters
+    ----------
+    arr : ndarray
+    copy : boolean, default True
+
+    Returns
+    -------
+    result : ndarray with dtype timedelta64[ns]
+
+    """
+    return arr.astype(TD_DTYPE, copy=copy)
+
+
 def datetime_to_datetime64(ndarray[object] values):
     """
     Convert ndarray of datetime-like objects to int64 array representing
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index b3ae8aae53b35..672e60c9dcde5 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -656,11 +656,15 @@ def astype_nansafe(arr, dtype, copy=True):
             return tslib.ints_to_pydatetime(arr.view(np.int64))
         elif dtype == np.int64:
             return arr.view(dtype)
-        elif dtype != _NS_DTYPE:
-            raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
-                            "to [{to_dtype}]".format(from_dtype=arr.dtype,
-                                                     to_dtype=dtype))
-        return arr.astype(_NS_DTYPE)
+
+        # allow frequency conversions
+        if dtype.kind == 'M':
+            return arr.astype(dtype)
+
+        raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
+                        "to [{to_dtype}]".format(from_dtype=arr.dtype,
+                                                 to_dtype=dtype))
+
     elif is_timedelta64_dtype(arr):
         if dtype == np.int64:
             return arr.view(dtype)
@@ -668,21 +672,23 @@ def astype_nansafe(arr, dtype, copy=True):
             return tslib.ints_to_pytimedelta(arr.view(np.int64))
 
         # in py3, timedelta64[ns] are int64
-        elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
-              (not PY3 and dtype != _TD_DTYPE)):
+        if ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
+                (not PY3 and dtype != _TD_DTYPE)):
 
             # allow frequency conversions
+            # we return a float here!
             if dtype.kind == 'm':
                 mask = isna(arr)
                 result = arr.astype(dtype).astype(np.float64)
                 result[mask] = np.nan
                 return result
+        elif dtype == _TD_DTYPE:
+            return arr.astype(_TD_DTYPE, copy=copy)
 
-            raise TypeError("cannot astype a timedelta from [{from_dtype}] "
-                            "to [{to_dtype}]".format(from_dtype=arr.dtype,
-                                                     to_dtype=dtype))
+        raise TypeError("cannot astype a timedelta from [{from_dtype}] "
+                        "to [{to_dtype}]".format(from_dtype=arr.dtype,
+                                                 to_dtype=dtype))
 
-        return arr.astype(_TD_DTYPE)
     elif (np.issubdtype(arr.dtype, np.floating) and
           np.issubdtype(dtype, np.integer)):
 
@@ -704,19 +710,7 @@ def astype_nansafe(arr, dtype, copy=True):
 
     if copy:
 
-        if arr.dtype == dtype:
-            return arr.copy()
-
-        # we handle datetimelikes with pandas machinery
-        # to be robust to the input type
-        elif is_datetime64_dtype(dtype):
-            from pandas import to_datetime
-            return to_datetime(arr).values
-        elif is_timedelta64_dtype(dtype):
-            from pandas import to_timedelta
-            return to_timedelta(arr).values
-
-        return arr.astype(dtype)
+        return arr.astype(dtype, copy=True)
     return arr.view(dtype)
 
 
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 5d6fc7487eeb5..dca9a5fde0d74 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -4,6 +4,7 @@
 from pandas.compat import (string_types, text_type, binary_type,
                            PY3, PY36)
 from pandas._libs import algos, lib
+from pandas._libs.tslibs import conversion
 from .dtypes import (CategoricalDtype, CategoricalDtypeType,
                      DatetimeTZDtype, DatetimeTZDtypeType,
                      PeriodDtype, PeriodDtypeType,
@@ -21,8 +22,8 @@
                              for t in ['O', 'int8', 'uint8', 'int16', 'uint16',
                                        'int32', 'uint32', 'int64', 'uint64']])
 
-_NS_DTYPE = np.dtype('M8[ns]')
-_TD_DTYPE = np.dtype('m8[ns]')
+_NS_DTYPE = conversion.NS_DTYPE
+_TD_DTYPE = conversion.TD_DTYPE
 _INT64_DTYPE = np.dtype(np.int64)
 
 # oh the troubles to reduce import time
@@ -31,6 +32,9 @@
 _ensure_float64 = algos.ensure_float64
 _ensure_float32 = algos.ensure_float32
 
+_ensure_datetime64ns = conversion.ensure_datetime64ns
+_ensure_timedelta64ns = conversion.ensure_timedelta64ns
+
 
 def _ensure_float(arr):
     """
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 5a4778ae4e629..3c923133477df 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -631,7 +631,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
                 values = astype_nansafe(values.ravel(), dtype, copy=True)
                 values = values.reshape(self.shape)
 
-            newb = make_block(values, placement=self.mgr_locs, dtype=dtype,
+            newb = make_block(values, placement=self.mgr_locs,
                               klass=klass)
         except:
             if errors == 'raise':
@@ -1954,6 +1954,13 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
     _can_hold_na = True
     is_numeric = False
 
+    def __init__(self, values, placement, fastpath=False, **kwargs):
+        if values.dtype != _TD_DTYPE:
+            values = conversion.ensure_timedelta64ns(values)
+
+        super(TimeDeltaBlock, self).__init__(values, fastpath=True,
+                                             placement=placement, **kwargs)
+
     @property
     def _box_func(self):
         return lambda x: tslib.Timedelta(x, unit='ns')
diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py
index 21c028e634bc0..70eee5984b438 100644
--- a/pandas/tests/frame/test_dtypes.py
+++ b/pandas/tests/frame/test_dtypes.py
@@ -640,6 +640,71 @@ def test_astype_categoricaldtype_class_raises(self, cls):
         with tm.assert_raises_regex(TypeError, xpr):
             df['A'].astype(cls)
 
+    @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
+    @pytest.mark.parametrize("dtype", ["M8", "m8"])
+    @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
+    def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit):
+        # tests all units from numeric origination
+        # gh-19223 / gh-12425
+        dtype = "{}[{}]".format(dtype, unit)
+        arr = np.array([[1, 2, 3]], dtype=arr_dtype)
+        df = DataFrame(arr)
+        result = df.astype(dtype)
+        expected = DataFrame(arr.astype(dtype))
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
+    def test_astype_to_datetime_unit(self, unit):
+        # tests all units from datetime origination
+        # gh-19223
+        dtype = "M8[{}]".format(unit)
+        arr = np.array([[1, 2, 3]], dtype=dtype)
+        df = DataFrame(arr)
+        result = df.astype(dtype)
+        expected = DataFrame(arr.astype(dtype))
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("unit", ['ns'])
+    def test_astype_to_timedelta_unit_ns(self, unit):
+        # preserver the timedelta conversion
+        # gh-19223
+        dtype = "m8[{}]".format(unit)
+        arr = np.array([[1, 2, 3]], dtype=dtype)
+        df = DataFrame(arr)
+        result = df.astype(dtype)
+        expected = DataFrame(arr.astype(dtype))
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("unit", ['us', 'ms', 's', 'h', 'm', 'D'])
+    def test_astype_to_timedelta_unit(self, unit):
+        # coerce to float
+        # gh-19223
+        dtype = "m8[{}]".format(unit)
+        arr = np.array([[1, 2, 3]], dtype=dtype)
+        df = DataFrame(arr)
+        result = df.astype(dtype)
+        expected = DataFrame(df.values.astype(dtype).astype(float))
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
+    def test_astype_to_incorrect_datetimelike(self, unit):
+        # trying to astype a m to a M, or vice-versa
+        # gh-19224
+        dtype = "M8[{}]".format(unit)
+        other = "m8[{}]".format(unit)
+
+        df = DataFrame(np.array([[1, 2, 3]], dtype=dtype))
+        with pytest.raises(TypeError):
+            df.astype(other)
+
+        df = DataFrame(np.array([[1, 2, 3]], dtype=other))
+        with pytest.raises(TypeError):
+            df.astype(dtype)
+
     def test_timedeltas(self):
         df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3,
                                                 freq='D')),
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index b9a667499b7a0..a8319339c6435 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -523,25 +523,23 @@ def test_other_datetime_unit(self):
                                columns=['entity_id', 'days'])
             tm.assert_frame_equal(result, exp)
 
-    def test_other_timedelta_unit(self):
+    @pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns'])
+    def test_other_timedelta_unit(self, unit):
         # GH 13389
         df1 = pd.DataFrame({'entity_id': [101, 102]})
         s = pd.Series([None, None], index=[101, 102], name='days')
 
-        for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]',
-                      'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]',
-                      'timedelta64[ns]']:
+        dtype = "m8[{}]".format(unit)
+        df2 = s.astype(dtype).to_frame('days')
+        assert df2['days'].dtype == 'm8[ns]'
 
-            df2 = s.astype(dtype).to_frame('days')
-            assert df2['days'].dtype == dtype
-
-            result = df1.merge(df2, left_on='entity_id', right_index=True)
+        result = df1.merge(df2, left_on='entity_id', right_index=True)
 
-            exp = pd.DataFrame({'entity_id': [101, 102],
-                                'days': np.array(['nat', 'nat'],
-                                                 dtype=dtype)},
-                               columns=['entity_id', 'days'])
-            tm.assert_frame_equal(result, exp)
+        exp = pd.DataFrame({'entity_id': [101, 102],
+                            'days': np.array(['nat', 'nat'],
+                                             dtype=dtype)},
+                           columns=['entity_id', 'days'])
+        tm.assert_frame_equal(result, exp)
 
     def test_overlapping_columns_error_message(self):
         df = DataFrame({'key': [1, 2, 3],
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index a3e40f65e922f..33737387edffa 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -552,10 +552,6 @@ def test_constructor_dtype_datetime64(self):
         s.iloc[0] = np.nan
         assert s.dtype == 'M8[ns]'
 
-        # invalid astypes
-        for t in ['s', 'D', 'us', 'ms']:
-            pytest.raises(TypeError, s.astype, 'M8[%s]' % t)
-
         # GH3414 related
         pytest.raises(TypeError, lambda x: Series(
             Series(dates).astype('int') / 1000000, dtype='M8[ms]'))
@@ -707,6 +703,20 @@ def test_constructor_with_datetime_tz(self):
         expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
         assert_series_equal(s, expected)
 
+    @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
+    @pytest.mark.parametrize("dtype", ["M8", "m8"])
+    @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
+    def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit):
+        # tests all units
+        # gh-19223
+        dtype = "{}[{}]".format(dtype, unit)
+        arr = np.array([1, 2, 3], dtype=arr_dtype)
+        s = Series(arr)
+        result = s.astype(dtype)
+        expected = Series(arr.astype(dtype))
+
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize('arg',
                              ['2013-01-01 00:00:00', pd.NaT, np.nan, None])
     def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg):
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index 783fcddac1280..ed9307d50521f 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -1649,32 +1649,26 @@ def test_invalid_ops(self):
         pytest.raises(Exception, self.objSeries.__sub__,
                       np.array(1, dtype=np.int64))
 
-    def test_timedelta64_conversions(self):
+    @pytest.mark.parametrize("m", [1, 3, 10])
+    @pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns'])
+    def test_timedelta64_conversions(self, m, unit):
+
         startdate = Series(date_range('2013-01-01', '2013-01-03'))
         enddate = Series(date_range('2013-03-01', '2013-03-03'))
 
         s1 = enddate - startdate
         s1[2] = np.nan
 
-        for m in [1, 3, 10]:
-            for unit in ['D', 'h', 'm', 's', 'ms', 'us', 'ns']:
-
-                # op
-                expected = s1.apply(lambda x: x / np.timedelta64(m, unit))
-                result = s1 / np.timedelta64(m, unit)
-                assert_series_equal(result, expected)
-
-                if m == 1 and unit != 'ns':
-
-                    # astype
-                    result = s1.astype("timedelta64[{0}]".format(unit))
-                    assert_series_equal(result, expected)
+        # op
+        expected = s1.apply(lambda x: x / np.timedelta64(m, unit))
+        result = s1 / np.timedelta64(m, unit)
+        assert_series_equal(result, expected)
 
-                # reverse op
-                expected = s1.apply(
-                    lambda x: Timedelta(np.timedelta64(m, unit)) / x)
-                result = np.timedelta64(m, unit) / s1
-                assert_series_equal(result, expected)
+        # reverse op
+        expected = s1.apply(
+            lambda x: Timedelta(np.timedelta64(m, unit)) / x)
+        result = np.timedelta64(m, unit) / s1
+        assert_series_equal(result, expected)
 
         # astype
         s = Series(date_range('20130101', periods=3))