Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement astype portion of #24024 #24405

Merged
merged 20 commits into from
Dec 28, 2018
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
6a5c216
implement astype portion of #24024
jbrockmendel Dec 24, 2018
1a9f30b
fixup unused import
jbrockmendel Dec 24, 2018
1b109b8
isort fixup
jbrockmendel Dec 24, 2018
f271005
Merge branch 'master' of https://github.com/pandas-dev/pandas into le…
jbrockmendel Dec 24, 2018
5615b9f
pass copy kwarg
jbrockmendel Dec 24, 2018
d5cca5a
Merge branch 'master' of https://github.com/pandas-dev/pandas into le…
jbrockmendel Dec 25, 2018
184f59f
revert change that brokethe world
jbrockmendel Dec 25, 2018
df39bd7
Merge branch 'master' of https://github.com/pandas-dev/pandas into le…
jbrockmendel Dec 25, 2018
e41068a
comments, typo
jbrockmendel Dec 25, 2018
6f108dd
avoid double-copy
jbrockmendel Dec 25, 2018
b123d08
Merge branch 'master' of https://github.com/pandas-dev/pandas into le…
jbrockmendel Dec 28, 2018
207ffb9
Merge branch 'master' of https://github.com/pandas-dev/pandas into le…
jbrockmendel Dec 28, 2018
04efd45
sidestep int sign/size astype issues
jbrockmendel Dec 28, 2018
3fca810
Implement UInt64 handling, tests, and docs
TomAugspurger Dec 28, 2018
5fa32e9
Handle uint in astype tests
TomAugspurger Dec 28, 2018
5d718e6
Fixed TimedeltaArray._format_native_types
TomAugspurger Dec 28, 2018
33b5434
Linting
TomAugspurger Dec 28, 2018
e29d898
Change default to str
TomAugspurger Dec 28, 2018
a3c42f0
revert for period
TomAugspurger Dec 28, 2018
eac662b
Merge remote-tracking branch 'upstream/master' into jbrockmendel-less…
TomAugspurger Dec 28, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1324,6 +1324,7 @@ Datetimelike
- Bug in :func:`to_datetime` where ``box`` and ``utc`` arguments were ignored when passing a :class:`DataFrame` or ``dict`` of unit mappings (:issue:`23760`)
- Bug in :attr:`Series.dt` where the cache would not update properly after an in-place operation (:issue:`24408`)
- Bug in :class:`PeriodIndex` where comparisons against an array-like object with length 1 failed to raise ``ValueError`` (:issue:`23078`)
- Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`).

Timedelta
^^^^^^^^^
Expand Down
57 changes: 52 additions & 5 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg

from pandas.core.dtypes.common import (
is_bool_dtype, is_datetime64_any_dtype, is_datetime64_dtype,
is_datetime64tz_dtype, is_extension_array_dtype, is_float_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_offsetlike,
is_period_dtype, is_timedelta64_dtype, needs_i8_conversion)
is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype,
is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype,
is_dtype_equal, is_extension_array_dtype, is_float_dtype, is_integer_dtype,
is_list_like, is_object_dtype, is_offsetlike, is_period_dtype,
is_string_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype,
needs_i8_conversion, pandas_dtype)
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna

Expand Down Expand Up @@ -403,9 +405,54 @@ def __getitem__(self, key):
return self._simple_new(result, **attribs)

def astype(self, dtype, copy=True):
# Some notes on cases we don't have to handle here in the base class:
# 1. PeriodArray.astype handles period -> period
# 2. DatetimeArray.astype handles conversion between tz.
# 3. DatetimeArray.astype handles datetime -> period
from pandas import Categorical
dtype = pandas_dtype(dtype)

if is_object_dtype(dtype):
return self._box_values(self.asi8)
return super(DatetimeLikeArrayMixin, self).astype(dtype, copy)
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return self._format_native_types()
elif is_integer_dtype(dtype):
# we deliberately ignore int32 vs. int64 here.
# See https://github.com/pandas-dev/pandas/issues/24381 for more.
values = self.asi8

if is_unsigned_integer_dtype(dtype):
# Again, we ignore int32 vs. int64
values = values.view("uint64")

if copy:
values = values.copy()
return values
elif (is_datetime_or_timedelta_dtype(dtype) and
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
elif is_categorical_dtype(dtype):
return Categorical(self, dtype=dtype)
jreback marked this conversation as resolved.
Show resolved Hide resolved
else:
return np.asarray(self, dtype=dtype)
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

def view(self, dtype=None):
"""
New view on this array with the same data.

Parameters
----------
dtype : numpy dtype, optional

Returns
-------
ndarray
With the specified `dtype`.
"""
return self._data.view(dtype=dtype)

# ------------------------------------------------------------------
# ExtensionArray Interface
Expand Down
34 changes: 32 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@

from pandas.core.dtypes.common import (
_INT64_DTYPE, _NS_DTYPE, is_categorical_dtype, is_datetime64_dtype,
is_datetime64tz_dtype, is_extension_type, is_float_dtype, is_int64_dtype,
is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype)
is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal,
is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype,
is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -473,6 +474,35 @@ def __iter__(self):
for v in converted:
yield v

def astype(self, dtype, copy=True):
# We handle
# --> datetime
# --> period
# DatetimeLikeArrayMixin Super handles the rest.
dtype = pandas_dtype(dtype)

if (is_datetime64_ns_dtype(dtype) and
not is_dtype_equal(dtype, self.dtype)):
# GH#18951: datetime64_ns dtype but not equal means different tz
new_tz = getattr(dtype, 'tz', None)
if getattr(self.dtype, 'tz', None) is None:
return self.tz_localize(new_tz)
result = self.tz_convert(new_tz)
if new_tz is None:
# Do we want .astype('datetime64[ns]') to be an ndarray.
# The astype in Block._astype expects this to return an
# ndarray, but we could maybe work around it there.
result = result._data
jreback marked this conversation as resolved.
Show resolved Hide resolved
return result
elif is_datetime64tz_dtype(self.dtype) and is_dtype_equal(self.dtype,
dtype):
if copy:
return self.copy()
return self
elif is_period_dtype(dtype):
return self.to_period(freq=dtype.freq)
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just noticed... it'd be nice to leave a bunch of TODO: Use super for places like this.

Actually... I think Python2 will force us to make this changes when we switch inheritance to composition, since we won't be able to call the unbound method with a DatetimeIndex anymore (I think).


# ----------------------------------------------------------------
# ExtensionArray Interface

Expand Down
43 changes: 6 additions & 37 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@
from pandas.util._validators import validate_fillna_kwargs

from pandas.core.dtypes.common import (
_TD_DTYPE, ensure_object, is_array_like, is_categorical_dtype,
is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal,
is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype,
is_period_dtype, is_string_dtype, pandas_dtype)
_TD_DTYPE, ensure_object, is_array_like, is_datetime64_dtype,
is_float_dtype, is_list_like, is_period_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCPeriodIndex, ABCSeries
from pandas.core.dtypes.missing import isna, notna
Expand Down Expand Up @@ -599,42 +597,13 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
# ------------------------------------------------------------------

def astype(self, dtype, copy=True):
# TODO: Figure out something better here...
# We have DatetimeLikeArrayMixin ->
# super(...), which ends up being... DatetimeIndexOpsMixin?
# this is complicated.
# need a pandas_astype(arr, dtype).
from pandas import Categorical

# We handle Period[T] -> Period[U]
# Our parent handles everything else.
dtype = pandas_dtype(dtype)

if is_object_dtype(dtype):
return np.asarray(self, dtype=object)
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return self._format_native_types()
elif is_integer_dtype(dtype):
values = self._data

if values.dtype != dtype:
# int32 vs. int64
values = values.astype(dtype)

elif copy:
values = values.copy()

return values
elif (is_datetime_or_timedelta_dtype(dtype) and
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
elif is_categorical_dtype(dtype):
return Categorical(self, dtype=dtype)
elif is_period_dtype(dtype):
if is_period_dtype(dtype):
return self.asfreq(dtype.freq)
else:
return np.asarray(self, dtype=dtype)
return super(PeriodArray, self).astype(dtype, copy=copy)

@property
def flags(self):
Expand Down
29 changes: 28 additions & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from pandas.core.dtypes.common import (
_NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
is_string_dtype, is_timedelta64_dtype)
is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype,
pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex)
Expand Down Expand Up @@ -234,6 +235,32 @@ def _validate_fill_value(self, fill_value):
"Got '{got}'.".format(got=fill_value))
return fill_value

def astype(self, dtype, copy=True):
# We handle
# --> timedelta64[ns]
# --> timedelta64
# DatetimeLikeArrayMixin super call handles other cases
dtype = pandas_dtype(dtype)

if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
# by pandas convention, converting to non-nano timedelta64
# returns an int64-dtyped array with ints representing multiples
# of the desired timedelta unit. This is essentially division
if self._hasnans:
# avoid double-copying
result = self._data.astype(dtype, copy=False)
jreback marked this conversation as resolved.
Show resolved Hide resolved
values = self._maybe_mask_results(result,
fill_value=None,
convert='float64')
return values
result = self._data.astype(dtype, copy=copy)
return result.astype('i8')
TomAugspurger marked this conversation as resolved.
Show resolved Hide resolved
elif is_timedelta64_ns_dtype(dtype):
if copy:
return self.copy()
return self
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)

# ----------------------------------------------------------------
# Rendering Methods

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
is_period_dtype, is_scalar, is_string_dtype, is_string_like_dtype,
is_timedelta64_dtype, needs_i8_conversion, pandas_dtype)
from .generic import (
ABCExtensionArray, ABCGeneric, ABCIndexClass, ABCMultiIndex, ABCSeries)
ABCDatetimeArray, ABCExtensionArray, ABCGeneric, ABCIndexClass,
ABCMultiIndex, ABCSeries, ABCTimedeltaArray)
from .inference import is_list_like

isposinf_scalar = libmissing.isposinf_scalar
Expand Down Expand Up @@ -108,7 +109,8 @@ def _isna_new(obj):
elif isinstance(obj, ABCMultiIndex):
raise NotImplementedError("isna is not defined for MultiIndex")
elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass,
ABCExtensionArray)):
ABCExtensionArray,
ABCDatetimeArray, ABCTimedeltaArray)):
return _isna_ndarraylike(obj)
elif isinstance(obj, ABCGeneric):
return obj._constructor(obj._data.isna(func=isna))
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,8 +739,9 @@ def view(self, cls=None):
Parameters
----------
dtype : numpy dtype or pandas type
Note that any integer `dtype` is treated as ``'int64'``,
regardless of the sign and size.
Note that any signed integer `dtype` is treated as ``'int64'``,
and any unsigned integer `dtype` is treated as ``'uint64'``,
regardless of the size.
copy : bool, default True
By default, astype always returns a newly allocated object.
If copy is set to False and internal requirements on dtype are
Expand Down
35 changes: 14 additions & 21 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
from pandas.util._decorators import Appender, cache_readonly

from pandas.core.dtypes.common import (
ensure_int64, is_bool_dtype, is_categorical_dtype,
is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype,
is_integer, is_integer_dtype, is_list_like, is_object_dtype,
is_period_dtype, is_scalar, is_string_dtype)
ensure_int64, is_bool_dtype, is_dtype_equal, is_float, is_integer,
is_integer_dtype, is_list_like, is_period_dtype, is_scalar, pandas_dtype)
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries

from pandas.core import algorithms, ops
Expand All @@ -39,6 +37,7 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):

# override DatetimeLikeArrayMixin method
copy = Index.copy
view = Index.view

# DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
# properties there. They can be made into cache_readonly for Index
Expand Down Expand Up @@ -550,24 +549,18 @@ def _maybe_box_as_values(self, values, **attribs):
# - sort_values
return values

@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
if is_object_dtype(dtype):
return self._box_values_as_index()
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return Index(self.format(), name=self.name, dtype=object)
elif is_integer_dtype(dtype):
# TODO(DatetimeArray): use self._values here.
# Can't use ._values currently, because that returns a
# DatetimeIndex, which throws us in an infinite loop.
return Index(self.values.astype('i8', copy=copy), name=self.name,
dtype='i8')
elif (is_datetime_or_timedelta_dtype(dtype) and
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
if is_dtype_equal(self.dtype, dtype) and copy is False:
# Ensure that self.astype(self.dtype) is self
return self

new_values = self._eadata.astype(dtype, copy=copy)

# pass copy=False because any copying will be done in the
# _eadata.astype call above
return Index(new_values,
dtype=new_values.dtype, name=self.name, copy=False)

@Appender(DatetimeLikeArrayMixin._time_shift.__doc__)
def _time_shift(self, periods, freq=None):
Expand Down
22 changes: 4 additions & 18 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,16 @@
from pandas.util._decorators import Appender, Substitution, cache_readonly

from pandas.core.dtypes.common import (
_NS_DTYPE, ensure_int64, is_datetime64_ns_dtype, is_dtype_equal, is_float,
is_integer, is_list_like, is_period_dtype, is_scalar, is_string_like,
pandas_dtype)
_NS_DTYPE, ensure_int64, is_float, is_integer, is_list_like, is_scalar,
is_string_like)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.missing import isna

from pandas.core.arrays.datetimes import (
DatetimeArrayMixin as DatetimeArray, _to_m8)
from pandas.core.base import _shared_docs
import pandas.core.common as com
from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.core.indexes.base import Index
from pandas.core.indexes.datetimelike import (
DatetimeIndexOpsMixin, wrap_array_method, wrap_field_accessor)
from pandas.core.indexes.numeric import Int64Index
Expand Down Expand Up @@ -603,20 +602,6 @@ def intersection(self, other):

# --------------------------------------------------------------------

@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
dtype = pandas_dtype(dtype)
if (is_datetime64_ns_dtype(dtype) and
not is_dtype_equal(dtype, self.dtype)):
# GH 18951: datetime64_ns dtype but not equal means different tz
new_tz = getattr(dtype, 'tz', None)
if getattr(self.dtype, 'tz', None) is None:
return self.tz_localize(new_tz)
return self.tz_convert(new_tz)
elif is_period_dtype(dtype):
return self.to_period(freq=dtype.freq)
return super(DatetimeIndex, self).astype(dtype, copy=copy)

def _get_time_micros(self):
values = self.asi8
if self.tz is not None and not timezones.is_utc(self.tz):
Expand Down Expand Up @@ -1089,6 +1074,7 @@ def _eadata(self):
_is_monotonic_increasing = Index.is_monotonic_increasing
_is_monotonic_decreasing = Index.is_monotonic_decreasing
_is_unique = Index.is_unique
astype = DatetimeIndexOpsMixin.astype

_timezone = cache_readonly(DatetimeArray._timezone.fget)
is_normalized = cache_readonly(DatetimeArray.is_normalized.fget)
Expand Down
9 changes: 3 additions & 6 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,16 +543,13 @@ def asof_locs(self, where, mask):
def astype(self, dtype, copy=True, how='start'):
dtype = pandas_dtype(dtype)

# We have a few special-cases for `dtype`.
# Failing those, we fall back to astyping the values

if is_datetime64_any_dtype(dtype):
# 'how' is index-speicifc, isn't part of the EA interface.
# 'how' is index-specific, isn't part of the EA interface.
tz = getattr(dtype, 'tz', None)
return self.to_timestamp(how=how).tz_localize(tz)

jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
result = self._data.astype(dtype, copy=copy)
return Index(result, name=self.name, dtype=dtype, copy=False)
# TODO: should probably raise on `how` here, so we don't ignore it.
return super(PeriodIndex, self).astype(dtype, copy=copy)

@Substitution(klass='PeriodIndex')
@Appender(_shared_docs['searchsorted'])
Expand Down
Loading