Skip to content

Commit

Permalink
REF: Pieces broken off of pandas-dev#24024 (pandas-dev#24364)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and Pingviinituutti committed Feb 28, 2019
1 parent cd3c7c5 commit 33ae55e
Show file tree
Hide file tree
Showing 13 changed files with 112 additions and 52 deletions.
23 changes: 18 additions & 5 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,9 +296,22 @@ def __iter__(self):

@property
def asi8(self):
# type: () -> ndarray
"""
Integer representation of the values.
Returns
-------
ndarray
An ndarray with int64 dtype.
"""
# do not cache or you'll create a memory leak
return self._data.view('i8')

@property
def _ndarray_values(self):
return self._data

# ----------------------------------------------------------------
# Rendering Methods

Expand Down Expand Up @@ -469,7 +482,7 @@ def _isnan(self):
return (self.asi8 == iNaT)

@property # NB: override with cache_readonly in immutable subclasses
def hasnans(self):
def _hasnans(self):
"""
return if I have any nans; enables various perf speedups
"""
Expand All @@ -493,7 +506,7 @@ def _maybe_mask_results(self, result, fill_value=iNaT, convert=None):
This is an internal routine
"""

if self.hasnans:
if self._hasnans:
if convert:
result = result.astype(convert)
if fill_value is None:
Expand Down Expand Up @@ -696,7 +709,7 @@ def _add_delta_tdi(self, other):
new_values = checked_add_with_arr(self_i8, other_i8,
arr_mask=self._isnan,
b_mask=other._isnan)
if self.hasnans or other.hasnans:
if self._hasnans or other._hasnans:
mask = (self._isnan) | (other._isnan)
new_values[mask] = iNaT
return new_values.view('i8')
Expand Down Expand Up @@ -764,7 +777,7 @@ def _sub_period_array(self, other):
b_mask=other._isnan)

new_values = np.array([self.freq.base * x for x in new_values])
if self.hasnans or other.hasnans:
if self._hasnans or other._hasnans:
mask = (self._isnan) | (other._isnan)
new_values[mask] = NaT
return new_values
Expand Down Expand Up @@ -1085,7 +1098,7 @@ def _evaluate_compare(self, other, op):
elif lib.is_scalar(lib.item_from_zerodim(other)):
# ndarray scalar
other = [other.item()]
other = type(self)(other)
other = type(self)._from_sequence(other)

# compare
result = op(self.asi8, other.asi8)
Expand Down
26 changes: 22 additions & 4 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def wrapper(self, other):
else:
if isinstance(other, list):
try:
other = type(self)(other)
other = type(self)._from_sequence(other)
except ValueError:
other = np.array(other, dtype=np.object_)
elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries,
Expand Down Expand Up @@ -147,7 +147,7 @@ def wrapper(self, other):
if o_mask.any():
result[o_mask] = nat_result

if self.hasnans:
if self._hasnans:
result[self._isnan] = nat_result

return result
Expand Down Expand Up @@ -349,14 +349,32 @@ def _box_func(self):

@property
def dtype(self):
# type: () -> Union[np.dtype, DatetimeTZDtype]
"""
The dtype for the DatetimeArray.
Returns
-------
numpy.dtype or DatetimeTZDtype
If the values are tz-naive, then ``np.dtype('datetime64[ns]')``
is returned.
If the values are tz-aware, then the ``DatetimeTZDtype``
is returned.
"""
if self.tz is None:
return _NS_DTYPE
return DatetimeTZDtype('ns', self.tz)

@property
def tz(self):
"""
Return timezone.
Return timezone, if any.
Returns
-------
datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
Returns None when the array is tz-naive.
"""
# GH 18595
return self._tz
Expand Down Expand Up @@ -534,7 +552,7 @@ def _sub_datetime_arraylike(self, other):
other_i8 = other.asi8
new_values = checked_add_with_arr(self_i8, -other_i8,
arr_mask=self._isnan)
if self.hasnans or other.hasnans:
if self._hasnans or other._hasnans:
mask = (self._isnan) | (other._isnan)
new_values[mask] = iNaT
return new_values.view('timedelta64[ns]')
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def wrapper(self, other):
other = Period(other, freq=self.freq)
result = op(other.ordinal)

if self.hasnans:
if self._hasnans:
result[self._isnan] = nat_result

return result
Expand Down Expand Up @@ -499,7 +499,7 @@ def _time_shift(self, n, freq=None):
"{cls}._time_shift"
.format(cls=type(self).__name__))
values = self.asi8 + n * self.freq.n
if self.hasnans:
if self._hasnans:
values[self._isnan] = iNaT
return type(self)(values, freq=self.freq)

Expand Down Expand Up @@ -561,7 +561,7 @@ def asfreq(self, freq=None, how='E'):

new_data = period_asfreq_arr(ordinal, base1, base2, end)

if self.hasnans:
if self._hasnans:
new_data[self._isnan] = iNaT

return type(self)(new_data, freq=freq)
Expand All @@ -581,7 +581,7 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
else:
formatter = lambda dt: u'%s' % dt

if self.hasnans:
if self._hasnans:
mask = self._isnan
values[mask] = na_rep
imask = ~mask
Expand Down Expand Up @@ -668,7 +668,7 @@ def _sub_period(self, other):
new_data = asi8 - other.ordinal
new_data = np.array([self.freq * x for x in new_data])

if self.hasnans:
if self._hasnans:
new_data[self._isnan] = NaT

return new_data
Expand Down Expand Up @@ -983,7 +983,7 @@ def dt64arr_to_periodarr(data, freq, tz=None):
"""
if data.dtype != np.dtype('M8[ns]'):
raise ValueError('Wrong dtype: %s' % data.dtype)
raise ValueError('Wrong dtype: {dtype}'.format(dtype=data.dtype))

if freq is None:
if isinstance(data, ABCIndexClass):
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _field_accessor(name, alias, docstring=None):
def f(self):
values = self.asi8
result = get_timedelta_field(values, alias)
if self.hasnans:
if self._hasnans:
result = self._maybe_mask_results(result, fill_value=None,
convert='float64')

Expand Down Expand Up @@ -102,7 +102,7 @@ def wrapper(self, other):
if o_mask.any():
result[o_mask] = nat_result

if self.hasnans:
if self._hasnans:
result[self._isnan] = nat_result

return result
Expand Down Expand Up @@ -714,7 +714,7 @@ def components(self):

columns = ['days', 'hours', 'minutes', 'seconds',
'milliseconds', 'microseconds', 'nanoseconds']
hasnans = self.hasnans
hasnans = self._hasnans
if hasnans:
def f(x):
if isna(x):
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ def __array__(self, dtype=None):
"""
The array interface, return my values.
"""
return self._data.view(np.ndarray)
return np.asarray(self._data, dtype=dtype)

def __array_wrap__(self, result, context=None):
"""
Expand Down Expand Up @@ -739,6 +739,8 @@ def view(self, cls=None):
Parameters
----------
dtype : numpy dtype or pandas type
Note that any integer `dtype` is treated as ``'int64'``,
regardless of the sign and size.
copy : bool, default True
By default, astype always returns a newly allocated object.
If copy is set to False and internal requirements on dtype are
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,22 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
# override DatetimeLikeArrayMixin method
copy = Index.copy
unique = Index.unique
take = Index.take

# DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
# properties there. They can be made into cache_readonly for Index
# subclasses bc they are immutable
inferred_freq = cache_readonly(DatetimeLikeArrayMixin.inferred_freq.fget)
_isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget)
hasnans = cache_readonly(DatetimeLikeArrayMixin.hasnans.fget)
hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget)
_hasnans = hasnans # for index / array -agnostic code
_resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget)
resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget)

# A few methods that are shared
_maybe_mask_results = DatetimeLikeArrayMixin._maybe_mask_results

# ------------------------------------------------------------------------

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
Expand Down
14 changes: 7 additions & 7 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,11 @@ def __new__(cls, data=None,
"endpoints is deprecated. Use "
"`pandas.date_range` instead.",
FutureWarning, stacklevel=2)
result = cls._generate_range(start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
result.name = name
return result
dtarr = DatetimeArray._generate_range(
start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
return cls(dtarr, name=name)

if is_scalar(data):
raise TypeError("{cls}() must be called with a "
Expand Down Expand Up @@ -1473,12 +1473,12 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None,
if freq is None and com._any_none(periods, start, end):
freq = 'D'

result = DatetimeIndex._generate_range(
dtarr = DatetimeArray._generate_range(
start=start, end=end, periods=periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, **kwargs)

result.name = name
result = DatetimeIndex(dtarr, name=name)
return result


Expand Down
31 changes: 14 additions & 17 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,6 @@ def _join_i8_wrapper(joinf, **kwargs):
_left_indexer_unique = _join_i8_wrapper(
libjoin.left_join_indexer_unique_int64, with_indexers=False)

# define my properties & methods for delegation
_other_ops = []
_bool_ops = []
_object_ops = ['freq']
_field_ops = ['days', 'seconds', 'microseconds', 'nanoseconds']
_datetimelike_ops = _field_ops + _object_ops + _bool_ops
_datetimelike_methods = ["to_pytimedelta", "total_seconds",
"round", "floor", "ceil"]

_engine_type = libindex.TimedeltaEngine

_comparables = ['name', 'freq']
Expand All @@ -143,6 +134,14 @@ def _join_i8_wrapper(joinf, **kwargs):

_freq = None

_box_func = TimedeltaArray._box_func
_bool_ops = TimedeltaArray._bool_ops
_object_ops = TimedeltaArray._object_ops
_field_ops = TimedeltaArray._field_ops
_datetimelike_ops = TimedeltaArray._datetimelike_ops
_datetimelike_methods = TimedeltaArray._datetimelike_methods
_other_ops = TimedeltaArray._other_ops

# -------------------------------------------------------------------
# Constructors

Expand All @@ -163,10 +162,9 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
"endpoints is deprecated. Use "
"`pandas.timedelta_range` instead.",
FutureWarning, stacklevel=2)
result = cls._generate_range(start, end, periods, freq,
closed=closed)
result.name = name
return result
tdarr = TimedeltaArray._generate_range(start, end, periods, freq,
closed=closed)
return cls(tdarr, name=name)

if is_scalar(data):
raise TypeError('{cls}() must be called with a '
Expand Down Expand Up @@ -766,7 +764,6 @@ def timedelta_range(start=None, end=None, periods=None, freq=None,
freq = 'D'

freq, freq_infer = dtl.maybe_infer_freq(freq)
result = TimedeltaIndex._generate_range(start, end, periods, freq,
closed=closed)
result.name = name
return result
tdarr = TimedeltaArray._generate_range(start, end, periods, freq,
closed=closed)
return TimedeltaIndex(tdarr, name=name)
4 changes: 2 additions & 2 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1592,8 +1592,8 @@ def _right_outer_join(x, y, max_groups):
def _factorize_keys(lk, rk, sort=True):
# Some pre-processing for non-ndarray lk / rk
if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
lk = lk.values
rk = rk.values
lk = lk._data
rk = rk._data

elif (is_categorical_dtype(lk) and
is_categorical_dtype(rk) and
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
- ndarray of Timestamps if box=False
"""
from pandas import DatetimeIndex
from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray
from pandas.core.arrays.datetimes import (
maybe_convert_dtype, objects_to_datetime64ns)

Expand All @@ -179,14 +180,14 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,

# these are shortcutable
if is_datetime64tz_dtype(arg):
if not isinstance(arg, DatetimeIndex):
if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
return DatetimeIndex(arg, tz=tz, name=name)
if tz == 'utc':
arg = arg.tz_convert(None).tz_localize(tz)
return arg

elif is_datetime64_ns_dtype(arg):
if box and not isinstance(arg, DatetimeIndex):
if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
try:
return DatetimeIndex(arg, tz=tz, name=name)
except ValueError:
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@
import pandas.util.testing as tm


class TestTimedeltaArrayConstructor(object):
def test_copy(self):
data = np.array([1, 2, 3], dtype='m8[ns]')
arr = TimedeltaArray(data, copy=False)
assert arr._data is data

arr = TimedeltaArray(data, copy=True)
assert arr._data is not data
assert arr._data.base is not data


class TestTimedeltaArray(object):
def test_from_sequence_dtype(self):
msg = r"Only timedelta64\[ns\] dtype is valid"
Expand Down
Loading

0 comments on commit 33ae55e

Please sign in to comment.