Skip to content

Commit

Permalink
implement independent parts of pandas-dev#24024 (pandas-dev#24276)
Browse files Browse the repository at this point in the history
* implement independent parts of pandas-dev#24024

* move monotonic checks up
  • Loading branch information
jbrockmendel authored and Pingviinituutti committed Feb 28, 2019
1 parent d09b08f commit 4205e45
Show file tree
Hide file tree
Showing 15 changed files with 136 additions and 72 deletions.
26 changes: 21 additions & 5 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np

from pandas._libs import NaT, iNaT, lib
from pandas._libs import NaT, algos, iNaT, lib
from pandas._libs.tslibs.period import (
DIFFERENT_FREQ_INDEX, IncompatibleFrequency, Period)
from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
Expand Down Expand Up @@ -155,6 +155,7 @@ class TimelikeOps(object):
times
.. versionadded:: 0.24.0
nonexistent : 'shift', 'NaT', default 'raise'
A nonexistent time does not exist in a particular timezone
where clocks moved forward due to DST.
Expand Down Expand Up @@ -246,7 +247,7 @@ def _round(self, freq, mode, ambiguous, nonexistent):
if 'tz' in attribs:
attribs['tz'] = None
return self._ensure_localized(
self._shallow_copy(result, **attribs), ambiguous, nonexistent
self._simple_new(result, **attribs), ambiguous, nonexistent
)

@Appender((_round_doc + _round_example).format(op="round"))
Expand Down Expand Up @@ -310,6 +311,8 @@ def shape(self):

@property
def size(self):
# type: () -> int
"""The number of elements in this array."""
return np.prod(self.shape)

def __len__(self):
Expand Down Expand Up @@ -554,6 +557,21 @@ def _validate_frequency(cls, index, freq, **kwargs):
'does not conform to passed frequency {passed}'
.format(infer=inferred, passed=freq.freqstr))

# monotonicity/uniqueness properties are called via frequencies.infer_freq,
# see GH#23789

@property
def _is_monotonic_increasing(self):
return algos.is_monotonic(self.asi8, timelike=True)[0]

@property
def _is_monotonic_decreasing(self):
return algos.is_monotonic(self.asi8, timelike=True)[1]

@property
def _is_unique(self):
return len(unique1d(self.asi8)) == len(self)

# ------------------------------------------------------------------
# Arithmetic Methods

Expand Down Expand Up @@ -661,9 +679,7 @@ def _add_nat(self):
# and datetime dtypes
result = np.zeros(len(self), dtype=np.int64)
result.fill(iNaT)
if is_timedelta64_dtype(self):
return type(self)(result, freq=None)
return type(self)(result, tz=self.tz, freq=None)
return type(self)(result, dtype=self.dtype, freq=None)

def _sub_nat(self):
"""
Expand Down
24 changes: 19 additions & 5 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,23 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin,
_data
"""
_typ = "datetimearray"

# define my properties & methods for delegation
_bool_ops = ['is_month_start', 'is_month_end',
'is_quarter_start', 'is_quarter_end', 'is_year_start',
'is_year_end', 'is_leap_year']
_object_ops = ['weekday_name', 'freq', 'tz']
_field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second',
'weekofyear', 'week', 'weekday', 'dayofweek',
'dayofyear', 'quarter', 'days_in_month',
'daysinmonth', 'microsecond',
'nanosecond']
_other_ops = ['date', 'time', 'timetz']
_datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops
_datetimelike_methods = ['to_period', 'tz_localize',
'tz_convert',
'normalize', 'strftime', 'round', 'floor',
'ceil', 'month_name', 'day_name']

# dummy attribute so that datetime.__eq__(DatetimeArray) defers
# by returning NotImplemented
Expand Down Expand Up @@ -527,7 +540,7 @@ def _add_offset(self, offset):
"or DatetimeIndex", PerformanceWarning)
result = self.astype('O') + offset

return type(self)(result, freq='infer')
return type(self)._from_sequence(result, freq='infer')

def _sub_datetimelike_scalar(self, other):
# subtract a datetime from myself, yielding a ndarray[timedelta64[ns]]
Expand Down Expand Up @@ -562,8 +575,8 @@ def _add_delta(self, delta):
-------
result : DatetimeArray
"""
new_values = dtl.DatetimeLikeArrayMixin._add_delta(self, delta)
return type(self)(new_values, tz=self.tz, freq='infer')
new_values = super(DatetimeArrayMixin, self)._add_delta(delta)
return type(self)._from_sequence(new_values, tz=self.tz, freq='infer')

# -----------------------------------------------------------------
# Timezone Conversion and Localization Methods
Expand Down Expand Up @@ -866,14 +879,15 @@ def normalize(self):
dtype='datetime64[ns, Asia/Calcutta]', freq=None)
"""
if self.tz is None or timezones.is_utc(self.tz):
not_null = self.notna()
not_null = ~self.isna()
DAY_NS = ccalendar.DAY_SECONDS * 1000000000
new_values = self.asi8.copy()
adjustment = (new_values[not_null] % DAY_NS)
new_values[not_null] = new_values[not_null] - adjustment
else:
new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz)
return type(self)(new_values, freq='infer').tz_localize(self.tz)
return type(self)._from_sequence(new_values,
freq='infer').tz_localize(self.tz)

def to_period(self, freq=None):
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def to_timestamp(self, freq=None, how='start'):
new_data = self.asfreq(freq, how=how)

new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base)
return DatetimeArrayMixin(new_data, freq='infer')
return DatetimeArrayMixin._from_sequence(new_data, freq='infer')

# --------------------------------------------------------------------
# Array-like / EA-Interface Methods
Expand Down
39 changes: 17 additions & 22 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import numpy as np

from pandas._libs import algos, lib, tslibs
from pandas._libs import lib, tslibs
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
from pandas._libs.tslibs.fields import get_timedelta_field
from pandas._libs.tslibs.timedeltas import (
Expand All @@ -15,15 +15,16 @@
from pandas.util._decorators import Appender

from pandas.core.dtypes.common import (
_TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
_NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
is_string_dtype, is_timedelta64_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex)
from pandas.core.dtypes.missing import isna

from pandas.core import ops
from pandas.core.algorithms import checked_add_with_arr, unique1d
from pandas.core.algorithms import checked_add_with_arr
import pandas.core.common as com

from pandas.tseries.frequencies import to_offset
Expand Down Expand Up @@ -90,7 +91,7 @@ def wrapper(self, other):

else:
try:
other = type(self)(other)._data
other = type(self)._from_sequence(other)._data
except (ValueError, TypeError):
return ops.invalid_comparison(self, other, op)

Expand All @@ -112,6 +113,14 @@ def wrapper(self, other):
class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
_typ = "timedeltaarray"
__array_priority__ = 1000
# define my properties & methods for delegation
_other_ops = []
_bool_ops = []
_object_ops = ['freq']
_field_ops = ['days', 'seconds', 'microseconds', 'nanoseconds']
_datetimelike_ops = _field_ops + _object_ops + _bool_ops
_datetimelike_methods = ["to_pytimedelta", "total_seconds",
"round", "floor", "ceil"]

# Needed so that NaT.__richcmp__(DateTimeArray) operates pointwise
ndim = 1
Expand Down Expand Up @@ -222,21 +231,6 @@ def _validate_fill_value(self, fill_value):
"Got '{got}'.".format(got=fill_value))
return fill_value

# monotonicity/uniqueness properties are called via frequencies.infer_freq,
# see GH#23789

@property
def _is_monotonic_increasing(self):
return algos.is_monotonic(self.asi8, timelike=True)[0]

@property
def _is_monotonic_decreasing(self):
return algos.is_monotonic(self.asi8, timelike=True)[1]

@property
def _is_unique(self):
return len(unique1d(self.asi8)) == len(self)

# ----------------------------------------------------------------
# Arithmetic Methods

Expand All @@ -262,8 +256,8 @@ def _add_delta(self, delta):
-------
result : TimedeltaArray
"""
new_values = dtl.DatetimeLikeArrayMixin._add_delta(self, delta)
return type(self)(new_values, freq='infer')
new_values = super(TimedeltaArrayMixin, self)._add_delta(delta)
return type(self)._from_sequence(new_values, freq='infer')

def _add_datetime_arraylike(self, other):
"""
Expand Down Expand Up @@ -293,7 +287,8 @@ def _add_datetimelike_scalar(self, other):
result = checked_add_with_arr(i8, other.value,
arr_mask=self._isnan)
result = self._maybe_mask_results(result)
return DatetimeArrayMixin(result, tz=other.tz, freq=self.freq)
dtype = DatetimeTZDtype(tz=other.tz) if other.tz else _NS_DTYPE
return DatetimeArrayMixin(result, dtype=dtype, freq=self.freq)

def _addsub_offset_array(self, other, op):
# Add or subtract Array-like of DateOffset objects
Expand Down
25 changes: 7 additions & 18 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,32 +191,21 @@ def _join_i8_wrapper(joinf, **kwargs):
_tz = None
_freq = None
_comparables = ['name', 'freqstr', 'tz']
_attributes = ['name', 'freq', 'tz']
_attributes = ['name', 'tz', 'freq']

# dummy attribute so that datetime.__eq__(DatetimeArray) defers
# by returning NotImplemented
timetuple = None

# define my properties & methods for delegation
_bool_ops = ['is_month_start', 'is_month_end',
'is_quarter_start', 'is_quarter_end', 'is_year_start',
'is_year_end', 'is_leap_year']
_object_ops = ['weekday_name', 'freq', 'tz']
_field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second',
'weekofyear', 'week', 'weekday', 'dayofweek',
'dayofyear', 'quarter', 'days_in_month',
'daysinmonth', 'microsecond',
'nanosecond']
_other_ops = ['date', 'time', 'timetz']
_datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops
_datetimelike_methods = ['to_period', 'tz_localize',
'tz_convert',
'normalize', 'strftime', 'round', 'floor',
'ceil', 'month_name', 'day_name']

_is_numeric_dtype = False
_infer_as_myclass = True

# some things like freq inference make use of these attributes.
_bool_ops = DatetimeArray._bool_ops
_object_ops = DatetimeArray._object_ops
_field_ops = DatetimeArray._field_ops
_datetimelike_ops = DatetimeArray._datetimelike_ops

# --------------------------------------------------------------------
# Constructors

Expand Down
4 changes: 2 additions & 2 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,8 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',

if is_datetime64_dtype(values.dtype):
fmt_klass = Datetime64Formatter
elif is_datetime64tz_dtype(values):
fmt_klass = Datetime64TZFormatter
elif is_timedelta64_dtype(values.dtype):
fmt_klass = Timedelta64Formatter
elif is_extension_array_dtype(values.dtype):
Expand All @@ -881,8 +883,6 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
fmt_klass = FloatArrayFormatter
elif is_integer_dtype(values.dtype):
fmt_klass = IntArrayFormatter
elif is_datetime64tz_dtype(values):
fmt_klass = Datetime64TZFormatter
else:
fmt_klass = GenericArrayFormatter

Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/indexes/datetimes/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,14 @@ def test_to_period_nofreq(self):
idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'])
assert idx.freqstr is None
tm.assert_index_equal(idx.to_period(), expected)

@pytest.mark.parametrize('tz', [None, 'US/Central'])
def test_astype_array_fallback(self, tz):
obj = pd.date_range("2000", periods=2, tz=tz)
result = obj.astype(bool)
expected = pd.Index(np.array([True, True]))
tm.assert_index_equal(result, expected)

result = obj._data.astype(bool)
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)
21 changes: 21 additions & 0 deletions pandas/tests/indexes/period/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,24 @@ def test_astype_object2(self):
for i in [0, 1, 3]:
assert result_list[i] == expected_list[i]
assert result_list[2] is pd.NaT

def test_astype_category(self):
obj = pd.period_range("2000", periods=2)
result = obj.astype('category')
expected = pd.CategoricalIndex([pd.Period('2000-01-01', freq="D"),
pd.Period('2000-01-02', freq="D")])
tm.assert_index_equal(result, expected)

result = obj._data.astype('category')
expected = expected.values
tm.assert_categorical_equal(result, expected)

def test_astype_array_fallback(self):
obj = pd.period_range("2000", periods=2)
result = obj.astype(bool)
expected = pd.Index(np.array([True, True]))
tm.assert_index_equal(result, expected)

result = obj._data.astype(bool)
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)
11 changes: 11 additions & 0 deletions pandas/tests/indexes/timedeltas/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

import pandas.util.testing as tm
import pandas as pd
from pandas import (
Float64Index, Index, Int64Index, NaT, Timedelta, TimedeltaIndex,
timedelta_range
Expand Down Expand Up @@ -77,3 +78,13 @@ def test_astype_raises(self, dtype):
msg = 'Cannot cast TimedeltaIndex to dtype'
with pytest.raises(TypeError, match=msg):
idx.astype(dtype)

def test_astype_array_fallback(self):
obj = pd.timedelta_range("1H", periods=2)
result = obj.astype(bool)
expected = pd.Index(np.array([True, True]))
tm.assert_index_equal(result, expected)

result = obj._data.astype(bool)
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)
3 changes: 2 additions & 1 deletion pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1026,7 +1026,8 @@ def test_tz_range_is_utc(self):
dti = pd.DatetimeIndex(tz_range)
assert dumps(dti, iso_dates=True) == exp
df = DataFrame({'DT': dti})
assert dumps(df, iso_dates=True) == dfexp
result = dumps(df, iso_dates=True)
assert result == dfexp

tz_range = pd.date_range('2013-01-01 00:00:00', periods=2,
tz='US/Eastern')
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/series/test_datetime_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,3 +555,13 @@ def test_setitem_with_string_index(self):
x['Date'] = date.today()
assert x.Date == date.today()
assert x['Date'] == date.today()

def test_setitem_with_different_tz(self):
# GH#24024
ser = pd.Series(pd.date_range('2000', periods=2, tz="US/Central"))
ser[0] = pd.Timestamp("2000", tz='US/Eastern')
expected = pd.Series([
pd.Timestamp("2000-01-01 00:00:00-05:00", tz="US/Eastern"),
pd.Timestamp("2000-01-02 00:00:00-06:00", tz="US/Central"),
], dtype=object)
tm.assert_series_equal(ser, expected)
10 changes: 10 additions & 0 deletions pandas/tests/series/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,3 +1023,13 @@ def test_get_level_values_box(self):
index = MultiIndex(levels=levels, codes=codes)

assert isinstance(index.get_level_values(0)[0], Timestamp)

def test_view_tz(self):
# GH#24024
ser = pd.Series(pd.date_range('2000', periods=4, tz='US/Central'))
result = ser.view("i8")
expected = pd.Series([946706400000000000,
946792800000000000,
946879200000000000,
946965600000000000])
tm.assert_series_equal(result, expected)
Loading

0 comments on commit 4205e45

Please sign in to comment.