-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
REF: DatetimeLikeArray #24024
REF: DatetimeLikeArray #24024
Changes from all commits
e9c8418
80d9576
acd5b6d
f364f77
56d9af6
538db1f
6620b0f
4842e53
a7c1d42
07586d9
7ebb9ee
e843984
67a9cf9
09837ac
f3f9142
7c76b3e
eae133d
165f3fd
7ec7351
e7538e6
4f1ee37
a117de4
1f463a1
d5fdc21
98182b1
d4c9521
c22a30a
abd019a
4e9608e
4988630
3eb8432
2a562b8
f4cbf36
b366968
fef6847
9c19b8c
3970f62
9a50f92
ebb4009
57b401e
0a61ba8
323bfeb
2c1719f
62bf6c6
0a8ccfd
d5f2ac2
831d91a
e69ba08
c31b80e
b4a0dc6
8c0641b
5d7dfda
580f7ba
c41ec57
558adf8
6586bcd
5777ed0
d20291f
0f231e7
5f473df
c3b7dea
074eed9
02145d9
bf57186
af34a0d
d557976
87f18e3
afc4c4a
3702801
119575f
0cd6958
4371ed0
629e8e5
a9a2101
4c460c6
ac734b3
b485e5a
f7d9cdb
a86e4cb
462a4f7
b901c3d
4bf1862
7dd3ba5
87101bf
8c6f2db
6bfd919
4cb6c50
4c1609a
e7505cd
8060edd
55f6c26
ef11a07
17a3bbb
48f85f0
4ec0284
aa82a0b
d7dcd79
34c2bd1
d0a266e
94fd88b
75df1c9
82c998a
9252c75
512af69
80b3455
87c125b
b9f2d4e
a695eb8
bbc5f1b
a22f22c
0aff5fa
90a937f
1566c1e
cc80a8e
58f3421
99bc78e
ee48dc0
6e487c6
f4aa1f8
d81c204
d821927
617a172
32ef700
0a0df77
169eae6
be4335d
203a5a6
fab4c33
89b5b51
b2afd4e
a874f5f
0eb28e8
42dfd30
f62544a
2e30a56
af815f8
b046791
4522dfe
eb594e7
0b570b1
72fe4fc
7a711f9
6530500
445e46a
9ae6706
60ddcb5
5988477
20c23b7
7a5fd94
e644d8c
4d3b55e
68cde94
421435a
bbf7fa4
324d452
cdec2a8
e66c18b
09c2c91
6373948
e91bc09
4bf00d8
8cb7d9e
23fd9bb
695010c
ec2c7af
a499ed8
a32e020
6d2fc99
9342b59
c566ce8
f783770
9502f90
3e1ee5e
92d8089
cbb90f7
be1c968
c655592
8d2108a
fafa1ea
01f185b
efa1c2c
a65efb0
01115c4
9d37675
38817a5
8fad32e
5dbc63a
7de78f1
7544bcf
39a2a67
6e90823
9e61b5b
0be63a6
90700fb
c77d49c
1499344
38a6eb6
b7253d7
f11f07f
ca11d27
4c2a620
adddef2
9498554
4f1c212
40cdca8
756a4b6
aea0e05
dfa7fea
cc8b1ca
6a2e1a1
b84bef1
69ed3d4
4110b4c
ce5f3b9
4c76ae1
ef36be1
ad4ea4d
14a13b0
5c8d3c6
2436214
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -228,6 +228,11 @@ static PyObject *get_values(PyObject *obj) { | |
PRINTMARK(); | ||
|
||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (values && !PyArray_CheckExact(values)) { | ||
|
||
if (PyObject_HasAttrString(values, "to_numpy")) { | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
values = PyObject_CallMethod(values, "to_numpy", NULL); | ||
} | ||
|
||
if (PyObject_HasAttrString(values, "values")) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we remove this branch for .values? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's possible to get Block here, which doesn't currently have a to_numpy. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. interesting, ok can you add a TODO / maybe an issue (we need one about serialization anyhow) |
||
PyObject *subvals = get_values(values); | ||
PyErr_Clear(); | ||
|
@@ -279,8 +284,8 @@ static PyObject *get_values(PyObject *obj) { | |
repr = PyString_FromString("<unknown dtype>"); | ||
} | ||
|
||
PyErr_Format(PyExc_ValueError, "%s or %s are not JSON serializable yet", | ||
PyString_AS_STRING(repr), PyString_AS_STRING(typeRepr)); | ||
PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet", | ||
repr, typeRepr); | ||
Py_DECREF(repr); | ||
Py_DECREF(typeRepr); | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,7 +47,7 @@ def cmp_method(self, other): | |
if isinstance(other, ABCDataFrame): | ||
return NotImplemented | ||
|
||
if isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)): | ||
if isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries, cls)): | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if other.ndim > 0 and len(self) != len(other): | ||
raise ValueError('Lengths must match to compare') | ||
|
||
|
@@ -1162,9 +1162,10 @@ def _addsub_offset_array(self, other, op): | |
left = lib.values_from_object(self.astype('O')) | ||
|
||
res_values = op(left, np.array(other)) | ||
kwargs = {} | ||
if not is_period_dtype(self): | ||
return type(self)(res_values, freq='infer') | ||
return self._from_sequence(res_values) | ||
kwargs['freq'] = 'infer' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wouldn't it be better to have this the default in PeriodArray._from_sequence? (e.g. freq='infer') There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is strange, since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so how can this be working now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whoops, I missed the For PeriodArray, freq is embedded in the dtype. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use the original format, seems way more clear here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't, since frequency inference isn't (currently) allowed in |
||
return self._from_sequence(res_values, **kwargs) | ||
|
||
def _time_shift(self, periods, freq=None): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -97,6 +97,9 @@ def _dt_array_cmp(cls, op): | |
|
||
def wrapper(self, other): | ||
meth = getattr(dtl.DatetimeLikeArrayMixin, opname) | ||
# TODO: return NotImplemented for Series / Index and let pandas unbox | ||
# Right now, returning NotImplemented for Index fails because we | ||
# go into the index implementation, which may be a bug? | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
other = lib.item_from_zerodim(other) | ||
|
||
|
@@ -145,9 +148,16 @@ def wrapper(self, other): | |
return ops.invalid_comparison(self, other, op) | ||
else: | ||
self._assert_tzawareness_compat(other) | ||
if not hasattr(other, 'asi8'): | ||
# ndarray, Series | ||
other = type(self)(other) | ||
if isinstance(other, (ABCIndexClass, ABCSeries)): | ||
other = other.array | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if (is_datetime64_dtype(other) and | ||
not is_datetime64_ns_dtype(other) or | ||
not hasattr(other, 'asi8')): | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a layer of parens to disambiguate the and/or |
||
# e.g. other.dtype == 'datetime64[s]' | ||
# or an object-dtype ndarray | ||
other = type(self)._from_sequence(other) | ||
|
||
result = meth(self, other) | ||
o_mask = other._isnan | ||
|
||
|
@@ -171,10 +181,24 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin, | |
dtl.TimelikeOps, | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
dtl.DatelikeOps): | ||
""" | ||
Assumes that subclass __new__/__init__ defines: | ||
tz | ||
_freq | ||
_data | ||
Pandas ExtensionArray for tz-naive or tz-aware datetime data. | ||
|
||
.. versionadded:: 0.24.0 | ||
|
||
Parameters | ||
---------- | ||
values : Series, Index, DatetimeArray, ndarray | ||
The datetime data. | ||
|
||
For DatetimeArray `values` (or a Series or Index boxing one), | ||
`dtype` and `freq` will be extracted from `values`, with | ||
precedence given to | ||
|
||
dtype : numpy.dtype or DatetimeTZDtype | ||
Note that the only NumPy dtype allowed is 'datetime64[ns]'. | ||
freq : str or Offset, optional | ||
copy : bool, default False | ||
Whether to copy the underlying array of values. | ||
""" | ||
_typ = "datetimearray" | ||
_scalar_type = Timestamp | ||
|
@@ -213,38 +237,84 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin, | |
_dtype = None # type: Union[np.dtype, DatetimeTZDtype] | ||
_freq = None | ||
|
||
@classmethod | ||
def _simple_new(cls, values, freq=None, tz=None): | ||
""" | ||
we require the we have a dtype compat for the values | ||
if we are passed a non-dtype compat, then coerce using the constructor | ||
""" | ||
assert isinstance(values, np.ndarray), type(values) | ||
def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if isinstance(values, (ABCSeries, ABCIndexClass)): | ||
values = values._values | ||
|
||
if isinstance(values, type(self)): | ||
# validation | ||
dtz = getattr(dtype, 'tz', None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can none of the helper functions at the bottom of the module be used to de-duplicate some of this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which ones do you have in mind? I think the tz / dtype validation is different. Those (e.g. |
||
if dtz and values.tz is None: | ||
dtype = DatetimeTZDtype(tz=dtype.tz) | ||
elif dtz and values.tz: | ||
if not timezones.tz_compare(dtz, values.tz): | ||
msg = ( | ||
"Timezone of the array and 'dtype' do not match. " | ||
"'{}' != '{}'" | ||
) | ||
raise TypeError(msg.format(dtz, values.tz)) | ||
elif values.tz: | ||
dtype = values.dtype | ||
# freq = validate_values_freq(values, freq) | ||
if freq is None: | ||
freq = values.freq | ||
values = values._data | ||
|
||
if not isinstance(values, np.ndarray): | ||
msg = ( | ||
"Unexpected type '{}'. 'values' must be a DatetimeArray " | ||
"ndarray, or Series or Index containing one of those." | ||
) | ||
raise ValueError(msg.format(type(values).__name__)) | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if values.dtype == 'i8': | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# for compat with datetime/timedelta/period shared methods, | ||
# we can sometimes get here with int64 values. These represent | ||
# nanosecond UTC (or tz-naive) unix timestamps | ||
values = values.view(_NS_DTYPE) | ||
|
||
assert values.dtype == 'M8[ns]', values.dtype | ||
if values.dtype != _NS_DTYPE: | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
msg = ( | ||
"The dtype of 'values' is incorrect. Must be 'datetime64[ns]'." | ||
" Got {} instead." | ||
) | ||
raise ValueError(msg.format(values.dtype)) | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
result = object.__new__(cls) | ||
result._data = values | ||
result._freq = freq | ||
if tz is None: | ||
dtype = _NS_DTYPE | ||
else: | ||
tz = timezones.maybe_get_tz(tz) | ||
tz = timezones.tz_standardize(tz) | ||
dtype = DatetimeTZDtype('ns', tz) | ||
result._dtype = dtype | ||
return result | ||
dtype = pandas_dtype(dtype) | ||
_validate_dt64_dtype(dtype) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False, | ||
dayfirst=False, yearfirst=False, ambiguous='raise'): | ||
return cls._from_sequence( | ||
values, freq=freq, tz=tz, dtype=dtype, copy=copy, | ||
dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous) | ||
if freq == "infer": | ||
msg = ( | ||
"Frequency inference not allowed in DatetimeArray.__init__. " | ||
"Use 'pd.array()' instead." | ||
) | ||
raise ValueError(msg) | ||
|
||
if copy: | ||
values = values.copy() | ||
if freq: | ||
freq = to_offset(freq) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto re not doing freq validation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you have a longer comment on this somewhere? I seem to have lost it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. https://github.com/pandas-dev/pandas/pull/24024/files#r243105181 There is a request-for-fleshing-out there that I'll get to now. |
||
if getattr(dtype, 'tz', None): | ||
# https://github.com/pandas-dev/pandas/issues/18595 | ||
# Ensure that we have a standard timezone for pytz objects. | ||
# Without this, things like adding an array of timedeltas and | ||
# a tz-aware Timestamp (with a tz specific to its datetime) will | ||
# be incorrect(ish?) for the array as a whole | ||
dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz)) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
self._data = values | ||
self._dtype = dtype | ||
self._freq = freq | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@classmethod | ||
def _simple_new(cls, values, freq=None, tz=None): | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
we require the we have a dtype compat for the values | ||
if we are passed a non-dtype compat, then coerce using the constructor | ||
""" | ||
dtype = DatetimeTZDtype(tz=tz) if tz else _NS_DTYPE | ||
|
||
return cls(values, freq=freq, dtype=dtype) | ||
|
||
@classmethod | ||
def _from_sequence(cls, data, dtype=None, copy=False, | ||
|
@@ -459,8 +529,7 @@ def __array__(self, dtype=None): | |
elif is_int64_dtype(dtype): | ||
return self.asi8 | ||
|
||
# TODO: warn that conversion may be lossy? | ||
return self._data.view(np.ndarray) # follow Index.__array__ | ||
return self._data | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def __iter__(self): | ||
""" | ||
|
@@ -519,7 +588,7 @@ def astype(self, dtype, copy=True): | |
|
||
@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) | ||
def _validate_fill_value(self, fill_value): | ||
if isna(fill_value): | ||
if isna(fill_value) or fill_value == iNaT: | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
fill_value = iNaT | ||
elif isinstance(fill_value, (datetime, np.datetime64)): | ||
self._assert_tzawareness_compat(fill_value) | ||
|
@@ -1574,6 +1643,9 @@ def sequence_to_dt64ns(data, dtype=None, copy=False, | |
# if dtype has an embedded tz, capture it | ||
tz = validate_tz_from_dtype(dtype, tz) | ||
|
||
if isinstance(data, ABCIndexClass): | ||
data = data._data | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# By this point we are assured to have either a numpy array or Index | ||
data, copy = maybe_convert_dtype(data, copy) | ||
|
||
|
@@ -1590,12 +1662,15 @@ def sequence_to_dt64ns(data, dtype=None, copy=False, | |
data, dayfirst=dayfirst, yearfirst=yearfirst) | ||
tz = maybe_infer_tz(tz, inferred_tz) | ||
|
||
# `data` may have originally been a Categorical[datetime64[ns, tz]], | ||
# so we need to handle these types. | ||
if is_datetime64tz_dtype(data): | ||
# DatetimeArray -> ndarray | ||
tz = maybe_infer_tz(tz, data.tz) | ||
result = data._data | ||
|
||
elif is_datetime64_dtype(data): | ||
# tz-naive DatetimeArray/Index or ndarray[datetime64] | ||
# tz-naive DatetimeArray or ndarray[datetime64] | ||
data = getattr(data, "_data", data) | ||
if data.dtype != _NS_DTYPE: | ||
data = conversion.ensure_datetime64ns(data) | ||
|
@@ -1750,7 +1825,7 @@ def maybe_convert_dtype(data, copy): | |
# GH#18664 preserve tz in going DTI->Categorical->DTI | ||
# TODO: cases where we need to do another pass through this func, | ||
# e.g. the categories are timedelta64s | ||
data = data.categories.take(data.codes, fill_value=NaT) | ||
data = data.categories.take(data.codes, fill_value=NaT)._values | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
copy = False | ||
|
||
elif is_extension_type(data) and not is_datetime64tz_dtype(data): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
make to .to_numpy()