-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG: Indexing with UTC offset string no longer ignored #25263
Changes from 22 commits
d2716fd
f444b92
6994e77
f02e669
807a1f8
71b093e
d48c0db
3bda5fa
c10bcff
6363f05
9678ebb
b24b773
77e1e4d
44ed2a4
c16e0cd
0ef291c
0a272a9
200cdbf
ea339b6
ed96516
6ac4598
a99d9c4
6d66371
138ac7c
e0e3e25
bb4814a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,37 @@ Other Enhancements | |
Backwards incompatible API changes | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
|
||
- :meth:`Timestamp.strptime` will now rise a NotImplementedError (:issue:`21257`) | ||
.. _whatsnew_0240.api_breaking.utc_offset_indexing: | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Indexing with date strings with UTC offsets | ||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
|
||
Indexing a :class:`DataFrame` or :class:`Series` with a :class:`DatetimeIndex` with a | ||
date string with a UTC offset would previously ignore the UTC offset. Now, the UTC offset | ||
is respected in indexing. (:issue:`24076`, :issue:`16785`) | ||
|
||
*Previous Behavior*: | ||
|
||
.. code-block:: ipython | ||
|
||
In [1]: df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) | ||
|
||
In [2]: df | ||
Out[2]: | ||
0 | ||
2019-01-01 00:00:00-08:00 0 | ||
|
||
In [3]: df['2019-01-01 00:00:00+04:00':'2019-01-01 01:00:00+04:00'] | ||
Out[3]: | ||
0 | ||
2019-01-01 00:00:00-08:00 0 | ||
|
||
*New Behavior*: | ||
|
||
.. ipython:: ipython | ||
|
||
df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) | ||
df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00'] | ||
|
||
.. _whatsnew_0250.api.other: | ||
|
||
|
@@ -37,7 +67,7 @@ Other API Changes | |
|
||
- :class:`DatetimeTZDtype` will now standardize pytz timezones to a common timezone instance (:issue:`24713`) | ||
- ``Timestamp`` and ``Timedelta`` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`) | ||
- | ||
- :meth:`Timestamp.strptime` will now rise a NotImplementedError (:issue:`21257`) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. leftover? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I moved this entry to |
||
- | ||
|
||
.. _whatsnew_0250.deprecations: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,9 +6,10 @@ | |
import numpy as np | ||
|
||
from pandas._libs import ( | ||
Timedelta, algos as libalgos, index as libindex, join as libjoin, lib, | ||
tslibs) | ||
algos as libalgos, index as libindex, join as libjoin, lib) | ||
from pandas._libs.lib import is_datetime_array | ||
from pandas._libs.tslibs import OutOfBoundsDatetime, Timedelta, Timestamp | ||
from pandas._libs.tslibs.timezones import tz_compare | ||
import pandas.compat as compat | ||
from pandas.compat import range, set_function_name, u | ||
from pandas.compat.numpy import function as nv | ||
|
@@ -447,7 +448,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, | |
try: | ||
return DatetimeIndex(subarr, copy=copy, | ||
name=name, **kwargs) | ||
except tslibs.OutOfBoundsDatetime: | ||
except OutOfBoundsDatetime: | ||
pass | ||
|
||
elif inferred.startswith('timedelta'): | ||
|
@@ -4866,6 +4867,20 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): | |
# If it's a reverse slice, temporarily swap bounds. | ||
start, end = end, start | ||
|
||
# GH 16785: If start and end happen to be date strings with UTC offsets | ||
# attempt to parse and check that the offsets are the same | ||
if (isinstance(start, (compat.string_types, datetime)) | ||
and isinstance(end, (compat.string_types, datetime))): | ||
try: | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ts_start = Timestamp(start) | ||
ts_end = Timestamp(end) | ||
except (ValueError, TypeError): | ||
pass | ||
else: | ||
if not tz_compare(ts_start.tzinfo, ts_end.tzinfo): | ||
raise ValueError("Both dates must have the " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this? |
||
"same UTC offset") | ||
|
||
start_slice = None | ||
if start is not None: | ||
start_slice = self.get_slice_bound(start, 'left', kind) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,9 +32,8 @@ | |
from pandas.core.ops import get_op_result_name | ||
import pandas.core.tools.datetimes as tools | ||
|
||
from pandas.tseries import offsets | ||
from pandas.tseries.frequencies import Resolution, to_offset | ||
from pandas.tseries.offsets import CDay, prefix_mapping | ||
from pandas.tseries.offsets import CDay, Nano, prefix_mapping | ||
|
||
|
||
def _new_DatetimeIndex(cls, d): | ||
|
@@ -826,54 +825,57 @@ def _parsed_string_to_bounds(self, reso, parsed): | |
lower, upper: pd.Timestamp | ||
|
||
""" | ||
valid_resos = {'year', 'month', 'quarter', 'day', 'hour', 'minute', | ||
'second', 'minute', 'second', 'microsecond'} | ||
if reso not in valid_resos: | ||
raise KeyError | ||
if reso == 'year': | ||
return (Timestamp(datetime(parsed.year, 1, 1), tz=self.tz), | ||
Timestamp(datetime(parsed.year, 12, 31, 23, | ||
59, 59, 999999), tz=self.tz)) | ||
start = Timestamp(parsed.year, 1, 1) | ||
end = Timestamp(parsed.year, 12, 31, 23, 59, 59, 999999) | ||
elif reso == 'month': | ||
d = ccalendar.get_days_in_month(parsed.year, parsed.month) | ||
return (Timestamp(datetime(parsed.year, parsed.month, 1), | ||
tz=self.tz), | ||
Timestamp(datetime(parsed.year, parsed.month, d, 23, | ||
59, 59, 999999), tz=self.tz)) | ||
start = Timestamp(parsed.year, parsed.month, 1) | ||
end = Timestamp(parsed.year, parsed.month, d, 23, 59, 59, 999999) | ||
elif reso == 'quarter': | ||
qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead | ||
d = ccalendar.get_days_in_month(parsed.year, qe) # at end of month | ||
return (Timestamp(datetime(parsed.year, parsed.month, 1), | ||
tz=self.tz), | ||
Timestamp(datetime(parsed.year, qe, d, 23, 59, | ||
59, 999999), tz=self.tz)) | ||
start = Timestamp(parsed.year, parsed.month, 1) | ||
end = Timestamp(parsed.year, qe, d, 23, 59, 59, 999999) | ||
elif reso == 'day': | ||
st = datetime(parsed.year, parsed.month, parsed.day) | ||
return (Timestamp(st, tz=self.tz), | ||
Timestamp(Timestamp(st + offsets.Day(), | ||
tz=self.tz).value - 1)) | ||
start = Timestamp(parsed.year, parsed.month, parsed.day) | ||
end = start + timedelta(days=1) - Nano(1) | ||
elif reso == 'hour': | ||
st = datetime(parsed.year, parsed.month, parsed.day, | ||
hour=parsed.hour) | ||
return (Timestamp(st, tz=self.tz), | ||
Timestamp(Timestamp(st + offsets.Hour(), | ||
tz=self.tz).value - 1)) | ||
start = Timestamp(parsed.year, parsed.month, parsed.day, | ||
parsed.hour) | ||
end = start + timedelta(hours=1) - Nano(1) | ||
elif reso == 'minute': | ||
st = datetime(parsed.year, parsed.month, parsed.day, | ||
hour=parsed.hour, minute=parsed.minute) | ||
return (Timestamp(st, tz=self.tz), | ||
Timestamp(Timestamp(st + offsets.Minute(), | ||
tz=self.tz).value - 1)) | ||
start = Timestamp(parsed.year, parsed.month, parsed.day, | ||
parsed.hour, parsed.minute) | ||
end = start + timedelta(minutes=1) - Nano(1) | ||
elif reso == 'second': | ||
st = datetime(parsed.year, parsed.month, parsed.day, | ||
hour=parsed.hour, minute=parsed.minute, | ||
second=parsed.second) | ||
return (Timestamp(st, tz=self.tz), | ||
Timestamp(Timestamp(st + offsets.Second(), | ||
tz=self.tz).value - 1)) | ||
start = Timestamp(parsed.year, parsed.month, parsed.day, | ||
parsed.hour, parsed.minute, parsed.second) | ||
end = start + timedelta(seconds=1) - Nano(1) | ||
elif reso == 'microsecond': | ||
st = datetime(parsed.year, parsed.month, parsed.day, | ||
parsed.hour, parsed.minute, parsed.second, | ||
parsed.microsecond) | ||
return (Timestamp(st, tz=self.tz), Timestamp(st, tz=self.tz)) | ||
else: | ||
raise KeyError | ||
start = Timestamp(parsed.year, parsed.month, parsed.day, | ||
parsed.hour, parsed.minute, parsed.second, | ||
parsed.microsecond) | ||
end = start + timedelta(microseconds=1) - Nano(1) | ||
# GH 24076 | ||
# If an incoming date string contained a UTC offset, need to localize | ||
# the parsed date to this offset first before aligning with the index's | ||
# timezone | ||
if parsed.tzinfo is not None: | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if self.tz is None: | ||
raise ValueError("The index must be timezone aware " | ||
"when indexing with a date string with a " | ||
"UTC offset") | ||
start = start.tz_localize(parsed.tzinfo).tz_convert(self.tz) | ||
end = end.tz_localize(parsed.tzinfo).tz_convert(self.tz) | ||
elif self.tz is not None: | ||
start = start.tz_localize(self.tz) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this case tested? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is tested by |
||
end = end.tz_localize(self.tz) | ||
return start, end | ||
|
||
def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): | ||
is_monotonic = self.is_monotonic | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add a versionadded tag