Skip to content

Commit

Permalink
BUG: to_datetime not localizing Series when utc=True (#6415)
Browse files Browse the repository at this point in the history
Modify test case

Comment about test edit, move conversion logic to convert_listlike

Add new section in whatsnew and update test

Alter SQL tests
  • Loading branch information
mroeschke committed Aug 7, 2017
1 parent 929c66f commit e85263d
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 18 deletions.
37 changes: 36 additions & 1 deletion doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,42 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical

The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement.

.. _whatsnew_0210.api.utc_localization_with_series:

UTC Localization with Series
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Previously, :func:`to_datetime` did not localize datetime ``Series`` data as when ``utc=True`` was passed. Now, :func:`to_datetime`
will correctly localize `Series` with a `datetime64[ns, UTC]` data type. (:issue:`6415`)

Old Behavior

.. ipython:: python

s = Series(['20130101 00:00:00'] * 10)

.. code-block:: python

In [12]: pd.to_datetime(s, utc=True)
Out[12]:
0 2013-01-01
1 2013-01-01
2 2013-01-01
3 2013-01-01
4 2013-01-01
5 2013-01-01
6 2013-01-01
7 2013-01-01
8 2013-01-01
9 2013-01-01
dtype: datetime64[ns]

New Behavior

.. ipython:: python

pd.to_datetime(s, utc=True)

.. _whatsnew_0210.api:

Other API Changes
Expand Down Expand Up @@ -266,7 +302,6 @@ Conversion
- Bug in assignment against datetime-like data with ``int`` may incorrectly converte to datetime-like (:issue:`14145`)
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)


Indexing
^^^^^^^^

Expand Down
29 changes: 18 additions & 11 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,9 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
return DatetimeIndex(arg, tz=tz, name=name)
except ValueError:
pass

from pandas import Series
if isinstance(arg, Series) and utc:
arg = arg.dt.tz_localize('utc')
return arg

elif unit is not None:
Expand All @@ -379,11 +381,12 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
raise TypeError('arg must be a string, datetime, list, tuple, '
'1-d array, or Series')

arg = _ensure_object(arg)
obj_arg = _ensure_object(arg)
require_iso8601 = False

if infer_datetime_format and format is None:
format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
format = _guess_datetime_format_for_array(obj_arg,
dayfirst=dayfirst)

if format is not None:
# There is a special fast-path for iso8601 formatted
Expand All @@ -402,46 +405,50 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
# shortcut formatting here
if format == '%Y%m%d':
try:
result = _attempt_YYYYMMDD(arg, errors=errors)
result = _attempt_YYYYMMDD(obj_arg, errors=errors)
except:
raise ValueError("cannot convert the input to "
"'%Y%m%d' date format")

# fallback
if result is None:
try:
result = tslib.array_strptime(arg, format, exact=exact,
result = tslib.array_strptime(obj_arg, format,
exact=exact,
errors=errors)
except tslib.OutOfBoundsDatetime:
if errors == 'raise':
raise
result = arg
result = obj_arg
except ValueError:
# if format was inferred, try falling back
# to array_to_datetime - terminate here
# for specified formats
if not infer_datetime_format:
if errors == 'raise':
raise
result = arg
result = obj_arg

if result is None and (format is None or infer_datetime_format):
result = tslib.array_to_datetime(
arg,
obj_arg,
errors=errors,
utc=utc,
dayfirst=dayfirst,
yearfirst=yearfirst,
require_iso8601=require_iso8601
)

from pandas import Series
if is_datetime64_dtype(result) and box:
result = DatetimeIndex(result, tz=tz, name=name)
# GH 6415
elif isinstance(arg, Series) and utc:
result = Series(result, name=name).dt.tz_localize('utc')
return result

except ValueError as e:
try:
values, tz = tslib.datetime_to_datetime64(arg)
values, tz = tslib.datetime_to_datetime64(obj_arg)
return DatetimeIndex._simple_new(values, name=name, tz=tz)
except (ValueError, TypeError):
raise e
Expand Down Expand Up @@ -506,7 +513,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
result = arg
elif isinstance(arg, ABCSeries):
from pandas import Series
values = _convert_listlike(arg._values, False, format)
values = _convert_listlike(arg, False, format, name=arg.name)
result = Series(values, index=arg.index, name=arg.name)
elif isinstance(arg, (ABCDataFrame, MutableMapping)):
result = _assemble_from_unit_mappings(arg, errors=errors)
Expand Down
32 changes: 32 additions & 0 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,38 @@ def test_to_datetime_utc_is_true(self):
expected = pd.DatetimeIndex(data=date_range)
tm.assert_index_equal(result, expected)

def test_to_datetime_utc_true_with_series(self):
# GH 6415: UTC=True with Series
data = ['20100102 121314', '20100102 121315']
expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'),
pd.Timestamp('2010-01-02 12:13:15', tz='utc')]
result = pd.to_datetime(pd.Series(data),
format='%Y%m%d %H%M%S',
utc=True)
expected = pd.Series(expected_data)
tm.assert_series_equal(result, expected)
result = pd.to_datetime(pd.Index(data),
format='%Y%m%d %H%M%S',
utc=True)
expected = pd.DatetimeIndex(expected_data)
tm.assert_index_equal(result, expected)

# GH 15760 UTC=True with Series
ts = 1.5e18
result = pd.to_datetime(pd.Series([ts]), utc=True)
expected = pd.Series([pd.Timestamp(ts, tz='utc')])
tm.assert_series_equal(result, expected)

test_dates = ['2013-01-01 00:00:00-01:00'] * 10
expected_data = [pd.Timestamp('20130101 01:00:00', tz='utc')] * 10
expected = pd.Series(expected_data)
ser = Series(test_dates)
result = pd.to_datetime(ser, utc=True)
tm.assert_series_equal(result, expected)
ser_naive = Series(test_dates, dtype='datetime64[ns]')
result = pd.to_datetime(ser_naive, utc=True)
tm.assert_series_equal(result, expected)

def test_to_datetime_tz_psycopg2(self):

# xref 8260
Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,14 +606,15 @@ def test_date_parsing(self):
# No Parsing
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn)
assert not issubclass(df.DateCol.dtype.type, np.datetime64)

# Now that GH 6415 is fixed, dates are automatically parsed to UTC
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
parse_dates=['DateCol'])
assert issubclass(df.DateCol.dtype.type, np.datetime64)
assert issubclass(df.DateCol.dtype.type, utc_dtype)

df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
assert issubclass(df.DateCol.dtype.type, np.datetime64)
assert issubclass(df.DateCol.dtype.type, utc_dtype)

df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
parse_dates=['IntDateCol'])
Expand All @@ -631,8 +632,9 @@ def test_date_and_index(self):
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
index_col='DateCol',
parse_dates=['DateCol', 'IntDateCol'])

assert issubclass(df.index.dtype.type, np.datetime64)
# Now that GH 6415 is fixed, dates are automatically parsed to UTC
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
assert issubclass(df.index.dtype.type, utc_dtype)
assert issubclass(df.IntDateCol.dtype.type, np.datetime64)

def test_timedelta(self):
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2130,7 +2130,8 @@ def test_set_index_datetime(self):
'2011-07-19 08:00:00', '2011-07-19 09:00:00'],
'value': range(6)})
df.index = pd.to_datetime(df.pop('datetime'), utc=True)
df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific')
# Removed 'tz_localize('utc') below after GH 6415 was fixed
df.index = df.index.tz_convert('US/Pacific')

expected = pd.DatetimeIndex(['2011-07-19 07:00:00',
'2011-07-19 08:00:00',
Expand Down

0 comments on commit e85263d

Please sign in to comment.