diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2f61b71d060198..13a0c5fa6a222e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -203,6 +203,42 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement. +.. _whatsnew_0210.api.utc_localization_with_series: + +UTC Localization with Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, :func:`to_datetime` did not localize datetime ``Series`` data as when ``utc=True`` was passed. Now, :func:`to_datetime` +will correctly localize `Series` with a `datetime64[ns, UTC]` data type. (:issue:`6415`) + + Old Behavior + + .. ipython:: python + + s = Series(['20130101 00:00:00'] * 10) + + .. code-block:: python + + In [12]: pd.to_datetime(s, utc=True) + Out[12]: + 0 2013-01-01 + 1 2013-01-01 + 2 2013-01-01 + 3 2013-01-01 + 4 2013-01-01 + 5 2013-01-01 + 6 2013-01-01 + 7 2013-01-01 + 8 2013-01-01 + 9 2013-01-01 + dtype: datetime64[ns] + + New Behavior + + .. ipython:: python + + pd.to_datetime(s, utc=True) + .. _whatsnew_0210.api: Other API Changes @@ -266,7 +302,6 @@ Conversion - Bug in assignment against datetime-like data with ``int`` may incorrectly converte to datetime-like (:issue:`14145`) - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) - Indexing ^^^^^^^^ diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index a1f323aff7c1ac..9784b7d7f3a3f8 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -359,7 +359,9 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass - + from pandas import Series + if isinstance(arg, Series) and utc: + arg = arg.dt.tz_localize('utc') return arg elif unit is not None: @@ -379,11 +381,12 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') - arg = _ensure_object(arg) + obj_arg = _ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: - format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) + format = _guess_datetime_format_for_array(obj_arg, + dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted @@ -402,7 +405,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): # shortcut formatting here if format == '%Y%m%d': try: - result = _attempt_YYYYMMDD(arg, errors=errors) + result = _attempt_YYYYMMDD(obj_arg, errors=errors) except: raise ValueError("cannot convert the input to " "'%Y%m%d' date format") @@ -410,12 +413,13 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): # fallback if result is None: try: - result = tslib.array_strptime(arg, format, exact=exact, + result = tslib.array_strptime(obj_arg, format, + exact=exact, errors=errors) except tslib.OutOfBoundsDatetime: if errors == 'raise': raise - result = arg + result = obj_arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here @@ -423,25 +427,28 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): if not infer_datetime_format: if errors == 'raise': raise - result = arg + result = obj_arg if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime( - arg, + obj_arg, errors=errors, utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, require_iso8601=require_iso8601 ) - + from pandas import Series if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) + # GH 6415 + elif isinstance(arg, Series) and utc: + result = Series(result, name=name).dt.tz_localize('utc') return result except ValueError as e: try: - values, tz = tslib.datetime_to_datetime64(arg) + values, tz = tslib.datetime_to_datetime64(obj_arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e @@ -506,7 +513,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): result = arg elif isinstance(arg, ABCSeries): from pandas import Series - values = _convert_listlike(arg._values, False, format) + values = _convert_listlike(arg, False, format, name=arg.name) result = Series(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, MutableMapping)): result = _assemble_from_unit_mappings(arg, errors=errors) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 7ff9c2b23cbfb7..414bc014e32f03 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -270,6 +270,38 @@ def test_to_datetime_utc_is_true(self): expected = pd.DatetimeIndex(data=date_range) tm.assert_index_equal(result, expected) + def test_to_datetime_utc_true_with_series(self): + # GH 6415: UTC=True with Series + data = ['20100102 121314', '20100102 121315'] + expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'), + pd.Timestamp('2010-01-02 12:13:15', tz='utc')] + result = pd.to_datetime(pd.Series(data), + format='%Y%m%d %H%M%S', + utc=True) + expected = pd.Series(expected_data) + tm.assert_series_equal(result, expected) + result = pd.to_datetime(pd.Index(data), + format='%Y%m%d %H%M%S', + utc=True) + expected = pd.DatetimeIndex(expected_data) + tm.assert_index_equal(result, expected) + + # GH 15760 UTC=True with Series + ts = 1.5e18 + result = pd.to_datetime(pd.Series([ts]), utc=True) + expected = pd.Series([pd.Timestamp(ts, tz='utc')]) + tm.assert_series_equal(result, expected) + + test_dates = ['2013-01-01 00:00:00-01:00'] * 10 + expected_data = [pd.Timestamp('20130101 01:00:00', tz='utc')] * 10 + expected = pd.Series(expected_data) + ser = Series(test_dates) + result = pd.to_datetime(ser, utc=True) + tm.assert_series_equal(result, expected) + ser_naive = Series(test_dates, dtype='datetime64[ns]') + result = pd.to_datetime(ser_naive, utc=True) + tm.assert_series_equal(result, expected) + def test_to_datetime_tz_psycopg2(self): # xref 8260 diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a7c42391effe66..358e450ac0e928 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -606,14 +606,15 @@ def test_date_parsing(self): # No Parsing df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn) assert not issubclass(df.DateCol.dtype.type, np.datetime64) - + # Now that GH 6415 is fixed, dates are automatically parsed to UTC + utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['DateCol']) - assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert issubclass(df.DateCol.dtype.type, utc_dtype) df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) - assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert issubclass(df.DateCol.dtype.type, utc_dtype) df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['IntDateCol']) @@ -631,8 +632,9 @@ def test_date_and_index(self): df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, index_col='DateCol', parse_dates=['DateCol', 'IntDateCol']) - - assert issubclass(df.index.dtype.type, np.datetime64) + # Now that GH 6415 is fixed, dates are automatically parsed to UTC + utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType + assert issubclass(df.index.dtype.type, utc_dtype) assert issubclass(df.IntDateCol.dtype.type, np.datetime64) def test_timedelta(self): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 0b2dc9ba70f034..414cf6bc3125f1 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2130,7 +2130,8 @@ def test_set_index_datetime(self): '2011-07-19 08:00:00', '2011-07-19 09:00:00'], 'value': range(6)}) df.index = pd.to_datetime(df.pop('datetime'), utc=True) - df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific') + # Removed 'tz_localize('utc') below after GH 6415 was fixed + df.index = df.index.tz_convert('US/Pacific') expected = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00',