From 50528421ae79b27a26b32ba715b17271c8dfda7e Mon Sep 17 00:00:00 2001 From: cbertinato Date: Wed, 7 Feb 2018 10:25:38 -0500 Subject: [PATCH] BUG: Fixes rounding error in Timestamp.floor() (#19240) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/tslibs/timestamps.pyx | 60 +++++++++++++------ pandas/core/indexes/datetimelike.py | 17 +----- .../indexes/datetimes/test_scalar_compat.py | 21 +++++++ .../tests/scalar/timestamp/test_unary_ops.py | 25 +++++++- 5 files changed, 90 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 1c6b698605521..a7300f7d1ceb0 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -620,6 +620,7 @@ Datetimelike - Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) - Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`) - Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) +- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) - Timezones diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 47179a4e1d761..ed77916a1d887 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -58,6 +58,46 @@ cdef inline object create_timestamp_from_ts(int64_t value, return ts_base +def round_ns(values, rounder, freq): + """ + Applies rounding function at given frequency + + Parameters + ---------- + values : int, :obj:`ndarray` + rounder : function + freq : str, obj + + Returns + ------- + int or :obj:`ndarray` + """ + from pandas.tseries.frequencies import to_offset + unit = to_offset(freq).nanos + if unit < 1000: + # for nano rounding, work with the last 6 digits separately + # due to float precision + buff = 1000000 + r = (buff * (values // buff) + unit * + (rounder((values % buff) * (1 / float(unit)))).astype('i8')) + else: + if unit % 1000 != 0: + msg = 'Precision will be lost using frequency: {}' + warnings.warn(msg.format(freq)) + + # GH19206 + # to deal with round-off when unit is large + if unit >= 1e9: + divisor = 10 ** int(np.log10(unit / 1e7)) + else: + divisor = 10 + + r = (unit * rounder((values * (divisor / float(unit))) / divisor) + .astype('i8')) + + return r + + # This is PITA. Because we inherit from datetime, which has very specific # construction requirements, we need to do object instantiation in python # (see Timestamp class above). This will serve as a C extension type that @@ -581,28 +621,12 @@ class Timestamp(_Timestamp): return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) def _round(self, freq, rounder): - - cdef: - int64_t unit, r, value, buff = 1000000 - object result - - from pandas.tseries.frequencies import to_offset - unit = to_offset(freq).nanos if self.tz is not None: value = self.tz_localize(None).value else: value = self.value - if unit < 1000 and unit % 1000 != 0: - # for nano rounding, work with the last 6 digits separately - # due to float precision - r = (buff * (value // buff) + unit * - (rounder((value % buff) / float(unit))).astype('i8')) - elif unit >= 1000 and unit % 1000 != 0: - msg = 'Precision will be lost using frequency: {}' - warnings.warn(msg.format(freq)) - r = (unit * rounder(value / float(unit)).astype('i8')) - else: - r = (unit * rounder(value / float(unit)).astype('i8')) + + r = round_ns(value, rounder, freq) result = Timestamp(r, unit='ns') if self.tz is not None: result = result.tz_localize(self.tz) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 8e77c7a7fa48c..4a526955d9bf4 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -36,6 +36,7 @@ from pandas._libs import lib, iNaT, NaT from pandas._libs.tslibs.period import Period from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds +from pandas._libs.tslibs.timestamps import round_ns from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly @@ -90,23 +91,9 @@ class TimelikeOps(object): """) def _round(self, freq, rounder): - - from pandas.tseries.frequencies import to_offset - unit = to_offset(freq).nanos # round the local times values = _ensure_datetimelike_to_i8(self) - if unit < 1000 and unit % 1000 != 0: - # for nano rounding, work with the last 6 digits separately - # due to float precision - buff = 1000000 - result = (buff * (values // buff) + unit * - (rounder((values % buff) / float(unit))).astype('i8')) - elif unit >= 1000 and unit % 1000 != 0: - msg = 'Precision will be lost using frequency: {}' - warnings.warn(msg.format(freq)) - result = (unit * rounder(values / float(unit)).astype('i8')) - else: - result = (unit * rounder(values / float(unit)).astype('i8')) + result = round_ns(values, rounder, freq) result = self._maybe_mask_results(result, fill_value=NaT) attribs = self._get_attributes_dict() diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 111f68ba14775..83e7a0cd68d63 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -126,6 +126,27 @@ def test_round(self, tz): ts = '2016-10-17 12:00:00.001501031' DatetimeIndex([ts]).round('1010ns') + @pytest.mark.parametrize('test_input, rounder, freq, expected', [ + (['2117-01-01 00:00:45'], 'floor', '15s', ['2117-01-01 00:00:45']), + (['2117-01-01 00:00:45'], 'ceil', '15s', ['2117-01-01 00:00:45']), + (['2117-01-01 00:00:45.000000012'], 'floor', '10ns', + ['2117-01-01 00:00:45.000000010']), + (['1823-01-01 00:00:01.000000012'], 'ceil', '10ns', + ['1823-01-01 00:00:01.000000020']), + (['1823-01-01 00:00:01'], 'floor', '1s', ['1823-01-01 00:00:01']), + (['1823-01-01 00:00:01'], 'ceil', '1s', ['1823-01-01 00:00:01']), + (('NaT', '1823-01-01 00:00:01'), 'floor', '1s', + ('NaT', '1823-01-01 00:00:01')), + (('NaT', '1823-01-01 00:00:01'), 'ceil', '1s', + ('NaT', '1823-01-01 00:00:01')) + ]) + def test_ceil_floor_edge(self, tz, test_input, rounder, freq, expected): + dt = DatetimeIndex(list(test_input)) + func = getattr(dt, rounder) + result = func(freq) + expected = DatetimeIndex(list(expected)) + assert expected.equals(result) + # ---------------------------------------------------------------- # DatetimeIndex.normalize diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 70c7308dd3991..8a6989c909cb2 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -10,7 +10,7 @@ from pandas.compat import PY3 from pandas._libs.tslibs.frequencies import _INVALID_FREQ_ERROR -from pandas import Timestamp +from pandas import Timestamp, NaT class TestTimestampUnaryOps(object): @@ -93,6 +93,29 @@ def test_round_frequencies(self, freq, expected): result = stamp.round(freq=freq) assert result == expected + @pytest.mark.parametrize('test_input, rounder, freq, expected', [ + ('2117-01-01 00:00:45', 'floor', '15s', '2117-01-01 00:00:45'), + ('2117-01-01 00:00:45', 'ceil', '15s', '2117-01-01 00:00:45'), + ('2117-01-01 00:00:45.000000012', 'floor', '10ns', + '2117-01-01 00:00:45.000000010'), + ('1823-01-01 00:00:01.000000012', 'ceil', '10ns', + '1823-01-01 00:00:01.000000020'), + ('1823-01-01 00:00:01', 'floor', '1s', '1823-01-01 00:00:01'), + ('1823-01-01 00:00:01', 'ceil', '1s', '1823-01-01 00:00:01'), + ('NaT', 'floor', '1s', 'NaT'), + ('NaT', 'ceil', '1s', 'NaT') + ]) + def test_ceil_floor_edge(self, test_input, rounder, freq, expected): + dt = Timestamp(test_input) + func = getattr(dt, rounder) + result = func(freq) + + if dt is NaT: + assert result is NaT + else: + expected = Timestamp(expected) + assert result == expected + def test_ceil(self): dt = Timestamp('20130101 09:10:11') result = dt.ceil('D')