Skip to content

Commit

Permalink
BUG: Fixes rounding error in Timestamp.floor() (#19240)
Browse files Browse the repository at this point in the history
  • Loading branch information
cbertinato authored and jreback committed Feb 7, 2018
1 parent 6b0c7e7 commit 5052842
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 34 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,7 @@ Datetimelike
- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`)
- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`)
- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`)
- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`)
-

Timezones
Expand Down
60 changes: 42 additions & 18 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,46 @@ cdef inline object create_timestamp_from_ts(int64_t value,
return ts_base


def round_ns(values, rounder, freq):
"""
Applies rounding function at given frequency
Parameters
----------
values : int, :obj:`ndarray`
rounder : function
freq : str, obj
Returns
-------
int or :obj:`ndarray`
"""
from pandas.tseries.frequencies import to_offset
unit = to_offset(freq).nanos
if unit < 1000:
# for nano rounding, work with the last 6 digits separately
# due to float precision
buff = 1000000
r = (buff * (values // buff) + unit *
(rounder((values % buff) * (1 / float(unit)))).astype('i8'))
else:
if unit % 1000 != 0:
msg = 'Precision will be lost using frequency: {}'
warnings.warn(msg.format(freq))

# GH19206
# to deal with round-off when unit is large
if unit >= 1e9:
divisor = 10 ** int(np.log10(unit / 1e7))
else:
divisor = 10

r = (unit * rounder((values * (divisor / float(unit))) / divisor)
.astype('i8'))

return r


# This is PITA. Because we inherit from datetime, which has very specific
# construction requirements, we need to do object instantiation in python
# (see Timestamp class above). This will serve as a C extension type that
Expand Down Expand Up @@ -581,28 +621,12 @@ class Timestamp(_Timestamp):
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)

def _round(self, freq, rounder):

cdef:
int64_t unit, r, value, buff = 1000000
object result

from pandas.tseries.frequencies import to_offset
unit = to_offset(freq).nanos
if self.tz is not None:
value = self.tz_localize(None).value
else:
value = self.value
if unit < 1000 and unit % 1000 != 0:
# for nano rounding, work with the last 6 digits separately
# due to float precision
r = (buff * (value // buff) + unit *
(rounder((value % buff) / float(unit))).astype('i8'))
elif unit >= 1000 and unit % 1000 != 0:
msg = 'Precision will be lost using frequency: {}'
warnings.warn(msg.format(freq))
r = (unit * rounder(value / float(unit)).astype('i8'))
else:
r = (unit * rounder(value / float(unit)).astype('i8'))

r = round_ns(value, rounder, freq)
result = Timestamp(r, unit='ns')
if self.tz is not None:
result = result.tz_localize(self.tz)
Expand Down
17 changes: 2 additions & 15 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from pandas._libs import lib, iNaT, NaT
from pandas._libs.tslibs.period import Period
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
from pandas._libs.tslibs.timestamps import round_ns

from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.util._decorators import Appender, cache_readonly
Expand Down Expand Up @@ -90,23 +91,9 @@ class TimelikeOps(object):
""")

def _round(self, freq, rounder):

from pandas.tseries.frequencies import to_offset
unit = to_offset(freq).nanos
# round the local times
values = _ensure_datetimelike_to_i8(self)
if unit < 1000 and unit % 1000 != 0:
# for nano rounding, work with the last 6 digits separately
# due to float precision
buff = 1000000
result = (buff * (values // buff) + unit *
(rounder((values % buff) / float(unit))).astype('i8'))
elif unit >= 1000 and unit % 1000 != 0:
msg = 'Precision will be lost using frequency: {}'
warnings.warn(msg.format(freq))
result = (unit * rounder(values / float(unit)).astype('i8'))
else:
result = (unit * rounder(values / float(unit)).astype('i8'))
result = round_ns(values, rounder, freq)
result = self._maybe_mask_results(result, fill_value=NaT)

attribs = self._get_attributes_dict()
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/indexes/datetimes/test_scalar_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,27 @@ def test_round(self, tz):
ts = '2016-10-17 12:00:00.001501031'
DatetimeIndex([ts]).round('1010ns')

@pytest.mark.parametrize('test_input, rounder, freq, expected', [
(['2117-01-01 00:00:45'], 'floor', '15s', ['2117-01-01 00:00:45']),
(['2117-01-01 00:00:45'], 'ceil', '15s', ['2117-01-01 00:00:45']),
(['2117-01-01 00:00:45.000000012'], 'floor', '10ns',
['2117-01-01 00:00:45.000000010']),
(['1823-01-01 00:00:01.000000012'], 'ceil', '10ns',
['1823-01-01 00:00:01.000000020']),
(['1823-01-01 00:00:01'], 'floor', '1s', ['1823-01-01 00:00:01']),
(['1823-01-01 00:00:01'], 'ceil', '1s', ['1823-01-01 00:00:01']),
(('NaT', '1823-01-01 00:00:01'), 'floor', '1s',
('NaT', '1823-01-01 00:00:01')),
(('NaT', '1823-01-01 00:00:01'), 'ceil', '1s',
('NaT', '1823-01-01 00:00:01'))
])
def test_ceil_floor_edge(self, tz, test_input, rounder, freq, expected):
dt = DatetimeIndex(list(test_input))
func = getattr(dt, rounder)
result = func(freq)
expected = DatetimeIndex(list(expected))
assert expected.equals(result)

# ----------------------------------------------------------------
# DatetimeIndex.normalize

Expand Down
25 changes: 24 additions & 1 deletion pandas/tests/scalar/timestamp/test_unary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from pandas.compat import PY3
from pandas._libs.tslibs.frequencies import _INVALID_FREQ_ERROR
from pandas import Timestamp
from pandas import Timestamp, NaT


class TestTimestampUnaryOps(object):
Expand Down Expand Up @@ -93,6 +93,29 @@ def test_round_frequencies(self, freq, expected):
result = stamp.round(freq=freq)
assert result == expected

@pytest.mark.parametrize('test_input, rounder, freq, expected', [
('2117-01-01 00:00:45', 'floor', '15s', '2117-01-01 00:00:45'),
('2117-01-01 00:00:45', 'ceil', '15s', '2117-01-01 00:00:45'),
('2117-01-01 00:00:45.000000012', 'floor', '10ns',
'2117-01-01 00:00:45.000000010'),
('1823-01-01 00:00:01.000000012', 'ceil', '10ns',
'1823-01-01 00:00:01.000000020'),
('1823-01-01 00:00:01', 'floor', '1s', '1823-01-01 00:00:01'),
('1823-01-01 00:00:01', 'ceil', '1s', '1823-01-01 00:00:01'),
('NaT', 'floor', '1s', 'NaT'),
('NaT', 'ceil', '1s', 'NaT')
])
def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
dt = Timestamp(test_input)
func = getattr(dt, rounder)
result = func(freq)

if dt is NaT:
assert result is NaT
else:
expected = Timestamp(expected)
assert result == expected

def test_ceil(self):
dt = Timestamp('20130101 09:10:11')
result = dt.ceil('D')
Expand Down

0 comments on commit 5052842

Please sign in to comment.