Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix Period and PeriodIndex support of combined offsets aliases #13874

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,8 @@ Deprecations
- ``pd.tseries.util.pivot_annual`` is deprecated. Use ``pivot_table`` as alternative, an example is :ref:`here <cookbook.pivot>` (:issue:`736`)
- ``pd.tseries.util.isleapyear`` has been deprecated and will be removed in a subsequent release. Datetime-likes now have a ``.is_leap_year`` property. (:issue:`13727`)
- ``Panel4D`` and ``PanelND`` constructors are deprecated and will be removed in a future version. The recommended way to represent these types of n-dimensional data are with the `xarray package <http://xarray.pydata.org/en/stable/>`__. Pandas provides a :meth:`~Panel4D.to_xarray` method to automate this conversion. (:issue:`13564`)
- ``pandas.tseries.frequencies.get_standard_freq`` is deprecated. Use ``pandas.tseries.frequencies.to_offset(freq).rule_code`` instead. (:issue:`13874`)
- ``pandas.tseries.frequencies.to_offset``'s ``freqstr`` keyword is deprecated in favor of ``freq``. (:issue:`13874`)

.. _whatsnew_0190.prior_deprecations:

Expand Down Expand Up @@ -968,3 +970,4 @@ Bug Fixes
- Bug in ``pd.read_csv`` in Python 2.x with non-UTF8 encoded, multi-character separated data (:issue:`3404`)

- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
- Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)
8 changes: 7 additions & 1 deletion pandas/src/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ cdef class _Period(object):
msg = 'Input cannot be converted to Period(freq={0})'
raise IncompatibleFrequency(msg.format(self.freqstr))
elif isinstance(other, offsets.DateOffset):
freqstr = frequencies.get_standard_freq(other)
freqstr = other.rule_code
base = frequencies.get_base_alias(freqstr)
if base == self.freq.rule_code:
ordinal = self.ordinal + other.n
Expand Down Expand Up @@ -806,6 +806,7 @@ cdef class _Period(object):
-------
resampled : Period
"""
freq = self._maybe_convert_freq(freq)
how = _validate_end_alias(how)
base1, mult1 = frequencies.get_freq_code(self.freq)
base2, mult2 = frequencies.get_freq_code(freq)
Expand Down Expand Up @@ -849,6 +850,8 @@ cdef class _Period(object):
-------
Timestamp
"""
if freq is not None:
freq = self._maybe_convert_freq(freq)
how = _validate_end_alias(how)

if freq is None:
Expand Down Expand Up @@ -1122,6 +1125,9 @@ class Period(_Period):

cdef _Period self

if freq is not None:
freq = cls._maybe_convert_freq(freq)

if ordinal is not None and value is not None:
raise ValueError(("Only value or ordinal but not both should be "
"given but not both"))
Expand Down
85 changes: 58 additions & 27 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import pandas.core.algorithms as algos
from pandas.core.algorithms import unique
from pandas.tseries.offsets import DateOffset
from pandas.util.decorators import cache_readonly
from pandas.util.decorators import cache_readonly, deprecate_kwarg
import pandas.tseries.offsets as offsets
import pandas.lib as lib
import pandas.tslib as tslib
Expand Down Expand Up @@ -386,51 +386,85 @@ def get_period_alias(offset_str):
_INVALID_FREQ_ERROR = "Invalid frequency: {0}"


def to_offset(freqstr):
@deprecate_kwarg(old_arg_name='freqstr', new_arg_name='freq')
def to_offset(freq):
"""
Return DateOffset object from string representation or
Timedelta object
Return DateOffset object from string or tuple representation
or datetime.timedelta object

Parameters
----------
freq : str, tuple, datetime.timedelta, DateOffset or None

Returns
-------
delta : DateOffset
None if freq is None

Raises
------
ValueError
If freq is an invalid frequency

See Also
--------
pandas.DateOffset

Examples
--------
>>> to_offset('5Min')
Minute(5)
>>> to_offset('5min')
<5 * Minutes>

>>> to_offset('1D1H')
<25 * Hours>

>>> to_offset(('W', 2))
<2 * Weeks: weekday=6>

>>> to_offset((2, 'B'))
<2 * BusinessDays>

>>> to_offset(datetime.timedelta(days=1))
<Day>

>>> to_offset(Hour())
<Hour>
"""
if freqstr is None:
if freq is None:
return None

if isinstance(freqstr, DateOffset):
return freqstr
if isinstance(freq, DateOffset):
return freq

if isinstance(freqstr, tuple):
name = freqstr[0]
stride = freqstr[1]
if isinstance(freq, tuple):
name = freq[0]
stride = freq[1]
if isinstance(stride, compat.string_types):
name, stride = stride, name
name, _ = _base_and_stride(name)
delta = get_offset(name) * stride

elif isinstance(freqstr, timedelta):
elif isinstance(freq, timedelta):
delta = None
freqstr = Timedelta(freqstr)
freq = Timedelta(freq)
try:
for name in freqstr.components._fields:
for name in freq.components._fields:
offset = _name_to_offset_map[name]
stride = getattr(freqstr.components, name)
stride = getattr(freq.components, name)
if stride != 0:
offset = stride * offset
if delta is None:
delta = offset
else:
delta = delta + offset
except Exception:
raise ValueError(_INVALID_FREQ_ERROR.format(freqstr))
raise ValueError(_INVALID_FREQ_ERROR.format(freq))

else:
delta = None
stride_sign = None
try:
for stride, name, _ in opattern.findall(freqstr):
for stride, name, _ in opattern.findall(freq):
offset = get_offset(name)
if stride_sign is None:
stride_sign = -1 if stride.startswith('-') else 1
Expand All @@ -443,10 +477,10 @@ def to_offset(freqstr):
else:
delta = delta + offset
except Exception:
raise ValueError(_INVALID_FREQ_ERROR.format(freqstr))
raise ValueError(_INVALID_FREQ_ERROR.format(freq))

if delta is None:
raise ValueError(_INVALID_FREQ_ERROR.format(freqstr))
raise ValueError(_INVALID_FREQ_ERROR.format(freq))

return delta

Expand Down Expand Up @@ -542,14 +576,11 @@ def get_standard_freq(freq):
"""
Return the standardized frequency string
"""
if freq is None:
return None

if isinstance(freq, DateOffset):
return freq.rule_code

code, stride = get_freq_code(freq)
return _get_freq_str(code, stride)
msg = ("get_standard_freq is deprecated. Use to_offset(freq).rule_code "
"instead.")
warnings.warn(msg, FutureWarning, stacklevel=2)
return to_offset(freq).rule_code

# ---------------------------------------------------------------------
# Period codes
Expand Down
13 changes: 11 additions & 2 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def dt64arr_to_periodarr(data, freq, tz):
if data.dtype != np.dtype('M8[ns]'):
raise ValueError('Wrong dtype: %s' % data.dtype)

freq = Period._maybe_convert_freq(freq)
base, mult = _gfc(freq)
return period.dt64arr_to_periodarr(data.view('i8'), base, tz)

Expand Down Expand Up @@ -206,6 +207,9 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,

@classmethod
def _generate_range(cls, start, end, periods, freq, fields):
if freq is not None:
freq = Period._maybe_convert_freq(freq)

field_count = len(fields)
if com._count_not_none(start, end) > 0:
if field_count > 0:
Expand All @@ -222,6 +226,9 @@ def _generate_range(cls, start, end, periods, freq, fields):

@classmethod
def _from_arraylike(cls, data, freq, tz):
if freq is not None:
freq = Period._maybe_convert_freq(freq)

if not isinstance(data, (np.ndarray, PeriodIndex,
DatetimeIndex, Int64Index)):
if is_scalar(data) or isinstance(data, Period):
Expand Down Expand Up @@ -478,7 +485,7 @@ def asfreq(self, freq=None, how='E'):
"""
how = _validate_end_alias(how)

freq = frequencies.get_standard_freq(freq)
freq = Period._maybe_convert_freq(freq)

base1, mult1 = _gfc(self.freq)
base2, mult2 = _gfc(freq)
Expand Down Expand Up @@ -579,6 +586,8 @@ def to_timestamp(self, freq=None, how='start'):
if freq is None:
base, mult = _gfc(self.freq)
freq = frequencies.get_to_timestamp_base(base)
else:
freq = Period._maybe_convert_freq(freq)

base, mult = _gfc(freq)
new_data = self.asfreq(freq, how)
Expand All @@ -596,7 +605,7 @@ def _maybe_convert_timedelta(self, other):
if nanos % offset_nanos == 0:
return nanos // offset_nanos
elif isinstance(other, offsets.DateOffset):
freqstr = frequencies.get_standard_freq(other)
freqstr = other.rule_code
base = frequencies.get_base_alias(freqstr)
if base == self.freq.rule_code:
return other.n
Expand Down
27 changes: 18 additions & 9 deletions pandas/tseries/tests/test_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4591,21 +4591,30 @@ def test_parse_time_quarter_w_dash(self):


def test_get_standard_freq():
fstr = get_standard_freq('W')
assert fstr == get_standard_freq('w')
assert fstr == get_standard_freq('1w')
assert fstr == get_standard_freq(('W', 1))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
fstr = get_standard_freq('W')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert fstr == get_standard_freq('w')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert fstr == get_standard_freq('1w')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert fstr == get_standard_freq(('W', 1))

with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR):
get_standard_freq('WeEk')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
get_standard_freq('WeEk')

fstr = get_standard_freq('5Q')
assert fstr == get_standard_freq('5q')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
fstr = get_standard_freq('5Q')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert fstr == get_standard_freq('5q')

with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR):
get_standard_freq('5QuarTer')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
get_standard_freq('5QuarTer')

assert fstr == get_standard_freq(('q', 5))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert fstr == get_standard_freq(('q', 5))


def test_quarterly_dont_normalize():
Expand Down
Loading