Skip to content

Commit

Permalink
BUG: Fix Period and PeriodIndex support of combined alias offsets
Browse files Browse the repository at this point in the history
Closes GH13730.
  • Loading branch information
agraboso committed Aug 7, 2016
1 parent 7e15923 commit 49a3783
Show file tree
Hide file tree
Showing 7 changed files with 268 additions and 40 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,8 @@ Deprecations
- ``pd.tseries.util.pivot_annual`` is deprecated. Use ``pivot_table`` as alternative, an example is :ref:`here <cookbook.pivot>` (:issue:`736`)
- ``pd.tseries.util.isleapyear`` has been deprecated and will be removed in a subsequent release. Datetime-likes now have a ``.is_leap_year`` property. (:issue:`13727`)
- ``Panel4D`` and ``PanelND`` constructors are deprecated and will be removed in a future version. The recommended way to represent these types of n-dimensional data are with the `xarray package <http://xarray.pydata.org/en/stable/>`__. Pandas provides a :meth:`~Panel4D.to_xarray` method to automate this conversion. (:issue:`13564`)
- ``pandas.tseries.frequencies.get_standard_freq`` is deprecated. Use ``pandas.tseries.frequencies.to_offset(freq).rule_code`` instead. (:issue:`13874`)
- ``pandas.tseries.frequencies.to_offset``'s ``freqstr`` keyword is deprecated in favor of ``freq``. (:issue:`13874`)

.. _whatsnew_0190.prior_deprecations:

Expand Down Expand Up @@ -968,3 +970,4 @@ Bug Fixes
- Bug in ``pd.read_csv`` in Python 2.x with non-UTF8 encoded, multi-character separated data (:issue:`3404`)

- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
- Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)
8 changes: 7 additions & 1 deletion pandas/src/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ cdef class _Period(object):
msg = 'Input cannot be converted to Period(freq={0})'
raise IncompatibleFrequency(msg.format(self.freqstr))
elif isinstance(other, offsets.DateOffset):
freqstr = frequencies.get_standard_freq(other)
freqstr = other.rule_code
base = frequencies.get_base_alias(freqstr)
if base == self.freq.rule_code:
ordinal = self.ordinal + other.n
Expand Down Expand Up @@ -806,6 +806,7 @@ cdef class _Period(object):
-------
resampled : Period
"""
freq = self._maybe_convert_freq(freq)
how = _validate_end_alias(how)
base1, mult1 = frequencies.get_freq_code(self.freq)
base2, mult2 = frequencies.get_freq_code(freq)
Expand Down Expand Up @@ -849,6 +850,8 @@ cdef class _Period(object):
-------
Timestamp
"""
if freq is not None:
freq = self._maybe_convert_freq(freq)
how = _validate_end_alias(how)

if freq is None:
Expand Down Expand Up @@ -1122,6 +1125,9 @@ class Period(_Period):

cdef _Period self

if freq is not None:
freq = cls._maybe_convert_freq(freq)

if ordinal is not None and value is not None:
raise ValueError(("Only value or ordinal but not both should be "
"given but not both"))
Expand Down
85 changes: 58 additions & 27 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import pandas.core.algorithms as algos
from pandas.core.algorithms import unique
from pandas.tseries.offsets import DateOffset
from pandas.util.decorators import cache_readonly
from pandas.util.decorators import cache_readonly, deprecate_kwarg
import pandas.tseries.offsets as offsets
import pandas.lib as lib
import pandas.tslib as tslib
Expand Down Expand Up @@ -386,51 +386,85 @@ def get_period_alias(offset_str):
_INVALID_FREQ_ERROR = "Invalid frequency: {0}"


def to_offset(freqstr):
@deprecate_kwarg(old_arg_name='freqstr', new_arg_name='freq')
def to_offset(freq):
"""
Return DateOffset object from string representation or
Timedelta object
Return DateOffset object from string or tuple representation
or datetime.timedelta object
Parameters
----------
freq : str, tuple, datetime.timedelta, DateOffset or None
Returns
-------
delta : DateOffset
None if freq is None
Raises
------
ValueError
If freq is an invalid frequency
See Also
--------
pandas.DateOffset
Examples
--------
>>> to_offset('5Min')
Minute(5)
>>> to_offset('5min')
<5 * Minutes>
>>> to_offset('1D1H')
<25 * Hours>
>>> to_offset(('W', 2))
<2 * Weeks: weekday=6>
>>> to_offset((2, 'B'))
<2 * BusinessDays>
>>> to_offset(datetime.timedelta(days=1))
<Day>
>>> to_offset(Hour())
<Hour>
"""
if freqstr is None:
if freq is None:
return None

if isinstance(freqstr, DateOffset):
return freqstr
if isinstance(freq, DateOffset):
return freq

if isinstance(freqstr, tuple):
name = freqstr[0]
stride = freqstr[1]
if isinstance(freq, tuple):
name = freq[0]
stride = freq[1]
if isinstance(stride, compat.string_types):
name, stride = stride, name
name, _ = _base_and_stride(name)
delta = get_offset(name) * stride

elif isinstance(freqstr, timedelta):
elif isinstance(freq, timedelta):
delta = None
freqstr = Timedelta(freqstr)
freq = Timedelta(freq)
try:
for name in freqstr.components._fields:
for name in freq.components._fields:
offset = _name_to_offset_map[name]
stride = getattr(freqstr.components, name)
stride = getattr(freq.components, name)
if stride != 0:
offset = stride * offset
if delta is None:
delta = offset
else:
delta = delta + offset
except Exception:
raise ValueError(_INVALID_FREQ_ERROR.format(freqstr))
raise ValueError(_INVALID_FREQ_ERROR.format(freq))

else:
delta = None
stride_sign = None
try:
for stride, name, _ in opattern.findall(freqstr):
for stride, name, _ in opattern.findall(freq):
offset = get_offset(name)
if stride_sign is None:
stride_sign = -1 if stride.startswith('-') else 1
Expand All @@ -443,10 +477,10 @@ def to_offset(freqstr):
else:
delta = delta + offset
except Exception:
raise ValueError(_INVALID_FREQ_ERROR.format(freqstr))
raise ValueError(_INVALID_FREQ_ERROR.format(freq))

if delta is None:
raise ValueError(_INVALID_FREQ_ERROR.format(freqstr))
raise ValueError(_INVALID_FREQ_ERROR.format(freq))

return delta

Expand Down Expand Up @@ -542,14 +576,11 @@ def get_standard_freq(freq):
"""
Return the standardized frequency string
"""
if freq is None:
return None

if isinstance(freq, DateOffset):
return freq.rule_code

code, stride = get_freq_code(freq)
return _get_freq_str(code, stride)
msg = ("get_standard_freq is deprecated. Use to_offset(freq).rule_code "
"instead.")
warnings.warn(msg, FutureWarning, stacklevel=2)
return to_offset(freq).rule_code

# ---------------------------------------------------------------------
# Period codes
Expand Down
13 changes: 11 additions & 2 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def dt64arr_to_periodarr(data, freq, tz):
if data.dtype != np.dtype('M8[ns]'):
raise ValueError('Wrong dtype: %s' % data.dtype)

freq = Period._maybe_convert_freq(freq)
base, mult = _gfc(freq)
return period.dt64arr_to_periodarr(data.view('i8'), base, tz)

Expand Down Expand Up @@ -206,6 +207,9 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,

@classmethod
def _generate_range(cls, start, end, periods, freq, fields):
if freq is not None:
freq = Period._maybe_convert_freq(freq)

field_count = len(fields)
if com._count_not_none(start, end) > 0:
if field_count > 0:
Expand All @@ -222,6 +226,9 @@ def _generate_range(cls, start, end, periods, freq, fields):

@classmethod
def _from_arraylike(cls, data, freq, tz):
if freq is not None:
freq = Period._maybe_convert_freq(freq)

if not isinstance(data, (np.ndarray, PeriodIndex,
DatetimeIndex, Int64Index)):
if is_scalar(data) or isinstance(data, Period):
Expand Down Expand Up @@ -478,7 +485,7 @@ def asfreq(self, freq=None, how='E'):
"""
how = _validate_end_alias(how)

freq = frequencies.get_standard_freq(freq)
freq = Period._maybe_convert_freq(freq)

base1, mult1 = _gfc(self.freq)
base2, mult2 = _gfc(freq)
Expand Down Expand Up @@ -579,6 +586,8 @@ def to_timestamp(self, freq=None, how='start'):
if freq is None:
base, mult = _gfc(self.freq)
freq = frequencies.get_to_timestamp_base(base)
else:
freq = Period._maybe_convert_freq(freq)

base, mult = _gfc(freq)
new_data = self.asfreq(freq, how)
Expand All @@ -596,7 +605,7 @@ def _maybe_convert_timedelta(self, other):
if nanos % offset_nanos == 0:
return nanos // offset_nanos
elif isinstance(other, offsets.DateOffset):
freqstr = frequencies.get_standard_freq(other)
freqstr = other.rule_code
base = frequencies.get_base_alias(freqstr)
if base == self.freq.rule_code:
return other.n
Expand Down
27 changes: 18 additions & 9 deletions pandas/tseries/tests/test_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4591,21 +4591,30 @@ def test_parse_time_quarter_w_dash(self):


def test_get_standard_freq():
fstr = get_standard_freq('W')
assert fstr == get_standard_freq('w')
assert fstr == get_standard_freq('1w')
assert fstr == get_standard_freq(('W', 1))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
fstr = get_standard_freq('W')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert fstr == get_standard_freq('w')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert fstr == get_standard_freq('1w')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert fstr == get_standard_freq(('W', 1))

with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR):
get_standard_freq('WeEk')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
get_standard_freq('WeEk')

fstr = get_standard_freq('5Q')
assert fstr == get_standard_freq('5q')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
fstr = get_standard_freq('5Q')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert fstr == get_standard_freq('5q')

with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR):
get_standard_freq('5QuarTer')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
get_standard_freq('5QuarTer')

assert fstr == get_standard_freq(('q', 5))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert fstr == get_standard_freq(('q', 5))


def test_quarterly_dont_normalize():
Expand Down
Loading

0 comments on commit 49a3783

Please sign in to comment.