diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index f93e8f4240787..f1957836645e7 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -783,6 +783,8 @@ Deprecations - ``pd.tseries.util.pivot_annual`` is deprecated. Use ``pivot_table`` as alternative, an example is :ref:`here ` (:issue:`736`) - ``pd.tseries.util.isleapyear`` has been deprecated and will be removed in a subsequent release. Datetime-likes now have a ``.is_leap_year`` property. (:issue:`13727`) - ``Panel4D`` and ``PanelND`` constructors are deprecated and will be removed in a future version. The recommended way to represent these types of n-dimensional data are with the `xarray package `__. Pandas provides a :meth:`~Panel4D.to_xarray` method to automate this conversion. (:issue:`13564`) +- ``pandas.tseries.frequencies.get_standard_freq`` is deprecated. Use ``pandas.tseries.frequencies.to_offset(freq).rule_code`` instead. (:issue:`13874`) +- ``pandas.tseries.frequencies.to_offset``'s ``freqstr`` keyword is deprecated in favor of ``freq``. (:issue:`13874`) .. _whatsnew_0190.prior_deprecations: @@ -968,3 +970,4 @@ Bug Fixes - Bug in ``pd.read_csv`` in Python 2.x with non-UTF8 encoded, multi-character separated data (:issue:`3404`) - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`) +- Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`) diff --git a/pandas/src/period.pyx b/pandas/src/period.pyx index 0435b01920504..bb0108fcb141c 100644 --- a/pandas/src/period.pyx +++ b/pandas/src/period.pyx @@ -739,7 +739,7 @@ cdef class _Period(object): msg = 'Input cannot be converted to Period(freq={0})' raise IncompatibleFrequency(msg.format(self.freqstr)) elif isinstance(other, offsets.DateOffset): - freqstr = frequencies.get_standard_freq(other) + freqstr = other.rule_code base = frequencies.get_base_alias(freqstr) if base == self.freq.rule_code: ordinal = self.ordinal + other.n @@ -806,6 +806,7 @@ cdef class _Period(object): ------- resampled : Period """ + freq = self._maybe_convert_freq(freq) how = _validate_end_alias(how) base1, mult1 = frequencies.get_freq_code(self.freq) base2, mult2 = frequencies.get_freq_code(freq) @@ -849,6 +850,8 @@ cdef class _Period(object): ------- Timestamp """ + if freq is not None: + freq = self._maybe_convert_freq(freq) how = _validate_end_alias(how) if freq is None: @@ -1122,6 +1125,9 @@ class Period(_Period): cdef _Period self + if freq is not None: + freq = cls._maybe_convert_freq(freq) + if ordinal is not None and value is not None: raise ValueError(("Only value or ordinal but not both should be " "given but not both")) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index eaf826230e772..3011e8dc0ae3d 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -15,7 +15,7 @@ import pandas.core.algorithms as algos from pandas.core.algorithms import unique from pandas.tseries.offsets import DateOffset -from pandas.util.decorators import cache_readonly +from pandas.util.decorators import cache_readonly, deprecate_kwarg import pandas.tseries.offsets as offsets import pandas.lib as lib import pandas.tslib as tslib @@ -386,37 +386,71 @@ def get_period_alias(offset_str): _INVALID_FREQ_ERROR = "Invalid frequency: {0}" -def to_offset(freqstr): +@deprecate_kwarg(old_arg_name='freqstr', new_arg_name='freq') +def to_offset(freq): """ - Return DateOffset object from string representation or - Timedelta object + Return DateOffset object from string or tuple representation + or datetime.timedelta object + + Parameters + ---------- + freq : str, tuple, datetime.timedelta, DateOffset or None + + Returns + ------- + delta : DateOffset + None if freq is None + + Raises + ------ + ValueError + If freq is an invalid frequency + + See Also + -------- + pandas.DateOffset Examples -------- - >>> to_offset('5Min') - Minute(5) + >>> to_offset('5min') + <5 * Minutes> + + >>> to_offset('1D1H') + <25 * Hours> + + >>> to_offset(('W', 2)) + <2 * Weeks: weekday=6> + + >>> to_offset((2, 'B')) + <2 * BusinessDays> + + >>> to_offset(datetime.timedelta(days=1)) + + + >>> to_offset(Hour()) + """ - if freqstr is None: + if freq is None: return None - if isinstance(freqstr, DateOffset): - return freqstr + if isinstance(freq, DateOffset): + return freq - if isinstance(freqstr, tuple): - name = freqstr[0] - stride = freqstr[1] + if isinstance(freq, tuple): + name = freq[0] + stride = freq[1] if isinstance(stride, compat.string_types): name, stride = stride, name name, _ = _base_and_stride(name) delta = get_offset(name) * stride - elif isinstance(freqstr, timedelta): + elif isinstance(freq, timedelta): delta = None - freqstr = Timedelta(freqstr) + freq = Timedelta(freq) try: - for name in freqstr.components._fields: + for name in freq.components._fields: offset = _name_to_offset_map[name] - stride = getattr(freqstr.components, name) + stride = getattr(freq.components, name) if stride != 0: offset = stride * offset if delta is None: @@ -424,13 +458,13 @@ def to_offset(freqstr): else: delta = delta + offset except Exception: - raise ValueError(_INVALID_FREQ_ERROR.format(freqstr)) + raise ValueError(_INVALID_FREQ_ERROR.format(freq)) else: delta = None stride_sign = None try: - for stride, name, _ in opattern.findall(freqstr): + for stride, name, _ in opattern.findall(freq): offset = get_offset(name) if stride_sign is None: stride_sign = -1 if stride.startswith('-') else 1 @@ -443,10 +477,10 @@ def to_offset(freqstr): else: delta = delta + offset except Exception: - raise ValueError(_INVALID_FREQ_ERROR.format(freqstr)) + raise ValueError(_INVALID_FREQ_ERROR.format(freq)) if delta is None: - raise ValueError(_INVALID_FREQ_ERROR.format(freqstr)) + raise ValueError(_INVALID_FREQ_ERROR.format(freq)) return delta @@ -542,14 +576,11 @@ def get_standard_freq(freq): """ Return the standardized frequency string """ - if freq is None: - return None - if isinstance(freq, DateOffset): - return freq.rule_code - - code, stride = get_freq_code(freq) - return _get_freq_str(code, stride) + msg = ("get_standard_freq is deprecated. Use to_offset(freq).rule_code " + "instead.") + warnings.warn(msg, FutureWarning, stacklevel=2) + return to_offset(freq).rule_code # --------------------------------------------------------------------- # Period codes diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 810c89b3f969b..da8868bb2bd84 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -57,6 +57,7 @@ def dt64arr_to_periodarr(data, freq, tz): if data.dtype != np.dtype('M8[ns]'): raise ValueError('Wrong dtype: %s' % data.dtype) + freq = Period._maybe_convert_freq(freq) base, mult = _gfc(freq) return period.dt64arr_to_periodarr(data.view('i8'), base, tz) @@ -206,6 +207,9 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, @classmethod def _generate_range(cls, start, end, periods, freq, fields): + if freq is not None: + freq = Period._maybe_convert_freq(freq) + field_count = len(fields) if com._count_not_none(start, end) > 0: if field_count > 0: @@ -222,6 +226,9 @@ def _generate_range(cls, start, end, periods, freq, fields): @classmethod def _from_arraylike(cls, data, freq, tz): + if freq is not None: + freq = Period._maybe_convert_freq(freq) + if not isinstance(data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if is_scalar(data) or isinstance(data, Period): @@ -478,7 +485,7 @@ def asfreq(self, freq=None, how='E'): """ how = _validate_end_alias(how) - freq = frequencies.get_standard_freq(freq) + freq = Period._maybe_convert_freq(freq) base1, mult1 = _gfc(self.freq) base2, mult2 = _gfc(freq) @@ -579,6 +586,8 @@ def to_timestamp(self, freq=None, how='start'): if freq is None: base, mult = _gfc(self.freq) freq = frequencies.get_to_timestamp_base(base) + else: + freq = Period._maybe_convert_freq(freq) base, mult = _gfc(freq) new_data = self.asfreq(freq, how) @@ -596,7 +605,7 @@ def _maybe_convert_timedelta(self, other): if nanos % offset_nanos == 0: return nanos // offset_nanos elif isinstance(other, offsets.DateOffset): - freqstr = frequencies.get_standard_freq(other) + freqstr = other.rule_code base = frequencies.get_base_alias(freqstr) if base == self.freq.rule_code: return other.n diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index b31e4d54c551f..3ec07c27ef854 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -4591,21 +4591,30 @@ def test_parse_time_quarter_w_dash(self): def test_get_standard_freq(): - fstr = get_standard_freq('W') - assert fstr == get_standard_freq('w') - assert fstr == get_standard_freq('1w') - assert fstr == get_standard_freq(('W', 1)) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + fstr = get_standard_freq('W') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + assert fstr == get_standard_freq('w') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + assert fstr == get_standard_freq('1w') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + assert fstr == get_standard_freq(('W', 1)) with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR): - get_standard_freq('WeEk') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + get_standard_freq('WeEk') - fstr = get_standard_freq('5Q') - assert fstr == get_standard_freq('5q') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + fstr = get_standard_freq('5Q') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + assert fstr == get_standard_freq('5q') with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR): - get_standard_freq('5QuarTer') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + get_standard_freq('5QuarTer') - assert fstr == get_standard_freq(('q', 5)) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + assert fstr == get_standard_freq(('q', 5)) def test_quarterly_dont_normalize(): diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 290c11bd8d79c..17e6e36d52acd 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -102,6 +102,9 @@ def test_period_cons_nat(self): p = Period(tslib.iNaT, freq='3D') self.assertIs(p, pd.NaT) + p = Period(tslib.iNaT, freq='1D1H') + self.assertIs(p, pd.NaT) + p = Period('NaT') self.assertIs(p, pd.NaT) @@ -152,6 +155,73 @@ def test_period_cons_mult(self): with tm.assertRaisesRegexp(ValueError, msg): Period('2011-01', freq='0M') + def test_period_cons_combined(self): + p = [(Period('2011-01', freq='1D1H'), + Period('2011-01', freq='1H1D'), + Period('2011-01', freq='H')), + (Period(ordinal=1, freq='1D1H'), + Period(ordinal=1, freq='1H1D'), + Period(ordinal=1, freq='H'))] + + for p1, p2, p3 in p: + self.assertEqual(p1.ordinal, p3.ordinal) + self.assertEqual(p2.ordinal, p3.ordinal) + + self.assertEqual(p1.freq, offsets.Hour(25)) + self.assertEqual(p1.freqstr, '25H') + + self.assertEqual(p2.freq, offsets.Hour(25)) + self.assertEqual(p2.freqstr, '25H') + + self.assertEqual(p3.freq, offsets.Hour()) + self.assertEqual(p3.freqstr, 'H') + + result = p1 + 1 + self.assertEqual(result.ordinal, (p3 + 25).ordinal) + self.assertEqual(result.freq, p1.freq) + self.assertEqual(result.freqstr, '25H') + + result = p2 + 1 + self.assertEqual(result.ordinal, (p3 + 25).ordinal) + self.assertEqual(result.freq, p2.freq) + self.assertEqual(result.freqstr, '25H') + + result = p1 - 1 + self.assertEqual(result.ordinal, (p3 - 25).ordinal) + self.assertEqual(result.freq, p1.freq) + self.assertEqual(result.freqstr, '25H') + + result = p2 - 1 + self.assertEqual(result.ordinal, (p3 - 25).ordinal) + self.assertEqual(result.freq, p2.freq) + self.assertEqual(result.freqstr, '25H') + + msg = ('Frequency must be positive, because it' + ' represents span: -25H') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='-1D1H') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='-1H1D') + with tm.assertRaisesRegexp(ValueError, msg): + Period(ordinal=1, freq='-1D1H') + with tm.assertRaisesRegexp(ValueError, msg): + Period(ordinal=1, freq='-1H1D') + + msg = ('Frequency must be positive, because it' + ' represents span: 0D') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='0D0H') + with tm.assertRaisesRegexp(ValueError, msg): + Period(ordinal=1, freq='0D0H') + + # You can only combine together day and intraday offsets + msg = ('Invalid frequency: 1W1D') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='1W1D') + msg = ('Invalid frequency: 1D1W') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='1D1W') + def test_timestamp_tz_arg(self): tm._skip_if_no_pytz() import pytz @@ -624,6 +694,14 @@ def _ex(*args): xp = _ex(2012, 1, 16) self.assertEqual(xp, p.end_time) + p = Period('2012', freq='1D1H') + xp = _ex(2012, 1, 2, 1) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='1H1D') + xp = _ex(2012, 1, 2, 1) + self.assertEqual(xp, p.end_time) + def test_anchor_week_end_time(self): def _ex(*args): return Timestamp(Timestamp(datetime(*args)).value - 1) @@ -1518,6 +1596,44 @@ def test_asfreq_mult(self): self.assertEqual(result.ordinal, expected.ordinal) self.assertEqual(result.freq, expected.freq) + def test_asfreq_combined(self): + # normal freq to combined freq + p = Period('2007', freq='H') + + # ordinal will not change + expected = Period('2007', freq='25H') + for freq, how in zip(['1D1H', '1H1D'], ['E', 'S']): + result = p.asfreq(freq, how=how) + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + # combined freq to normal freq + p1 = Period(freq='1D1H', year=2007) + p2 = Period(freq='1H1D', year=2007) + + # ordinal will change because how=E is the default + result1 = p1.asfreq('H') + result2 = p2.asfreq('H') + expected = Period('2007-01-02', freq='H') + self.assertEqual(result1, expected) + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freq, expected.freq) + self.assertEqual(result2, expected) + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freq, expected.freq) + + # ordinal will not change + result1 = p1.asfreq('H', how='S') + result2 = p2.asfreq('H', how='S') + expected = Period('2007-01-01', freq='H') + self.assertEqual(result1, expected) + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freq, expected.freq) + self.assertEqual(result2, expected) + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freq, expected.freq) + def test_is_leap_year(self): # GH 13727 for freq in ['A', 'M', 'D', 'H']: @@ -1861,6 +1977,17 @@ def test_constructor_freq_mult_dti_compat(self): periods=10).to_period(freqstr) tm.assert_index_equal(pidx, expected) + def test_constructor_freq_combined(self): + for freq in ['1D1H', '1H1D']: + pidx = PeriodIndex(['2016-01-01', '2016-01-02'], freq=freq) + expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 00:00'], + freq='25H') + for freq, func in zip(['1D1H', '1H1D'], [PeriodIndex, period_range]): + pidx = func(start='2016-01-01', periods=2, freq=freq) + expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 01:00'], + freq='25H') + tm.assert_index_equal(pidx, expected) + def test_is_(self): create_index = lambda: PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') @@ -2130,6 +2257,21 @@ def test_to_timestamp_pi_mult(self): ['2011-02-28', 'NaT', '2011-03-31'], name='idx') self.assert_index_equal(result, expected) + def test_to_timestamp_pi_combined(self): + idx = PeriodIndex(start='2011', periods=2, freq='1D1H', name='idx') + result = idx.to_timestamp() + expected = DatetimeIndex( + ['2011-01-01 00:00', '2011-01-02 01:00'], name='idx') + self.assert_index_equal(result, expected) + result = idx.to_timestamp(how='E') + expected = DatetimeIndex( + ['2011-01-02 00:59:59', '2011-01-03 01:59:59'], name='idx') + self.assert_index_equal(result, expected) + result = idx.to_timestamp(how='E', freq='H') + expected = DatetimeIndex( + ['2011-01-02 00:00', '2011-01-03 01:00'], name='idx') + self.assert_index_equal(result, expected) + def test_start_time(self): index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') @@ -2541,6 +2683,33 @@ def test_asfreq_mult_pi(self): self.assert_index_equal(result, exp) self.assertEqual(result.freq, exp.freq) + def test_asfreq_combined_pi(self): + pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], + freq='H') + exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], + freq='25H') + for freq, how in zip(['1D1H', '1H1D'], ['S', 'E']): + result = pi.asfreq(freq, how=how) + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + + for freq in ['1D1H', '1H1D']: + pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', + 'NaT'], freq=freq) + result = pi.asfreq('H') + exp = PeriodIndex(['2001-01-02 00:00', '2001-01-03 02:00', 'NaT'], + freq='H') + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + + pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', + 'NaT'], freq=freq) + result = pi.asfreq('H', how='S') + exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], + freq='H') + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + def test_period_index_length(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') self.assertEqual(len(pi), 9) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 09fb4beb74f28..a3abfd0321677 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -4700,7 +4700,8 @@ def test_frequency_misc(self): self.assertRaises(ValueError, frequencies.to_offset, ('', '')) - result = frequencies.get_standard_freq(offsets.Hour()) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = frequencies.get_standard_freq(offsets.Hour()) self.assertEqual(result, 'H') def test_hash_equivalent(self):