From 28dbae9f306ade549eb1edd5484b3e1da758bcdb Mon Sep 17 00:00:00 2001 From: Onno Eberhard Date: Thu, 3 May 2018 12:42:04 +0200 Subject: [PATCH] ENH: linearly spaced date_range (GH 20808) (#20846) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/indexes/datetimes.py | 69 ++++++++++++------- .../indexes/datetimes/test_date_range.py | 25 +++++-- 3 files changed, 67 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 9cdda2d14669c..4ad40fe0f7f2b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -523,6 +523,7 @@ Other Enhancements library. (:issue:`20564`) - Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`) - :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`) +- :func:`date_range` now returns a linearly spaced ``DatetimeIndex`` if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`) .. _whatsnew_0230.api_breaking: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 720718e78d50e..e9ab443a978f8 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -358,7 +358,8 @@ def __new__(cls, data=None, msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) - if data is None and freq is None: + if data is None and freq is None \ + and com._any_none(periods, start, end): raise ValueError("Must provide freq argument if no data is " "supplied") @@ -466,9 +467,9 @@ def __new__(cls, data=None, @classmethod def _generate(cls, start, end, periods, name, freq, tz=None, normalize=False, ambiguous='raise', closed=None): - if com._count_not_none(start, end, periods) != 2: - raise ValueError('Of the three parameters: start, end, and ' - 'periods, exactly two must be specified') + if com._count_not_none(start, end, periods, freq) != 3: + raise ValueError('Of the four parameters: start, end, periods, ' + 'and freq, exactly three must be specified') _normalized = True @@ -566,23 +567,30 @@ def _generate(cls, start, end, periods, name, freq, if end.tz is None and start.tz is not None: start = start.replace(tzinfo=None) - if _use_cached_range(freq, _normalized, start, end): - index = cls._cached_range(start, end, periods=periods, - freq=freq, name=name) + if freq is not None: + if _use_cached_range(freq, _normalized, start, end): + index = cls._cached_range(start, end, periods=periods, + freq=freq, name=name) + else: + index = _generate_regular_range(start, end, periods, freq) + + if tz is not None and getattr(index, 'tz', None) is None: + index = conversion.tz_localize_to_utc(_ensure_int64(index), + tz, + ambiguous=ambiguous) + index = index.view(_NS_DTYPE) + + # index is localized datetime64 array -> have to convert + # start/end as well to compare + if start is not None: + start = start.tz_localize(tz).asm8 + if end is not None: + end = end.tz_localize(tz).asm8 else: - index = _generate_regular_range(start, end, periods, freq) - - if tz is not None and getattr(index, 'tz', None) is None: - index = conversion.tz_localize_to_utc(_ensure_int64(index), tz, - ambiguous=ambiguous) - index = index.view(_NS_DTYPE) - - # index is localized datetime64 array -> have to convert - # start/end as well to compare - if start is not None: - start = start.tz_localize(tz).asm8 - if end is not None: - end = end.tz_localize(tz).asm8 + index = tools.to_datetime(np.linspace(start.value, + end.value, periods)) + if tz is not None: + index = index.tz_localize('UTC').tz_convert(tz) if not left_closed and len(index) and index[0] == start: index = index[1:] @@ -2565,13 +2573,15 @@ def _generate_regular_range(start, end, periods, freq): return data -def date_range(start=None, end=None, periods=None, freq='D', tz=None, +def date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None, closed=None, **kwargs): """ Return a fixed frequency DatetimeIndex. - Exactly two of the three parameters `start`, `end` and `periods` - must be specified. + Of the three parameters `start`, `end`, `periods`, and `freq` exactly + three must be specified. If `freq` is omitted, the resulting DatetimeIndex + will have `periods` linearly spaced elements between `start` and `end` + (closed on both sides). Parameters ---------- @@ -2613,7 +2623,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, -------- **Specifying the values** - The next three examples generate the same `DatetimeIndex`, but vary + The next four examples generate the same `DatetimeIndex`, but vary the combination of `start`, `end` and `periods`. Specify `start` and `end`, with the default daily frequency. @@ -2637,6 +2647,13 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'], dtype='datetime64[ns]', freq='D') + Specify `start`, `end`, and `periods`; the frequency is generated + automatically (linearly spaced). + + >>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3) + DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00', + '2018-04-27 00:00:00'], freq=None) + **Other Parameters** Changed the `freq` (frequency) to ``'M'`` (month end frequency). @@ -2687,6 +2704,10 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') """ + + if freq is None and com._any_none(periods, start, end): + freq = 'D' + return DatetimeIndex(start=start, end=end, periods=periods, freq=freq, tz=tz, normalize=normalize, name=name, closed=closed, **kwargs) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index e5291ed52a86c..bbe9cb65eb1a9 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -157,11 +157,28 @@ def test_date_range_ambiguous_arguments(self): start = datetime(2011, 1, 1, 5, 3, 40) end = datetime(2011, 1, 1, 8, 9, 40) - msg = ('Of the three parameters: start, end, and periods, ' - 'exactly two must be specified') + msg = ('Of the four parameters: start, end, periods, and ' + 'freq, exactly three must be specified') with tm.assert_raises_regex(ValueError, msg): date_range(start, end, periods=10, freq='s') + def test_date_range_convenience_periods(self): + # GH 20808 + rng = date_range('2018-04-24', '2018-04-27', periods=3) + exp = DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00', + '2018-04-27 00:00:00'], freq=None) + + tm.assert_index_equal(rng, exp) + + # Test if spacing remains linear if tz changes to dst in range + rng = date_range('2018-04-01 01:00:00', '2018-04-01 04:00:00', + tz='Australia/Sydney', periods=3) + exp = DatetimeIndex(['2018-04-01 01:00:00+11:00', + '2018-04-01 02:00:00+11:00', + '2018-04-01 02:00:00+10:00', + '2018-04-01 03:00:00+10:00', + '2018-04-01 04:00:00+10:00'], freq=None) + def test_date_range_businesshour(self): idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', '2014-07-04 11:00', @@ -198,8 +215,8 @@ def test_date_range_businesshour(self): def test_range_misspecified(self): # GH #1095 - msg = ('Of the three parameters: start, end, and periods, ' - 'exactly two must be specified') + msg = ('Of the four parameters: start, end, periods, and ' + 'freq, exactly three must be specified') with tm.assert_raises_regex(ValueError, msg): date_range(start='1/1/2000')