Skip to content

Commit

Permalink
ENH: linearly spaced date_range (GH 20808) (pandas-dev#20846)
Browse files Browse the repository at this point in the history
  • Loading branch information
onnoeberhard authored and jreback committed May 3, 2018
1 parent cb5c869 commit 28dbae9
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 28 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ Other Enhancements
library. (:issue:`20564`)
- Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`)
- :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`)
- :func:`date_range` now returns a linearly spaced ``DatetimeIndex`` if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`)

.. _whatsnew_0230.api_breaking:

Expand Down
69 changes: 45 additions & 24 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,8 @@ def __new__(cls, data=None,
msg = 'periods must be a number, got {periods}'
raise TypeError(msg.format(periods=periods))

if data is None and freq is None:
if data is None and freq is None \
and com._any_none(periods, start, end):
raise ValueError("Must provide freq argument if no data is "
"supplied")

Expand Down Expand Up @@ -466,9 +467,9 @@ def __new__(cls, data=None,
@classmethod
def _generate(cls, start, end, periods, name, freq,
tz=None, normalize=False, ambiguous='raise', closed=None):
if com._count_not_none(start, end, periods) != 2:
raise ValueError('Of the three parameters: start, end, and '
'periods, exactly two must be specified')
if com._count_not_none(start, end, periods, freq) != 3:
raise ValueError('Of the four parameters: start, end, periods, '
'and freq, exactly three must be specified')

_normalized = True

Expand Down Expand Up @@ -566,23 +567,30 @@ def _generate(cls, start, end, periods, name, freq,
if end.tz is None and start.tz is not None:
start = start.replace(tzinfo=None)

if _use_cached_range(freq, _normalized, start, end):
index = cls._cached_range(start, end, periods=periods,
freq=freq, name=name)
if freq is not None:
if _use_cached_range(freq, _normalized, start, end):
index = cls._cached_range(start, end, periods=periods,
freq=freq, name=name)
else:
index = _generate_regular_range(start, end, periods, freq)

if tz is not None and getattr(index, 'tz', None) is None:
index = conversion.tz_localize_to_utc(_ensure_int64(index),
tz,
ambiguous=ambiguous)
index = index.view(_NS_DTYPE)

# index is localized datetime64 array -> have to convert
# start/end as well to compare
if start is not None:
start = start.tz_localize(tz).asm8
if end is not None:
end = end.tz_localize(tz).asm8
else:
index = _generate_regular_range(start, end, periods, freq)

if tz is not None and getattr(index, 'tz', None) is None:
index = conversion.tz_localize_to_utc(_ensure_int64(index), tz,
ambiguous=ambiguous)
index = index.view(_NS_DTYPE)

# index is localized datetime64 array -> have to convert
# start/end as well to compare
if start is not None:
start = start.tz_localize(tz).asm8
if end is not None:
end = end.tz_localize(tz).asm8
index = tools.to_datetime(np.linspace(start.value,
end.value, periods))
if tz is not None:
index = index.tz_localize('UTC').tz_convert(tz)

if not left_closed and len(index) and index[0] == start:
index = index[1:]
Expand Down Expand Up @@ -2565,13 +2573,15 @@ def _generate_regular_range(start, end, periods, freq):
return data


def date_range(start=None, end=None, periods=None, freq='D', tz=None,
def date_range(start=None, end=None, periods=None, freq=None, tz=None,
normalize=False, name=None, closed=None, **kwargs):
"""
Return a fixed frequency DatetimeIndex.
Exactly two of the three parameters `start`, `end` and `periods`
must be specified.
Of the three parameters `start`, `end`, `periods`, and `freq` exactly
three must be specified. If `freq` is omitted, the resulting DatetimeIndex
will have `periods` linearly spaced elements between `start` and `end`
(closed on both sides).
Parameters
----------
Expand Down Expand Up @@ -2613,7 +2623,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
--------
**Specifying the values**
The next three examples generate the same `DatetimeIndex`, but vary
The next four examples generate the same `DatetimeIndex`, but vary
the combination of `start`, `end` and `periods`.
Specify `start` and `end`, with the default daily frequency.
Expand All @@ -2637,6 +2647,13 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
'2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'],
dtype='datetime64[ns]', freq='D')
Specify `start`, `end`, and `periods`; the frequency is generated
automatically (linearly spaced).
>>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3)
DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
'2018-04-27 00:00:00'], freq=None)
**Other Parameters**
Changed the `freq` (frequency) to ``'M'`` (month end frequency).
Expand Down Expand Up @@ -2687,6 +2704,10 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
dtype='datetime64[ns]', freq='D')
"""

if freq is None and com._any_none(periods, start, end):
freq = 'D'

return DatetimeIndex(start=start, end=end, periods=periods,
freq=freq, tz=tz, normalize=normalize, name=name,
closed=closed, **kwargs)
Expand Down
25 changes: 21 additions & 4 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,11 +157,28 @@ def test_date_range_ambiguous_arguments(self):
start = datetime(2011, 1, 1, 5, 3, 40)
end = datetime(2011, 1, 1, 8, 9, 40)

msg = ('Of the three parameters: start, end, and periods, '
'exactly two must be specified')
msg = ('Of the four parameters: start, end, periods, and '
'freq, exactly three must be specified')
with tm.assert_raises_regex(ValueError, msg):
date_range(start, end, periods=10, freq='s')

def test_date_range_convenience_periods(self):
# GH 20808
rng = date_range('2018-04-24', '2018-04-27', periods=3)
exp = DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
'2018-04-27 00:00:00'], freq=None)

tm.assert_index_equal(rng, exp)

# Test if spacing remains linear if tz changes to dst in range
rng = date_range('2018-04-01 01:00:00', '2018-04-01 04:00:00',
tz='Australia/Sydney', periods=3)
exp = DatetimeIndex(['2018-04-01 01:00:00+11:00',
'2018-04-01 02:00:00+11:00',
'2018-04-01 02:00:00+10:00',
'2018-04-01 03:00:00+10:00',
'2018-04-01 04:00:00+10:00'], freq=None)

def test_date_range_businesshour(self):
idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00',
'2014-07-04 11:00',
Expand Down Expand Up @@ -198,8 +215,8 @@ def test_date_range_businesshour(self):

def test_range_misspecified(self):
# GH #1095
msg = ('Of the three parameters: start, end, and periods, '
'exactly two must be specified')
msg = ('Of the four parameters: start, end, periods, and '
'freq, exactly three must be specified')

with tm.assert_raises_regex(ValueError, msg):
date_range(start='1/1/2000')
Expand Down

0 comments on commit 28dbae9

Please sign in to comment.