From 0e9ce370301de4bd66b537c9804b8c4506d5f2c8 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 17 Nov 2015 06:32:00 -0500 Subject: [PATCH] BUG: date_range creation with an ambiguous endpoint, #11619 TST: some tests for datetime tz aware serialized to/from csv/hdf --- doc/source/whatsnew/v0.17.1.txt | 3 ++- pandas/io/tests/test_pytables.py | 17 ++++++++++++++++- pandas/tests/test_frame.py | 20 ++++++++++++++++++++ pandas/tseries/index.py | 18 +++++++++--------- pandas/tseries/tests/test_timezones.py | 16 ++++++++++++++++ 5 files changed, 63 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 046791d4287c9..02830e63ae81b 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -120,7 +120,8 @@ Bug Fixes - Bug in ``HDFStore.append`` with strings whose encoded length exceded the max unencoded length (:issue:`11234`) - Bug in merging ``datetime64[ns, tz]`` dtypes (:issue:`11405`) - Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`) -- Bug in using ``DataFrame.ix`` with a multi-index indexer(:issue:`11372`) +- Bug in using ``DataFrame.ix`` with a multi-index indexer (:issue:`11372`) +- Bug in ``date_range`` with ambigous endpoints (:issue:`11626`) - Prevent adding new attributes to the accessors ``.str``, ``.dt`` and ``.cat``. Retrieving such a value was not possible, so error out on setting it. (:issue:`10673`) - Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 6c78f9cf3937c..9de7732d3b289 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -3049,7 +3049,7 @@ def test_select_dtypes(self): result = store.select( 'df4', where='values>2.0') tm.assert_frame_equal(expected, result) - + # test selection with comparison against numpy scalar # GH 11283 with ensure_clean_store(self.path) as store: @@ -4988,6 +4988,21 @@ def test_legacy_datetimetz_object(self): result = store['df'] assert_frame_equal(result, expected) + def test_dst_transitions(self): + # make sure we are not failing on transaitions + with ensure_clean_store(self.path) as store: + times = pd.date_range("2013-10-26 23:00", "2013-10-27 01:00", + tz="Europe/London", + freq="H", + ambiguous='infer') + + for i in [times, times+pd.Timedelta('10min')]: + _maybe_remove(store, 'df') + df = DataFrame({'A' : range(len(i)), 'B' : i }, index=i) + store.append('df',df) + result = store.select('df') + assert_frame_equal(result, df) + def _test_sort(obj): if isinstance(obj, DataFrame): return obj.reindex(sorted(obj.index)) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a743ce4ffef61..e4f3032cabf88 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -15439,6 +15439,26 @@ def test_to_csv_date_format(self): assert_frame_equal(test, nat_frame) + def test_to_csv_with_dst_transitions(self): + pname = '__tmp_to_csv_date_format_with_dst__' + with ensure_clean(pname) as path: + # make sure we are not failing on transitions + times = pd.date_range("2013-10-26 23:00", "2013-10-27 01:00", + tz="Europe/London", + freq="H", + ambiguous='infer') + + for i in [times, times+pd.Timedelta('10s')]: + df = DataFrame({'A' : range(len(i))}, index=i) + df.to_csv(path,index=True) + + # we have to reconvert the index as we + # don't parse the tz's + result = read_csv(path,index_col=0) + result.index = pd.to_datetime(result.index).tz_localize('UTC').tz_convert('Europe/London') + assert_frame_equal(result,df) + + def test_concat_empty_dataframe_dtypes(self): df = DataFrame(columns=list("abc")) df['a'] = df['a'].astype(np.bool_) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index fd26e9834bd5f..4fd61e28233a6 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -355,7 +355,7 @@ def __new__(cls, data=None, if freq is not None and not freq_infer: inferred = subarr.inferred_freq if inferred != freq.freqstr: - on_freq = cls._generate(subarr[0], None, len(subarr), None, freq, tz=tz) + on_freq = cls._generate(subarr[0], None, len(subarr), None, freq, tz=tz, ambiguous=ambiguous) if not np.array_equal(subarr.asi8, on_freq.asi8): raise ValueError('Inferred frequency {0} from passed dates does not ' 'conform to passed frequency {1}'.format(inferred, freq.freqstr)) @@ -440,17 +440,17 @@ def _generate(cls, start, end, periods, name, offset, if inferred_tz is None and tz is not None: # naive dates if start is not None and start.tz is None: - start = start.tz_localize(tz) + start = start.tz_localize(tz, ambiguous=False) if end is not None and end.tz is None: - end = end.tz_localize(tz) + end = end.tz_localize(tz, ambiguous=False) if start and end: if start.tz is None and end.tz is not None: - start = start.tz_localize(end.tz) + start = start.tz_localize(end.tz, ambiguous=False) if end.tz is None and start.tz is not None: - end = end.tz_localize(start.tz) + end = end.tz_localize(start.tz, ambiguous=False) if _use_cached_range(offset, _normalized, start, end): index = cls._cached_range(start, end, periods=periods, @@ -1884,7 +1884,7 @@ def _generate_regular_range(start, end, periods, offset): def date_range(start=None, end=None, periods=None, freq='D', tz=None, - normalize=False, name=None, closed=None): + normalize=False, name=None, closed=None, **kwargs): """ Return a fixed frequency datetime index, with day (calendar) as the default frequency @@ -1920,11 +1920,11 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, """ return DatetimeIndex(start=start, end=end, periods=periods, freq=freq, tz=tz, normalize=normalize, name=name, - closed=closed) + closed=closed, **kwargs) def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, - normalize=True, name=None, closed=None): + normalize=True, name=None, closed=None, **kwargs): """ Return a fixed frequency datetime index, with business day as the default frequency @@ -1961,7 +1961,7 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, return DatetimeIndex(start=start, end=end, periods=periods, freq=freq, tz=tz, normalize=normalize, name=name, - closed=closed) + closed=closed, **kwargs) def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index a6e5812158474..37c40dd48cf6a 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -502,6 +502,22 @@ def test_ambiguous_flags(self): localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst) self.assert_numpy_array_equal(localized, localized_is_dst) + # construction with an ambiguous end-point + # GH 11626 + tz=self.tzstr("Europe/London") + + def f(): + date_range("2013-10-26 23:00", "2013-10-27 01:00", + tz="Europe/London", + freq="H") + self.assertRaises(pytz.AmbiguousTimeError, f) + times = date_range("2013-10-26 23:00", "2013-10-27 01:00", + freq="H", + tz=tz, + ambiguous='infer') + self.assertEqual(times[0],Timestamp('2013-10-26 23:00',tz=tz)) + self.assertEqual(times[-1],Timestamp('2013-10-27 01:00',tz=tz)) + def test_ambiguous_nat(self): tz = self.tz('US/Eastern') times = ['11/06/2011 00:00', '11/06/2011 01:00',