From 0e9ce370301de4bd66b537c9804b8c4506d5f2c8 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 17 Nov 2015 06:32:00 -0500
Subject: [PATCH] BUG: date_range creation with an ambiguous endpoint, #11619

TST: some tests for datetime tz aware serialized to/from csv/hdf
---
 doc/source/whatsnew/v0.17.1.txt        |  3 ++-
 pandas/io/tests/test_pytables.py       | 17 ++++++++++++++++-
 pandas/tests/test_frame.py             | 20 ++++++++++++++++++++
 pandas/tseries/index.py                | 18 +++++++++---------
 pandas/tseries/tests/test_timezones.py | 16 ++++++++++++++++
 5 files changed, 63 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt
index 046791d4287c9..02830e63ae81b 100755
--- a/doc/source/whatsnew/v0.17.1.txt
+++ b/doc/source/whatsnew/v0.17.1.txt
@@ -120,7 +120,8 @@ Bug Fixes
 - Bug in ``HDFStore.append`` with strings whose encoded length exceded the max unencoded length (:issue:`11234`)
 - Bug in merging ``datetime64[ns, tz]`` dtypes (:issue:`11405`)
 - Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`)
-- Bug in using ``DataFrame.ix`` with a multi-index indexer(:issue:`11372`)
+- Bug in using ``DataFrame.ix`` with a multi-index indexer (:issue:`11372`)
+- Bug in ``date_range`` with ambigous endpoints (:issue:`11626`)
 - Prevent adding new attributes to the accessors ``.str``, ``.dt`` and ``.cat``. Retrieving such
   a value was not possible, so error out on setting it. (:issue:`10673`)
 - Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`)
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 6c78f9cf3937c..9de7732d3b289 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -3049,7 +3049,7 @@ def test_select_dtypes(self):
             result = store.select(
                 'df4', where='values>2.0')
             tm.assert_frame_equal(expected, result)
-        
+
         # test selection with comparison against numpy scalar
         # GH 11283
         with ensure_clean_store(self.path) as store:
@@ -4988,6 +4988,21 @@ def test_legacy_datetimetz_object(self):
             result = store['df']
             assert_frame_equal(result, expected)
 
+    def test_dst_transitions(self):
+        # make sure we are not failing on transaitions
+        with ensure_clean_store(self.path) as store:
+            times = pd.date_range("2013-10-26 23:00", "2013-10-27 01:00",
+                                  tz="Europe/London",
+                                  freq="H",
+                                  ambiguous='infer')
+
+            for i in [times, times+pd.Timedelta('10min')]:
+                _maybe_remove(store, 'df')
+                df = DataFrame({'A' : range(len(i)), 'B' : i }, index=i)
+                store.append('df',df)
+                result = store.select('df')
+                assert_frame_equal(result, df)
+
 def _test_sort(obj):
     if isinstance(obj, DataFrame):
         return obj.reindex(sorted(obj.index))
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index a743ce4ffef61..e4f3032cabf88 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -15439,6 +15439,26 @@ def test_to_csv_date_format(self):
 
                 assert_frame_equal(test, nat_frame)
 
+    def test_to_csv_with_dst_transitions(self):
+        pname = '__tmp_to_csv_date_format_with_dst__'
+        with ensure_clean(pname) as path:
+            # make sure we are not failing on transitions
+            times = pd.date_range("2013-10-26 23:00", "2013-10-27 01:00",
+                                  tz="Europe/London",
+                                  freq="H",
+                                  ambiguous='infer')
+
+            for i in [times, times+pd.Timedelta('10s')]:
+                df = DataFrame({'A' : range(len(i))}, index=i)
+                df.to_csv(path,index=True)
+
+                # we have to reconvert the index as we
+                # don't parse the tz's
+                result = read_csv(path,index_col=0)
+                result.index = pd.to_datetime(result.index).tz_localize('UTC').tz_convert('Europe/London')
+                assert_frame_equal(result,df)
+
+
     def test_concat_empty_dataframe_dtypes(self):
         df = DataFrame(columns=list("abc"))
         df['a'] = df['a'].astype(np.bool_)
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index fd26e9834bd5f..4fd61e28233a6 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -355,7 +355,7 @@ def __new__(cls, data=None,
             if freq is not None and not freq_infer:
                 inferred = subarr.inferred_freq
                 if inferred != freq.freqstr:
-                    on_freq = cls._generate(subarr[0], None, len(subarr), None, freq, tz=tz)
+                    on_freq = cls._generate(subarr[0], None, len(subarr), None, freq, tz=tz, ambiguous=ambiguous)
                     if not np.array_equal(subarr.asi8, on_freq.asi8):
                         raise ValueError('Inferred frequency {0} from passed dates does not '
                                          'conform to passed frequency {1}'.format(inferred, freq.freqstr))
@@ -440,17 +440,17 @@ def _generate(cls, start, end, periods, name, offset,
             if inferred_tz is None and tz is not None:
                 # naive dates
                 if start is not None and start.tz is None:
-                    start = start.tz_localize(tz)
+                    start = start.tz_localize(tz, ambiguous=False)
 
                 if end is not None and end.tz is None:
-                    end = end.tz_localize(tz)
+                    end = end.tz_localize(tz, ambiguous=False)
 
             if start and end:
                 if start.tz is None and end.tz is not None:
-                    start = start.tz_localize(end.tz)
+                    start = start.tz_localize(end.tz, ambiguous=False)
 
                 if end.tz is None and start.tz is not None:
-                    end = end.tz_localize(start.tz)
+                    end = end.tz_localize(start.tz, ambiguous=False)
 
             if _use_cached_range(offset, _normalized, start, end):
                 index = cls._cached_range(start, end, periods=periods,
@@ -1884,7 +1884,7 @@ def _generate_regular_range(start, end, periods, offset):
 
 
 def date_range(start=None, end=None, periods=None, freq='D', tz=None,
-               normalize=False, name=None, closed=None):
+               normalize=False, name=None, closed=None, **kwargs):
     """
     Return a fixed frequency datetime index, with day (calendar) as the default
     frequency
@@ -1920,11 +1920,11 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
     """
     return DatetimeIndex(start=start, end=end, periods=periods,
                          freq=freq, tz=tz, normalize=normalize, name=name,
-                         closed=closed)
+                         closed=closed, **kwargs)
 
 
 def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
-                normalize=True, name=None, closed=None):
+                normalize=True, name=None, closed=None, **kwargs):
     """
     Return a fixed frequency datetime index, with business day as the default
     frequency
@@ -1961,7 +1961,7 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
 
     return DatetimeIndex(start=start, end=end, periods=periods,
                          freq=freq, tz=tz, normalize=normalize, name=name,
-                         closed=closed)
+                         closed=closed, **kwargs)
 
 
 def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py
index a6e5812158474..37c40dd48cf6a 100644
--- a/pandas/tseries/tests/test_timezones.py
+++ b/pandas/tseries/tests/test_timezones.py
@@ -502,6 +502,22 @@ def test_ambiguous_flags(self):
         localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst)
         self.assert_numpy_array_equal(localized, localized_is_dst)
 
+        # construction with an ambiguous end-point
+        # GH 11626
+        tz=self.tzstr("Europe/London")
+
+        def f():
+            date_range("2013-10-26 23:00", "2013-10-27 01:00",
+                       tz="Europe/London",
+                       freq="H")
+            self.assertRaises(pytz.AmbiguousTimeError, f)
+        times = date_range("2013-10-26 23:00", "2013-10-27 01:00",
+                              freq="H",
+                              tz=tz,
+                              ambiguous='infer')
+        self.assertEqual(times[0],Timestamp('2013-10-26 23:00',tz=tz))
+        self.assertEqual(times[-1],Timestamp('2013-10-27 01:00',tz=tz))
+
     def test_ambiguous_nat(self):
         tz = self.tz('US/Eastern')
         times = ['11/06/2011 00:00', '11/06/2011 01:00',