Merge branch 'master' into bug_issue16770

rs2 · Jul 8, 2017 · eab3192 · eab3192
2 parents 7acc09f + 9c44f9b
commit eab3192
Show file tree

Hide file tree

Showing 17 changed files with 134 additions and 92 deletions.
diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
@@ -53,7 +53,11 @@ def setup(self):
         self.rng6 = date_range(start='1/1/1', periods=self.N, freq='B')
 
         self.rng7 = date_range(start='1/1/1700', freq='D', periods=100000)
-        self.a = self.rng7[:50000].append(self.rng7[50002:])
+        self.no_freq = self.rng7[:50000].append(self.rng7[50002:])
+        self.d_freq = self.rng7[:50000].append(self.rng7[50000:])
+
+        self.rng8 = date_range(start='1/1/1700', freq='B', periods=100000)
+        self.b_freq = self.rng8[:50000].append(self.rng8[50000:])
 
     def time_add_timedelta(self):
         (self.rng + dt.timedelta(minutes=2))
@@ -94,8 +98,14 @@ def time_infer_dst(self):
     def time_timeseries_is_month_start(self):
         self.rng6.is_month_start
 
-    def time_infer_freq(self):
-        infer_freq(self.a)
+    def time_infer_freq_none(self):
+        infer_freq(self.no_freq)
+
+    def time_infer_freq_daily(self):
+        infer_freq(self.d_freq)
+
+    def time_infer_freq_business(self):
+        infer_freq(self.b_freq)
 
 
 class TimeDatetimeConverter(object):

diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt
@@ -1,101 +1,61 @@
 .. _whatsnew_0203:
 
-v0.20.3 (June ??, 2017)
+v0.20.3 (July 7, 2017)
 -----------------------
 
-This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes,
-bug fixes and performance improvements.
-We recommend that all users upgrade to this version.
+This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes
+and bug fixes. We recommend that all users upgrade to this version.
 
 .. contents:: What's new in v0.20.3
     :local:
     :backlinks: none
 
-
-.. _whatsnew_0203.enhancements:
-
-Enhancements
-~~~~~~~~~~~~
-
-
-
-
-
-
-.. _whatsnew_0203.performance:
-
-Performance Improvements
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-
-
-
-
-
 .. _whatsnew_0203.bug_fixes:
 
 Bug Fixes
 ~~~~~~~~~
-- Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`)
-- Fixed issue with :meth:`DataFrame.style` where element id's were not unique (:issue:`16780`)
-- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`)
-- Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`)
-- Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`)
-- Bug in a DataFrame/Series with a ``TimedeltaIndex`` when slice indexing (:issue:`16637`)
 
+- Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`)
+- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`)
 
 Conversion
 ^^^^^^^^^^
 
 - Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`)
 - Bug in ``Series`` construction when passing a ``Series`` with ``dtype='category'`` (:issue:`16524`).
-- Bug in ``DataFrame.astype()`` when passing a ``Series`` as the ``dtype`` kwarg. (:issue:`16717`).
+- Bug in :meth:`DataFrame.astype` when passing a ``Series`` as the ``dtype`` kwarg. (:issue:`16717`).
 
 Indexing
 ^^^^^^^^
 
 - Bug in ``Float64Index`` causing an empty array instead of ``None`` to be returned from ``.get(np.nan)`` on a Series whose index did not contain any ``NaN`` s (:issue:`8569`)
 - Bug in ``MultiIndex.isin`` causing an error when passing an empty iterable (:issue:`16777`)
+- Fixed a bug in a slicing DataFrame/Series that have a  ``TimedeltaIndex`` (:issue:`16637`)
 
 I/O
 ^^^
 
 - Bug in :func:`read_csv` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`)
 - Bug in :func:`read_hdf` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`)
-- Bug in :func:`DataFrame.to_latex` where ``bold_rows`` was wrongly specified to be ``True`` by default, whereas in reality row labels remained non-bold whatever parameter provided. (:issue:`16707`)
+- Bug in :meth:`DataFrame.to_latex` where ``bold_rows`` was wrongly specified to be ``True`` by default, whereas in reality row labels remained non-bold whatever parameter provided. (:issue:`16707`)
+- Fixed an issue with :meth:`DataFrame.style` where generated element ids were not unique (:issue:`16780`)
+- Fixed loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`)
 
 Plotting
 ^^^^^^^^
-- Fix regression in series plotting that prevented RGB and RGBA tuples from being used as color arguments (:issue:`16233`)
-
-
-
-Groupby/Resample/Rolling
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-
-
-Sparse
-^^^^^^
-
-
 
+- Fixed regression that prevented RGB and RGBA tuples from being used as color arguments (:issue:`16233`)
+- Fixed an issue with :meth:`DataFrame.plot.scatter` that incorrectly raised a ``KeyError`` when categorical data is used for plotting (:issue:`16199`)
 
 Reshaping
 ^^^^^^^^^
 
+- ``PeriodIndex`` / ``TimedeltaIndex.join`` was missing the ``sort=`` kwarg (:issue:`16541`) 
 - Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`).
 - Bug in :func:`merge` when merging/joining with multiple categorical columns (:issue:`16767`)
 
-
-Numeric
-^^^^^^^
-
-
 Categorical
 ^^^^^^^^^^^
 
-- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` with categorical data (:issue:`16793`)
 
-Other
-^^^^^
+- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` with categorical data (:issue:`16793`)
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -170,7 +170,7 @@ Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 - Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
 
-
+- Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)
 
 Sparse
 ^^^^^^

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2704,7 +2704,7 @@ def get_indexer_non_unique(self, target):
             tgt_values = target._values
 
         indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
-        return indexer, missing
+        return _ensure_platform_int(indexer), missing
 
     def get_indexer_for(self, target, **kwargs):
         """
@@ -3126,7 +3126,7 @@ def _join_non_unique(self, other, how='left', return_indexers=False):
         left_idx = _ensure_platform_int(left_idx)
         right_idx = _ensure_platform_int(right_idx)
 
-        join_index = self.values.take(left_idx)
+        join_index = np.asarray(self.values.take(left_idx))
         mask = left_idx == -1
         np.putmask(join_index, mask, other._values.take(right_idx))
 

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -500,7 +500,6 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
                 codes = self.categories.get_indexer(target)
 
         indexer, _ = self._engine.get_indexer_non_unique(codes)
-
         return _ensure_platform_int(indexer)
 
     @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
@@ -511,7 +510,8 @@ def get_indexer_non_unique(self, target):
             target = target.categories
 
         codes = self.categories.get_indexer(target)
-        return self._engine.get_indexer_non_unique(codes)
+        indexer, missing = self._engine.get_indexer_non_unique(codes)
+        return _ensure_platform_int(indexer), missing
 
     @Appender(_index_shared_docs['_convert_scalar_indexer'])
     def _convert_scalar_indexer(self, key, kind=None):

diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
@@ -912,14 +912,16 @@ def insert(self, loc, item):
                               self[loc:].asi8))
         return self._shallow_copy(idx)
 
-    def join(self, other, how='left', level=None, return_indexers=False):
+    def join(self, other, how='left', level=None, return_indexers=False,
+             sort=False):
         """
         See Index.join
         """
         self._assert_can_do_setop(other)
 
         result = Int64Index.join(self, other, how=how, level=level,
-                                 return_indexers=return_indexers)
+                                 return_indexers=return_indexers,
+                                 sort=sort)
 
         if return_indexers:
             result, lidx, ridx = result

diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
@@ -516,7 +516,8 @@ def union(self, other):
                     result.freq = to_offset(result.inferred_freq)
             return result
 
-    def join(self, other, how='left', level=None, return_indexers=False):
+    def join(self, other, how='left', level=None, return_indexers=False,
+             sort=False):
         """
         See Index.join
         """
@@ -527,7 +528,8 @@ def join(self, other, how='left', level=None, return_indexers=False):
                 pass
 
         return Index.join(self, other, how=how, level=level,
-                          return_indexers=return_indexers)
+                          return_indexers=return_indexers,
+                          sort=sort)
 
     def _wrap_joined_index(self, joined, other):
         name = self.name if self.name == other.name else None

diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py
@@ -3,11 +3,19 @@
 import pytest
 import numpy as np
 
-from pandas import DataFrame, Index
+from pandas import DataFrame, Index, PeriodIndex
 from pandas.tests.frame.common import TestData
 import pandas.util.testing as tm
 
 
+@pytest.fixture
+def frame_with_period_index():
+    return DataFrame(
+        data=np.arange(20).reshape(4, 5),
+        columns=list('abcde'),
+        index=PeriodIndex(start='2000', freq='A', periods=4))
+
+
 @pytest.fixture
 def frame():
     return TestData().frame
@@ -139,3 +147,21 @@ def test_join_overlap(frame):
 
     # column order not necessarily sorted
     tm.assert_frame_equal(joined, expected.loc[:, joined.columns])
+
+
+def test_join_period_index(frame_with_period_index):
+    other = frame_with_period_index.rename(
+        columns=lambda x: '{key}{key}'.format(key=x))
+
+    joined_values = np.concatenate(
+        [frame_with_period_index.values] * 2, axis=1)
+
+    joined_cols = frame_with_period_index.columns.append(other.columns)
+
+    joined = frame_with_period_index.join(other)
+    expected = DataFrame(
+        data=joined_values,
+        columns=joined_cols,
+        index=frame_with_period_index.index)
+
+    tm.assert_frame_equal(joined, expected)
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -132,6 +132,20 @@ def test_reindex_base(self):
         with tm.assert_raises_regex(ValueError, 'Invalid fill method'):
             idx.get_indexer(idx, method='invalid')
 
+    def test_get_indexer_consistency(self):
+        # See GH 16819
+        for name, index in self.indices.items():
+            if isinstance(index, IntervalIndex):
+                continue
+
+            indexer = index.get_indexer(index[0:2])
+            assert isinstance(indexer, np.ndarray)
+            assert indexer.dtype == np.intp
+
+            indexer, _ = index.get_indexer_non_unique(index[0:2])
+            assert isinstance(indexer, np.ndarray)
+            assert indexer.dtype == np.intp
+
     def test_ndarray_compat_properties(self):
         idx = self.create_index()
         assert idx.T.equals(idx)
@@ -905,7 +919,7 @@ def test_fillna(self):
 
     def test_nulls(self):
         # this is really a smoke test for the methods
-        # as these are adequantely tested for function elsewhere
+        # as these are adequately tested for function elsewhere
 
         for name, index in self.indices.items():
             if len(index) == 0:
@@ -933,3 +947,10 @@ def test_empty(self):
         index = self.create_index()
         assert not index.empty
         assert index[:0].empty
+
+    @pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right'])
+    def test_join_self_unique(self, how):
+        index = self.create_index()
+        if index.is_unique:
+            joined = index.join(index, how=how)
+            assert (index == joined).all()
diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
@@ -773,3 +773,9 @@ def test_map(self):
         result = index.map(lambda x: x.ordinal)
         exp = Index([x.ordinal for x in index])
         tm.assert_index_equal(result, exp)
+
+    @pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right'])
+    def test_join_self(self, how):
+        index = period_range('1/1/2000', periods=10)
+        joined = index.join(index, how=how)
+        assert index is joined
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -1131,17 +1131,6 @@ def test_get_indexer_strings(self):
         with pytest.raises(TypeError):
             idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2)
 
-    def test_get_indexer_consistency(self):
-        # See GH 16819
-        for name, index in self.indices.items():
-            indexer = index.get_indexer(index[0:2])
-            assert isinstance(indexer, np.ndarray)
-            assert indexer.dtype == np.intp
-
-            indexer, _ = index.get_indexer_non_unique(index[0:2])
-            assert isinstance(indexer, np.ndarray)
-            assert indexer.dtype == np.intp
-
     def test_get_loc(self):
         idx = pd.Index([0, 1, 2])
         all_methods = [None, 'pad', 'backfill', 'nearest']

diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
@@ -401,23 +401,23 @@ def test_reindex_dtype(self):
         exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
         tm.assert_index_equal(res, exp, exact=True)
         tm.assert_numpy_array_equal(indexer,
-                                    np.array([0, 3, 2], dtype=np.int64))
+                                    np.array([0, 3, 2], dtype=np.intp))
 
         c = CategoricalIndex(['a', 'b', 'c', 'a'],
                              categories=['a', 'b', 'c', 'd'])
         res, indexer = c.reindex(['a', 'c'])
         exp = Index(['a', 'a', 'c'], dtype='object')
         tm.assert_index_equal(res, exp, exact=True)
         tm.assert_numpy_array_equal(indexer,
-                                    np.array([0, 3, 2], dtype=np.int64))
+                                    np.array([0, 3, 2], dtype=np.intp))
 
         c = CategoricalIndex(['a', 'b', 'c', 'a'],
                              categories=['a', 'b', 'c', 'd'])
         res, indexer = c.reindex(Categorical(['a', 'c']))
         exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
         tm.assert_index_equal(res, exp, exact=True)
         tm.assert_numpy_array_equal(indexer,
-                                    np.array([0, 3, 2], dtype=np.int64))
+                                    np.array([0, 3, 2], dtype=np.intp))
 
     def test_reindex_empty_index(self):
         # See GH16770

diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -564,15 +564,23 @@ def test_freq_conversion(self):
 
 
 class TestSlicing(object):
+    @pytest.mark.parametrize('freq', ['B', 'D'])
+    def test_timedelta(self, freq):
+        index = date_range('1/1/2000', periods=50, freq=freq)
 
-    def test_timedelta(self):
-        # this is valid too
-        index = date_range('1/1/2000', periods=50, freq='B')
         shifted = index + timedelta(1)
         back = shifted + timedelta(-1)
-        assert tm.equalContents(index, back)
-        assert shifted.freq == index.freq
-        assert shifted.freq == back.freq
+        tm.assert_index_equal(index, back)
+
+        if freq == 'D':
+            expected = pd.tseries.offsets.Day(1)
+            assert index.freq == expected
+            assert shifted.freq == expected
+            assert back.freq == expected
+        else:  # freq == 'B'
+            assert index.freq == pd.tseries.offsets.BusinessDay(1)
+            assert shifted.freq is None
+            assert back.freq == pd.tseries.offsets.BusinessDay(1)
 
         result = index - timedelta(1)
         expected = index + timedelta(-1)

diff --git a/...eriodindex_0.20.1_x86_64_darwin_2.7.13.h5 → ...eriodindex_0.20.1_x86_64_darwin_2.7.13.h5 b/...eriodindex_0.20.1_x86_64_darwin_2.7.13.h5 → ...eriodindex_0.20.1_x86_64_darwin_2.7.13.h5
-Original file line number
+Diff line change
@@ Expand Up / @@ -170,7 +170,7 @@ Groupby/Resample/Rolling @@
     ^^^^^^^^^^^^^^^^^^^^^^^^
     - Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
+    - Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)
     Sparse
     ^^^^^^
@@ Expand Down @@