From 3ca3222e95560b5f17c7ca2e3e1f8f9fbf63f899 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 13 Jun 2013 11:19:32 -0400 Subject: [PATCH] BUG: (GH3880) index names are now propogated with loc/ix --- RELEASE.rst | 6 ++++-- doc/source/v0.11.1.txt | 9 +++++++-- pandas/core/frame.py | 4 ++-- pandas/core/index.py | 10 ++++++++-- pandas/core/internals.py | 4 ++-- pandas/tests/test_frame.py | 1 + pandas/tests/test_indexing.py | 13 +++++++++++++ 7 files changed, 37 insertions(+), 10 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 03cfc4f6bcafc..839c472da1610 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -177,6 +177,8 @@ pandas 0.11.1 - Non-unique indexing with a slice via ``loc`` and friends fixed (GH3659_) - Allow insert/delete to non-unique columns (GH3679_) - Extend ``reindex`` to correctly deal with non-unique indices (GH3679_) + - ``DataFrame.itertuples()`` now works with frames with duplicate column + names (GH3873_) - Fixed bug in groupby with empty series referencing a variable before assignment. (GH3510_) - Fixed bug in mixed-frame assignment with aligned series (GH3492_) - Fixed bug in selecting month/quarter/year from a series would not select the time element @@ -228,8 +230,7 @@ pandas 0.11.1 - PandasObjects raise TypeError when trying to hash (GH3882_) - Fix incorrect arguments passed to concat that are not list-like (e.g. concat(df1,df2)) (GH3481_) - Correctly parse when passed the ``dtype=str`` (or other variable-len string dtypes) in ``read_csv`` (GH3795_) - - ``DataFrame.itertuples()`` now works with frames with duplicate column - names (GH3873_) + - Fix index name not propogating when using ``loc/ix`` (GH3880_) .. _GH3164: https://github.com/pydata/pandas/issues/3164 .. _GH2786: https://github.com/pydata/pandas/issues/2786 @@ -323,6 +324,7 @@ pandas 0.11.1 .. _GH3834: https://github.com/pydata/pandas/issues/3834 .. _GH3873: https://github.com/pydata/pandas/issues/3873 .. _GH3877: https://github.com/pydata/pandas/issues/3877 +.. _GH3880: https://github.com/pydata/pandas/issues/3880 pandas 0.11.0 diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt index 1a43e9e6a49e0..dfc36258a680f 100644 --- a/doc/source/v0.11.1.txt +++ b/doc/source/v0.11.1.txt @@ -348,11 +348,14 @@ Bug Fixes - Duplicate indexes with and empty DataFrame.from_records will return a correct frame (GH3562_) - Concat to produce a non-unique columns when duplicates are across dtypes is fixed (GH3602_) - Allow insert/delete to non-unique columns (GH3679_) + - Non-unique indexing with a slice via ``loc`` and friends fixed (GH3659_) + - Allow insert/delete to non-unique columns (GH3679_) + - Extend ``reindex`` to correctly deal with non-unique indices (GH3679_) + - ``DataFrame.itertuples()`` now works with frames with duplicate column + names (GH3873_) - ``DataFrame.from_records`` did not accept empty recarrays (GH3682_) - ``read_html`` now correctly skips tests (GH3741_) - - ``DataFrame.itertuples()`` now works with frames with duplicate column - names (GH3873_) See the `full release notes `__ or issue tracker @@ -405,3 +408,5 @@ on GitHub for a complete list. .. _GH3834: https://github.com/pydata/pandas/issues/3834 .. _GH3873: https://github.com/pydata/pandas/issues/3873 .. _GH3877: https://github.com/pydata/pandas/issues/3877 +.. _GH3659: https://github.com/pydata/pandas/issues/3659 +.. _GH3679: https://github.com/pydata/pandas/issues/3679 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b6e29204fc0d8..f9f8a424f8d96 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2712,14 +2712,14 @@ def _reindex_multi(self, new_index, new_columns, copy, fill_value): def _reindex_index(self, new_index, method, copy, level, fill_value=NA, limit=None): new_index, indexer = self.index.reindex(new_index, method, level, - limit=limit) + limit=limit, copy_if_needed=True) return self._reindex_with_indexers(new_index, indexer, None, None, copy, fill_value) def _reindex_columns(self, new_columns, copy, level, fill_value=NA, limit=None): new_columns, indexer = self.columns.reindex(new_columns, level=level, - limit=limit) + limit=limit, copy_if_needed=True) return self._reindex_with_indexers(None, None, new_columns, indexer, copy, fill_value) diff --git a/pandas/core/index.py b/pandas/core/index.py index 51ebd58c33343..a5880b9f18670 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -920,7 +920,7 @@ def _get_method(self, method): } return aliases.get(method, method) - def reindex(self, target, method=None, level=None, limit=None): + def reindex(self, target, method=None, level=None, limit=None, copy_if_needed=False): """ For Index, simply returns the new index and the results of get_indexer. Provided here to enable an interface that is amenable for @@ -939,6 +939,12 @@ def reindex(self, target, method=None, level=None, limit=None): else: if self.equals(target): indexer = None + + # to avoid aliasing an existing index + if copy_if_needed and target.name != self.name and self.name is not None: + if target.name is None: + target = self.copy() + else: if self.is_unique: indexer = self.get_indexer(target, method=method, @@ -2196,7 +2202,7 @@ def get_indexer(self, target, method=None, limit=None): return com._ensure_platform_int(indexer) - def reindex(self, target, method=None, level=None, limit=None): + def reindex(self, target, method=None, level=None, limit=None, copy_if_needed=False): """ Performs any necessary conversion on the input index and calls get_indexer. This method is here so MultiIndex and an Index of diff --git a/pandas/core/internals.py b/pandas/core/internals.py index af1543dad0314..49d92afc46848 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1948,7 +1948,7 @@ def reindex_axis(self, new_axis, method=None, axis=0, copy=True): 'axis == 0') return self.reindex_items(new_axis) - new_axis, indexer = cur_axis.reindex(new_axis, method) + new_axis, indexer = cur_axis.reindex(new_axis, method, copy_if_needed=True) return self.reindex_indexer(new_axis, indexer, axis=axis) def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=np.nan): @@ -2014,7 +2014,7 @@ def reindex_items(self, new_items, copy=True, fill_value=np.nan): return data.reindex_items(new_items) # TODO: this part could be faster (!) - new_items, indexer = self.items.reindex(new_items) + new_items, indexer = self.items.reindex(new_items, copy_if_needed=True) new_axes = [new_items] + self.axes[1:] # could have so me pathological (MultiIndex) issues here diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 2b2d59306da6e..5b4d582e5e42e 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7207,6 +7207,7 @@ def test_reindex_name_remains(self): s = Series(random.rand(10)) df = DataFrame(s, index=np.arange(len(s))) i = Series(np.arange(10), name='iname') + df = df.reindex(i) self.assert_(df.index.name == 'iname') diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 295eaede443b1..0719d9c9a87db 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1024,6 +1024,19 @@ def test_non_unique_loc(self): expected = DataFrame({'A' : [2,4,5], 'B' : [4,6,7]}, index = [1,1,2]) assert_frame_equal(result,expected) + def test_loc_name(self): + # GH 3880 + df = DataFrame([[1, 1], [1, 1]]) + df.index.name = 'index_name' + result = df.iloc[[0, 1]].index.name + self.assert_(result == 'index_name') + + result = df.ix[[0, 1]].index.name + self.assert_(result == 'index_name') + + result = df.loc[[0, 1]].index.name + self.assert_(result == 'index_name') + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],