Skip to content

Commit

Permalink
Bugfix for multilevel columns with empty strings in Python 2 (#17099)
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisjbillington authored and jreback committed Aug 10, 2017
1 parent b822535 commit a09db4b
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 9 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ Indexing
- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`)
- Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`)
- Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`)
- Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`)

I/O
^^^
Expand Down
12 changes: 10 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2134,10 +2134,18 @@ def _getitem_multilevel(self, key):
result = self._constructor(new_values, index=self.index,
columns=result_columns)
result = result.__finalize__(self)

# If there is only one column being returned, and its name is
# either an empty string, or a tuple with an empty string as its
# first element, then treat the empty string as a placeholder
# and return the column as if the user had provided that empty
# string in the key. If the result is a Series, exclude the
# implied empty string from its name.
if len(result.columns) == 1:
top = result.columns[0]
if ((type(top) == str and top == '') or
(type(top) == tuple and top[0] == '')):
if isinstance(top, tuple):
top = top[0]
if top == '':
result = result['']
if isinstance(result, Series):
result = self._constructor_sliced(result,
Expand Down
21 changes: 14 additions & 7 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1675,24 +1675,31 @@ def test_int_series_slicing(self):
expected = self.ymd.reindex(s.index[5:])
tm.assert_frame_equal(result, expected)

def test_mixed_depth_get(self):
@pytest.mark.parametrize('unicode_strings', [True, False])
def test_mixed_depth_get(self, unicode_strings):
# If unicode_strings is True, the column labels in dataframe
# construction will use unicode strings in Python 2 (pull request
# #17099).

arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
['', 'OD', 'OD', 'result1', 'result2', 'result1'],
['', 'wx', 'wy', '', '', '']]

if unicode_strings:
arrays = [[u(s) for s in arr] for arr in arrays]

tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)
df = DataFrame(randn(4, 6), columns=index)
df = DataFrame(np.random.randn(4, 6), columns=index)

result = df['a']
expected = df['a', '', '']
tm.assert_series_equal(result, expected, check_names=False)
assert result.name == 'a'
expected = df['a', '', ''].rename('a')
tm.assert_series_equal(result, expected)

result = df['routine1', 'result1']
expected = df['routine1', 'result1', '']
tm.assert_series_equal(result, expected, check_names=False)
assert result.name == ('routine1', 'result1')
expected = expected.rename(('routine1', 'result1'))
tm.assert_series_equal(result, expected)

def test_mixed_depth_insert(self):
arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
Expand Down

0 comments on commit a09db4b

Please sign in to comment.