Skip to content

Commit

Permalink
API: Have MultiIndex consturctors always return a MI (pandas-dev#17236)
Browse files Browse the repository at this point in the history
* API: Have MultiIndex constructors return MI

This removes the special case for MultiIndex constructors returning
an Index if all the levels are length-1. Now this will return a
MultiIndex with a single level.

This is a backwards incompatabile change, with no clear method for
deprecation, so we're making a clean break.

Closes pandas-dev#17178

* fixup! API: Have MultiIndex constructors return MI

* Update for comments
  • Loading branch information
TomAugspurger authored and jowens committed Sep 20, 2017
1 parent c33af56 commit 0f8205c
Show file tree
Hide file tree
Showing 12 changed files with 170 additions and 45 deletions.
24 changes: 24 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,30 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical

The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement.

.. _whatsnew_210.api.multiindex_single:

MultiIndex Constructor with a Single Level
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The ``MultiIndex`` constructors no longer squeeze a MultiIndex with all
length-one levels down to a regular ``Index``. This affects all the
``MultiIndex`` constructors. (:issue:`17178`)

Previous behavior:

.. code-block:: ipython

In [2]: pd.MultiIndex.from_tuples([('a',), ('b',)])
Out[2]: Index(['a', 'b'], dtype='object')

Length 1 levels are no longer special-cased. They behave exactly as if you had
length 2+ levels, so a :class:`MultiIndex` is always returned from all of the
``MultiIndex`` constructors:

.. ipython:: python

pd.MultiIndex.from_tuples([('a',), ('b',)])

.. _whatsnew_0210.api:

Other API Changes
Expand Down
11 changes: 6 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@
_dict_compat,
standardize_mapping)
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.index import (Index, MultiIndex, _ensure_index,
_ensure_index_from_sequences)
from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
check_bool_indexer)
from pandas.core.internals import (BlockManager,
Expand Down Expand Up @@ -1155,9 +1156,9 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
else:
try:
to_remove = [arr_columns.get_loc(field) for field in index]

result_index = MultiIndex.from_arrays(
[arrays[i] for i in to_remove], names=index)
index_data = [arrays[i] for i in to_remove]
result_index = _ensure_index_from_sequences(index_data,
names=index)

exclude.update(index)
except Exception:
Expand Down Expand Up @@ -3000,7 +3001,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
to_remove.append(col)
arrays.append(level)

index = MultiIndex.from_arrays(arrays, names=names)
index = _ensure_index_from_sequences(arrays, names)

if verify_integrity and not index.is_unique:
duplicates = index.get_duplicates()
Expand Down
12 changes: 8 additions & 4 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from pandas.core.indexes.base import (Index, _new_Index, # noqa
_ensure_index, _get_na_value,
InvalidIndexError)
from pandas.core.indexes.base import (Index,
_new_Index,
_ensure_index,
_ensure_index_from_sequences,
_get_na_value,
InvalidIndexError) # noqa
from pandas.core.indexes.category import CategoricalIndex # noqa
from pandas.core.indexes.multi import MultiIndex # noqa
from pandas.core.indexes.interval import IntervalIndex # noqa
Expand All @@ -22,7 +25,8 @@
'InvalidIndexError', 'TimedeltaIndex',
'PeriodIndex', 'DatetimeIndex',
'_new_Index', 'NaT',
'_ensure_index', '_get_na_value', '_get_combined_index',
'_ensure_index', '_ensure_index_from_sequences', '_get_na_value',
'_get_combined_index',
'_get_objs_combined_axis', '_union_indexes',
'_get_consensus_names',
'_all_indexes_same']
Expand Down
69 changes: 69 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4012,7 +4012,76 @@ def invalid_op(self, other=None):
Index._add_comparison_methods()


def _ensure_index_from_sequences(sequences, names=None):
"""Construct an index from sequences of data.
A single sequence returns an Index. Many sequences returns a
MultiIndex.
Parameters
----------
sequences : sequence of sequences
names : sequence of str
Returns
-------
index : Index or MultiIndex
Examples
--------
>>> _ensure_index_from_sequences([[1, 2, 3]], names=['name'])
Int64Index([1, 2, 3], dtype='int64', name='name')
>>> _ensure_index_from_sequences([['a', 'a'], ['a', 'b']],
names=['L1', 'L2'])
MultiIndex(levels=[['a'], ['a', 'b']],
labels=[[0, 0], [0, 1]],
names=['L1', 'L2'])
See Also
--------
_ensure_index
"""
from .multi import MultiIndex

if len(sequences) == 1:
if names is not None:
names = names[0]
return Index(sequences[0], name=names)
else:
return MultiIndex.from_arrays(sequences, names=names)


def _ensure_index(index_like, copy=False):
"""
Ensure that we have an index from some index-like object
Parameters
----------
index : sequence
An Index or other sequence
copy : bool
Returns
-------
index : Index or MultiIndex
Examples
--------
>>> _ensure_index(['a', 'b'])
Index(['a', 'b'], dtype='object')
>>> _ensure_index([('a', 'a'), ('b', 'c')])
Index([('a', 'a'), ('b', 'c')], dtype='object')
>>> _ensure_index([['a', 'a'], ['b', 'c']])
MultiIndex(levels=[['a'], ['b', 'c']],
labels=[[0, 0], [0, 1]])
See Also
--------
_ensure_index_from_sequences
"""
if isinstance(index_like, Index):
if copy:
index_like = index_like.copy()
Expand Down
10 changes: 0 additions & 10 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
raise ValueError('Length of levels and labels must be the same.')
if len(levels) == 0:
raise ValueError('Must pass non-zero number of levels/labels')
if len(levels) == 1:
if names:
name = names[0]
else:
name = None
return Index(levels[0], name=name, copy=True).take(labels[0])

result = object.__new__(MultiIndex)

Expand Down Expand Up @@ -1084,10 +1078,6 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
MultiIndex.from_product : Make a MultiIndex from cartesian product
of iterables
"""
if len(arrays) == 1:
name = None if names is None else names[0]
return Index(arrays[0], name=name)

# Check if lengths of all arrays are equal or not,
# raise ValueError, if not
for i in range(1, len(arrays)):
Expand Down
21 changes: 15 additions & 6 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

from pandas.core.frame import _shared_docs
from pandas.util._decorators import Appender
from pandas.core.index import MultiIndex, _get_na_value
from pandas.core.index import Index, MultiIndex, _get_na_value


class _Unstacker(object):
Expand Down Expand Up @@ -311,10 +311,14 @@ def _unstack_multiple(data, clocs):
recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels,
xnull=False)

dummy_index = MultiIndex(levels=rlevels + [obs_ids],
labels=rlabels + [comp_ids],
names=rnames + ['__placeholder__'],
verify_integrity=False)
if rlocs == []:
# Everything is in clocs, so the dummy df has a regular index
dummy_index = Index(obs_ids, name='__placeholder__')
else:
dummy_index = MultiIndex(levels=rlevels + [obs_ids],
labels=rlabels + [comp_ids],
names=rnames + ['__placeholder__'],
verify_integrity=False)

if isinstance(data, Series):
dummy = data.copy()
Expand Down Expand Up @@ -446,7 +450,12 @@ def _slow_pivot(index, columns, values):

def unstack(obj, level, fill_value=None):
if isinstance(level, (tuple, list)):
return _unstack_multiple(obj, level)
if len(level) != 1:
# _unstack_multiple only handles MultiIndexes,
# and isn't needed for a single level
return _unstack_multiple(obj, level)
else:
level = level[0]

if isinstance(obj, DataFrame):
if isinstance(obj.index, MultiIndex):
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/sparse/scipy_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ def robust_get_level_values(i):
labels_to_i = Series(labels_to_i)
if len(subset) > 1:
labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index)
labels_to_i.index.names = [index.names[i] for i in subset]
labels_to_i.index.names = [index.names[i] for i in subset]
else:
labels_to_i.index = Index(x[0] for x in labels_to_i.index)
labels_to_i.index.name = index.names[subset[0]]

labels_to_i.name = 'value'
return (labels_to_i)

Expand Down
7 changes: 6 additions & 1 deletion pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,7 +1452,12 @@ def cons_row(x):

if expand:
result = list(result)
return MultiIndex.from_tuples(result, names=name)
out = MultiIndex.from_tuples(result, names=name)
if out.nlevels == 1:
# We had all tuples of length-one, which are
# better represented as a regular Index.
out = out.get_level_values(0)
return out
else:
return Index(result, name=name)
else:
Expand Down
13 changes: 7 additions & 6 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
is_scalar, is_categorical_dtype)
from pandas.core.dtypes.missing import isna
from pandas.core.dtypes.cast import astype_nansafe
from pandas.core.index import Index, MultiIndex, RangeIndex
from pandas.core.index import (Index, MultiIndex, RangeIndex,
_ensure_index_from_sequences)
from pandas.core.series import Series
from pandas.core.frame import DataFrame
from pandas.core.categorical import Categorical
Expand Down Expand Up @@ -1444,7 +1445,8 @@ def _agg_index(self, index, try_parse_dates=True):
arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
arrays.append(arr)

index = MultiIndex.from_arrays(arrays, names=self.index_names)
names = self.index_names
index = _ensure_index_from_sequences(arrays, names)

return index

Expand Down Expand Up @@ -1808,7 +1810,7 @@ def read(self, nrows=None):
try_parse_dates=True)
arrays.append(values)

index = MultiIndex.from_arrays(arrays)
index = _ensure_index_from_sequences(arrays)

if self.usecols is not None:
names = self._filter_usecols(names)
Expand Down Expand Up @@ -3138,9 +3140,8 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
if index_col is None or index_col is False:
index = Index([])
else:
index = [Series([], dtype=dtype[index_name])
for index_name in index_names]
index = MultiIndex.from_arrays(index, names=index_names)
data = [Series([], dtype=dtype[name]) for name in index_names]
index = _ensure_index_from_sequences(data, names=index_names)
index_col.sort()
for i, n in enumerate(index_col):
columns.pop(n - i)
Expand Down
18 changes: 17 additions & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
DataFrame, Float64Index, Int64Index,
CategoricalIndex, DatetimeIndex, TimedeltaIndex,
PeriodIndex, isna)
from pandas.core.index import _get_combined_index
from pandas.core.index import _get_combined_index, _ensure_index_from_sequences
from pandas.util.testing import assert_almost_equal
from pandas.compat.numpy import np_datetime64_compat

Expand Down Expand Up @@ -2112,3 +2112,19 @@ def test_intersect_str_dates(self):
res = i2.intersection(i1)

assert len(res) == 0


class TestIndexUtils(object):

@pytest.mark.parametrize('data, names, expected', [
([[1, 2, 3]], None, Index([1, 2, 3])),
([[1, 2, 3]], ['name'], Index([1, 2, 3], name='name')),
([['a', 'a'], ['c', 'd']], None,
MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]])),
([['a', 'a'], ['c', 'd']], ['L1', 'L2'],
MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]],
names=['L1', 'L2'])),
])
def test_ensure_index_from_sequences(self, data, names, expected):
result = _ensure_index_from_sequences(data, names)
tm.assert_index_equal(result, expected)
20 changes: 9 additions & 11 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,15 +537,12 @@ def test_astype(self):
self.index.astype(np.dtype(int))

def test_constructor_single_level(self):
single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]], names=['first'])
assert isinstance(single_level, Index)
assert not isinstance(single_level, MultiIndex)
assert single_level.name == 'first'

single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]])
assert single_level.name is None
result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]], names=['first'])
assert isinstance(result, MultiIndex)
expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ['first']

def test_constructor_no_levels(self):
tm.assert_raises_regex(ValueError, "non-zero number "
Expand Down Expand Up @@ -768,8 +765,9 @@ def test_from_arrays_empty(self):

# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
assert isinstance(result, MultiIndex)
expected = Index([], name='A')
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result.levels[0], expected)

# N levels
for N in [2, 3]:
Expand Down Expand Up @@ -830,7 +828,7 @@ def test_from_product_empty(self):
# 1 level
result = MultiIndex.from_product([[]], names=['A'])
expected = pd.Index([], name='A')
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result.levels[0], expected)

# 2 levels
l1 = [[], ['foo', 'bar', 'baz'], []]
Expand Down
4 changes: 4 additions & 0 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1909,7 +1909,11 @@ def keyfunc(x):

# convert tuples to index
if nentries == 1:
# we have a single level of tuples, i.e. a regular Index
index = Index(tuples[0], name=names[0])
elif nlevels == 1:
name = None if names is None else names[0]
index = Index((x[0] for x in tuples), name=name)
else:
index = MultiIndex.from_tuples(tuples, names=names)
return index
Expand Down

0 comments on commit 0f8205c

Please sign in to comment.