Add allow_sets-kwarg to is_list_like (pandas-dev#23065)

tm9k1 · Nov 19, 2018 · c16fcc4 · c16fcc4
1 parent f490328
commit c16fcc4
Show file tree

Hide file tree

Showing 5 changed files with 85 additions and 26 deletions.
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -198,6 +198,8 @@ Other Enhancements
 - :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`)
 - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
 - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
+- :meth:`pandas.core.dtypes.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``,
+  all instances of ``set`` will not be considered "list-like" anymore (:issue:`23061`)
 - :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).
 - New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
 - Compatibility with Matplotlib 3.0 (:issue:`22790`).

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
@@ -141,6 +141,7 @@ def lfilter(*args, **kwargs):
     Mapping = collections.abc.Mapping
     Sequence = collections.abc.Sequence
     Sized = collections.abc.Sized
+    Set = collections.abc.Set
 
 else:
     # Python 2
@@ -201,6 +202,7 @@ def get_range_parameters(data):
     Mapping = collections.Mapping
     Sequence = collections.Sequence
     Sized = collections.Sized
+    Set = collections.Set
 
 if PY2:
     def iteritems(obj, **kw):

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -16,10 +16,10 @@
     ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, ABCIndexClass,
     ABCDateOffset)
 from pandas.core.dtypes.inference import (  # noqa:F401
-    is_bool, is_integer, is_hashable, is_iterator, is_float,
-    is_dict_like, is_scalar, is_string_like, is_list_like, is_number,
-    is_file_like, is_re, is_re_compilable, is_sequence, is_nested_list_like,
-    is_named_tuple, is_array_like, is_decimal, is_complex, is_interval)
+    is_bool, is_integer, is_float, is_number, is_decimal, is_complex,
+    is_re, is_re_compilable, is_dict_like, is_string_like, is_file_like,
+    is_list_like, is_nested_list_like, is_sequence, is_named_tuple,
+    is_hashable, is_iterator, is_array_like, is_scalar, is_interval)
 
 _POSSIBLY_CAST_DTYPES = {np.dtype(t).name
                          for t in ['O', 'int8', 'uint8', 'int16', 'uint16',

diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py
@@ -5,7 +5,7 @@
 from numbers import Number
 from pandas import compat
 from pandas.compat import (PY2, string_types, text_type,
-                           string_and_binary_types, re_type)
+                           string_and_binary_types, re_type, Set)
 from pandas._libs import lib
 
 is_bool = lib.is_bool
@@ -247,7 +247,7 @@ def is_re_compilable(obj):
         return True
 
 
-def is_list_like(obj):
+def is_list_like(obj, allow_sets=True):
     """
     Check if the object is list-like.
 
@@ -259,6 +259,10 @@ def is_list_like(obj):
     Parameters
     ----------
     obj : The object to check.
+    allow_sets : boolean, default True
+        If this parameter is False, sets will not be considered list-like
+
+        .. versionadded:: 0.24.0
 
     Returns
     -------
@@ -283,11 +287,15 @@ def is_list_like(obj):
     False
     """
 
-    return (isinstance(obj, compat.Iterable) and
+    return (isinstance(obj, compat.Iterable)
             # we do not count strings/unicode/bytes as list-like
-            not isinstance(obj, string_and_binary_types) and
+            and not isinstance(obj, string_and_binary_types)
+
             # exclude zero-dimensional numpy arrays, effectively scalars
-            not (isinstance(obj, np.ndarray) and obj.ndim == 0))
+            and not (isinstance(obj, np.ndarray) and obj.ndim == 0)
+
+            # exclude sets if allow_sets is False
+            and not (allow_sets is False and isinstance(obj, Set)))
 
 
 def is_array_like(obj):

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -48,6 +48,70 @@ def coerce(request):
     return request.param
 
 
+# collect all objects to be tested for list-like-ness; use tuples of objects,
+# whether they are list-like or not (special casing for sets), and their ID
+ll_params = [
+    ([1],                       True,  'list'),                 # noqa: E241
+    ([],                        True,  'list-empty'),           # noqa: E241
+    ((1, ),                     True,  'tuple'),                # noqa: E241
+    (tuple(),                   True,  'tuple-empty'),          # noqa: E241
+    ({'a': 1},                  True,  'dict'),                 # noqa: E241
+    (dict(),                    True,  'dict-empty'),           # noqa: E241
+    ({'a', 1},                  'set', 'set'),                  # noqa: E241
+    (set(),                     'set', 'set-empty'),            # noqa: E241
+    (frozenset({'a', 1}),       'set', 'frozenset'),            # noqa: E241
+    (frozenset([]),             'set', 'frozenset-empty'),      # noqa: E241
+    (iter([1, 2]),              True,  'iterator'),             # noqa: E241
+    (iter([]),                  True,  'iterator-empty'),       # noqa: E241
+    ((x for x in [1, 2]),       True,  'generator'),            # noqa: E241
+    ((x for x in []),           True,  'generator-empty'),      # noqa: E241
+    (Series([1]),               True,  'Series'),               # noqa: E241
+    (Series([]),                True,  'Series-empty'),         # noqa: E241
+    (Series(['a']).str,         True,  'StringMethods'),        # noqa: E241
+    (Series([], dtype='O').str, True,  'StringMethods-empty'),  # noqa: E241
+    (Index([1]),                True,  'Index'),                # noqa: E241
+    (Index([]),                 True,  'Index-empty'),          # noqa: E241
+    (DataFrame([[1]]),          True,  'DataFrame'),            # noqa: E241
+    (DataFrame(),               True,  'DataFrame-empty'),      # noqa: E241
+    (np.ndarray((2,) * 1),      True,  'ndarray-1d'),           # noqa: E241
+    (np.array([]),              True,  'ndarray-1d-empty'),     # noqa: E241
+    (np.ndarray((2,) * 2),      True,  'ndarray-2d'),           # noqa: E241
+    (np.array([[]]),            True,  'ndarray-2d-empty'),     # noqa: E241
+    (np.ndarray((2,) * 3),      True,  'ndarray-3d'),           # noqa: E241
+    (np.array([[[]]]),          True,  'ndarray-3d-empty'),     # noqa: E241
+    (np.ndarray((2,) * 4),      True,  'ndarray-4d'),           # noqa: E241
+    (np.array([[[[]]]]),        True,  'ndarray-4d-empty'),     # noqa: E241
+    (np.array(2),               False, 'ndarray-0d'),           # noqa: E241
+    (1,                         False, 'int'),                  # noqa: E241
+    (b'123',                    False, 'bytes'),                # noqa: E241
+    (b'',                       False, 'bytes-empty'),          # noqa: E241
+    ('123',                     False, 'string'),               # noqa: E241
+    ('',                        False, 'string-empty'),         # noqa: E241
+    (str,                       False, 'string-type'),          # noqa: E241
+    (object(),                  False, 'object'),               # noqa: E241
+    (np.nan,                    False, 'NaN'),                  # noqa: E241
+    (None,                      False, 'None')                  # noqa: E241
+]
+objs, expected, ids = zip(*ll_params)
+
+
+@pytest.fixture(params=zip(objs, expected), ids=ids)
+def maybe_list_like(request):
+    return request.param
+
+
+def test_is_list_like(maybe_list_like):
+    obj, expected = maybe_list_like
+    expected = True if expected == 'set' else expected
+    assert inference.is_list_like(obj) == expected
+
+
+def test_is_list_like_disallow_sets(maybe_list_like):
+    obj, expected = maybe_list_like
+    expected = False if expected == 'set' else expected
+    assert inference.is_list_like(obj, allow_sets=False) == expected
+
+
 def test_is_sequence():
     is_seq = inference.is_sequence
     assert (is_seq((1, 2)))
@@ -64,23 +128,6 @@ def __getitem__(self):
     assert (not is_seq(A()))
 
 
-@pytest.mark.parametrize(
-    "ll",
-    [
-        [], [1], (1, ), (1, 2), {'a': 1},
-        {1, 'a'}, Series([1]),
-        Series([]), Series(['a']).str,
-        np.array([2])])
-def test_is_list_like_passes(ll):
-    assert inference.is_list_like(ll)
-
-
-@pytest.mark.parametrize(
-    "ll", [1, '2', object(), str, np.array(2)])
-def test_is_list_like_fails(ll):
-    assert not inference.is_list_like(ll)
-
-
 def test_is_array_like():
     assert inference.is_array_like(Series([]))
     assert inference.is_array_like(Series([1, 2]))