-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add allow_sets-kwarg to is_list_like #23065
Changes from 10 commits
5508857
544b7ec
ae9a45b
3d65d25
15e3265
1941376
7871397
8efee57
0826f34
5686c77
cb588d6
3796080
3647bdd
d5ef14f
4b91d2e
514abd9
d1ff6ab
ece9deb
13e0983
2f5e927
ab3ce96
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ | |
from numbers import Number | ||
from pandas import compat | ||
from pandas.compat import (PY2, string_types, text_type, | ||
string_and_binary_types, re_type) | ||
string_and_binary_types, re_type, Set) | ||
from pandas._libs import lib | ||
|
||
is_bool = lib.is_bool | ||
|
@@ -247,7 +247,7 @@ def is_re_compilable(obj): | |
return True | ||
|
||
|
||
def is_list_like(obj): | ||
def is_list_like(obj, strict=False): | ||
""" | ||
Check if the object is list-like. | ||
|
@@ -259,6 +259,8 @@ def is_list_like(obj): | |
Parameters | ||
---------- | ||
obj : The object to check. | ||
strict : boolean, default False | ||
If this parameter is True, sets will not be considered list-like | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a versionadded tag |
||
Returns | ||
------- | ||
|
@@ -283,11 +285,13 @@ def is_list_like(obj): | |
False | ||
""" | ||
|
||
return (isinstance(obj, compat.Iterable) and | ||
return (isinstance(obj, compat.Iterable) | ||
# we do not count strings/unicode/bytes as list-like | ||
not isinstance(obj, string_and_binary_types) and | ||
and not isinstance(obj, string_and_binary_types) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is not correct, leave the and where it was There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PEP8 is clear about this (https://www.python.org/dev/peps/pep-0008/#should-a-line-break-before-or-after-a-binary-operator) Binary operators (like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, changing this is in principle fine, we have been following that PEP8 rule recently (typically we only want such changes on lines that are already touched by the PR, but since you are here already touching the function some lines below, I would say it is fine). Note that that is a recent change in PEP8, so you will see many places in the code that does it differently. |
||
# exclude zero-dimensional numpy arrays, effectively scalars | ||
not (isinstance(obj, np.ndarray) and obj.ndim == 0)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Aside from adding the kwarg everywhere, this is the only substantial change of this PR. |
||
and not (isinstance(obj, np.ndarray) and obj.ndim == 0) | ||
# exclude sets if ordered_only | ||
and not (strict and isinstance(obj, Set))) | ||
|
||
|
||
def is_array_like(obj): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,20 +66,39 @@ def __getitem__(self): | |
@pytest.mark.parametrize( | ||
"ll", | ||
[ | ||
[], [1], (1, ), (1, 2), {'a': 1}, | ||
{1, 'a'}, Series([1]), | ||
Series([]), Series(['a']).str, | ||
np.array([2])]) | ||
[], [1], tuple(), (1, ), (1, 2), {'a': 1}, {1, 'a'}, np.array([2]), | ||
Series([1]), Series([]), Series(['a']).str, Index([]), Index([1]), | ||
DataFrame(), DataFrame([[1]]), iter([1, 2]), (x for x in [1, 2]), | ||
np.ndarray((2,) * 2), np.ndarray((2,) * 3), np.ndarray((2,) * 4) | ||
]) | ||
def test_is_list_like_passes(ll): | ||
assert inference.is_list_like(ll) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"ll", [1, '2', object(), str, np.array(2)]) | ||
@pytest.mark.parametrize("ll", [1, '2', object(), str, np.array(2)]) | ||
def test_is_list_like_fails(ll): | ||
assert not inference.is_list_like(ll) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer to have 2 tests total to avoid the duplication of the args here (IOW 1 for allow_sets=True and 1 for False). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure if my solution is what you had in mind, but I gave it a shot There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't see the earlier version, but I don't think this is what Jeff had in mind. If we want to de-duplicate the arguments, you would need a fixture giving them @pytest.fixture(params=...)
def maybe_list_like(request):
return request.param Each of the params would be a tuple like Then we would have two tests. In the first we do obj, expected = ...
if expected:
expected = True
assert is_list_like(obj) is expected and in the second if expected is None:
expected = False
assert is_list_like(obj, include_sets=False) is expected There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah @TomAugspurger suggestion is good here. The issues is we can't list the args twice. |
||
"ll", | ||
[ | ||
[], [1], tuple(), (1, ), (1, 2), {'a': 1}, np.array([2]), | ||
Series([1]), Series([]), Series(['a']).str, Index([]), Index([1]), | ||
DataFrame(), DataFrame([[1]]), iter([1, 2]), (x for x in [1, 2]), | ||
np.ndarray((2,) * 2), np.ndarray((2,) * 3), np.ndarray((2,) * 4) | ||
]) | ||
def test_is_list_like_strict_passes(ll): | ||
assert inference.is_list_like(ll, strict=True) | ||
|
||
|
||
@pytest.mark.parametrize("ll", [1, '2', object(), str, np.array(2), | ||
{1, 'a'}, frozenset({1, 'a'})]) | ||
def test_is_list_like_strict_fails(ll): | ||
# GH 23061 | ||
assert not inference.is_list_like(ll, strict=True) | ||
|
||
|
||
def test_is_array_like(): | ||
assert inference.is_array_like(Series([])) | ||
assert inference.is_array_like(Series([1, 2])) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is somewhat of an artefact of the version with
is_ordered_list_like
, where I tried to group these methods by similarity (i.e. scalar dtypes, regexes, containers), but I decided to keep it because I think it helps. Can revert that part of courseThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, on any change, pls try to do the minimal changeset. This will lessen reviewer burden and make things go faster.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"yes, please try to do minimal changeset [next time]" or "yes please revert"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fine as is for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok for now, but generally pls don't change unrelated things.