Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: tuple-of-tuples indexing results in NumPy VisibleDeprecationWarning #35437

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,12 @@ def is_bool_indexer(key: Any) -> bool:
return True
elif isinstance(key, list):
try:
arr = np.asarray(key)
# https://github.com/pandas-dev/pandas/issues/35434
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=np.VisibleDeprecationWarning)
Comment on lines +142 to +143
Copy link
Contributor

@TomAugspurger TomAugspurger Jul 28, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Getting these can be somewhat expensive. Does this slow down DataFrame.__getitem__ or Series.__getitem__ at all?

Copy link
Member Author

@simonjayhawkins simonjayhawkins Jul 28, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's not free, but not significant for ser[[tup]]

         1187 function calls (1183 primitive calls) in 0.002 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      4/2    0.000    0.000    0.001    0.000 base.py:289(__new__)
      281    0.000    0.000    0.000    0.000 {built-in method builtins.isinstance}
       14    0.000    0.000    0.000    0.000 {built-in method numpy.array}
        4    0.000    0.000    0.000    0.000 {pandas._libs.lib.infer_dtype}
       94    0.000    0.000    0.000    0.000 generic.py:10(_check)
        1    0.000    0.000    0.002    0.002 {built-in method builtins.exec}
        5    0.000    0.000    0.000    0.000 common.py:221(asarray_tuplesafe)
        2    0.000    0.000    0.000    0.000 common.py:97(is_bool_indexer)
        2    0.000    0.000    0.000    0.000 {method 'reduce' of 'numpy.ufunc' objects}
        1    0.000    0.000    0.000    0.000 indexing.py:1257(_validate_read_indexer)
        7    0.000    0.000    0.000    0.000 warnings.py:458(__enter__)
        6    0.000    0.000    0.000    0.000 _dtype.py:321(_name_get)
        1    0.000    0.000    0.000    0.000 {pandas._libs.algos.take_1d_int64_int64}
       20    0.000    0.000    0.000    0.000 common.py:1460(is_extension_array_dtype)
        1    0.000    0.000    0.000    0.000 algorithms.py:1586(take_nd)
        1    0.000    0.000    0.000    0.000 {method 'get_indexer' of 'pandas._libs.index.IndexEngine' objects}
       23    0.000    0.000    0.000    0.000 base.py:256(is_dtype)
        1    0.000    0.000    0.002    0.002 series.py:910(_get_with)
        1    0.000    0.000    0.000    0.000 base.py:2951(get_indexer)
      144    0.000    0.000    0.000    0.000 {built-in method builtins.getattr}
       20    0.000    0.000    0.000    0.000 base.py:413(find)
        7    0.000    0.000    0.000    0.000 warnings.py:181(_add_filter)
        1    0.000    0.000    0.000    0.000 generic.py:4493(_reindex_with_indexers)
        2    0.000    0.000    0.000    0.000 cast.py:441(maybe_promote)
        1    0.000    0.000    0.002    0.002 series.py:868(__getitem__)
        1    0.000    0.000    0.002    0.002 indexing.py:1078(_getitem_axis)
       21    0.000    0.000    0.000    0.000 common.py:1600(_is_dtype_type)
        2    0.000    0.000    0.000    0.000 base.py:5718(_maybe_cast_data_without_dtype)
        7    0.000    0.000    0.000    0.000 dtypes.py:1113(is_dtype)
        1    0.000    0.000    0.000    0.000 managers.py:1267(_slice_take_blocks_ax0)
        2    0.000    0.000    0.000    0.000 cast.py:1559(construct_1d_object_array_from_listlike)
        3    0.000    0.000    0.000    0.000 {built-in method numpy.empty}
        3    0.000    0.000    0.000    0.000 generic.py:377(_get_axis)
        1    0.000    0.000    0.001    0.001 indexing.py:1208(_get_listlike_indexer)
        1    0.000    0.000    0.000    0.000 blocks.py:1233(take_nd)
       76    0.000    0.000    0.000    0.000 {built-in method builtins.issubclass}
        9    0.000    0.000    0.000    0.000 common.py:1565(_get_dtype)
        6    0.000    0.000    0.000    0.000 _dtype.py:24(_kind_name)
        9    0.000    0.000    0.000    0.000 common.py:530(is_categorical_dtype)
        7    0.000    0.000    0.000    0.000 dtypes.py:901(is_dtype)
        9    0.000    0.000    0.000    0.000 common.py:492(is_interval_dtype)
        1    0.000    0.000    0.000    0.000 algorithms.py:1457(_get_take_nd_function)
        7    0.000    0.000    0.000    0.000 warnings.py:165(simplefilter)

what's the best approach?

arr = np.asarray(key)
return arr.dtype == np.bool_ and len(arr) == len(key)
except TypeError: # pragma: no cover
except (TypeError, ValueError): # pragma: no cover
return False

return False
Expand Down Expand Up @@ -225,7 +228,15 @@ def asarray_tuplesafe(values, dtype=None):
if isinstance(values, list) and dtype in [np.object_, object]:
return construct_1d_object_array_from_listlike(values)

result = np.asarray(values, dtype=dtype)
try:
# https://github.com/pandas-dev/pandas/issues/35434
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=np.VisibleDeprecationWarning)
result = np.asarray(values, dtype=dtype)
except ValueError:
# we get here with a list-like of nested values if dtype=None
# for numpy < 1.18
return construct_1d_object_array_from_listlike(values)

if issubclass(result.dtype.type, str):
result = np.asarray(values, dtype=object)
Expand Down
35 changes: 35 additions & 0 deletions pandas/core/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,41 @@
)
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries

from pandas.core.dtypes.common import is_scalar, is_iterator
import pandas.core.common as com


class Indexer:
_is_iterator = None
_is_bool_indexer = None

def __init__(self, key):
if isinstance(key, (list, tuple)):
key = unpack_1tuple(key)
self.key = key

@property
def is_scalar(self):
return is_scalar(self.key)

@property
def is_bool_indexer(self):
is_bool_indexer = self._is_bool_indexer
if is_bool_indexer is not None:
return is_bool_indexer

key = self.key
if self._is_iterator is None:
if is_iterator(key):
key = list(key)
self.key = key
self._is_iterator = False

is_bool_indexer = com.is_bool_indexer(key)
self._is_bool_indexer = is_bool_indexer
return is_bool_indexer


# -----------------------------------------------------------
# Indexer Identification

Expand Down
17 changes: 11 additions & 6 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
is_dict_like,
is_extension_array_dtype,
is_integer,
is_iterator,
is_list_like,
is_object_dtype,
is_scalar,
Expand Down Expand Up @@ -78,7 +77,7 @@
sanitize_array,
)
from pandas.core.generic import NDFrame
from pandas.core.indexers import deprecate_ndim_indexing, unpack_1tuple
from pandas.core.indexers import deprecate_ndim_indexing, unpack_1tuple, Indexer
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
from pandas.core.indexes.api import Float64Index, Index, MultiIndex, ensure_index
import pandas.core.indexes.base as ibase
Expand Down Expand Up @@ -897,17 +896,23 @@ def __getitem__(self, key):
# in the first level of our MultiIndex
return self._get_values_tuple(key)

if is_iterator(key):
key = list(key)
# if is_iterator(key):
# key = list(key)

_key = Indexer(key)

if com.is_bool_indexer(key):
if _key.is_bool_indexer:
key = check_bool_indexer(self.index, key)
key = np.asarray(key, dtype=bool)
return self._get_values(key)

return self._get_with(key)
return self._get_with(_key)

def _get_with(self, key):
# breakpoint()
_key = key
key = _key.key

# other: fancy integer or otherwise
if isinstance(key, slice):
# _convert_slice_indexer to determine if this slice is positional
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1110,3 +1110,12 @@ def test_setitem_categorical():
{"h": pd.Categorical(["m", "n"]).reorder_categories(["n", "m"])}
)
tm.assert_frame_equal(df, expected)


def test_nested_tuple_no_warning():
# https://github.com/pandas-dev/pandas/issues/35434
tup = "A", ("B", 2)
ser = pd.Series([42], index=[tup])
with tm.assert_produces_warning(None):
result = ser[[tup]]
tm.assert_series_equal(result, ser)