From 282b76ef16e8b421f34fa74c9ec3813647424c5b Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Tue, 29 Jun 2021 06:44:28 -0700 Subject: [PATCH] Backport PR #41283: TYP Series and DataFrame currently type-check as hashable (#42299) Co-authored-by: Marco Edward Gorelli --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/arrays/base.py | 6 ++++-- pandas/core/frame.py | 5 ++++- pandas/core/generic.py | 9 ++++----- pandas/core/indexes/base.py | 7 ++++--- pandas/core/reshape/pivot.py | 21 ++++++++++----------- pandas/core/series.py | 1 - pandas/tests/frame/test_api.py | 2 +- pandas/tests/series/test_api.py | 2 +- 9 files changed, 29 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 0641b32383125..60dc7096c9d1e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -707,6 +707,7 @@ Other API changes - Added new ``engine`` and ``**engine_kwargs`` parameters to :meth:`DataFrame.to_sql` to support other future "SQL engines". Currently we still only use ``SQLAlchemy`` under the hood, but more engines are planned to be supported such as `turbodbc `_ (:issue:`36893`) - Removed redundant ``freq`` from :class:`PeriodIndex` string representation (:issue:`41653`) - :meth:`ExtensionDtype.construct_array_type` is now a required method instead of an optional one for :class:`ExtensionDtype` subclasses (:issue:`24860`) +- Calling ``hash`` on non-hashable pandas objects will now raise ``TypeError`` with the built-in error message (e.g. ``unhashable type: 'Series'``). Previously it would raise a custom message such as ``'Series' objects are mutable, thus they cannot be hashed``. Furthermore, ``isinstance(, abc.collections.Hashable)`` will now return ``False`` (:issue:`40013`) - :meth:`.Styler.from_custom_template` now has two new arguments for template names, and removed the old ``name``, due to template inheritance having been introducing for better parsing (:issue:`42053`). Subclassing modifications to Styler attributes are also needed. .. _whatsnew_130.api_breaking.build: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 888c7cbbffb59..96bd4280f4da4 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1296,8 +1296,10 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): """ raise TypeError(f"cannot perform {name} with type {self.dtype}") - def __hash__(self) -> int: - raise TypeError(f"unhashable type: {repr(type(self).__name__)}") + # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 + # Incompatible types in assignment (expression has type "None", base class + # "object" defined the type as "Callable[[object], int]") + __hash__: None # type: ignore[assignment] # ------------------------------------------------------------------------ # Non-Optimized Default Methods diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e555625dbba95..cd127d9c9ec63 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6186,7 +6186,10 @@ def f(vals) -> tuple[np.ndarray, int]: return labels.astype("i8", copy=False), len(shape) if subset is None: - subset = self.columns + # Incompatible types in assignment + # (expression has type "Index", variable has type "Sequence[Any]") + # (pending on https://github.com/pandas-dev/pandas/issues/28770) + subset = self.columns # type: ignore[assignment] elif ( not np.iterable(subset) or isinstance(subset, str) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7070228feeefa..da4feb9640626 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1873,11 +1873,10 @@ def _drop_labels_or_levels(self, keys, axis: int = 0): # ---------------------------------------------------------------------- # Iteration - def __hash__(self) -> int: - raise TypeError( - f"{repr(type(self).__name__)} objects are mutable, " - f"thus they cannot be hashed" - ) + # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 + # Incompatible types in assignment (expression has type "None", base class + # "object" defined the type as "Callable[[object], int]") + __hash__: None # type: ignore[assignment] def __iter__(self): """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e213952c04c52..dd123ae82ce49 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4540,9 +4540,10 @@ def __contains__(self, key: Any) -> bool: except (OverflowError, TypeError, ValueError): return False - @final - def __hash__(self): - raise TypeError(f"unhashable type: {repr(type(self).__name__)}") + # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 + # Incompatible types in assignment (expression has type "None", base class + # "object" defined the type as "Callable[[object], int]") + __hash__: None # type: ignore[assignment] @final def __setitem__(self, key, value): diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 51556fda6da04..7a5c2677307e2 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -482,7 +482,7 @@ def pivot( if columns is None: raise TypeError("pivot() missing 1 required argument: 'columns'") - columns = com.convert_to_list_like(columns) + columns_listlike = com.convert_to_list_like(columns) if values is None: if index is not None: @@ -494,28 +494,27 @@ def pivot( # error: Unsupported operand types for + ("List[Any]" and "ExtensionArray") # error: Unsupported left operand type for + ("ExtensionArray") indexed = data.set_index( - cols + columns, append=append # type: ignore[operator] + cols + columns_listlike, append=append # type: ignore[operator] ) else: if index is None: - index = [Series(data.index, name=data.index.name)] + index_list = [Series(data.index, name=data.index.name)] else: - index = com.convert_to_list_like(index) - index = [data[idx] for idx in index] + index_list = [data[idx] for idx in com.convert_to_list_like(index)] - data_columns = [data[col] for col in columns] - index.extend(data_columns) - index = MultiIndex.from_arrays(index) + data_columns = [data[col] for col in columns_listlike] + index_list.extend(data_columns) + multiindex = MultiIndex.from_arrays(index_list) if is_list_like(values) and not isinstance(values, tuple): # Exclude tuple because it is seen as a single column name values = cast(Sequence[Hashable], values) indexed = data._constructor( - data[values]._values, index=index, columns=values + data[values]._values, index=multiindex, columns=values ) else: - indexed = data._constructor_sliced(data[values]._values, index=index) - return indexed.unstack(columns) + indexed = data._constructor_sliced(data[values]._values, index=multiindex) + return indexed.unstack(columns_listlike) def crosstab( diff --git a/pandas/core/series.py b/pandas/core/series.py index 9b21f4fa967ec..43738831981d2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -305,7 +305,6 @@ class Series(base.IndexOpsMixin, generic.NDFrame): hasnans = property( # type: ignore[assignment] base.IndexOpsMixin.hasnans.func, doc=base.IndexOpsMixin.hasnans.__doc__ ) - __hash__ = generic.NDFrame.__hash__ _mgr: SingleManager div: Callable[[Series, Any], Series] rdiv: Callable[[Series, Any], Series] diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 76cfd77d254f2..49649c1487f13 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -91,7 +91,7 @@ def test_not_hashable(self): empty_frame = DataFrame() df = DataFrame([1]) - msg = "'DataFrame' objects are mutable, thus they cannot be hashed" + msg = "unhashable type: 'DataFrame'" with pytest.raises(TypeError, match=msg): hash(df) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index eddf57c1e88f3..b49c209a59a06 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -101,7 +101,7 @@ def test_index_tab_completion(self, index): def test_not_hashable(self): s_empty = Series(dtype=object) s = Series([1]) - msg = "'Series' objects are mutable, thus they cannot be hashed" + msg = "unhashable type: 'Series'" with pytest.raises(TypeError, match=msg): hash(s_empty) with pytest.raises(TypeError, match=msg):