Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-90562: Improve zero argument support for super() in dataclasses when slots=True #124692

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 54 additions & 13 deletions Lib/dataclasses.py
Original file line number Diff line number Diff line change
@@ -1222,11 +1222,6 @@ def _get_slots(cls):


def _update_func_cell_for__class__(f, oldcls, newcls):
# Returns True if we update a cell, else False.
Bobronium marked this conversation as resolved.
Show resolved Hide resolved
if f is None:
# f will be None in the case of a property where not all of
# fget, fset, and fdel are used. Nothing to do in that case.
return False
try:
idx = f.__code__.co_freevars.index("__class__")
except ValueError:
@@ -1235,13 +1230,38 @@ def _update_func_cell_for__class__(f, oldcls, newcls):
# Fix the cell to point to the new class, if it's already pointing
# at the old class. I'm not convinced that the "is oldcls" test
# is needed, but other than performance can't hurt.
closure = f.__closure__[idx]
if closure.cell_contents is oldcls:
closure.cell_contents = newcls
cell = f.__closure__[idx]
if cell.cell_contents is oldcls:
cell.cell_contents = newcls
return True
return False


def _find_inner_functions(obj, _seen=None, _depth=0):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the leading underscores on _seen and _depth?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should indicate that these arguments are only used within the function (in recursive calls) and not outside of the function body.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the whole function is private, so it can only be used inside CPython by us :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, you've got a point. Removed the underscores.

if _seen is None:
_seen = set()
if id(obj) in _seen:
return None
_seen.add(id(obj))

_depth += 1
# Normally just inspection of a descriptor object itself should be enough,
# and we should encounter the function as its attribute,
# but in case function was wrapped (e.g. functools.partition was used),
Bobronium marked this conversation as resolved.
Show resolved Hide resolved
# we want to dive at least one level deeper.
if _depth > 2:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are depth 1 and 2 special? What is this test (and _depth in general) trying to accomplish? Please add comments explaining what's going on here, it's not at all clear from the code.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added an explanation and few test cases for this behaviour. Caught two bugs in the process \o/

return None

for attr in dir(obj):
value = getattr(obj, attr, None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This case is always complex, since getattr can evaluate properties and other objects. There are two ways of hanling this case:

  1. Allow all exceptions to raise
  2. Hide all exceptions from user here

I am not quite sure which one is better here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for bringing this to my attention!

After some hacking, I've decided to go with the third option: removing any possibility of user defined code execution.

if value is None:
continue
if isinstance(value, types.FunctionType):
yield inspect.unwrap(value)
return
yield from _find_inner_functions(value, _seen, _depth)


def _create_slots(defined_fields, inherited_slots, field_names, weakref_slot):
# The slots for our class. Remove slots from our base classes. Add
# '__weakref__' if weakref_slot was given, unless it is already present.
@@ -1317,7 +1337,11 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields):
# (the newly created one, which we're returning) and not the
# original class. We can break out of this loop as soon as we
# make an update, since all closures for a class will share a
# given cell.
# given cell. First we try to find a pure function or a property,
# and then fallback to inspecting custom descriptors
# if no pure function or property is found.

custom_descriptors_to_check = []
for member in newcls.__dict__.values():
# If this is a wrapped function, unwrap it.
member = inspect.unwrap(member)
@@ -1326,10 +1350,27 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields):
if _update_func_cell_for__class__(member, cls, newcls):
break
elif isinstance(member, property):
if (_update_func_cell_for__class__(member.fget, cls, newcls)
or _update_func_cell_for__class__(member.fset, cls, newcls)
or _update_func_cell_for__class__(member.fdel, cls, newcls)):
break
for f in member.fget, member.fset, member.fdel:
if f is None:
continue
# Unwrap once more in case function
# was wrapped before it became property.
f = inspect.unwrap(f)
if _update_func_cell_for__class__(f, cls, newcls):
break
ericvsmith marked this conversation as resolved.
Show resolved Hide resolved
elif hasattr(member, "__get__") and not inspect.ismemberdescriptor(
member
):
# We don't want to inspect custom descriptors just yet
# there's still a chance we'll encounter a pure function
# or a property and won't have to use slower recursive search.
custom_descriptors_to_check.append(member)
else:
# Now let's ensure custom descriptors won't be left out.
for descriptor in custom_descriptors_to_check:
for f in _find_inner_functions(descriptor):
if _update_func_cell_for__class__(f, cls, newcls):
break

return newcls

151 changes: 151 additions & 0 deletions Lib/test/test_dataclasses/__init__.py
Original file line number Diff line number Diff line change
@@ -13,6 +13,8 @@
import weakref
import traceback
import unittest
from functools import partial
from functools import update_wrapper
Bobronium marked this conversation as resolved.
Show resolved Hide resolved
from unittest.mock import Mock
from typing import ClassVar, Any, List, Union, Tuple, Dict, Generic, TypeVar, Optional, Protocol, DefaultDict
from typing import get_type_hints
@@ -5031,6 +5033,155 @@ def foo(self):

A().foo()

def test_wrapped_property(self):
def mydecorator(f):
@wraps(f)
def wrapper(*args, **kwargs):
return f(*args, **kwargs)
return wrapper

class B:
@property
def foo(self):
return "bar"

@dataclass(slots=True)
class A(B):
@property
@mydecorator
def foo(self):
return super().foo

self.assertEqual(A().foo, "bar")

def test_custom_descriptor(self):
class CustomDescriptor:
def __init__(self, f):
self._f = f

def __get__(self, instance, owner):
return self._f(instance)

class B:
def foo(self):
return "bar"

@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(cls):
return super().foo()

self.assertEqual(A().foo, "bar")

def test_custom_descriptor_wrapped(self):
class CustomDescriptor:
def __init__(self, f):
self._f = update_wrapper(lambda *args, **kwargs: f(*args, **kwargs), f)

def __get__(self, instance, owner):
return self._f(instance)

class B:
def foo(self):
return "bar"

@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(cls):
return super().foo()

self.assertEqual(A().foo, "bar")

def test_custom_nested_descriptor(self):
class CustomFunctionWrapper:
def __init__(self, f):
self._f = f

def __call__(self, *args, **kwargs):
return self._f(*args, **kwargs)

class CustomDescriptor:
def __init__(self, f):
self._wrapper = CustomFunctionWrapper(f)

def __get__(self, instance, owner):
return self._wrapper(instance)

class B:
def foo(self):
return "bar"

@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(cls):
return super().foo()

self.assertEqual(A().foo, "bar")

def test_custom_nested_descriptor_with_partial(self):
class CustomDescriptor:
def __init__(self, f):
self._wrapper = partial(f, value="bar")

def __get__(self, instance, owner):
return self._wrapper(instance)

class B:
def foo(self, value):
return value

@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(self, value):
return super().foo(value)

self.assertEqual(A().foo, "bar")

def test_custom_too_nested_descriptor(self):
class UnnecessaryNestedWrapper:
def __init__(self, wrapper):
self._wrapper = wrapper

def __call__(self, *args, **kwargs):
return self._wrapper(*args, **kwargs)

class CustomFunctionWrapper:
def __init__(self, f):
self._f = f

def __call__(self, *args, **kwargs):
return self._f(*args, **kwargs)

class CustomDescriptor:
def __init__(self, f):
self._wrapper = UnnecessaryNestedWrapper(CustomFunctionWrapper(f))

def __get__(self, instance, owner):
return self._wrapper(instance)

class B:
def foo(self):
return "bar"

@dataclass(slots=True)
class A(B):
@CustomDescriptor
def foo(cls):
return super().foo()

with self.assertRaises(TypeError) as context:
A().foo

expected_error_message = (
'super(type, obj): obj (instance of A) is not '
'an instance or subtype of type (A).'
)
self.assertEqual(context.exception.args, (expected_error_message,))

def test_remembered_class(self):
# Apply the dataclass decorator manually (not when the class
# is created), so that we can keep a reference to the
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Modify dataclasses to enable zero argument support for ``super()`` when ``slots=True`` is
specified and custom descriptor is used or ``property`` function is wrapped.