Skip to content

Commit

Permalink
Further MultiValuedValue optimization (#469)
Browse files Browse the repository at this point in the history
I noticed significant time being spent repeatedly constructing the set of known subvalues. On a profiled run on our maiin internal codebase, this makes pyanalyze ~30% faster.
  • Loading branch information
JelleZijlstra authored Feb 7, 2022
1 parent 0bf4700 commit a92fb16
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 26 deletions.
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Unreleased

- Optimize type compatibility checks on large unions (#469)
- Detect incorrect key types passed to `dict.__getitem__` (#468)
- Pick up the signature of `open()` from typeshed correctly (#463)
- Do not strip away generic parameters explicitly set to
Expand Down
59 changes: 33 additions & 26 deletions pyanalyze/value.py
Original file line number Diff line number Diff line change
Expand Up @@ -1323,16 +1323,43 @@ class MultiValuedValue(Value):
raw_vals: InitVar[Iterable[Value]]
vals: Tuple[Value, ...] = field(init=False)
"""The underlying values of the union."""
_known_subvals: Optional[Tuple[Set[Tuple[object, type]], Sequence[Value]]] = field(
init=False, repr=False, hash=False, compare=False
)

def __post_init__(self, raw_vals: Iterable[Value]) -> None:
object.__setattr__(
self,
"vals",
tuple(chain.from_iterable(flatten_values(val) for val in raw_vals)),
)
object.__setattr__(self, "_known_subvals", self._get_known_subvals())

def _get_known_subvals(
self,
) -> Optional[Tuple[Set[Tuple[object, type]], Sequence[Value]]]:
# Not worth it for small unions
if len(self.vals) < 10:
return None
# Optimization for comparing Unions containing large unions of literals.
try:
# Include the type to avoid e.g. 1 and True matching
known_values = {
(subval.val, type(subval.val))
for subval in self.vals
if isinstance(subval, KnownValue)
}
except TypeError:
return None # not hashable
else:
# Make remaining check not consider the KnownValues again
remaining_vals = [
subval for subval in self.vals if not isinstance(subval, KnownValue)
]
return known_values, remaining_vals

def substitute_typevars(self, typevars: TypeVarMap) -> Value:
if not self.vals:
if not self.vals or not typevars:
return self
return MultiValuedValue(
[val.substitute_typevars(typevars) for val in self.vals]
Expand All @@ -1359,35 +1386,15 @@ def can_assign(self, other: Value, ctx: CanAssignContext) -> CanAssign:
return {}
else:
my_vals = self.vals
# Optimization for large unions of literals. We could perhaps cache this set,
# but that's more complicated. Empirically this is already much faster.
# The number 20 is arbitrary. I noticed the bottleneck in production on a
# Union with nearly 500 values.
if isinstance(other, KnownValue) and len(my_vals) > 20:
if isinstance(other, KnownValue) and self._known_subvals is not None:
known_values, my_vals = self._known_subvals
try:
# Include the type to avoid e.g. 1 and True matching
known_values = {
(subval.val, type(subval.val))
for subval in my_vals
if isinstance(subval, KnownValue)
}
is_present = (other.val, type(other.val)) in known_values
except TypeError:
pass # not hashable
else:
try:
is_present = (other.val, type(other.val)) in known_values
except TypeError:
pass # not hashable
else:
if is_present:
return {}
else:
# Make remaining check not consider the KnownValues again
my_vals = [
subval
for subval in my_vals
if not isinstance(subval, KnownValue)
]
if is_present:
return {}

bounds_maps = []
errors = []
Expand Down

0 comments on commit a92fb16

Please sign in to comment.