Skip to content

Commit

Permalink
use sort_key_ir for most things
Browse files Browse the repository at this point in the history
  • Loading branch information
tybug committed Dec 27, 2024
1 parent 2530e74 commit 4657a4f
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 50 deletions.
3 changes: 3 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
RELEASE_TYPE: patch

The shrinker now uses the `typed choice sequence` (:issue:`3921`) to determine counterexample complexity. We expect this to mostly match the previous ordering, but it may result in reporting different counterexamples in some cases.
4 changes: 2 additions & 2 deletions hypothesis-python/src/hypothesis/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
ensure_free_stackframes,
gc_cumulative_time,
)
from hypothesis.internal.conjecture.shrinker import sort_key
from hypothesis.internal.conjecture.shrinker import sort_key, sort_key_ir
from hypothesis.internal.entropy import deterministic_PRNG
from hypothesis.internal.escalation import (
InterestingOrigin,
Expand Down Expand Up @@ -1226,7 +1226,7 @@ def run_engine(self):
if runner.interesting_examples:
self.falsifying_examples = sorted(
runner.interesting_examples.values(),
key=lambda d: sort_key(d.buffer),
key=lambda d: sort_key_ir(d.ir_nodes),
reverse=True,
)
else:
Expand Down
12 changes: 6 additions & 6 deletions hypothesis-python/src/hypothesis/internal/conjecture/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
startswith,
)
from hypothesis.internal.conjecture.pareto import NO_SCORE, ParetoFront, ParetoOptimiser
from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key
from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key, sort_key_ir
from hypothesis.internal.healthcheck import fail_health_check
from hypothesis.reporting import base_report, report

Expand Down Expand Up @@ -562,8 +562,8 @@ def test_function(self, data: ConjectureData) -> None:
if v < existing_score:
continue

if v > existing_score or sort_key(data.buffer) < sort_key(
existing_example.buffer
if v > existing_score or sort_key_ir(data.ir_nodes) < sort_key_ir(
existing_example.ir_nodes
):
data_as_result = data.as_result()
assert not isinstance(data_as_result, _Overrun)
Expand Down Expand Up @@ -619,7 +619,7 @@ def test_function(self, data: ConjectureData) -> None:
if self.first_bug_found_at is None:
self.first_bug_found_at = self.call_count
else:
if sort_key(data.buffer) < sort_key(existing.buffer):
if sort_key_ir(data.ir_nodes) < sort_key_ir(existing.ir_nodes):
self.shrinks += 1
self.downgrade_buffer(existing.buffer)
self.__data_cache.unpin(existing.buffer)
Expand Down Expand Up @@ -1376,7 +1376,7 @@ def shrink_interesting_examples(self) -> None:
self.finish_shrinking_deadline = time.perf_counter() + MAX_SHRINKING_SECONDS

for prev_data in sorted(
self.interesting_examples.values(), key=lambda d: sort_key(d.buffer)
self.interesting_examples.values(), key=lambda d: sort_key_ir(d.ir_nodes)
):
assert prev_data.status == Status.INTERESTING
data = self.new_conjecture_data_ir(prev_data.ir_nodes)
Expand All @@ -1393,7 +1393,7 @@ def shrink_interesting_examples(self) -> None:
for k, v in self.interesting_examples.items()
if k not in self.shrunk_examples
),
key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))),
key=lambda kv: (sort_key_ir(kv[1].ir_nodes), sort_key(repr(kv[0]))),
)
self.debug(f"Shrinking {target!r}: {data.choices}")

Expand Down
12 changes: 7 additions & 5 deletions hypothesis-python/src/hypothesis/internal/conjecture/pareto.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from hypothesis.internal.conjecture.data import ConjectureData, ConjectureResult, Status
from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy, swap
from hypothesis.internal.conjecture.shrinker import sort_key
from hypothesis.internal.conjecture.shrinker import sort_key_ir

NO_SCORE = float("-inf")

Expand Down Expand Up @@ -45,10 +45,12 @@ def dominance(left, right):
more structured or failing tests it can be useful to track, and future work
will depend on it more."""

if left.buffer == right.buffer:
left_key = sort_key_ir(left.ir_nodes)
right_key = sort_key_ir(right.ir_nodes)
if left_key == right_key:
return DominanceRelation.EQUAL

if sort_key(right.buffer) < sort_key(left.buffer):
if right_key < left_key:
result = dominance(left=right, right=left)
if result == DominanceRelation.LEFT_DOMINATES:
return DominanceRelation.RIGHT_DOMINATES
Expand All @@ -60,7 +62,7 @@ def dominance(left, right):
return result

# Either left is better or there is no dominance relationship.
assert sort_key(left.buffer) < sort_key(right.buffer)
assert left_key < right_key

# The right is more interesting
if left.status < right.status:
Expand Down Expand Up @@ -126,7 +128,7 @@ def __init__(self, random):
self.__random = random
self.__eviction_listeners = []

self.front = SortedList(key=lambda d: sort_key(d.buffer))
self.front = SortedList(key=lambda d: sort_key_ir(d.ir_nodes))
self.__pending = None

def add(self, data):
Expand Down
68 changes: 31 additions & 37 deletions hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import attr

from hypothesis.internal.compat import int_from_bytes, int_to_bytes
from hypothesis.internal.conjecture.choice import choice_from_index
from hypothesis.internal.conjecture.choice import choice_from_index, choice_to_index
from hypothesis.internal.conjecture.data import (
ConjectureData,
ConjectureResult,
Expand Down Expand Up @@ -80,6 +80,13 @@ def sort_key(buffer: SortKeyT) -> tuple[int, SortKeyT]:
return (len(buffer), buffer)


def sort_key_ir(nodes: list[IRNode]) -> tuple[int, tuple[int]]:
return (
len(nodes),
tuple(choice_to_index(node.value, node.kwargs) for node in nodes),
)


SHRINK_PASS_DEFINITIONS: dict[str, "ShrinkPassDefinition"] = {}


Expand Down Expand Up @@ -305,7 +312,7 @@ def __init__(
self.__derived_values: dict = {}
self.__pending_shrink_explanation = None

self.initial_size = len(initial.buffer)
self.initial_size = len(initial.choices)

# We keep track of the current best example on the shrink_target
# attribute.
Expand Down Expand Up @@ -401,7 +408,7 @@ def consider_new_tree(self, tree: Sequence[IRNode]) -> bool:
if startswith(tree, self.nodes):
return True

if startswith(self.nodes, tree):
if sort_key_ir(self.nodes) < sort_key_ir(tree):
return False

previous = self.shrink_target
Expand Down Expand Up @@ -445,7 +452,7 @@ def incorporate_test_data(self, data):
return
if (
self.__predicate(data)
and sort_key(data.buffer) < sort_key(self.shrink_target.buffer)
and sort_key_ir(data.ir_nodes) < sort_key_ir(self.shrink_target.ir_nodes)
and self.__allow_transition(self.shrink_target, data)
):
self.update_shrink_target(data)
Expand Down Expand Up @@ -474,29 +481,14 @@ def shrink(self):
This method is "mostly idempotent" - calling it twice is unlikely to
have any effect, though it has a non-zero probability of doing so.
"""
# We assume that if an all-zero block of bytes is an interesting
# example then we're not going to do better than that.
# This might not technically be true: e.g. for integers() | booleans()
# the simplest example is actually [1, 0]. Missing this case is fairly
# harmless and this allows us to make various simplifying assumptions
# about the structure of the data (principally that we're never
# operating on a block of all zero bytes so can use non-zeroness as a
# signpost of complexity).
if not any(self.shrink_target.buffer) or self.incorporate_new_buffer(
bytes(len(self.shrink_target.buffer))
):
# We assume that if an all-trivial example is interesting then
# we're not going to do better than that. This might not technically be true:
# e.g. in tuples(booleans(), booleans()) | booleans() the simplest example
# is [1, False] but the all-trivial example is [0, False, False].
if all(node.trivial for node in self.nodes):
self.explain()
return

# There are multiple buffers that represent the same counterexample, eg
# n=2 (from the 16 bit integer bucket) and n=2 (from the 32 bit integer
# bucket). Before we start shrinking, we need to normalize to the minimal
# such buffer, else a buffer-smaller but ir-larger value may be chosen
# as the minimal counterexample.
data = self.engine.new_conjecture_data_ir(self.nodes)
self.engine.test_function(data)
self.incorporate_test_data(data.as_result())

try:
self.greedy_shrink()
except StopShrinking:
Expand All @@ -509,7 +501,7 @@ def shrink(self):
def s(n):
return "s" if n != 1 else ""

total_deleted = self.initial_size - len(self.shrink_target.buffer)
total_deleted = self.initial_size - len(self.shrink_target.choices)
calls = self.engine.call_count - self.initial_calls
misaligned = self.engine.misaligned_count - self.initial_misaligned

Expand All @@ -518,7 +510,7 @@ def s(n):
"Shrink pass profiling\n"
"---------------------\n\n"
f"Shrinking made a total of {calls} call{s(calls)} of which "
f"{self.shrinks} shrank and {misaligned} were misaligned. This deleted {total_deleted} bytes out "
f"{self.shrinks} shrank and {misaligned} were misaligned. This deleted {total_deleted} choices out "
f"of {self.initial_size}."
)
for useful in [True, False]:
Expand All @@ -540,7 +532,7 @@ def s(n):
self.debug(
f" * {p.name} made {p.calls} call{s(p.calls)} of which "
f"{p.shrinks} shrank and {p.misaligned} were misaligned, "
f"deleting {p.deletions} byte{s(p.deletions)}."
f"deleting {p.deletions} choice{s(p.deletions)}."
)
self.debug("")
self.explain()
Expand Down Expand Up @@ -797,7 +789,7 @@ def fixate_shrink_passes(self, passes):
# the length are the best.
if self.shrink_target is before_sp:
reordering[sp] = 1
elif len(self.buffer) < len(before_sp.buffer):
elif len(self.choices) < len(before_sp.choices):
reordering[sp] = -1
else:
reordering[sp] = 0
Expand Down Expand Up @@ -988,7 +980,7 @@ def __changed_nodes(self):
assert prev_target is not new_target
prev_nodes = prev_target.ir_nodes
new_nodes = new_target.ir_nodes
assert sort_key(new_target.buffer) < sort_key(prev_target.buffer)
assert sort_key_ir(new_target.ir_nodes) < sort_key_ir(prev_target.ir_nodes)

if len(prev_nodes) != len(new_nodes) or any(
n1.ir_type != n2.ir_type for n1, n2 in zip(prev_nodes, new_nodes)
Expand Down Expand Up @@ -1186,11 +1178,11 @@ def remove_discarded(self):

for ex in self.shrink_target.examples:
if (
ex.length > 0
ex.ir_length > 0
and ex.discarded
and (not discarded or ex.start >= discarded[-1][-1])
and (not discarded or ex.ir_start >= discarded[-1][-1])
):
discarded.append((ex.start, ex.end))
discarded.append((ex.ir_start, ex.ir_end))

# This can happen if we have discards but they are all of
# zero length. This shouldn't happen very often so it's
Expand All @@ -1199,11 +1191,11 @@ def remove_discarded(self):
if not discarded:
break

attempt = bytearray(self.shrink_target.buffer)
attempt = list(self.nodes)
for u, v in reversed(discarded):
del attempt[u:v]

if not self.incorporate_new_buffer(attempt):
if not self.consider_new_tree(tuple(attempt)):
return False
return True

Expand Down Expand Up @@ -1563,7 +1555,9 @@ def test_not_equal(x, y):
],
)
),
key=lambda i: st.buffer[examples[i].start : examples[i].end],
key=lambda i: sort_key_ir(
st.ir_nodes[examples[i].ir_start : examples[i].ir_end]
),
)

def run_node_program(self, i, description, original, repeats=1):
Expand Down Expand Up @@ -1670,7 +1664,7 @@ def step(self, *, random_order=False):
initial_shrinks = self.shrinker.shrinks
initial_calls = self.shrinker.calls
initial_misaligned = self.shrinker.misaligned
size = len(self.shrinker.shrink_target.buffer)
size = len(self.shrinker.shrink_target.choices)
self.shrinker.engine.explain_next_call_as(self.name)

if random_order:
Expand All @@ -1687,7 +1681,7 @@ def step(self, *, random_order=False):
self.calls += self.shrinker.calls - initial_calls
self.misaligned += self.shrinker.misaligned - initial_misaligned
self.shrinks += self.shrinker.shrinks - initial_shrinks
self.deletions += size - len(self.shrinker.shrink_target.buffer)
self.deletions += size - len(self.shrinker.shrink_target.choices)
self.shrinker.engine.clear_call_explanation()
return True

Expand Down

0 comments on commit 4657a4f

Please sign in to comment.