Skip to content

Commit

Permalink
Merge pull request #4001 from tybug/shrinker-adjacent-improvements
Browse files Browse the repository at this point in the history
Shrinker-adjacent improvements
  • Loading branch information
Zac-HD authored May 23, 2024
2 parents 259813a + ff523f7 commit 66d84b5
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 42 deletions.
3 changes: 3 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
RELEASE_TYPE: patch

This patch fixes one of our shrinking passes getting into a rare ``O(n)`` case instead of ``O(log(n))``.
21 changes: 12 additions & 9 deletions hypothesis-python/src/hypothesis/internal/conjecture/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ class BooleanKWargs(TypedDict):
IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
]
IRTypeName: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"]
InvalidAt: TypeAlias = Tuple[IRTypeName, IRKWargsType]
# ir_type, kwargs, forced
InvalidAt: TypeAlias = Tuple[IRTypeName, IRKWargsType, Optional[IRType]]


class ExtraInformation:
Expand Down Expand Up @@ -2084,7 +2085,7 @@ def draw_integer(
)

if self.ir_tree_nodes is not None and observe:
node = self._pop_ir_tree_node("integer", kwargs)
node = self._pop_ir_tree_node("integer", kwargs, forced=forced)
if forced is None:
assert isinstance(node.value, int)
forced = node.value
Expand Down Expand Up @@ -2141,7 +2142,7 @@ def draw_float(
)

if self.ir_tree_nodes is not None and observe:
node = self._pop_ir_tree_node("float", kwargs)
node = self._pop_ir_tree_node("float", kwargs, forced=forced)
if forced is None:
assert isinstance(node.value, float)
forced = node.value
Expand Down Expand Up @@ -2183,7 +2184,7 @@ def draw_string(
},
)
if self.ir_tree_nodes is not None and observe:
node = self._pop_ir_tree_node("string", kwargs)
node = self._pop_ir_tree_node("string", kwargs, forced=forced)
if forced is None:
assert isinstance(node.value, str)
forced = node.value
Expand Down Expand Up @@ -2219,7 +2220,7 @@ def draw_bytes(
kwargs: BytesKWargs = self._pooled_kwargs("bytes", {"size": size})

if self.ir_tree_nodes is not None and observe:
node = self._pop_ir_tree_node("bytes", kwargs)
node = self._pop_ir_tree_node("bytes", kwargs, forced=forced)
if forced is None:
assert isinstance(node.value, bytes)
forced = node.value
Expand Down Expand Up @@ -2261,7 +2262,7 @@ def draw_boolean(
kwargs: BooleanKWargs = self._pooled_kwargs("boolean", {"p": p})

if self.ir_tree_nodes is not None and observe:
node = self._pop_ir_tree_node("boolean", kwargs)
node = self._pop_ir_tree_node("boolean", kwargs, forced=forced)
if forced is None:
assert isinstance(node.value, bool)
forced = node.value
Expand Down Expand Up @@ -2302,7 +2303,9 @@ def _pooled_kwargs(self, ir_type, kwargs):
POOLED_KWARGS_CACHE[key] = kwargs
return kwargs

def _pop_ir_tree_node(self, ir_type: IRTypeName, kwargs: IRKWargsType) -> IRNode:
def _pop_ir_tree_node(
self, ir_type: IRTypeName, kwargs: IRKWargsType, *, forced: Optional[IRType]
) -> IRNode:
assert self.ir_tree_nodes is not None

if self._node_index == len(self.ir_tree_nodes):
Expand All @@ -2321,7 +2324,7 @@ def _pop_ir_tree_node(self, ir_type: IRTypeName, kwargs: IRKWargsType) -> IRNode
# (in fact, it is possible that giving up early here results in more time
# for useful shrinks to run).
if node.ir_type != ir_type:
invalid_at = (ir_type, kwargs)
invalid_at = (ir_type, kwargs, forced)
self.invalid_at = invalid_at
self.observer.mark_invalid(invalid_at)
self.mark_invalid(f"(internal) want a {ir_type} but have a {node.ir_type}")
Expand All @@ -2330,7 +2333,7 @@ def _pop_ir_tree_node(self, ir_type: IRTypeName, kwargs: IRKWargsType) -> IRNode
# that is allowed by the expected kwargs, then we can coerce this node
# into an aligned one by using its value. It's unclear how useful this is.
if not ir_value_permitted(node.value, node.ir_type, kwargs):
invalid_at = (ir_type, kwargs)
invalid_at = (ir_type, kwargs, forced)
self.invalid_at = invalid_at
self.observer.mark_invalid(invalid_at)
self.mark_invalid(f"(internal) got a {ir_type} but outside the valid range")
Expand Down
22 changes: 10 additions & 12 deletions hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,16 +555,13 @@ def _repr_pretty_(self, p, cycle):
p.text(_node_pretty(ir_type, value, kwargs, forced=i in self.forced))
indent += 2

if isinstance(self.transition, Branch):
with p.indent(indent):
if len(self.values) > 0:
p.break_()
p.pretty(self.transition)

if isinstance(self.transition, (Killed, Conclusion)):
with p.indent(indent):
if len(self.values) > 0:
p.break_()
if self.transition is not None:
p.pretty(self.transition)
else:
p.text("unknown")


class DataTree:
Expand Down Expand Up @@ -843,8 +840,8 @@ def simulate_test_function(self, data):
tree. This will likely change in future."""
node = self.root

def draw(ir_type, kwargs, *, forced=None):
if ir_type == "float" and forced is not None:
def draw(ir_type, kwargs, *, forced=None, convert_forced=True):
if ir_type == "float" and forced is not None and convert_forced:
forced = int_to_float(forced)

draw_func = getattr(data, f"draw_{ir_type}")
Expand All @@ -869,9 +866,9 @@ def draw(ir_type, kwargs, *, forced=None):
data.conclude_test(t.status, t.interesting_origin)
elif node.transition is None:
if node.invalid_at is not None:
(ir_type, kwargs) = node.invalid_at
(ir_type, kwargs, forced) = node.invalid_at
try:
draw(ir_type, kwargs)
draw(ir_type, kwargs, forced=forced, convert_forced=False)
except StopTest:
if data.invalid_at is not None:
raise
Expand Down Expand Up @@ -1021,7 +1018,8 @@ def draw_boolean(
self.draw_value("boolean", value, was_forced=was_forced, kwargs=kwargs)

def mark_invalid(self, invalid_at: InvalidAt) -> None:
self.__current_node.invalid_at = invalid_at
if self.__current_node.transition is None:
self.__current_node.invalid_at = invalid_at

def draw_value(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def __init__(
self.shrinks: int = 0
self.finish_shrinking_deadline: Optional[float] = None
self.call_count: int = 0
self.misaligned_count: int = 0
self.valid_examples: int = 0
self.random: Random = random or Random(getrandbits(128))
self.database_key: Optional[bytes] = database_key
Expand Down Expand Up @@ -418,6 +419,8 @@ def test_function(self, data: ConjectureData) -> None:
}
self.stats_per_test_case.append(call_stats)
self._cache(data)
if data.invalid_at is not None: # pragma: no branch # coverage bug?
self.misaligned_count += 1

self.debug_data(data)

Expand Down
28 changes: 15 additions & 13 deletions hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ def __init__(
# it's time to stop shrinking.
self.max_stall = 200
self.initial_calls = self.engine.call_count
self.initial_misaligned = self.engine.misaligned_count
self.calls_at_last_shrink = self.initial_calls

self.passes_by_name: Dict[str, ShrinkPass] = {}
Expand Down Expand Up @@ -383,6 +384,10 @@ def calls(self):
test function."""
return self.engine.call_count

@property
def misaligned(self):
return self.engine.misaligned_count

def check_calls(self):
if self.calls - self.calls_at_last_shrink >= self.max_stall:
raise StopShrinking
Expand Down Expand Up @@ -501,13 +506,14 @@ def s(n):

total_deleted = self.initial_size - len(self.shrink_target.buffer)
calls = self.engine.call_count - self.initial_calls
misaligned = self.engine.misaligned_count - self.initial_misaligned

self.debug(
"---------------------\n"
"Shrink pass profiling\n"
"---------------------\n\n"
f"Shrinking made a total of {calls} call{s(calls)} of which "
f"{self.shrinks} shrank. This deleted {total_deleted} bytes out "
f"{self.shrinks} shrank and {misaligned} were misaligned. This deleted {total_deleted} bytes out "
f"of {self.initial_size}."
)
for useful in [True, False]:
Expand All @@ -527,16 +533,9 @@ def s(n):
continue

self.debug(
" * %s made %d call%s of which "
"%d shrank, deleting %d byte%s."
% (
p.name,
p.calls,
s(p.calls),
p.shrinks,
p.deletions,
s(p.deletions),
)
f" * {p.name} made {p.calls} call{s(p.calls)} of which "
f"{p.shrinks} shrank and {p.misaligned} were misaligned, "
f"deleting {p.deletions} byte{s(p.deletions)}."
)
self.debug("")
self.explain()
Expand Down Expand Up @@ -1321,7 +1320,7 @@ def boost(k):

@defines_shrink_pass()
def lower_blocks_together(self, chooser):
block = chooser.choose(self.blocks, lambda b: not b.all_zero)
block = chooser.choose(self.blocks, lambda b: not b.trivial)

# Choose the next block to be up to eight blocks onwards. We don't
# want to go too far (to avoid quadratic time) but it's worth a
Expand All @@ -1330,7 +1329,7 @@ def lower_blocks_together(self, chooser):
next_block = self.blocks[
chooser.choose(
range(block.index + 1, min(len(self.blocks), block.index + 9)),
lambda j: not self.blocks[j].all_zero,
lambda j: not self.blocks[j].trivial,
)
]

Expand Down Expand Up @@ -1623,6 +1622,7 @@ class ShrinkPass:
last_prefix = attr.ib(default=())
successes = attr.ib(default=0)
calls = attr.ib(default=0)
misaligned = attr.ib(default=0)
shrinks = attr.ib(default=0)
deletions = attr.ib(default=0)

Expand All @@ -1633,6 +1633,7 @@ def step(self, *, random_order=False):

initial_shrinks = self.shrinker.shrinks
initial_calls = self.shrinker.calls
initial_misaligned = self.shrinker.misaligned
size = len(self.shrinker.shrink_target.buffer)
self.shrinker.engine.explain_next_call_as(self.name)

Expand All @@ -1648,6 +1649,7 @@ def step(self, *, random_order=False):
)
finally:
self.calls += self.shrinker.calls - initial_calls
self.misaligned += self.shrinker.misaligned - initial_misaligned
self.shrinks += self.shrinker.shrinks - initial_shrinks
self.deletions += size - len(self.shrinker.shrink_target.buffer)
self.shrinker.engine.clear_call_explanation()
Expand Down
46 changes: 43 additions & 3 deletions hypothesis-python/tests/conjecture/test_data_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,10 @@ def test_datatree_repr(bool_kwargs, int_kwargs):
observer.draw_boolean(False, was_forced=True, kwargs=bool_kwargs)
observer.conclude_test(Status.INTERESTING, interesting_origin=origin)

observer = tree.new_observer()
observer.draw_boolean(False, was_forced=False, kwargs=bool_kwargs)
observer.draw_integer(5, was_forced=False, kwargs=int_kwargs)

assert (
pretty.pretty(tree)
== textwrap.dedent(
Expand All @@ -610,13 +614,15 @@ def test_datatree_repr(bool_kwargs, int_kwargs):
integer 0 {int_kwargs}
boolean False [forced] {bool_kwargs}
Conclusion (Status.INTERESTING, {origin})
integer 5 {int_kwargs}
unknown
"""
).strip()
)


def _draw(data, node):
return getattr(data, f"draw_{node.ir_type}")(**node.kwargs)
def _draw(data, node, *, forced=None):
return getattr(data, f"draw_{node.ir_type}")(**node.kwargs, forced=forced)


@given(ir_nodes(), ir_nodes())
Expand All @@ -635,7 +641,7 @@ def test_misaligned_nodes_after_valid_draw(node, misaligned_node):
tree.simulate_test_function(data)
assert data.status is Status.INVALID

assert data.invalid_at == (node.ir_type, node.kwargs)
assert data.invalid_at == (node.ir_type, node.kwargs, None)


@given(ir_nodes(was_forced=False), ir_nodes(was_forced=False))
Expand Down Expand Up @@ -703,3 +709,37 @@ def test_simulate_non_invalid_conclude_is_unseen_behavior(node, misaligned_node)
tree.simulate_test_function(data)

assert data.status is Status.OVERRUN


@given(ir_nodes(), ir_nodes())
@settings(suppress_health_check=[HealthCheck.too_slow])
def test_simulating_inherits_invalid_forced_status(node, misaligned_node):
assume(misaligned_node.ir_type != node.ir_type)

# we have some logic in DataTree.simulate_test_function to "peek ahead" and
# make sure it simulates invalid nodes correctly. But if it does so without
# respecting whether the invalid node was forced or not, and this simulation
# is observed by an observer, this can cause flaky errors later due to a node
# going from unforced to forced.

tree = DataTree()

def test_function(ir_nodes):
data = ConjectureData.for_ir_tree(ir_nodes, observer=tree.new_observer())
_draw(data, node)
_draw(data, node, forced=node.value)

# (1) set up a misaligned node at index 1
with pytest.raises(StopTest):
test_function([node, misaligned_node])

# (2) simulate an aligned tree. the datatree peeks ahead here using invalid_at
# due to (1).
data = ConjectureData.for_ir_tree([node, node], observer=tree.new_observer())
with pytest.raises(PreviouslyUnseenBehaviour):
tree.simulate_test_function(data)

# (3) run the same aligned tree without simulating. this uses the actual test
# function's draw and forced value. This would flaky error if it did not match
# what the datatree peeked ahead with in (2).
test_function([node, node])
9 changes: 9 additions & 0 deletions hypothesis-python/tests/conjecture/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1661,3 +1661,12 @@ def test(data):
runner.tree.simulate_test_function(ConjectureData.for_ir_tree([node_0]))
runner.cached_test_function_ir([node_0])
assert runner.call_count == 3


def test_mildly_complicated_strategy():
# there are some code paths in engine.py that are easily covered by any mildly
# compliated strategy and aren't worth testing explicitly for. This covers
# those.
n = 5
s = st.lists(st.integers(), min_size=n)
assert minimal(s, lambda x: sum(x) >= 2 * n) == [0, 0, 0, 0, n * 2]
7 changes: 2 additions & 5 deletions hypothesis-python/tests/cover/test_simple_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,8 @@ def test_dictionaries_of_fixed_length(n):

@pytest.mark.parametrize("n", range(10))
def test_lists_of_lower_bounded_length(n):
x = minimal(lists(integers(), min_size=n), lambda x: sum(x) >= 2 * n)
assert n <= len(x) <= 2 * n
assert all(t >= 0 for t in x)
assert len(x) == n or all(t > 0 for t in x)
assert sum(x) == 2 * n
l = minimal(lists(integers(), min_size=n), lambda x: sum(x) >= 2 * n)
assert l == [] if n == 0 else [0] * (n - 1) + [n * 2]


@flaky(min_passes=1, max_runs=3)
Expand Down

0 comments on commit 66d84b5

Please sign in to comment.