Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use typed choice sequence in database #4241

Merged
merged 5 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ python:
- path: hypothesis-python/
extra_requirements:
- all
sphinx:
configuration: hypothesis-python/docs/conf.py
5 changes: 5 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
RELEASE_TYPE: minor

The :doc:`Hypothesis example database <database>` now uses a new internal format to store examples. This new format is not compatible with the previous format, so stored entries will not carry over.

The database is best thought of as a cache that may be invalidated at times. Instead of relying on it for correctness, we recommend using :obj:`@example <hypothesis.example>` to specify explicit examples. When using databases across environments (such as connecting a :class:`~hypothesis.database.GitHubArtifactDatabase` database in CI to your local environment), we recommend using the same version of Hypothesis for each where possible, for maximum reproducibility.
15 changes: 7 additions & 8 deletions hypothesis-python/src/hypothesis/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
ensure_free_stackframes,
gc_cumulative_time,
)
from hypothesis.internal.conjecture.shrinker import sort_key, sort_key_ir
from hypothesis.internal.conjecture.shrinker import sort_key_ir
from hypothesis.internal.entropy import deterministic_PRNG
from hypothesis.internal.escalation import (
InterestingOrigin,
Expand Down Expand Up @@ -352,9 +352,8 @@ def decode_failure(blob: bytes) -> Sequence[ChoiceT]:
f"Could not decode blob {blob!r}: Invalid start byte {prefix!r}"
)

try:
choices = ir_from_bytes(decoded)
except Exception:
choices = ir_from_bytes(decoded)
if choices is None:
raise InvalidArgument(f"Invalid serialized choice sequence for blob {blob!r}")

return choices
Expand Down Expand Up @@ -1873,13 +1872,13 @@ def fuzz_one_input(
except (StopTest, UnsatisfiedAssumption):
return None
except BaseException:
buffer = bytes(data.buffer)
known = minimal_failures.get(data.interesting_origin)
if settings.database is not None and (
known is None or sort_key(buffer) <= sort_key(known)
known is None
or sort_key_ir(data.ir_nodes) <= sort_key_ir(known)
):
settings.database.save(database_key, buffer)
minimal_failures[data.interesting_origin] = buffer
settings.database.save(database_key, ir_to_bytes(data.choices))
minimal_failures[data.interesting_origin] = data.ir_nodes
raise
return bytes(data.buffer)

Expand Down
20 changes: 17 additions & 3 deletions hypothesis-python/src/hypothesis/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,8 +768,7 @@ def ir_to_bytes(ir: Iterable[ChoiceT], /) -> bytes:
return b"".join(parts)


def ir_from_bytes(buffer: bytes, /) -> list[ChoiceT]:
"""Deserialize a bytestring to a list of IR elements. Inverts ir_to_bytes."""
def _ir_from_bytes(buffer: bytes, /) -> tuple[ChoiceT, ...]:
# See above for an explanation of the format.
parts: list[ChoiceT] = []
idx = 0
Expand Down Expand Up @@ -797,4 +796,19 @@ def ir_from_bytes(buffer: bytes, /) -> list[ChoiceT]:
else:
assert tag == 4
parts.append(chunk.decode(errors="surrogatepass"))
return parts
return tuple(parts)


def ir_from_bytes(buffer: bytes, /) -> Optional[tuple[ChoiceT, ...]]:
"""
Deserialize a bytestring to a tuple of choices. Inverts ir_to_bytes.

Returns None if the given bytestring is not a valid serialization of choice
sequences.
"""
try:
return _ir_from_bytes(buffer)
except Exception:
# deserialization error, eg because our format changed or someone put junk
# data in the db.
return None
49 changes: 48 additions & 1 deletion hypothesis-python/src/hypothesis/internal/conjecture/choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from hypothesis.errors import ChoiceTooLarge
from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float
from hypothesis.internal.conjecture.utils import identity
from hypothesis.internal.floats import make_float_clamper, sign_aware_lte
from hypothesis.internal.floats import float_to_int, make_float_clamper, sign_aware_lte
from hypothesis.internal.intervalsets import IntervalSet

T = TypeVar("T")
Expand Down Expand Up @@ -67,6 +67,9 @@ class BooleanKWargs(TypedDict):
IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
]
ChoiceNameT: "TypeAlias" = Literal["integer", "string", "boolean", "float", "bytes"]
ChoiceKeyT: "TypeAlias" = Union[
int, str, bytes, tuple[Literal["bool"], bool], tuple[Literal["float"], int]
]


def _size_to_index(size: int, *, alphabet_size: int) -> int:
Expand Down Expand Up @@ -454,3 +457,47 @@ def choice_permitted(choice: ChoiceT, kwargs: ChoiceKwargsT) -> bool:
return True
else:
raise NotImplementedError(f"unhandled type {type(choice)} with value {choice}")


def choices_key(choices: Sequence[ChoiceT]) -> tuple[ChoiceKeyT, ...]:
return tuple(choice_key(choice) for choice in choices)


def choice_key(choice: ChoiceT) -> ChoiceKeyT:
if isinstance(choice, float):
# float_to_int to distinguish -0.0/0.0, signaling/nonsignaling nans, etc,
# and then add a "float" key to avoid colliding with actual integers.
return ("float", float_to_int(choice))
if isinstance(choice, bool):
# avoid choice_key(0) == choice_key(False)
return ("bool", choice)
return choice


def choice_equal(choice1: ChoiceT, choice2: ChoiceT) -> bool:
assert type(choice1) is type(choice2), (choice1, choice2)
return choice_key(choice1) == choice_key(choice2)


def choice_kwargs_equal(
ir_type: ChoiceNameT, kwargs1: ChoiceKwargsT, kwargs2: ChoiceKwargsT
) -> bool:
return choice_kwargs_key(ir_type, kwargs1) == choice_kwargs_key(ir_type, kwargs2)


def choice_kwargs_key(ir_type, kwargs):
if ir_type == "float":
return (
float_to_int(kwargs["min_value"]),
float_to_int(kwargs["max_value"]),
kwargs["allow_nan"],
kwargs["smallest_nonzero_magnitude"],
)
if ir_type == "integer":
return (
kwargs["min_value"],
kwargs["max_value"],
None if kwargs["weights"] is None else tuple(kwargs["weights"]),
kwargs["shrink_towards"],
)
return tuple(kwargs[key] for key in sorted(kwargs))
53 changes: 12 additions & 41 deletions hypothesis-python/src/hypothesis/internal/conjecture/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@
FloatKWargs,
IntegerKWargs,
StringKWargs,
choice_equal,
choice_from_index,
choice_key,
choice_kwargs_equal,
choice_kwargs_key,
choice_permitted,
)
from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float
Expand Down Expand Up @@ -647,15 +651,15 @@ def trivial(self) -> bool:

if self.ir_type != "float":
zero_value = choice_from_index(0, self.ir_type, self.kwargs)
return ir_value_equal(self.value, zero_value)
return choice_equal(self.value, zero_value)
else:
kwargs = cast(FloatKWargs, self.kwargs)
min_value = kwargs["min_value"]
max_value = kwargs["max_value"]
shrink_towards = 0.0

if min_value == -math.inf and max_value == math.inf:
return ir_value_equal(self.value, shrink_towards)
return choice_equal(self.value, shrink_towards)

if (
not math.isinf(min_value)
Expand All @@ -666,7 +670,7 @@ def trivial(self) -> bool:
# one closest to shrink_towards
shrink_towards = max(math.ceil(min_value), shrink_towards)
shrink_towards = min(math.floor(max_value), shrink_towards)
return ir_value_equal(self.value, float(shrink_towards))
return choice_equal(self.value, float(shrink_towards))

# the real answer here is "the value in [min_value, max_value] with
# the lowest denominator when represented as a fraction".
Expand All @@ -680,17 +684,17 @@ def __eq__(self, other: object) -> bool:

return (
self.ir_type == other.ir_type
and ir_value_equal(self.value, other.value)
and ir_kwargs_equal(self.ir_type, self.kwargs, other.kwargs)
and choice_equal(self.value, other.value)
and choice_kwargs_equal(self.ir_type, self.kwargs, other.kwargs)
and self.was_forced == other.was_forced
)

def __hash__(self) -> int:
return hash(
(
self.ir_type,
ir_value_key(self.value),
ir_kwargs_key(self.ir_type, self.kwargs),
choice_key(self.value),
choice_kwargs_key(self.ir_type, self.kwargs),
self.was_forced,
)
)
Expand Down Expand Up @@ -726,39 +730,6 @@ def ir_size(ir: Iterable[Union[IRNode, NodeTemplate, ChoiceT]]) -> int:
return size


def ir_value_key(v):
if type(v) is float:
return float_to_int(v)
return v


def ir_kwargs_key(ir_type, kwargs):
if ir_type == "float":
return (
float_to_int(kwargs["min_value"]),
float_to_int(kwargs["max_value"]),
kwargs["allow_nan"],
kwargs["smallest_nonzero_magnitude"],
)
if ir_type == "integer":
return (
kwargs["min_value"],
kwargs["max_value"],
None if kwargs["weights"] is None else tuple(kwargs["weights"]),
kwargs["shrink_towards"],
)
return tuple(kwargs[key] for key in sorted(kwargs))


def ir_value_equal(v1, v2):
assert type(v1) is type(v2), (v1, v2)
return ir_value_key(v1) == ir_value_key(v2)


def ir_kwargs_equal(ir_type, kwargs1, kwargs2):
return ir_kwargs_key(ir_type, kwargs1) == ir_kwargs_key(ir_type, kwargs2)


@dataclass_transform()
@attr.s(slots=True)
class ConjectureResult:
Expand Down Expand Up @@ -1969,7 +1940,7 @@ def _pooled_kwargs(self, ir_type, kwargs):
if self.provider.avoid_realization:
return kwargs

key = (ir_type, *ir_kwargs_key(ir_type, kwargs))
key = (ir_type, *choice_kwargs_key(ir_type, kwargs))
try:
return POOLED_KWARGS_CACHE[key]
except KeyError:
Expand Down
Loading
Loading