diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..42943ff894 --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,5 @@ +RELEASE_TYPE: patch + +This release improves the behaviour of the :func:`~hypothesis.strategies.characters` strategy +when shrinking, by changing which characters are considered smallest to prefer more "normal" ascii characters +where available. diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/dfa/__init__.py b/hypothesis-python/src/hypothesis/internal/conjecture/dfa/__init__.py new file mode 100644 index 0000000000..c84dc4411e --- /dev/null +++ b/hypothesis-python/src/hypothesis/internal/conjecture/dfa/__init__.py @@ -0,0 +1,115 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Most of this work is copyright (C) 2013-2020 David R. MacIver +# (david@drmaciver.com), but it contains contributions by others. See +# CONTRIBUTING.rst for a full list of people who may hold copyright, and +# consult the git log if you need to determine who owns an individual +# contribution. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. +# +# END HEADER + +import threading +from collections import deque + + +class DFA: + """Base class for implementations of deterministic finite + automata. + + This is abstract to allow for the possibility of states + being calculated lazily as we traverse the DFA (which + we make heavy use of in our L* implementation - see + lstar.py for details). + + States can be of any hashable type. + """ + + def __init__(self): + self.__caches = threading.local() + + @property + def start(self): + """Returns the starting state.""" + raise NotImplementedError() + + def is_accepting(self, i): + """Returns if state ``i`` is an accepting one.""" + raise NotImplementedError() + + def transition(self, i, c): + """Returns the state that i transitions to on reading + character c from a string.""" + raise NotImplementedError() + + def transitions(self, i): + """Iterates over all pairs (byte, state) of transitions + which do not lead to dead states.""" + for c, j in self.__raw_transitions(i): + if not self.is_dead(j): + yield c, j + + def matches(self, s): + """Returns whether the string ``s`` is accepted + by this automaton.""" + i = self.start + for c in s: + i = self.transition(i, c) + return self.is_accepting(i) + + def is_dead(self, i): + """Returns True if no strings can be accepted + when starting from state ``i``.""" + if self.is_accepting(i): + return False + + try: + cache = self.__caches.dead + except AttributeError: + cache = {} + self.__caches.dead = cache + + try: + return cache[i] + except KeyError: + pass + seen = set() + pending = deque([i]) + result = True + while pending: + j = pending.popleft() + if j in seen: + continue + seen.add(j) + if self.is_accepting(j): + result = False + break + else: + for _, k in self.__raw_transitions(j): + pending.append(k) + if result: + for j in seen: + cache[j] = True + else: + cache[i] = False + return result + + def all_matching_strings(self): + """Iterate over all strings matched by this automaton + in shortlex-ascending order.""" + queue = deque([(self.start, b"")]) + while queue: + i, path = queue.popleft() + if self.is_accepting(i): + yield path + for c, j in self.transitions(i): + queue.append((j, path + bytes([c]))) + + def __raw_transitions(self, i): + for c in range(256): + j = self.transition(i, c) + yield c, j diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/dfa/lstar.py b/hypothesis-python/src/hypothesis/internal/conjecture/dfa/lstar.py new file mode 100644 index 0000000000..fd1e7ee237 --- /dev/null +++ b/hypothesis-python/src/hypothesis/internal/conjecture/dfa/lstar.py @@ -0,0 +1,221 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Most of this work is copyright (C) 2013-2020 David R. MacIver +# (david@drmaciver.com), but it contains contributions by others. See +# CONTRIBUTING.rst for a full list of people who may hold copyright, and +# consult the git log if you need to determine who owns an individual +# contribution. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. +# +# END HEADER + +from hypothesis.internal.conjecture.dfa import DFA +from hypothesis.internal.conjecture.junkdrawer import find_integer + +""" +This module contains an implementation of the L* algorithm +for learning a deterministic finite automaton based on an +unknown membership function and a series of examples of +strings that may or may not satisfy it. + +The two relevant papers for understanding this are: + +* Angluin, Dana. "Learning regular sets from queries and counterexamples." + Information and computation 75.2 (1987): 87-106. +* Rivest, Ronald L., and Robert E. Schapire. "Inference of finite automata + using homing sequences." Information and Computation 103.2 (1993): 299-347. + Note that we only use the material from section 4.5 "Improving Angluin's L* + algorithm" (page 318), and all of the rest of the material on homing + sequences can be skipped. + +The former explains the core algorithm, the latter a modification +we use (which we have further modified) which allows it to +be implemented more efficiently. + +We have two major departures from the paper: + +1. We learn the automaton lazily as we traverse it. This is particularly + valuable because if we make many corrections on the same string we only + have to learn the transitions that correspond to the string we are + correcting on. +2. We make use of our ``find_integer`` method rather than a binary search + as proposed in the Rivest and Schapire paper, as we expect that + usually most strings will be mispredicted near the beginning. + +A note on performance: This code is not really fast enough for +us to ever want to run in production on large strings, and this +is somewhat intrinsic. We should only use it in testing or for +learning languages offline that we can record for later use. + +""" + + +class LStar: + def __init__(self, member): + self.__experiments = [] + self.__cache = {} + self.__member = member + + self.__add_experiment(b"") + + def member(self, s): + """Check whether this string is a member of the language + to be learned.""" + s = bytes(s) + try: + return self.__cache[s] + except KeyError: + return self.__cache.setdefault(s, self.__member(s)) + + @property + def generation(self): + """Return an integer value that will be incremented + every time the DFA we predict changes.""" + return len(self.__experiments) + + @property + def dfa(self): + """Returns our current model of a DFA for matching + the language we are learning.""" + if self.__dfa is None: + self.__dfa = ExperimentDFA(self.member, self.__experiments) + return self.__dfa + + def learn(self, s): + """Learn to give the correct answer on this string. + That is, after this method completes we will have + ``self.dfa.matches(s) == self.member(s)``. + + Note that we do not guarantee that this will remain + true in the event that learn is called again with + a different string. It is in principle possible that + future learning will cause us to make a mistake on + this string. However, repeatedly calling learn on + each of a set of strings until the generation stops + changing is guaranteed to terminate. + """ + s = bytes(s) + correct_outcome = self.member(s) + + # We don't want to check this inside the loop because it potentially + # causes us to evaluate more of the states than we actually need to, + # but if our model is mostly correct then this will be faster because + # we only need to evaluate strings that are of the form + # ``state + experiment``, which will generally be cached and/or needed + # later. + if self.dfa.matches(s) == correct_outcome: + return + + # In the papers they assume that we only run this process + # once, but this is silly - often when you've got a messy + # string it will be wrong for many different reasons. + # + # Thus we iterate this to a fixed point where we repair + # the DFA by repeatedly adding experiments until the DFA + # agrees with the membership function on this string. + while True: + dfa = self.dfa + + states = [dfa.start] + + def seems_right(n): + """After reading n characters from s, do we seem to be + in the right state? + + We determine this by replacing the first n characters + of s with the label of the state we expect to be in. + If we are in the right state, that will replace a substring + with an equivalent one so must produce the same answer. + """ + if n > len(s): + return False + + # Populate enough of the states list to know where we are. + while n >= len(states): + states.append(dfa.transition(states[-1], s[len(states) - 1])) + + return self.member(dfa.label(states[n]) + s[n:]) == correct_outcome + + n = find_integer(seems_right) + + # We got to the end without ever finding ourself in a bad + # state, so we must correctly match this string. + if n == len(s): + assert dfa.matches(s) == correct_outcome + break + + # Reading n characters does not put us in a bad state but + # reading n + 1 does. This means that the remainder of + # the string that we have not read yet is an experiment + # that allows us to distinguish the state that we ended + # up in from the state that we should have ended up in. + self.__add_experiment(s[n + 1 :]) + + def __add_experiment(self, e): + self.__experiments.append(e) + self.__dfa = None + + +class ExperimentDFA(DFA): + """This implements a lazily calculated DFA where states + are labelled by some string that reaches them, and are + distinguished by a membership test and a set of experiments.""" + + def __init__(self, member, experiments): + DFA.__init__(self) + self.__experiments = tuple(experiments) + self.__member = member + + self.__states = [b""] + self.__rows_to_states = {tuple(map(member, experiments)): 0} + self.__transition_cache = {} + + def label(self, i): + return self.__states[i] + + @property + def start(self): + return 0 + + def is_accepting(self, i): + return self.__member(self.__states[i]) + + def transition(self, i, c): + key = (i, c) + try: + return self.__transition_cache[key] + except KeyError: + pass + s = self.__states[i] + + # t is either the string that labels our destination + # state or one equivalent to it. + t = s + bytes([c]) + + # A row is a tuple of booleans that corresponds to + # the information our experiments can reveal about + # this string. Two strings with different rows *must* + # correspond to different states in the DFA for our + # membership function, because the same path out + # of them (taken by one of the experiments) leads to + # different results. + row = tuple(self.__member(t + e) for e in self.__experiments) + try: + # If we have seen this row before, assume that this + # state is equivalent to that already discovered one. + # If it is not, we will have to find a new experiment + # to reveal that, + result = self.__rows_to_states[row] + except KeyError: + # This string is definitely not equivalent to any of + # those visited before, so it must be a new state and + # we add it to our list of states. + result = len(self.__states) + self.__states.append(t) + self.__rows_to_states[row] = result + self.__transition_cache[key] = result + return result diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py index 473b493906..7f12e2d6fb 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py @@ -87,7 +87,14 @@ class RunIsComplete(Exception): class ConjectureRunner: - def __init__(self, test_function, settings=None, random=None, database_key=None): + def __init__( + self, + test_function, + settings=None, + random=None, + database_key=None, + ignore_limits=False, + ): self._test_function = test_function self.settings = settings or Settings() self.shrinks = 0 @@ -96,6 +103,7 @@ def __init__(self, test_function, settings=None, random=None, database_key=None) self.valid_examples = 0 self.random = random or Random(getrandbits(128)) self.database_key = database_key + self.ignore_limits = ignore_limits # Global dict of per-phase statistics, and a list of per-call stats # which transfer to the global dict at the end of each phase. @@ -273,7 +281,8 @@ def test_function(self, data): self.exit_with(ExitReason.max_shrinks) if ( - self.finish_shrinking_deadline is not None + not self.ignore_limits + and self.finish_shrinking_deadline is not None and self.finish_shrinking_deadline < time.perf_counter() ): # See https://github.com/HypothesisWorks/hypothesis/issues/2340 @@ -558,6 +567,8 @@ def reuse_existing_examples(self): break def exit_with(self, reason): + if self.ignore_limits: + return self.statistics["stopped-because"] = reason.describe(self.settings) if self.best_observed_targets: self.statistics["targets"] = dict(self.best_observed_targets) diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/junkdrawer.py b/hypothesis-python/src/hypothesis/internal/conjecture/junkdrawer.py index 6612178f9e..004be35ad1 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/junkdrawer.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/junkdrawer.py @@ -233,3 +233,41 @@ def stack_depth_of_caller(): frame = frame.f_back size += 1 return size + + +def find_integer(f): + """Finds a (hopefully large) integer such that f(n) is True and f(n + 1) is + False. + + f(0) is assumed to be True and will not be checked. + """ + # We first do a linear scan over the small numbers and only start to do + # anything intelligent if f(4) is true. This is because it's very hard to + # win big when the result is small. If the result is 0 and we try 2 first + # then we've done twice as much work as we needed to! + for i in range(1, 5): + if not f(i): + return i - 1 + + # We now know that f(4) is true. We want to find some number for which + # f(n) is *not* true. + # lo is the largest number for which we know that f(lo) is true. + lo = 4 + + # Exponential probe upwards until we find some value hi such that f(hi) + # is not true. Subsequently we maintain the invariant that hi is the + # smallest number for which we know that f(hi) is not true. + hi = 5 + while f(hi): + lo = hi + hi *= 2 + + # Now binary search until lo + 1 = hi. At that point we have f(lo) and not + # f(lo + 1), as desired.. + while lo + 1 < hi: + mid = (lo + hi) // 2 + if f(mid): + lo = mid + else: + hi = mid + return lo diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/optimiser.py b/hypothesis-python/src/hypothesis/internal/conjecture/optimiser.py index c4389c1ebe..d2fd28e151 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/optimiser.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/optimiser.py @@ -16,7 +16,7 @@ from hypothesis.internal.compat import int_from_bytes, int_to_bytes from hypothesis.internal.conjecture.data import Status from hypothesis.internal.conjecture.engine import BUFFER_SIZE, NO_SCORE -from hypothesis.internal.conjecture.shrinking.common import find_integer +from hypothesis.internal.conjecture.junkdrawer import find_integer class Optimiser: diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py index 630f3d1486..81ee09b051 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py @@ -25,9 +25,12 @@ float_to_lex, lex_to_float, ) -from hypothesis.internal.conjecture.junkdrawer import binary_search, replace_all +from hypothesis.internal.conjecture.junkdrawer import ( + binary_search, + find_integer, + replace_all, +) from hypothesis.internal.conjecture.shrinking import Float, Integer, Lexical, Ordering -from hypothesis.internal.conjecture.shrinking.common import find_integer if False: from typing import Dict # noqa diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/common.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/common.py index d6bce01dfc..ba2a20af88 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/common.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/common.py @@ -16,44 +16,6 @@ """This module implements various useful common functions for shrinking tasks.""" -def find_integer(f): - """Finds a (hopefully large) integer such that f(n) is True and f(n + 1) is - False. - - f(0) is assumed to be True and will not be checked. - """ - # We first do a linear scan over the small numbers and only start to do - # anything intelligent if f(4) is true. This is because it's very hard to - # win big when the result is small. If the result is 0 and we try 2 first - # then we've done twice as much work as we needed to! - for i in range(1, 5): - if not f(i): - return i - 1 - - # We now know that f(4) is true. We want to find some number for which - # f(n) is *not* true. - # lo is the largest number for which we know that f(lo) is true. - lo = 4 - - # Exponential probe upwards until we find some value hi such that f(hi) - # is not true. Subsequently we maintain the invariant that hi is the - # smallest number for which we know that f(hi) is not true. - hi = 5 - while f(hi): - lo = hi - hi *= 2 - - # Now binary search until lo + 1 = hi. At that point we have f(lo) and not - # f(lo + 1), as desired.. - while lo + 1 < hi: - mid = (lo + hi) // 2 - if f(mid): - lo = mid - else: - hi = mid - return lo - - class Shrinker: """A Shrinker object manages a single value and a predicate it should satisfy, and attempts to improve it in some direction, making it smaller diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/integer.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/integer.py index 601dd39643..7ae11c150b 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/integer.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/integer.py @@ -13,7 +13,8 @@ # # END HEADER -from hypothesis.internal.conjecture.shrinking.common import Shrinker, find_integer +from hypothesis.internal.conjecture.junkdrawer import find_integer +from hypothesis.internal.conjecture.shrinking.common import Shrinker """ This module implements a shrinker for non-negative integers. diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/ordering.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/ordering.py index 9946c17a5b..b32abb02a8 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/ordering.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/shrinking/ordering.py @@ -13,7 +13,8 @@ # # END HEADER -from hypothesis.internal.conjecture.shrinking.common import Shrinker, find_integer +from hypothesis.internal.conjecture.junkdrawer import find_integer +from hypothesis.internal.conjecture.shrinking.common import Shrinker def identity(v): diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/strings.py b/hypothesis-python/src/hypothesis/strategies/_internal/strings.py index 45b3c1a90d..63eccfdb90 100644 --- a/hypothesis-python/src/hypothesis/strategies/_internal/strings.py +++ b/hypothesis-python/src/hypothesis/strategies/_internal/strings.py @@ -15,7 +15,7 @@ from hypothesis.errors import InvalidArgument from hypothesis.internal import charmap -from hypothesis.internal.conjecture.utils import integer_range +from hypothesis.internal.conjecture.utils import biased_coin, integer_range from hypothesis.internal.intervalsets import IntervalSet from hypothesis.strategies._internal.strategies import ( MappedSearchStrategy, @@ -61,11 +61,46 @@ def __init__( ) self.intervals = IntervalSet(intervals) self.zero_point = self.intervals.index_above(ord("0")) + self.Z_point = min( + self.intervals.index_above(ord("Z")), len(self.intervals) - 1 + ) def do_draw(self, data): - i = integer_range(data, 0, len(self.intervals) - 1, center=self.zero_point) + if len(self.intervals) > 256: + if biased_coin(data, 0.2): + i = integer_range(data, 256, len(self.intervals) - 1) + else: + i = integer_range(data, 0, 255) + else: + i = integer_range(data, 0, len(self.intervals) - 1) + + i = self.rewrite_integer(i) + return chr(self.intervals[i]) + def rewrite_integer(self, i): + # We would like it so that, where possible, shrinking replaces + # characters with simple ascii characters, so we rejig this + # bit so that the smallest values are 0, 1, 2, ..., Z. + # + # Imagine that numbers are laid out as abc0yyyZ... + # this rearranges them so that they are laid out as + # 0yyyZcba..., which gives a better shrinking order. + if i <= self.Z_point: + # We want to rewrite the integers [0, n] inclusive + # to [zero_point, Z_point]. + n = self.Z_point - self.zero_point + if i <= n: + i += self.zero_point + else: + # We want to rewrite the integers [n + 1, Z_point] to + # [zero_point, 0] (reversing the order so that codepoints below + # zero_point shrink upwards). + i = self.zero_point - (i - n) + assert i < self.zero_point + assert 0 <= i <= self.Z_point + return i + class StringStrategy(MappedSearchStrategy): """A strategy for text strings, defined in terms of a strategy for lists of diff --git a/hypothesis-python/tests/conjecture/test_engine.py b/hypothesis-python/tests/conjecture/test_engine.py index 4b8a4e72b2..a3e24144c0 100644 --- a/hypothesis-python/tests/conjecture/test_engine.py +++ b/hypothesis-python/tests/conjecture/test_engine.py @@ -1572,3 +1572,18 @@ def test(data): d2 = runner.cached_test_function(b"", extend=8) assert d1.status == Status.OVERRUN assert d2.status == Status.VALID + + +def test_can_be_set_to_ignore_limits(): + def test(data): + data.draw_bits(8) + + with deterministic_PRNG(): + runner = ConjectureRunner( + test, settings=settings(TEST_SETTINGS, max_examples=1), ignore_limits=True + ) + + for c in range(256): + runner.cached_test_function([c]) + + assert runner.tree.is_exhausted diff --git a/hypothesis-python/tests/conjecture/test_lstar.py b/hypothesis-python/tests/conjecture/test_lstar.py new file mode 100644 index 0000000000..1bc3c7c35b --- /dev/null +++ b/hypothesis-python/tests/conjecture/test_lstar.py @@ -0,0 +1,127 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Most of this work is copyright (C) 2013-2020 David R. MacIver +# (david@drmaciver.com), but it contains contributions by others. See +# CONTRIBUTING.rst for a full list of people who may hold copyright, and +# consult the git log if you need to determine who owns an individual +# contribution. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. +# +# END HEADER + +import itertools + +from hypothesis.internal.conjecture.dfa.lstar import LStar + + +def test_can_learn_simple_predicate(): + learner = LStar(lambda s: len(s) >= 3) + + learner.learn(bytes(3)) + + dfa = learner.dfa + assert dfa.start == 0 + assert dfa.transition(0, 0) == 1 + assert dfa.transition(1, 0) == 2 + assert dfa.transition(2, 0) == 3 + assert dfa.transition(3, 0) == 3 + + assert not dfa.is_accepting(0) + assert not dfa.is_accepting(1) + assert not dfa.is_accepting(2) + assert dfa.is_accepting(3) + + +def test_relearning_does_not_change_generation(): + learner = LStar(lambda s: len(s) >= 3) + + prev = learner.generation + learner.learn(bytes(3)) + assert prev != learner.generation + + prev = learner.generation + learner.learn(bytes(3)) + assert prev == learner.generation + + +def test_can_learn_dead_nodes(): + learner = LStar(lambda s: len(s) == 4 and max(s) <= 1) + + learner.learn(bytes(4)) + + assert learner.dfa.matches(bytes(4)) + assert learner.dfa.matches(bytes([1] * 4)) + + # Need a length 5 string to distinguish this from + # something that just loops back to zero. + learner.learn([2, 0, 0, 0, 0]) + + dfa = learner.dfa + + assert dfa.is_dead(dfa.transition(dfa.start, 2)) + assert dfa.is_dead(dfa.transition(dfa.start, 3)) + + +def test_iterates_over_learned_strings(): + upper_bound = bytes([1, 2]) + learner = LStar(lambda s: len(s) == 2 and max(s) <= 5 and s <= upper_bound) + learner.learn(upper_bound) + learner.learn([1, 2, 0]) + learner.learn([6, 1, 2]) + learner.learn([1, 3]) + learner.learn([0, 5]) + learner.learn([0, 6]) + + dfa = learner.dfa + n = 9 + matches = list(itertools.islice(dfa.all_matching_strings(), n + 1)) + assert len(matches) == n + + +def test_iteration_with_dead_nodes(): + learner = LStar(lambda s: len(s) == 3 and max(s) <= 1 and s[1] == 0) + learner.learn([1, 0, 1]) + learner.learn([1, 1, 1]) + learner.learn([0, 1, 1]) + learner.learn([1, 1, 0]) + learner.learn([1, 1, 1, 0, 1]) + + dfa = learner.dfa + i = dfa.transition(dfa.start, 1) + assert not dfa.is_dead(i) + assert dfa.is_dead(dfa.transition(i, 2)) + + assert list(learner.dfa.all_matching_strings()) == [ + bytes([0, 0, 0]), + bytes([0, 0, 1]), + bytes([1, 0, 0]), + bytes([1, 0, 1]), + ] + + +def test_learning_is_just_checking_when_fully_explored(): + count = [0] + + def accept(s): + count[0] += 1 + return len(s) <= 5 and all(c == 0 for c in s) + + learner = LStar(accept) + + for c in [0, 1]: + for n in range(10): + learner.learn(bytes([c]) * n) + + assert list(learner.dfa.all_matching_strings()) == [bytes(n) for n in range(6)] + + (prev,) = count + + learner.learn([2] * 11) + + calls = count[0] - prev + + assert calls == 1 diff --git a/hypothesis-python/tests/cover/test_text.py b/hypothesis-python/tests/cover/test_text.py new file mode 100644 index 0000000000..a4945e30a8 --- /dev/null +++ b/hypothesis-python/tests/cover/test_text.py @@ -0,0 +1,23 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Most of this work is copyright (C) 2013-2020 David R. MacIver +# (david@drmaciver.com), but it contains contributions by others. See +# CONTRIBUTING.rst for a full list of people who may hold copyright, and +# consult the git log if you need to determine who owns an individual +# contribution. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. +# +# END HEADER + +from hypothesis.strategies._internal.strings import OneCharStringStrategy + + +def test_rewriting_integers_covers_right_range(): + strategy = OneCharStringStrategy() + + rewritten = [strategy.rewrite_integer(i) for i in range(256)] + assert sorted(rewritten) == sorted(range(256)) diff --git a/hypothesis-python/tests/quality/test_shrinking_order.py b/hypothesis-python/tests/quality/test_shrinking_order.py new file mode 100644 index 0000000000..a151a4bd9b --- /dev/null +++ b/hypothesis-python/tests/quality/test_shrinking_order.py @@ -0,0 +1,135 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Most of this work is copyright (C) 2013-2020 David R. MacIver +# (david@drmaciver.com), but it contains contributions by others. See +# CONTRIBUTING.rst for a full list of people who may hold copyright, and +# consult the git log if you need to determine who owns an individual +# contribution. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. +# +# END HEADER + +from itertools import islice +from random import Random + +from hypothesis import HealthCheck, Verbosity, settings, strategies as st +from hypothesis.internal.conjecture.data import ConjectureData, Status +from hypothesis.internal.conjecture.dfa.lstar import LStar +from hypothesis.internal.conjecture.engine import BUFFER_SIZE, ConjectureRunner +from hypothesis.internal.conjecture.junkdrawer import uniform + +LEARNERS = {} + + +def learner_for(strategy): + """Returns an LStar learner that predicts whether a buffer + corresponds to a discard free choice sequence leading to + a valid value for this strategy.""" + try: + return LEARNERS[strategy] + except KeyError: + pass + + def test_function(data): + data.draw(strategy) + data.mark_interesting() + + runner = ConjectureRunner( + test_function, + settings=settings( + database=None, + verbosity=Verbosity.quiet, + suppress_health_check=HealthCheck.all(), + ), + random=Random(0), + ignore_limits=True, + ) + + def predicate(s): + result = runner.cached_test_function(s) + if result.status < Status.VALID: + return False + if result.has_discards: + return False + return result.buffer == s + + learner = LStar(predicate) + + runner.run() + + (v,) = runner.interesting_examples.values() + + # We make sure the learner has properly learned small examples. + # This is all fairly ad hoc but is mostly designed to get it + # to understand what the smallest example is and avoid any + # loops at the beginning of the DFA that don't really exist. + learner.learn(v.buffer) + + for n in [1, 2, 3]: + for _ in range(5): + learner.learn(uniform(runner.random, n) + v.buffer) + + prev = -1 + while learner.generation != prev: + prev = learner.generation + + for _ in range(10): + s = uniform(runner.random, len(v.buffer)) + bytes(BUFFER_SIZE) + learner.learn(s) + data = runner.cached_test_function(s) + if data.status >= Status.VALID: + learner.learn(data.buffer) + + LEARNERS[strategy] = learner + return learner + + +def iter_values(strategy, unique_by=lambda s: s): + """Iterate over the values that can be generated by ``strategy`` + in what is, as best as we can figure, shortlex-ascending order. + + The same value may have multiple, redundant, representations, + and we don't want to yield it more than once, so we deduplicate. + If the value is not hashable, pass some suitable key (e.g. repr) + as unique_by. + """ + learner = learner_for(strategy) + + seen = set() + + while True: + for s in learner.dfa.all_matching_strings(): + if not learner.member(s): + # This string matched the DFA but didn't + # satisfy the membership test. We relearn + # the string, improving our learner, and + # restart the loop. + learner.learn(s) + break + result = ConjectureData.for_buffer(s).draw(strategy) + key = unique_by(result) + if key in seen: + continue + seen.add(key) + yield result + else: + break + + +def test_characters_start_with_the_digits(): + assert list(islice(iter_values(st.characters()), 10)) == [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + ]