From 4657a4f0b8c0b4ff22d705a4594de68dedf1081d Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Thu, 26 Dec 2024 20:36:24 -0500 Subject: [PATCH 1/7] use sort_key_ir for most things --- hypothesis-python/RELEASE.rst | 3 + hypothesis-python/src/hypothesis/core.py | 4 +- .../hypothesis/internal/conjecture/engine.py | 12 ++-- .../hypothesis/internal/conjecture/pareto.py | 12 ++-- .../internal/conjecture/shrinker.py | 68 +++++++++---------- 5 files changed, 49 insertions(+), 50 deletions(-) create mode 100644 hypothesis-python/RELEASE.rst diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..2e99882c60 --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,3 @@ +RELEASE_TYPE: patch + +The shrinker now uses the `typed choice sequence` (:issue:`3921`) to determine counterexample complexity. We expect this to mostly match the previous ordering, but it may result in reporting different counterexamples in some cases. diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py index cfb569e4c8..b15272f92b 100644 --- a/hypothesis-python/src/hypothesis/core.py +++ b/hypothesis-python/src/hypothesis/core.py @@ -85,7 +85,7 @@ ensure_free_stackframes, gc_cumulative_time, ) -from hypothesis.internal.conjecture.shrinker import sort_key +from hypothesis.internal.conjecture.shrinker import sort_key, sort_key_ir from hypothesis.internal.entropy import deterministic_PRNG from hypothesis.internal.escalation import ( InterestingOrigin, @@ -1226,7 +1226,7 @@ def run_engine(self): if runner.interesting_examples: self.falsifying_examples = sorted( runner.interesting_examples.values(), - key=lambda d: sort_key(d.buffer), + key=lambda d: sort_key_ir(d.ir_nodes), reverse=True, ) else: diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py index 894a8b67ee..f8c8a1255d 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py @@ -74,7 +74,7 @@ startswith, ) from hypothesis.internal.conjecture.pareto import NO_SCORE, ParetoFront, ParetoOptimiser -from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key +from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key, sort_key_ir from hypothesis.internal.healthcheck import fail_health_check from hypothesis.reporting import base_report, report @@ -562,8 +562,8 @@ def test_function(self, data: ConjectureData) -> None: if v < existing_score: continue - if v > existing_score or sort_key(data.buffer) < sort_key( - existing_example.buffer + if v > existing_score or sort_key_ir(data.ir_nodes) < sort_key_ir( + existing_example.ir_nodes ): data_as_result = data.as_result() assert not isinstance(data_as_result, _Overrun) @@ -619,7 +619,7 @@ def test_function(self, data: ConjectureData) -> None: if self.first_bug_found_at is None: self.first_bug_found_at = self.call_count else: - if sort_key(data.buffer) < sort_key(existing.buffer): + if sort_key_ir(data.ir_nodes) < sort_key_ir(existing.ir_nodes): self.shrinks += 1 self.downgrade_buffer(existing.buffer) self.__data_cache.unpin(existing.buffer) @@ -1376,7 +1376,7 @@ def shrink_interesting_examples(self) -> None: self.finish_shrinking_deadline = time.perf_counter() + MAX_SHRINKING_SECONDS for prev_data in sorted( - self.interesting_examples.values(), key=lambda d: sort_key(d.buffer) + self.interesting_examples.values(), key=lambda d: sort_key_ir(d.ir_nodes) ): assert prev_data.status == Status.INTERESTING data = self.new_conjecture_data_ir(prev_data.ir_nodes) @@ -1393,7 +1393,7 @@ def shrink_interesting_examples(self) -> None: for k, v in self.interesting_examples.items() if k not in self.shrunk_examples ), - key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))), + key=lambda kv: (sort_key_ir(kv[1].ir_nodes), sort_key(repr(kv[0]))), ) self.debug(f"Shrinking {target!r}: {data.choices}") diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/pareto.py b/hypothesis-python/src/hypothesis/internal/conjecture/pareto.py index a0451d7f86..647a91ae94 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/pareto.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/pareto.py @@ -14,7 +14,7 @@ from hypothesis.internal.conjecture.data import ConjectureData, ConjectureResult, Status from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy, swap -from hypothesis.internal.conjecture.shrinker import sort_key +from hypothesis.internal.conjecture.shrinker import sort_key_ir NO_SCORE = float("-inf") @@ -45,10 +45,12 @@ def dominance(left, right): more structured or failing tests it can be useful to track, and future work will depend on it more.""" - if left.buffer == right.buffer: + left_key = sort_key_ir(left.ir_nodes) + right_key = sort_key_ir(right.ir_nodes) + if left_key == right_key: return DominanceRelation.EQUAL - if sort_key(right.buffer) < sort_key(left.buffer): + if right_key < left_key: result = dominance(left=right, right=left) if result == DominanceRelation.LEFT_DOMINATES: return DominanceRelation.RIGHT_DOMINATES @@ -60,7 +62,7 @@ def dominance(left, right): return result # Either left is better or there is no dominance relationship. - assert sort_key(left.buffer) < sort_key(right.buffer) + assert left_key < right_key # The right is more interesting if left.status < right.status: @@ -126,7 +128,7 @@ def __init__(self, random): self.__random = random self.__eviction_listeners = [] - self.front = SortedList(key=lambda d: sort_key(d.buffer)) + self.front = SortedList(key=lambda d: sort_key_ir(d.ir_nodes)) self.__pending = None def add(self, data): diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py index 4d7dd6fc4b..c83096a66c 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py @@ -15,7 +15,7 @@ import attr from hypothesis.internal.compat import int_from_bytes, int_to_bytes -from hypothesis.internal.conjecture.choice import choice_from_index +from hypothesis.internal.conjecture.choice import choice_from_index, choice_to_index from hypothesis.internal.conjecture.data import ( ConjectureData, ConjectureResult, @@ -80,6 +80,13 @@ def sort_key(buffer: SortKeyT) -> tuple[int, SortKeyT]: return (len(buffer), buffer) +def sort_key_ir(nodes: list[IRNode]) -> tuple[int, tuple[int]]: + return ( + len(nodes), + tuple(choice_to_index(node.value, node.kwargs) for node in nodes), + ) + + SHRINK_PASS_DEFINITIONS: dict[str, "ShrinkPassDefinition"] = {} @@ -305,7 +312,7 @@ def __init__( self.__derived_values: dict = {} self.__pending_shrink_explanation = None - self.initial_size = len(initial.buffer) + self.initial_size = len(initial.choices) # We keep track of the current best example on the shrink_target # attribute. @@ -401,7 +408,7 @@ def consider_new_tree(self, tree: Sequence[IRNode]) -> bool: if startswith(tree, self.nodes): return True - if startswith(self.nodes, tree): + if sort_key_ir(self.nodes) < sort_key_ir(tree): return False previous = self.shrink_target @@ -445,7 +452,7 @@ def incorporate_test_data(self, data): return if ( self.__predicate(data) - and sort_key(data.buffer) < sort_key(self.shrink_target.buffer) + and sort_key_ir(data.ir_nodes) < sort_key_ir(self.shrink_target.ir_nodes) and self.__allow_transition(self.shrink_target, data) ): self.update_shrink_target(data) @@ -474,29 +481,14 @@ def shrink(self): This method is "mostly idempotent" - calling it twice is unlikely to have any effect, though it has a non-zero probability of doing so. """ - # We assume that if an all-zero block of bytes is an interesting - # example then we're not going to do better than that. - # This might not technically be true: e.g. for integers() | booleans() - # the simplest example is actually [1, 0]. Missing this case is fairly - # harmless and this allows us to make various simplifying assumptions - # about the structure of the data (principally that we're never - # operating on a block of all zero bytes so can use non-zeroness as a - # signpost of complexity). - if not any(self.shrink_target.buffer) or self.incorporate_new_buffer( - bytes(len(self.shrink_target.buffer)) - ): + # We assume that if an all-trivial example is interesting then + # we're not going to do better than that. This might not technically be true: + # e.g. in tuples(booleans(), booleans()) | booleans() the simplest example + # is [1, False] but the all-trivial example is [0, False, False]. + if all(node.trivial for node in self.nodes): self.explain() return - # There are multiple buffers that represent the same counterexample, eg - # n=2 (from the 16 bit integer bucket) and n=2 (from the 32 bit integer - # bucket). Before we start shrinking, we need to normalize to the minimal - # such buffer, else a buffer-smaller but ir-larger value may be chosen - # as the minimal counterexample. - data = self.engine.new_conjecture_data_ir(self.nodes) - self.engine.test_function(data) - self.incorporate_test_data(data.as_result()) - try: self.greedy_shrink() except StopShrinking: @@ -509,7 +501,7 @@ def shrink(self): def s(n): return "s" if n != 1 else "" - total_deleted = self.initial_size - len(self.shrink_target.buffer) + total_deleted = self.initial_size - len(self.shrink_target.choices) calls = self.engine.call_count - self.initial_calls misaligned = self.engine.misaligned_count - self.initial_misaligned @@ -518,7 +510,7 @@ def s(n): "Shrink pass profiling\n" "---------------------\n\n" f"Shrinking made a total of {calls} call{s(calls)} of which " - f"{self.shrinks} shrank and {misaligned} were misaligned. This deleted {total_deleted} bytes out " + f"{self.shrinks} shrank and {misaligned} were misaligned. This deleted {total_deleted} choices out " f"of {self.initial_size}." ) for useful in [True, False]: @@ -540,7 +532,7 @@ def s(n): self.debug( f" * {p.name} made {p.calls} call{s(p.calls)} of which " f"{p.shrinks} shrank and {p.misaligned} were misaligned, " - f"deleting {p.deletions} byte{s(p.deletions)}." + f"deleting {p.deletions} choice{s(p.deletions)}." ) self.debug("") self.explain() @@ -797,7 +789,7 @@ def fixate_shrink_passes(self, passes): # the length are the best. if self.shrink_target is before_sp: reordering[sp] = 1 - elif len(self.buffer) < len(before_sp.buffer): + elif len(self.choices) < len(before_sp.choices): reordering[sp] = -1 else: reordering[sp] = 0 @@ -988,7 +980,7 @@ def __changed_nodes(self): assert prev_target is not new_target prev_nodes = prev_target.ir_nodes new_nodes = new_target.ir_nodes - assert sort_key(new_target.buffer) < sort_key(prev_target.buffer) + assert sort_key_ir(new_target.ir_nodes) < sort_key_ir(prev_target.ir_nodes) if len(prev_nodes) != len(new_nodes) or any( n1.ir_type != n2.ir_type for n1, n2 in zip(prev_nodes, new_nodes) @@ -1186,11 +1178,11 @@ def remove_discarded(self): for ex in self.shrink_target.examples: if ( - ex.length > 0 + ex.ir_length > 0 and ex.discarded - and (not discarded or ex.start >= discarded[-1][-1]) + and (not discarded or ex.ir_start >= discarded[-1][-1]) ): - discarded.append((ex.start, ex.end)) + discarded.append((ex.ir_start, ex.ir_end)) # This can happen if we have discards but they are all of # zero length. This shouldn't happen very often so it's @@ -1199,11 +1191,11 @@ def remove_discarded(self): if not discarded: break - attempt = bytearray(self.shrink_target.buffer) + attempt = list(self.nodes) for u, v in reversed(discarded): del attempt[u:v] - if not self.incorporate_new_buffer(attempt): + if not self.consider_new_tree(tuple(attempt)): return False return True @@ -1563,7 +1555,9 @@ def test_not_equal(x, y): ], ) ), - key=lambda i: st.buffer[examples[i].start : examples[i].end], + key=lambda i: sort_key_ir( + st.ir_nodes[examples[i].ir_start : examples[i].ir_end] + ), ) def run_node_program(self, i, description, original, repeats=1): @@ -1670,7 +1664,7 @@ def step(self, *, random_order=False): initial_shrinks = self.shrinker.shrinks initial_calls = self.shrinker.calls initial_misaligned = self.shrinker.misaligned - size = len(self.shrinker.shrink_target.buffer) + size = len(self.shrinker.shrink_target.choices) self.shrinker.engine.explain_next_call_as(self.name) if random_order: @@ -1687,7 +1681,7 @@ def step(self, *, random_order=False): self.calls += self.shrinker.calls - initial_calls self.misaligned += self.shrinker.misaligned - initial_misaligned self.shrinks += self.shrinker.shrinks - initial_shrinks - self.deletions += size - len(self.shrinker.shrink_target.buffer) + self.deletions += size - len(self.shrinker.shrink_target.choices) self.shrinker.engine.clear_call_explanation() return True From 75d1f429b043a69d9fe87a014e8089950f79fdb8 Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Thu, 26 Dec 2024 20:36:36 -0500 Subject: [PATCH 2/7] this test is unsound with a db failure --- hypothesis-python/tests/cover/test_deadline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hypothesis-python/tests/cover/test_deadline.py b/hypothesis-python/tests/cover/test_deadline.py index 3bb4583ba4..e87463238f 100644 --- a/hypothesis-python/tests/cover/test_deadline.py +++ b/hypothesis-python/tests/cover/test_deadline.py @@ -66,7 +66,7 @@ def test_flaky_slow(i): def test_deadlines_participate_in_shrinking(): - @settings(deadline=500, max_examples=1000) + @settings(deadline=500, max_examples=1000, database=None) @given(st.integers(min_value=0)) def slow_if_large(i): if i >= 1000: From a4c8648708034e56e23631a98878c075cee82735 Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Thu, 26 Dec 2024 20:36:39 -0500 Subject: [PATCH 3/7] speed up test_shrinks_downwards_to_integers --- .../tests/quality/test_float_shrinking.py | 22 +++++-------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/hypothesis-python/tests/quality/test_float_shrinking.py b/hypothesis-python/tests/quality/test_float_shrinking.py index 7beccc2f65..415746f9ff 100644 --- a/hypothesis-python/tests/quality/test_float_shrinking.py +++ b/hypothesis-python/tests/quality/test_float_shrinking.py @@ -10,14 +10,7 @@ import pytest -from hypothesis import ( - HealthCheck, - Verbosity, - example, - given, - settings, - strategies as st, -) +from hypothesis import example, given, strategies as st from hypothesis.internal.compat import ceil from tests.common.debug import minimal @@ -39,21 +32,16 @@ def test_can_shrink_in_variable_sized_context(n): @example(1.7976931348623157e308) @example(1.5) @given(st.floats(min_value=0, allow_infinity=False, allow_nan=False)) -@settings(deadline=None, suppress_health_check=list(HealthCheck)) def test_shrinks_downwards_to_integers(f): - g = minimal( - st.floats().filter(lambda x: x >= f), - settings=settings(verbosity=Verbosity.quiet, max_examples=10**6), - ) - assert g == ceil(f) + assert minimal(st.floats(min_value=f)) == ceil(f) @example(1) @given(st.integers(1, 2**16 - 1)) -@settings(deadline=None, suppress_health_check=list(HealthCheck), max_examples=10) def test_shrinks_downwards_to_integers_when_fractional(b): g = minimal( - st.floats().filter(lambda x: b < x < 2**53 and int(x) != x), - settings=settings(verbosity=Verbosity.quiet, max_examples=10**6), + st.floats( + min_value=b, max_value=2**53, exclude_min=True, exclude_max=True + ).filter(lambda x: int(x) != x) ) assert g == b + 0.5 From fb74bd732cabcba19648d7ad62526d22ac8a56ce Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Thu, 26 Dec 2024 20:48:17 -0500 Subject: [PATCH 4/7] this test can go back to normal now! --- .../tests/nocover/test_duplication.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hypothesis-python/tests/nocover/test_duplication.py b/hypothesis-python/tests/nocover/test_duplication.py index ff89a9978f..ce3557fca8 100644 --- a/hypothesis-python/tests/nocover/test_duplication.py +++ b/hypothesis-python/tests/nocover/test_duplication.py @@ -52,11 +52,11 @@ def test(b): test() except ValueError: pass - # There are three circumstances in which a duplicate is allowed: We replay - # the failing test once to check for flakiness, once when shrinking to normalize - # to the minimal buffer, and then we replay the fully minimized failing test - # at the end to display the error. The complication comes from the fact that - # these may or may not be the same test case, so we can see either two test - # cases each run twice or one test case which has been run three times. - assert set(counts.values()) in ({1, 2, 3}, {1, 4}) + # There are two circumstances in which a duplicate is allowed: We replay + # the failing test once to check for flakiness, and then we replay the + # fully minimized failing test at the end to display the error. The + # complication comes from the fact that these may or may not be the same + # test case, so we can see either two test cases each run twice or one + # test case which has been run three times. + assert set(counts.values()) in ({1, 2}, {1, 3}) assert len([k for k, v in counts.items() if v > 1]) <= 2 From 4181f0c1be6b1c3f8eb929fe77c4c98a34e7253c Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Fri, 27 Dec 2024 01:15:17 -0500 Subject: [PATCH 5/7] this fails test_minimizes_errors_in_teardown due to forced being trivial --- .../src/hypothesis/internal/conjecture/shrinker.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py index c83096a66c..835d4e9daf 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py @@ -481,13 +481,6 @@ def shrink(self): This method is "mostly idempotent" - calling it twice is unlikely to have any effect, though it has a non-zero probability of doing so. """ - # We assume that if an all-trivial example is interesting then - # we're not going to do better than that. This might not technically be true: - # e.g. in tuples(booleans(), booleans()) | booleans() the simplest example - # is [1, False] but the all-trivial example is [0, False, False]. - if all(node.trivial for node in self.nodes): - self.explain() - return try: self.greedy_shrink() From 681df66bcdc2ef5933ac54cda2c0fdbef39658c9 Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Fri, 27 Dec 2024 01:26:05 -0500 Subject: [PATCH 6/7] typing --- .../src/hypothesis/internal/conjecture/shrinker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py index 835d4e9daf..952598267e 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py @@ -80,7 +80,7 @@ def sort_key(buffer: SortKeyT) -> tuple[int, SortKeyT]: return (len(buffer), buffer) -def sort_key_ir(nodes: list[IRNode]) -> tuple[int, tuple[int]]: +def sort_key_ir(nodes: Sequence[IRNode]) -> tuple[int, tuple[int]]: return ( len(nodes), tuple(choice_to_index(node.value, node.kwargs) for node in nodes), From b089be7402e0ca9226724f80d2e4c7c20eb7b3d0 Mon Sep 17 00:00:00 2001 From: Liam DeVoe Date: Fri, 27 Dec 2024 11:51:34 -0500 Subject: [PATCH 7/7] reword release, typing --- hypothesis-python/RELEASE.rst | 2 +- .../src/hypothesis/internal/conjecture/shrinker.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst index 2e99882c60..b70f64656f 100644 --- a/hypothesis-python/RELEASE.rst +++ b/hypothesis-python/RELEASE.rst @@ -1,3 +1,3 @@ RELEASE_TYPE: patch -The shrinker now uses the `typed choice sequence` (:issue:`3921`) to determine counterexample complexity. We expect this to mostly match the previous ordering, but it may result in reporting different counterexamples in some cases. +The shrinker now uses the typed choice sequence (:issue:`3921`) when ordering failing examples. As a result, Hypothesis may now report a different minimal failing example for some tests. We expect most cases to remain unchanged. diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py index 952598267e..0f462f7951 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py @@ -80,7 +80,7 @@ def sort_key(buffer: SortKeyT) -> tuple[int, SortKeyT]: return (len(buffer), buffer) -def sort_key_ir(nodes: Sequence[IRNode]) -> tuple[int, tuple[int]]: +def sort_key_ir(nodes: Sequence[IRNode]) -> tuple[int, tuple[int, ...]]: return ( len(nodes), tuple(choice_to_index(node.value, node.kwargs) for node in nodes),