Merge pull request #3894 from Zac-HD/efficient-stateful

Fix a swarm-testing footgun
HypothesisWorks · Feb 24, 2024 · 202d6af · 202d6af
2 parents 405b7a9 + 77f596f
commit 202d6af
Show file tree

Hide file tree

Showing 8 changed files with 105 additions and 61 deletions.
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,4 @@
+RELEASE_TYPE: patch
+
+This patch makes stateful testing somewhat less likely to get stuck
+when there are only a few possible rules.
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -62,11 +62,14 @@
 from hypothesis.internal.intervalsets import IntervalSet
 
 if TYPE_CHECKING:
+    from typing import TypeAlias
+
     from typing_extensions import dataclass_transform
 
     from hypothesis.strategies import SearchStrategy
     from hypothesis.strategies._internal.strategies import Ex
 else:
+    TypeAlias = object
 
     def dataclass_transform():
         def wrapper(tp):
@@ -94,6 +97,41 @@ def wrapper(tp):
 T = TypeVar("T")
 
 
+class IntegerKWargs(TypedDict):
+    min_value: Optional[int]
+    max_value: Optional[int]
+    weights: Optional[Sequence[float]]
+    shrink_towards: int
+
+
+class FloatKWargs(TypedDict):
+    min_value: float
+    max_value: float
+    allow_nan: bool
+    smallest_nonzero_magnitude: float
+
+
+class StringKWargs(TypedDict):
+    intervals: IntervalSet
+    min_size: int
+    max_size: Optional[int]
+
+
+class BytesKWargs(TypedDict):
+    size: int
+
+
+class BooleanKWargs(TypedDict):
+    p: float
+
+
+IRType: TypeAlias = Union[int, str, bool, float, bytes]
+IRKWargsType: TypeAlias = Union[
+    IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
+]
+IRTypeName: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"]
+
+
 class ExtraInformation:
     """A class for holding shared state on a ``ConjectureData`` that should
     be added to the final ``ConjectureResult``."""
@@ -798,34 +836,6 @@ def as_result(self) -> "_Overrun":
 MAX_DEPTH = 100
 
 
-class IntegerKWargs(TypedDict):
-    min_value: Optional[int]
-    max_value: Optional[int]
-    weights: Optional[Sequence[float]]
-    shrink_towards: int
-
-
-class FloatKWargs(TypedDict):
-    min_value: float
-    max_value: float
-    allow_nan: bool
-    smallest_nonzero_magnitude: float
-
-
-class StringKWargs(TypedDict):
-    intervals: IntervalSet
-    min_size: int
-    max_size: Optional[int]
-
-
-class BytesKWargs(TypedDict):
-    size: int
-
-
-class BooleanKWargs(TypedDict):
-    p: float
-
-
 class DataObserver:
     """Observer class for recording the behaviour of a
     ConjectureData object, primarily used for tracking

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py b/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
@@ -10,7 +10,7 @@
 
 import itertools
 import math
-from typing import TYPE_CHECKING, List, Literal, Optional, Union
+from typing import List, Optional, Union
 
 import attr
 
@@ -24,23 +24,14 @@
     DataObserver,
     FloatKWargs,
     IntegerKWargs,
+    IRKWargsType,
+    IRType,
+    IRTypeName,
     Status,
     StringKWargs,
 )
 from hypothesis.internal.floats import count_between_floats, float_to_int, int_to_float
 
-if TYPE_CHECKING:
-    from typing import TypeAlias
-else:
-    TypeAlias = object
-
-IRType: TypeAlias = Union[int, str, bool, float, bytes]
-IRKWargsType: TypeAlias = Union[
-    IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
-]
-# this would be "IRTypeType", but that's just confusing.
-IRLiteralType: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"]
-
 
 class PreviouslyUnseenBehaviour(HypothesisException):
     pass
@@ -336,7 +327,7 @@ class TreeNode:
     # have the same length. The values at index i belong to node i.
     kwargs: List[IRKWargsType] = attr.ib(factory=list)
     values: List[IRType] = attr.ib(factory=list)
-    ir_types: List[IRLiteralType] = attr.ib(factory=list)
+    ir_types: List[IRTypeName] = attr.ib(factory=list)
 
     # The indices of nodes which had forced values.
     #
@@ -885,7 +876,7 @@ def draw_boolean(
 
     def draw_value(
         self,
-        ir_type: IRLiteralType,
+        ir_type: IRTypeName,
         value: IRType,
         *,
         was_forced: bool,

diff --git a/hypothesis-python/src/hypothesis/stateful.py b/hypothesis-python/src/hypothesis/stateful.py
@@ -358,7 +358,6 @@ def invariants(cls):
         return cls._invariants_per_class[cls]
 
     def _repr_step(self, rule, data, result):
-        self.step_count = getattr(self, "step_count", 0) + 1
         output_assignment = ""
         if rule.targets:
             if isinstance(result, MultipleResults):
@@ -431,7 +430,7 @@ def runTest(self):
         return StateMachineTestCase
 
 
-@attr.s()
+@attr.s(repr=False)
 class Rule:
     targets = attr.ib()
     function = attr.ib(repr=get_pretty_function_description)
@@ -451,6 +450,11 @@ def __attrs_post_init__(self):
             self.arguments_strategies[k] = v
         self.bundles = tuple(bundles)
 
+    def __repr__(self) -> str:
+        rep = get_pretty_function_description
+        bits = [f"{k}={rep(v)}" for k, v in attr.asdict(self).items() if v]
+        return f"{self.__class__.__name__}({', '.join(bits)})"
+
 
 self_strategy = st.runner()
 
@@ -937,7 +941,8 @@ def __init__(self, machine):
         self.rules = list(machine.rules())
 
         self.enabled_rules_strategy = st.shared(
-            FeatureStrategy(), key=("enabled rules", machine)
+            FeatureStrategy(at_least_one_of={r.function.__name__ for r in self.rules}),
+            key=("enabled rules", machine),
         )
 
         # The order is a bit arbitrary. Primarily we're trying to group rules
@@ -965,17 +970,16 @@ def do_draw(self, data):
 
         feature_flags = data.draw(self.enabled_rules_strategy)
 
-        # Note: The order of the filters here is actually quite important,
-        # because checking is_enabled makes choices, so increases the size of
-        # the choice sequence. This means that if we are in a case where many
-        # rules are invalid we will make a lot more choices if we ask if they
-        # are enabled before we ask if they are valid, so our test cases will
-        # be artificially large.
-        rule = data.draw(
-            st.sampled_from(self.rules)
-            .filter(self.is_valid)
-            .filter(lambda r: feature_flags.is_enabled(r.function.__name__))
-        )
+        def rule_is_enabled(r):
+            # Note: The order of the filters here is actually quite important,
+            # because checking is_enabled makes choices, so increases the size of
+            # the choice sequence. This means that if we are in a case where many
+            # rules are invalid we would make a lot more choices if we ask if they
+            # are enabled before we ask if they are valid, so our test cases would
+            # be artificially large.
+            return self.is_valid(r) and feature_flags.is_enabled(r.function.__name__)
+
+        rule = data.draw(st.sampled_from(self.rules).filter(rule_is_enabled))
 
         arguments = {}
         for k, strat in rule.arguments_strategies.items():

diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/collections.py b/hypothesis-python/src/hypothesis/strategies/_internal/collections.py
@@ -13,6 +13,7 @@
 
 from hypothesis.errors import InvalidArgument
 from hypothesis.internal.conjecture import utils as cu
+from hypothesis.internal.conjecture.engine import BUFFER_SIZE
 from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy
 from hypothesis.internal.conjecture.utils import combine_labels
 from hypothesis.internal.filtering import get_integer_predicate_bounds
@@ -142,6 +143,10 @@ def __init__(self, elements, min_size=0, max_size=float("inf")):
         self.min_size = min_size or 0
         self.max_size = max_size if max_size is not None else float("inf")
         assert 0 <= self.min_size <= self.max_size
+        if min_size > BUFFER_SIZE:
+            raise InvalidArgument(
+                f"min_size={min_size:_d} is larger than Hypothesis is designed to handle"
+            )
         self.average_size = min(
             max(self.min_size * 2, self.min_size + 5),
             0.5 * (self.min_size + self.max_size),

diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/featureflags.py b/hypothesis-python/src/hypothesis/strategies/_internal/featureflags.py
@@ -31,7 +31,7 @@ class FeatureFlags:
     required disabled features.
     """
 
-    def __init__(self, data=None, enabled=(), disabled=()):
+    def __init__(self, data=None, enabled=(), disabled=(), at_least_one_of=()):
         self.__data = data
         self.__is_disabled = {}
 
@@ -52,13 +52,18 @@ def __init__(self, data=None, enabled=(), disabled=()):
         # features will be enabled. This is so that we shrink in the direction
         # of more features being enabled.
         if self.__data is not None:
-            self.__p_disabled = data.draw_integer(0, 255) / 255.0
+            self.__p_disabled = data.draw_integer(0, 254) / 255
         else:
             # If data is None we're in example mode so all that matters is the
             # enabled/disabled lists above. We set this up so that everything
             # else is enabled by default.
             self.__p_disabled = 0.0
 
+        # The naive approach can lead to disabling e.g. every single rule on a
+        # RuleBasedStateMachine, which aborts the test as unable to make progress.
+        # Track the set of possible names, and ensure that at least one is enabled.
+        self.__at_least_one_of = set(at_least_one_of)
+
     def is_enabled(self, name):
         """Tests whether the feature named ``name`` should be enabled on this
         test run."""
@@ -81,10 +86,19 @@ def is_enabled(self, name):
         # of the test case where we originally decided, the next point at
         # which we make this decision just makes the decision it previously
         # made.
+        oneof = self.__at_least_one_of
         is_disabled = self.__data.draw_boolean(
-            self.__p_disabled, forced=self.__is_disabled.get(name)
+            self.__p_disabled,
+            forced=(
+                False
+                if len(oneof) == 1 and name in oneof
+                else self.__is_disabled.get(name)
+            ),
         )
         self.__is_disabled[name] = is_disabled
+        if name in oneof and not is_disabled:
+            oneof.clear()
+        oneof.discard(name)
         data.stop_example()
         return not is_disabled
 
@@ -100,5 +114,9 @@ def __repr__(self):
 
 
 class FeatureStrategy(SearchStrategy):
+    def __init__(self, at_least_one_of=()):
+        super().__init__()
+        self._at_least_one_of = frozenset(at_least_one_of)
+
     def do_draw(self, data):
-        return FeatureFlags(data)
+        return FeatureFlags(data, at_least_one_of=self._at_least_one_of)
diff --git a/hypothesis-python/tests/cover/test_direct_strategies.py b/hypothesis-python/tests/cover/test_direct_strategies.py
@@ -119,6 +119,12 @@ def fn_ktest(*fnkwargs):
     (ds.lists, {"elements": ds.integers(), "unique_by": ()}),
     (ds.lists, {"elements": ds.integers(), "unique_by": (1,)}),
     (ds.lists, {"elements": ds.sampled_from([0, 1]), "min_size": 3, "unique": True}),
+    (ds.lists, {"elements": ds.none(), "min_size": 100_000}),
+    (ds.lists, {"elements": ds.none(), "min_size": 100_000, "unique": True}),
+    (
+        ds.lists,
+        {"elements": ds.sampled_from([1, 2]), "min_size": 100_000, "unique": True},
+    ),
     (ds.text, {"min_size": 10, "max_size": 9}),
     (ds.text, {"alphabet": [1]}),
     (ds.text, {"alphabet": ["abc"]}),
@@ -128,6 +134,7 @@ def fn_ktest(*fnkwargs):
     (ds.text, {"alphabet": ds.sampled_from([123, 456])}),
     (ds.text, {"alphabet": ds.builds(lambda: "abc")}),
     (ds.text, {"alphabet": ds.builds(lambda: 123)}),
+    (ds.text, {"alphabet": "abc", "min_size": 100_000}),
     (ds.from_regex, {"regex": 123}),
     (ds.from_regex, {"regex": b"abc", "alphabet": "abc"}),
     (ds.from_regex, {"regex": b"abc", "alphabet": b"def"}),

diff --git a/hypothesis-python/tests/cover/test_feature_flags.py b/hypothesis-python/tests/cover/test_feature_flags.py
@@ -78,3 +78,8 @@ def test_repr_can_be_evalled(data):
 
     for f in more_features:
         assert flags2.is_enabled(f)
+
+
+@given(FeatureStrategy(at_least_one_of={"a", "b", "c"}))
+def test_can_avoid_disabling_every_flag(flags):
+    assert any(flags.is_enabled(k) for k in {"a", "b", "c"})