Skip to content

Commit

Permalink
Merge pull request #3894 from Zac-HD/efficient-stateful
Browse files Browse the repository at this point in the history
Fix a swarm-testing footgun
  • Loading branch information
Zac-HD authored Feb 24, 2024
2 parents 405b7a9 + 77f596f commit 202d6af
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 61 deletions.
4 changes: 4 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
RELEASE_TYPE: patch

This patch makes stateful testing somewhat less likely to get stuck
when there are only a few possible rules.
66 changes: 38 additions & 28 deletions hypothesis-python/src/hypothesis/internal/conjecture/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,14 @@
from hypothesis.internal.intervalsets import IntervalSet

if TYPE_CHECKING:
from typing import TypeAlias

from typing_extensions import dataclass_transform

from hypothesis.strategies import SearchStrategy
from hypothesis.strategies._internal.strategies import Ex
else:
TypeAlias = object

def dataclass_transform():
def wrapper(tp):
Expand Down Expand Up @@ -94,6 +97,41 @@ def wrapper(tp):
T = TypeVar("T")


class IntegerKWargs(TypedDict):
min_value: Optional[int]
max_value: Optional[int]
weights: Optional[Sequence[float]]
shrink_towards: int


class FloatKWargs(TypedDict):
min_value: float
max_value: float
allow_nan: bool
smallest_nonzero_magnitude: float


class StringKWargs(TypedDict):
intervals: IntervalSet
min_size: int
max_size: Optional[int]


class BytesKWargs(TypedDict):
size: int


class BooleanKWargs(TypedDict):
p: float


IRType: TypeAlias = Union[int, str, bool, float, bytes]
IRKWargsType: TypeAlias = Union[
IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
]
IRTypeName: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"]


class ExtraInformation:
"""A class for holding shared state on a ``ConjectureData`` that should
be added to the final ``ConjectureResult``."""
Expand Down Expand Up @@ -798,34 +836,6 @@ def as_result(self) -> "_Overrun":
MAX_DEPTH = 100


class IntegerKWargs(TypedDict):
min_value: Optional[int]
max_value: Optional[int]
weights: Optional[Sequence[float]]
shrink_towards: int


class FloatKWargs(TypedDict):
min_value: float
max_value: float
allow_nan: bool
smallest_nonzero_magnitude: float


class StringKWargs(TypedDict):
intervals: IntervalSet
min_size: int
max_size: Optional[int]


class BytesKWargs(TypedDict):
size: int


class BooleanKWargs(TypedDict):
p: float


class DataObserver:
"""Observer class for recording the behaviour of a
ConjectureData object, primarily used for tracking
Expand Down
21 changes: 6 additions & 15 deletions hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import itertools
import math
from typing import TYPE_CHECKING, List, Literal, Optional, Union
from typing import List, Optional, Union

import attr

Expand All @@ -24,23 +24,14 @@
DataObserver,
FloatKWargs,
IntegerKWargs,
IRKWargsType,
IRType,
IRTypeName,
Status,
StringKWargs,
)
from hypothesis.internal.floats import count_between_floats, float_to_int, int_to_float

if TYPE_CHECKING:
from typing import TypeAlias
else:
TypeAlias = object

IRType: TypeAlias = Union[int, str, bool, float, bytes]
IRKWargsType: TypeAlias = Union[
IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
]
# this would be "IRTypeType", but that's just confusing.
IRLiteralType: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"]


class PreviouslyUnseenBehaviour(HypothesisException):
pass
Expand Down Expand Up @@ -336,7 +327,7 @@ class TreeNode:
# have the same length. The values at index i belong to node i.
kwargs: List[IRKWargsType] = attr.ib(factory=list)
values: List[IRType] = attr.ib(factory=list)
ir_types: List[IRLiteralType] = attr.ib(factory=list)
ir_types: List[IRTypeName] = attr.ib(factory=list)

# The indices of nodes which had forced values.
#
Expand Down Expand Up @@ -885,7 +876,7 @@ def draw_boolean(

def draw_value(
self,
ir_type: IRLiteralType,
ir_type: IRTypeName,
value: IRType,
*,
was_forced: bool,
Expand Down
32 changes: 18 additions & 14 deletions hypothesis-python/src/hypothesis/stateful.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,6 @@ def invariants(cls):
return cls._invariants_per_class[cls]

def _repr_step(self, rule, data, result):
self.step_count = getattr(self, "step_count", 0) + 1
output_assignment = ""
if rule.targets:
if isinstance(result, MultipleResults):
Expand Down Expand Up @@ -431,7 +430,7 @@ def runTest(self):
return StateMachineTestCase


@attr.s()
@attr.s(repr=False)
class Rule:
targets = attr.ib()
function = attr.ib(repr=get_pretty_function_description)
Expand All @@ -451,6 +450,11 @@ def __attrs_post_init__(self):
self.arguments_strategies[k] = v
self.bundles = tuple(bundles)

def __repr__(self) -> str:
rep = get_pretty_function_description
bits = [f"{k}={rep(v)}" for k, v in attr.asdict(self).items() if v]
return f"{self.__class__.__name__}({', '.join(bits)})"


self_strategy = st.runner()

Expand Down Expand Up @@ -937,7 +941,8 @@ def __init__(self, machine):
self.rules = list(machine.rules())

self.enabled_rules_strategy = st.shared(
FeatureStrategy(), key=("enabled rules", machine)
FeatureStrategy(at_least_one_of={r.function.__name__ for r in self.rules}),
key=("enabled rules", machine),
)

# The order is a bit arbitrary. Primarily we're trying to group rules
Expand Down Expand Up @@ -965,17 +970,16 @@ def do_draw(self, data):

feature_flags = data.draw(self.enabled_rules_strategy)

# Note: The order of the filters here is actually quite important,
# because checking is_enabled makes choices, so increases the size of
# the choice sequence. This means that if we are in a case where many
# rules are invalid we will make a lot more choices if we ask if they
# are enabled before we ask if they are valid, so our test cases will
# be artificially large.
rule = data.draw(
st.sampled_from(self.rules)
.filter(self.is_valid)
.filter(lambda r: feature_flags.is_enabled(r.function.__name__))
)
def rule_is_enabled(r):
# Note: The order of the filters here is actually quite important,
# because checking is_enabled makes choices, so increases the size of
# the choice sequence. This means that if we are in a case where many
# rules are invalid we would make a lot more choices if we ask if they
# are enabled before we ask if they are valid, so our test cases would
# be artificially large.
return self.is_valid(r) and feature_flags.is_enabled(r.function.__name__)

rule = data.draw(st.sampled_from(self.rules).filter(rule_is_enabled))

arguments = {}
for k, strat in rule.arguments_strategies.items():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from hypothesis.errors import InvalidArgument
from hypothesis.internal.conjecture import utils as cu
from hypothesis.internal.conjecture.engine import BUFFER_SIZE
from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy
from hypothesis.internal.conjecture.utils import combine_labels
from hypothesis.internal.filtering import get_integer_predicate_bounds
Expand Down Expand Up @@ -142,6 +143,10 @@ def __init__(self, elements, min_size=0, max_size=float("inf")):
self.min_size = min_size or 0
self.max_size = max_size if max_size is not None else float("inf")
assert 0 <= self.min_size <= self.max_size
if min_size > BUFFER_SIZE:
raise InvalidArgument(
f"min_size={min_size:_d} is larger than Hypothesis is designed to handle"
)
self.average_size = min(
max(self.min_size * 2, self.min_size + 5),
0.5 * (self.min_size + self.max_size),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class FeatureFlags:
required disabled features.
"""

def __init__(self, data=None, enabled=(), disabled=()):
def __init__(self, data=None, enabled=(), disabled=(), at_least_one_of=()):
self.__data = data
self.__is_disabled = {}

Expand All @@ -52,13 +52,18 @@ def __init__(self, data=None, enabled=(), disabled=()):
# features will be enabled. This is so that we shrink in the direction
# of more features being enabled.
if self.__data is not None:
self.__p_disabled = data.draw_integer(0, 255) / 255.0
self.__p_disabled = data.draw_integer(0, 254) / 255
else:
# If data is None we're in example mode so all that matters is the
# enabled/disabled lists above. We set this up so that everything
# else is enabled by default.
self.__p_disabled = 0.0

# The naive approach can lead to disabling e.g. every single rule on a
# RuleBasedStateMachine, which aborts the test as unable to make progress.
# Track the set of possible names, and ensure that at least one is enabled.
self.__at_least_one_of = set(at_least_one_of)

def is_enabled(self, name):
"""Tests whether the feature named ``name`` should be enabled on this
test run."""
Expand All @@ -81,10 +86,19 @@ def is_enabled(self, name):
# of the test case where we originally decided, the next point at
# which we make this decision just makes the decision it previously
# made.
oneof = self.__at_least_one_of
is_disabled = self.__data.draw_boolean(
self.__p_disabled, forced=self.__is_disabled.get(name)
self.__p_disabled,
forced=(
False
if len(oneof) == 1 and name in oneof
else self.__is_disabled.get(name)
),
)
self.__is_disabled[name] = is_disabled
if name in oneof and not is_disabled:
oneof.clear()
oneof.discard(name)
data.stop_example()
return not is_disabled

Expand All @@ -100,5 +114,9 @@ def __repr__(self):


class FeatureStrategy(SearchStrategy):
def __init__(self, at_least_one_of=()):
super().__init__()
self._at_least_one_of = frozenset(at_least_one_of)

def do_draw(self, data):
return FeatureFlags(data)
return FeatureFlags(data, at_least_one_of=self._at_least_one_of)
7 changes: 7 additions & 0 deletions hypothesis-python/tests/cover/test_direct_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,12 @@ def fn_ktest(*fnkwargs):
(ds.lists, {"elements": ds.integers(), "unique_by": ()}),
(ds.lists, {"elements": ds.integers(), "unique_by": (1,)}),
(ds.lists, {"elements": ds.sampled_from([0, 1]), "min_size": 3, "unique": True}),
(ds.lists, {"elements": ds.none(), "min_size": 100_000}),
(ds.lists, {"elements": ds.none(), "min_size": 100_000, "unique": True}),
(
ds.lists,
{"elements": ds.sampled_from([1, 2]), "min_size": 100_000, "unique": True},
),
(ds.text, {"min_size": 10, "max_size": 9}),
(ds.text, {"alphabet": [1]}),
(ds.text, {"alphabet": ["abc"]}),
Expand All @@ -128,6 +134,7 @@ def fn_ktest(*fnkwargs):
(ds.text, {"alphabet": ds.sampled_from([123, 456])}),
(ds.text, {"alphabet": ds.builds(lambda: "abc")}),
(ds.text, {"alphabet": ds.builds(lambda: 123)}),
(ds.text, {"alphabet": "abc", "min_size": 100_000}),
(ds.from_regex, {"regex": 123}),
(ds.from_regex, {"regex": b"abc", "alphabet": "abc"}),
(ds.from_regex, {"regex": b"abc", "alphabet": b"def"}),
Expand Down
5 changes: 5 additions & 0 deletions hypothesis-python/tests/cover/test_feature_flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,8 @@ def test_repr_can_be_evalled(data):

for f in more_features:
assert flags2.is_enabled(f)


@given(FeatureStrategy(at_least_one_of={"a", "b", "c"}))
def test_can_avoid_disabling_every_flag(flags):
assert any(flags.is_enabled(k) for k in {"a", "b", "c"})

0 comments on commit 202d6af

Please sign in to comment.