diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..b289060100 --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,3 @@ +RELEASE_TYPE: minor + +This release improves Hypothesis' handling of ExceptionGroup - it's now able to detect marker detections if they're inside a group and attempts to resolve them. Note that this handling is still a work in progress and might not handle edge cases optimally. Please open issues if you encounter any problems or unexpected behavior with it. diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py index 188fb6d1a4..4e02174032 100644 --- a/hypothesis-python/src/hypothesis/core.py +++ b/hypothesis-python/src/hypothesis/core.py @@ -9,7 +9,6 @@ # obtain one at https://mozilla.org/MPL/2.0/. """This module provides the core primitives of Hypothesis, such as given.""" - import base64 import contextlib import datetime @@ -24,7 +23,7 @@ import warnings import zlib from collections import defaultdict -from collections.abc import Coroutine, Hashable +from collections.abc import Coroutine, Generator, Hashable from functools import partial from random import Random from typing import ( @@ -58,6 +57,7 @@ FlakyFailure, FlakyReplay, Found, + Frozen, HypothesisException, HypothesisWarning, InvalidArgument, @@ -735,6 +735,69 @@ def execute(data, function): return default_executor +@contextlib.contextmanager +def unwrap_markers_from_group() -> Generator[None, None, None]: + # This function is a crude solution, a better way of resolving it would probably + # be to rewrite a bunch of exception handlers to use except*. + T = TypeVar("T", bound=BaseException) + + def _flatten_group(excgroup: BaseExceptionGroup[T]) -> list[T]: + found_exceptions: list[T] = [] + for exc in excgroup.exceptions: + if isinstance(exc, BaseExceptionGroup): + found_exceptions.extend(_flatten_group(exc)) + else: + found_exceptions.append(exc) + return found_exceptions + + try: + yield + except BaseExceptionGroup as excgroup: + frozen_exceptions, non_frozen_exceptions = excgroup.split(Frozen) + + # group only contains Frozen, reraise the group + # it doesn't matter what we raise, since any exceptions get disregarded + # and reraised as StopTest if data got frozen. + if non_frozen_exceptions is None: + raise + # in all other cases they are discarded + + # Can RewindRecursive end up in this group? + _, user_exceptions = non_frozen_exceptions.split( + lambda e: isinstance(e, (StopTest, HypothesisException)) + ) + + # this might contain marker exceptions, or internal errors, but not frozen. + if user_exceptions is not None: + raise + + # single marker exception - reraise it + flattened_non_frozen_exceptions: list[BaseException] = _flatten_group( + non_frozen_exceptions + ) + if len(flattened_non_frozen_exceptions) == 1: + e = flattened_non_frozen_exceptions[0] + # preserve the cause of the original exception to not hinder debugging + # note that __context__ is still lost though + raise e from e.__cause__ + + # multiple marker exceptions. If we re-raise the whole group we break + # a bunch of logic so ....? + stoptests, non_stoptests = non_frozen_exceptions.split(StopTest) + + # TODO: stoptest+hypothesisexception ...? Is it possible? If so, what do? + + if non_stoptests: + # TODO: multiple marker exceptions is easy to produce, but the logic in the + # engine does not handle it... so we just reraise the first one for now. + e = _flatten_group(non_stoptests)[0] + raise e from e.__cause__ + assert stoptests is not None + + # multiple stoptests: raising the one with the lowest testcounter + raise min(_flatten_group(stoptests), key=lambda s_e: s_e.testcounter) + + class StateForActualGivenExecution: def __init__(self, stuff, test, settings, random, wrapped_test): self.test_runner = get_executor(stuff.selfy) @@ -808,7 +871,7 @@ def execute_once( @proxies(self.test) def test(*args, **kwargs): - with ensure_free_stackframes(): + with unwrap_markers_from_group(), ensure_free_stackframes(): return self.test(*args, **kwargs) else: @@ -820,7 +883,7 @@ def test(*args, **kwargs): arg_gctime = gc_cumulative_time() start = time.perf_counter() try: - with ensure_free_stackframes(): + with unwrap_markers_from_group(), ensure_free_stackframes(): result = self.test(*args, **kwargs) finally: finish = time.perf_counter() @@ -1221,6 +1284,7 @@ def run_engine(self): ran_example.slice_comments = falsifying_example.slice_comments tb = None origin = None + assert info is not None assert info._expected_exception is not None try: with with_reporter(fragments.append): diff --git a/hypothesis-python/tests/cover/test_exceptiongroup.py b/hypothesis-python/tests/cover/test_exceptiongroup.py new file mode 100644 index 0000000000..195b259413 --- /dev/null +++ b/hypothesis-python/tests/cover/test_exceptiongroup.py @@ -0,0 +1,157 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import pytest + +from hypothesis import errors, given, strategies as st +from hypothesis.internal.compat import BaseExceptionGroup, ExceptionGroup +from hypothesis.strategies import DataObject + + +def test_discard_frozen() -> None: + @given(st.data()) + def discard_frozen(data: DataObject) -> None: + # Accessing .conjecture_data is internal API. Other possible ways of freezing + # data might go through ConjectureRunner.cached_test_function_ir or + # ConjectureRunner.test_function + data.conjecture_data.freeze() + # Raising Frozen doesn't actually do anything, what matters is + # whether the data is frozen. + raise ExceptionGroup("", [errors.Frozen()]) + + discard_frozen() + + +def test_discard_multiple_frozen() -> None: + @given(st.data()) + def discard_multiple_frozen(data: DataObject) -> None: + data.conjecture_data.freeze() + raise ExceptionGroup("", [errors.Frozen(), errors.Frozen()]) + + discard_multiple_frozen() + + +def test_user_error_and_frozen() -> None: + @given(st.data()) + def user_error_and_frozen(data: DataObject) -> None: + raise ExceptionGroup("", [errors.Frozen(), TypeError()]) + + with pytest.raises(ExceptionGroup) as excinfo: + user_error_and_frozen() + e = excinfo.value + assert isinstance(e, ExceptionGroup) + assert len(e.exceptions) == 2 + assert isinstance(e.exceptions[0], errors.Frozen) + assert isinstance(e.exceptions[1], TypeError) + + +def test_user_error_and_stoptest() -> None: + # if the code base had "proper" handling of exceptiongroups, the StopTest would + # probably be handled by an except*. + # TODO: which I suppose is an argument in favor of stripping it?? + @given(st.data()) + def user_error_and_stoptest(data: DataObject) -> None: + raise BaseExceptionGroup( + "", [errors.StopTest(data.conjecture_data.testcounter), TypeError()] + ) + + with pytest.raises(BaseExceptionGroup) as excinfo: + user_error_and_stoptest() + e = excinfo.value + assert isinstance(e, BaseExceptionGroup) + assert len(e.exceptions) == 2 + assert isinstance(e.exceptions[0], errors.StopTest) + assert isinstance(e.exceptions[1], TypeError) + + +def test_lone_user_error() -> None: + # we don't want to unwrap exceptiongroups, since they might contain + # useful debugging info + @given(st.data()) + def lone_user_error(data: DataObject) -> None: + raise ExceptionGroup("foo", [TypeError()]) + + with pytest.raises(ExceptionGroup) as excinfo: + lone_user_error() + e = excinfo.value + assert isinstance(e, ExceptionGroup) + assert len(e.exceptions) == 1 + assert isinstance(e.exceptions[0], TypeError) + + +def test_nested_stoptest() -> None: + @given(st.data()) + def nested_stoptest(data: DataObject) -> None: + raise BaseExceptionGroup( + "", + [ + BaseExceptionGroup( + "", [errors.StopTest(data.conjecture_data.testcounter)] + ) + ], + ) + + nested_stoptest() + + +def test_frozen_and_stoptest() -> None: + # frozen+stoptest => strip frozen and let engine handle StopTest + # actually.. I don't think I've got a live repo for this either. + @given(st.data()) + def frozen_and_stoptest(data: DataObject) -> None: + raise BaseExceptionGroup( + "", [errors.StopTest(data.conjecture_data.testcounter), errors.Frozen()] + ) + + frozen_and_stoptest() + + +def test_multiple_stoptest_1() -> None: + # multiple stoptest, reraise the one with lowest testcounter + @given(st.data()) + def multiple_stoptest(data: DataObject) -> None: + c = data.conjecture_data.testcounter + raise BaseExceptionGroup("", [errors.StopTest(c), errors.StopTest(c + 1)]) + + multiple_stoptest() + + +def test_multiple_stoptest_2() -> None: + # the lower value is raised, which does not match data.conjecture_data.testcounter + # so it is not handled by the engine + @given(st.data()) + def multiple_stoptest_2(data: DataObject) -> None: + c = data.conjecture_data.testcounter + raise BaseExceptionGroup("", [errors.StopTest(c), errors.StopTest(c - 1)]) + + with pytest.raises(errors.StopTest): + multiple_stoptest_2() + + +def test_stoptest_and_hypothesisexception() -> None: + # current code raises the first hypothesisexception and throws away stoptest + @given(st.data()) + def stoptest_and_hypothesisexception(data: DataObject) -> None: + c = data.conjecture_data.testcounter + raise BaseExceptionGroup("", [errors.StopTest(c), errors.Flaky()]) + + with pytest.raises(errors.Flaky): + stoptest_and_hypothesisexception() + + +def test_multiple_hypothesisexception() -> None: + # this can happen in several ways, see nocover/test_exceptiongroup.py + @given(st.data()) + def stoptest_and_hypothesisexception(data: DataObject) -> None: + c = data.conjecture_data.testcounter + raise BaseExceptionGroup("", [errors.StopTest(c), errors.Flaky()]) + + with pytest.raises(errors.Flaky): + stoptest_and_hypothesisexception() diff --git a/hypothesis-python/tests/nocover/test_exceptiongroup.py b/hypothesis-python/tests/nocover/test_exceptiongroup.py new file mode 100644 index 0000000000..7d963771be --- /dev/null +++ b/hypothesis-python/tests/nocover/test_exceptiongroup.py @@ -0,0 +1,171 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import asyncio +import sys +from typing import Callable + +import pytest + +from hypothesis import errors, given, reject, strategies as st +from hypothesis.internal.compat import ExceptionGroup +from hypothesis.strategies import DataObject + +# this file is not typechecked by mypy, which only runs py310 + +if sys.version_info < (3, 11): + pytest.skip("asyncio.TaskGroup not available on None: + @given(st.data()) + def test_function(data: DataObject) -> None: + async def task(pred: Callable[[bool], bool]) -> None: + data.draw(st.booleans().filter(pred)) + + async def _main() -> None: + async with asyncio.TaskGroup(): + async with asyncio.TaskGroup() as tg2: + tg2.create_task(task(bool)) + tg2.create_task(task(lambda _: False)) + + asyncio.run(_main()) + + with pytest.raises(errors.Unsatisfiable): + test_function() + + +def test_exceptiongroup_user_originated() -> None: + @given(st.data()) + def test_function(data): + raise ExceptionGroup("foo", [ValueError(), ValueError()]) + + with pytest.raises(ExceptionGroup) as exc_info: + test_function() + e = exc_info.value + assert e.message == "foo" + assert isinstance(e, ExceptionGroup) + assert len(e.exceptions) == 2 + assert all(isinstance(child_e, ValueError) for child_e in e.exceptions) + + @given(st.data()) + def test_single_exc_group(data): + raise ExceptionGroup("important message for user", [ValueError()]) + + with pytest.raises(ExceptionGroup) as exc_info: + test_single_exc_group() + e = exc_info.value + assert e.message == "important message for user" + assert isinstance(e, ExceptionGroup) + assert len(e.exceptions) == 1 + assert isinstance(e.exceptions[0], ValueError) + + +def test_exceptiongroup_multiple_stop() -> None: + @given(st.data()) + def test_function(data): + async def task(d: DataObject) -> None: + d.conjecture_data.mark_invalid() + + async def _main(d: DataObject) -> None: + async with asyncio.TaskGroup() as tg: + tg.create_task(task(d)) + tg.create_task(task(d)) + + asyncio.run(_main(data)) + + # multiple stoptests -> single stoptest -> unsatisfiable + with pytest.raises(errors.Unsatisfiable): + test_function() + + +def test_exceptiongroup_stop_and_hypothesisexception() -> None: + # group with stoptest+invalidargument -> invalidargument + @given(st.data()) + def test_function(data): + async def task_stoptest(d: DataObject) -> None: + # only mark some runs as invalid to not raise Unsatisfiable + if d.draw(st.integers(min_value=0, max_value=1)) == 1: + d.conjecture_data.mark_invalid() + + async def task_invalid_argument(d: DataObject) -> None: + d.draw(st.integers(max_value=2, min_value=3)) + + async def _main(d: DataObject) -> None: + async with asyncio.TaskGroup() as tg: + tg.create_task(task_stoptest(d)) + tg.create_task(task_invalid_argument(d)) + + asyncio.run(_main(data)) + + with pytest.raises(errors.InvalidArgument): + test_function() + + +def test_exceptiongroup_multiple_hypothesisexception() -> None: + # multiple UnsatisfiedAssumption => first one is reraised => engine suppresses it + + @given(st.integers(min_value=0, max_value=3)) + def test_function(val: int) -> None: + async def task(value: int) -> None: + if value == 0: + reject() + + async def _main(value: int) -> None: + async with asyncio.TaskGroup() as tg: + tg.create_task(task(value)) + tg.create_task(task(value)) + + asyncio.run(_main(val)) + + test_function() + + +def test_exceptiongroup_multiple_InvalidArgument() -> None: + # multiple InvalidArgument => only first one is reraised... which seems bad. + # But raising a group might break ghostwriter(?) + + @given(st.data()) + def test_function(data: DataObject) -> None: + async def task1(d: DataObject) -> None: + d.draw(st.integers(max_value=1, min_value=3)) + + async def task2(d: DataObject) -> None: + d.draw(st.integers(max_value=2, min_value=3)) + + async def _main(d: DataObject) -> None: + async with asyncio.TaskGroup() as tg: + tg.create_task(task1(d)) + tg.create_task(task2(d)) + + asyncio.run(_main(data)) + + with pytest.raises(errors.InvalidArgument): + test_function()