HypothesisWorks · jakkdl · Sep 19, 2024 · Sep 24, 2024 · Sep 25, 2024 · Sep 25, 2024
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
@@ -9,6 +9,7 @@
 # obtain one at https://mozilla.org/MPL/2.0/.
 
 """This module provides the core primitives of Hypothesis, such as given."""
+from __future__ import annotations
 
 import base64
 import contextlib
@@ -32,13 +33,9 @@
     BinaryIO,
     Callable,
     Coroutine,
+    Generator,
     Hashable,
-    List,
-    Optional,
-    Tuple,
-    Type,
     TypeVar,
-    Union,
     overload,
 )
 from unittest import TestCase
@@ -61,6 +58,7 @@
     FlakyFailure,
     FlakyReplay,
     Found,
+    Frozen,
     HypothesisException,
     HypothesisWarning,
     InvalidArgument,
@@ -148,7 +146,6 @@
 else:  # pragma: no cover
     EllipsisType = type(Ellipsis)
 
-
 TestFunc = TypeVar("TestFunc", bound=Callable)
 
 
@@ -178,7 +175,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         if not (args or kwargs):
             raise InvalidArgument("An example must provide at least one argument")
 
-        self.hypothesis_explicit_examples: List[Example] = []
+        self.hypothesis_explicit_examples: list[Example] = []
         self._this_example = Example(tuple(args), kwargs)
 
     def __call__(self, test: TestFunc) -> TestFunc:
@@ -192,10 +189,8 @@ def xfail(
         condition: bool = True,  # noqa: FBT002
         *,
         reason: str = "",
-        raises: Union[
-            Type[BaseException], Tuple[Type[BaseException], ...]
-        ] = BaseException,
-    ) -> "example":
+        raises: type[BaseException] | tuple[type[BaseException], ...] = BaseException,
+    ) -> example:
         """Mark this example as an expected failure, similarly to
         :obj:`pytest.mark.xfail(strict=True) <pytest.mark.xfail>`.
 
@@ -264,7 +259,7 @@ def test(x):
             )
         return self
 
-    def via(self, whence: str, /) -> "example":
+    def via(self, whence: str, /) -> example:
         """Attach a machine-readable label noting whence this example came.
 
         The idea is that tools will be able to add ``@example()`` cases for you, e.g.
@@ -768,6 +763,61 @@ def execute(data, function):
     return default_executor
 
 
+@contextlib.contextmanager
+def unwrap_exception_group() -> Generator[None]:
+    T = TypeVar("T", bound=BaseException)
+
+    def _flatten_group(excgroup: BaseExceptionGroup[T]) -> list[T]:
+        found_exceptions = []
+        for exc in excgroup.exceptions:
+            if isinstance(exc, BaseExceptionGroup):
+                found_exceptions.extend(_flatten_group(exc))
+            else:
+                found_exceptions.append(exc)
+        return found_exceptions
+
+    try:
+        yield
+    except BaseExceptionGroup as excgroup:
+        frozen_exceptions, non_frozen_exceptions = excgroup.split(Frozen)
+
+        # group only contains Frozen, reraise the group
+        # it doesn't matter what we raise, since any exceptions get disregarded
+        # and reraised as StopTest if data got frozen.
+        if non_frozen_exceptions is None:
+            raise
+        # in all other cases they are discarded
+
+        # Can RewindRecursive end up in this group?
+        _, user_exceptions = non_frozen_exceptions.split(
+            lambda e: isinstance(e, (StopTest, HypothesisException))
+        )
+
+        # this might contain marker exceptions, or internal errors, but not frozen.
+        if user_exceptions is not None:
+            raise
+
+        # single marker exception - reraise it
+        flattened_non_frozen_exceptions = _flatten_group(non_frozen_exceptions)
+        if len(flattened_non_frozen_exceptions) == 1:
+            raise flattened_non_frozen_exceptions[0] from None
+
+        # multiple marker exceptions. If we re-raise the whole group we break
+        # a bunch of logic so ....?
+        stoptests, non_stoptests = non_frozen_exceptions.split(StopTest)
+
+        # TODO: stoptest+hypothesisexception ...? Is it possible? If so, what do?
+
+        if non_stoptests:
+            # TODO: multiple marker exceptions is easy to produce, but the logic in the
+            # engine does not handle it... so we just reraise the first one for now.
+            raise _flatten_group(non_stoptests)[0] from None
+        assert stoptests is not None
+
+        # multiple stoptests: raising the one with the lowest testcounter
+        raise min(_flatten_group(stoptests), key=lambda s_e: s_e.testcounter)
+
+
 except failure_exceptions_to_catch() as e: 
     # If an unhandled (i.e., non-Hypothesis) error was raised by 
     # Hypothesis-internal code, re-raise it as a fatal error instead 
     # of treating it as a test failure. 
     filepath = traceback.extract_tb(e.__traceback__)[-1][0] 
     if is_hypothesis_file(filepath) and not isinstance(e, HypothesisException): 
         raise 
     if data.frozen: 
         # This can happen if an error occurred in a finally 
         # block somewhere, suppressing our original StopTest. 
         # We raise a new one here to resume normal operation. 
         raise StopTest(data.testcounter) from e 
 except failure_exceptions_to_catch() as e: 
     # If an unhandled (i.e., non-Hypothesis) error was raised by 
     # Hypothesis-internal code, re-raise it as a fatal error instead 
     # of treating it as a test failure. 
     filepath = traceback.extract_tb(e.__traceback__)[-1][0] 
     if is_hypothesis_file(filepath) and not isinstance(e, HypothesisException): 
         raise 
  
     if data.frozen: 
         # This can happen if an error occurred in a finally 
         # block somewhere, suppressing our original StopTest. 
         # We raise a new one here to resume normal operation. 
         raise StopTest(data.testcounter) from e 
 class StateForActualGivenExecution:
     def __init__(self, stuff, test, settings, random, wrapped_test):
         self.test_runner = get_executor(stuff.selfy)
@@ -841,7 +891,7 @@ def execute_once(
 
             @proxies(self.test)
             def test(*args, **kwargs):
-                with ensure_free_stackframes():
+                with unwrap_exception_group(), ensure_free_stackframes():
                     return self.test(*args, **kwargs)
 
         else:
@@ -853,7 +903,7 @@ def test(*args, **kwargs):
                 arg_gctime = gc_cumulative_time()
                 start = time.perf_counter()
                 try:
-                    with ensure_free_stackframes():
+                    with unwrap_exception_group(), ensure_free_stackframes():
                         result = self.test(*args, **kwargs)
                 finally:
                     finish = time.perf_counter()
@@ -1086,6 +1136,9 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None:
                 # This can happen if an error occurred in a finally
                 # block somewhere, suppressing our original StopTest.
                 # We raise a new one here to resume normal operation.
+                # TODO: this should maybe inspect the stacktrace to see that the above
+                # mentioned story is true? I.e. reraise as StopTest iff there is a
+                # StopTest somewhere in the tree of e.__context__
                 raise StopTest(data.testcounter) from e
             else:
                 # The test failed by raising an exception, so we inform the
@@ -1146,7 +1199,7 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None:
             self._timing_features = {}
 
     def _deliver_information_message(
-        self, *, type: str, title: str, content: Union[str, dict]
+        self, *, type: str, title: str, content: str | dict
     ) -> None:
         deliver_json_blob(
             {
@@ -1249,6 +1302,7 @@ def run_engine(self):
             ran_example.slice_comments = falsifying_example.slice_comments
             tb = None
             origin = None
+            assert info is not None
             assert info._expected_exception is not None
             try:
                 with with_reporter(fragments.append):
@@ -1407,7 +1461,7 @@ class HypothesisHandle:
     @property
     def fuzz_one_input(
         self,
-    ) -> Callable[[Union[bytes, bytearray, memoryview, BinaryIO]], Optional[bytes]]:
+    ) -> Callable[[bytes | bytearray | memoryview | BinaryIO], bytes | None]:
         """Run the test as a fuzz target, driven with the `buffer` of bytes.
 
         Returns None if buffer invalid for the strategy, canonical pruned
@@ -1428,7 +1482,7 @@ def fuzz_one_input(
 def given(
     _: EllipsisType, /
 ) -> Callable[
-    [Callable[..., Optional[Coroutine[Any, Any, None]]]], Callable[[], None]
+    [Callable[..., Coroutine[Any, Any, None] | None]], Callable[[], None]
 ]:  # pragma: no cover
     ...
 
@@ -1437,26 +1491,24 @@ def given(
 def given(
     *_given_arguments: SearchStrategy[Any],
 ) -> Callable[
-    [Callable[..., Optional[Coroutine[Any, Any, None]]]], Callable[..., None]
+    [Callable[..., Coroutine[Any, Any, None] | None]], Callable[..., None]
 ]:  # pragma: no cover
     ...
 
 
 @overload
 def given(
-    **_given_kwargs: Union[SearchStrategy[Any], EllipsisType],
+    **_given_kwargs: SearchStrategy[Any] | EllipsisType,
 ) -> Callable[
-    [Callable[..., Optional[Coroutine[Any, Any, None]]]], Callable[..., None]
+    [Callable[..., Coroutine[Any, Any, None] | None]], Callable[..., None]
 ]:  # pragma: no cover
     ...
 
 
 def given(
-    *_given_arguments: Union[SearchStrategy[Any], EllipsisType],
-    **_given_kwargs: Union[SearchStrategy[Any], EllipsisType],
-) -> Callable[
-    [Callable[..., Optional[Coroutine[Any, Any, None]]]], Callable[..., None]
-]:
+    *_given_arguments: SearchStrategy[Any] | EllipsisType,
+    **_given_kwargs: SearchStrategy[Any] | EllipsisType,
+) -> Callable[[Callable[..., Coroutine[Any, Any, None] | None]], Callable[..., None]]:
     """A decorator for turning a test function that accepts arguments into a
     randomized test.
 
@@ -1725,7 +1777,7 @@ def wrapped_test(*arguments, **kwargs):
                 raise SKIP_BECAUSE_NO_EXAMPLES
 
         def _get_fuzz_target() -> (
-            Callable[[Union[bytes, bytearray, memoryview, BinaryIO]], Optional[bytes]]
+            Callable[[bytes | bytearray | memoryview | BinaryIO], bytes | None]
         ):
             # Because fuzzing interfaces are very performance-sensitive, we use a
             # somewhat more complicated structure here.  `_get_fuzz_target()` is
@@ -1757,8 +1809,8 @@ def _get_fuzz_target() -> (
             minimal_failures: dict = {}
 
             def fuzz_one_input(
-                buffer: Union[bytes, bytearray, memoryview, BinaryIO]
-            ) -> Optional[bytes]:
+                buffer: bytes | bytearray | memoryview | BinaryIO,
+            ) -> bytes | None:
                 # This inner part is all that the fuzzer will actually run,
                 # so we keep it as small and as fast as possible.
                 if isinstance(buffer, io.IOBase):
@@ -1812,9 +1864,9 @@ def find(
     specifier: SearchStrategy[Ex],
     condition: Callable[[Any], bool],
     *,
-    settings: Optional[Settings] = None,
-    random: Optional[Random] = None,
-    database_key: Optional[bytes] = None,
+    settings: Settings | None = None,
+    random: Random | None = None,
+    database_key: bytes | None = None,
 ) -> Ex:
     """Returns the minimal example from the given strategy ``specifier`` that
     matches the predicate function ``condition``."""
@@ -1837,7 +1889,7 @@ def find(
         )
     specifier.validate()
 
-    last: List[Ex] = []
+    last: list[Ex] = []
 
     @settings
     @given(specifier)

diff --git a/hypothesis-python/src/hypothesis/internal/escalation.py b/hypothesis-python/src/hypothesis/internal/escalation.py
@@ -13,9 +13,10 @@
 import sys
 import textwrap
 import traceback
+from functools import partial
 from inspect import getframeinfo
 from pathlib import Path
-from typing import Dict, NamedTuple, Optional, Type
+from typing import Dict, NamedTuple, Optional, Tuple, Type
 
 import hypothesis
 from hypothesis.errors import _Trimmable
@@ -107,20 +108,44 @@ def __str__(self) -> str:
         return f"{self.exc_type.__name__} at {self.filename}:{self.lineno}{ctx}{group}"
 
     @classmethod
-    def from_exception(cls, exception: BaseException, /) -> "InterestingOrigin":
+    def from_exception(
+        cls,
+        exception: BaseException,
+        /,
+        handled_exceptions: Tuple[BaseException, ...] = (),
+    ) -> "InterestingOrigin":
+        if exception in handled_exceptions:
+            exception.__context__ = None
         filename, lineno = None, None
         if tb := get_trimmed_traceback(exception):
             filename, lineno, *_ = traceback.extract_tb(tb)[-1]
+
+        # Quick dirty fix for #4115
+        handled_exceptions = (*handled_exceptions, exception)
+
         return cls(
             type(exception),
             filename,
             lineno,
             # Note that if __cause__ is set it is always equal to __context__, explicitly
             # to support introspection when debugging, so we can use that unconditionally.
-            cls.from_exception(exception.__context__) if exception.__context__ else (),
+            (
+                cls.from_exception(
+                    exception.__context__, handled_exceptions=handled_exceptions
+                )
+                if exception.__context__
+                else ()
+            ),
             # We distinguish exception groups by the inner exceptions, as for __context__
             (
-                tuple(map(cls.from_exception, exception.exceptions))
+                tuple(
+                    map(
+                        partial(
+                            cls.from_exception, handled_exceptions=handled_exceptions
+                        ),
+                        exception.exceptions,
+                    )
+                )
                 if isinstance(exception, BaseExceptionGroup)
                 else ()
             ),