Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

first draft of handling marker exceptions wrapped in exceptiongroup #4110

Draft
wants to merge 11 commits into
base: master
Choose a base branch
from
116 changes: 84 additions & 32 deletions hypothesis-python/src/hypothesis/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# obtain one at https://mozilla.org/MPL/2.0/.

"""This module provides the core primitives of Hypothesis, such as given."""
from __future__ import annotations
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because Hypothesis does a lot of runtime introspection, we prefer to avoid from __future__ import annotations - which will eventually go away anyway, c.f. PEPs 649 and 749.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you also want to avoid it in test files?


import base64
import contextlib
Expand All @@ -32,13 +33,9 @@
BinaryIO,
Callable,
Coroutine,
Generator,
Hashable,
List,
Optional,
Tuple,
Type,
TypeVar,
Union,
overload,
)
from unittest import TestCase
Expand All @@ -61,6 +58,7 @@
FlakyFailure,
FlakyReplay,
Found,
Frozen,
HypothesisException,
HypothesisWarning,
InvalidArgument,
Expand Down Expand Up @@ -148,7 +146,6 @@
else: # pragma: no cover
EllipsisType = type(Ellipsis)


TestFunc = TypeVar("TestFunc", bound=Callable)


Expand Down Expand Up @@ -178,7 +175,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
if not (args or kwargs):
raise InvalidArgument("An example must provide at least one argument")

self.hypothesis_explicit_examples: List[Example] = []
self.hypothesis_explicit_examples: list[Example] = []
self._this_example = Example(tuple(args), kwargs)

def __call__(self, test: TestFunc) -> TestFunc:
Expand All @@ -192,10 +189,8 @@ def xfail(
condition: bool = True, # noqa: FBT002
*,
reason: str = "",
raises: Union[
Type[BaseException], Tuple[Type[BaseException], ...]
] = BaseException,
) -> "example":
raises: type[BaseException] | tuple[type[BaseException], ...] = BaseException,
) -> example:
"""Mark this example as an expected failure, similarly to
:obj:`pytest.mark.xfail(strict=True) <pytest.mark.xfail>`.

Expand Down Expand Up @@ -264,7 +259,7 @@ def test(x):
)
return self

def via(self, whence: str, /) -> "example":
def via(self, whence: str, /) -> example:
"""Attach a machine-readable label noting whence this example came.

The idea is that tools will be able to add ``@example()`` cases for you, e.g.
Expand Down Expand Up @@ -768,6 +763,61 @@ def execute(data, function):
return default_executor


@contextlib.contextmanager
def unwrap_exception_group() -> Generator[None]:
T = TypeVar("T", bound=BaseException)

def _flatten_group(excgroup: BaseExceptionGroup[T]) -> list[T]:
found_exceptions = []
for exc in excgroup.exceptions:
if isinstance(exc, BaseExceptionGroup):
found_exceptions.extend(_flatten_group(exc))
else:
found_exceptions.append(exc)
return found_exceptions

try:
yield
except BaseExceptionGroup as excgroup:
frozen_exceptions, non_frozen_exceptions = excgroup.split(Frozen)

# group only contains Frozen, reraise the group
# it doesn't matter what we raise, since any exceptions get disregarded
# and reraised as StopTest if data got frozen.
if non_frozen_exceptions is None:
raise
# in all other cases they are discarded

# Can RewindRecursive end up in this group?
_, user_exceptions = non_frozen_exceptions.split(
lambda e: isinstance(e, (StopTest, HypothesisException))
)

# this might contain marker exceptions, or internal errors, but not frozen.
if user_exceptions is not None:
raise

# single marker exception - reraise it
flattened_non_frozen_exceptions = _flatten_group(non_frozen_exceptions)
if len(flattened_non_frozen_exceptions) == 1:
raise flattened_non_frozen_exceptions[0] from None

# multiple marker exceptions. If we re-raise the whole group we break
# a bunch of logic so ....?
stoptests, non_stoptests = non_frozen_exceptions.split(StopTest)

# TODO: stoptest+hypothesisexception ...? Is it possible? If so, what do?

if non_stoptests:
# TODO: multiple marker exceptions is easy to produce, but the logic in the
# engine does not handle it... so we just reraise the first one for now.
raise _flatten_group(non_stoptests)[0] from None
assert stoptests is not None

# multiple stoptests: raising the one with the lowest testcounter
raise min(_flatten_group(stoptests), key=lambda s_e: s_e.testcounter)


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, the semantics here (generally) are a good question.

I'd opine:

  • If all exceptions are Frozen, raise the first (as above).
  • Else, if exactly one exception is non-Frozen, raise that one [*].
  • Otherwise, reconstruct and raise a group with just the non-frozen ones (maybe .with_traceback(eg.__traceback__)).

[*] hm, I see, that would cause an undesired unwrapping if user code raises a single-exception ExceptionGroup. Maybe the second step should be "if exactly one ... and that exception is a StopException or HypothesisException.

Copy link
Author

@jakkdl jakkdl Sep 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If all exceptions are Frozen, raise the first (as above).

Actually, I think this could cause confusion. If I spawn three tasks and I only get one Frozen, I will assume that the-thing-that-caused-Frozen only happened in one of the tasks. We should probably err on the side of not unwrapping unless necessary to get internal exception handling to work.

edit: nvm, I thought frozen didn't get swallowed by @given

edit2: I'm confused about how hypothesis works 😅

edit3: Aaaahhh.

except failure_exceptions_to_catch() as e:
# If an unhandled (i.e., non-Hypothesis) error was raised by
# Hypothesis-internal code, re-raise it as a fatal error instead
# of treating it as a test failure.
filepath = traceback.extract_tb(e.__traceback__)[-1][0]
if is_hypothesis_file(filepath) and not isinstance(e, HypothesisException):
raise
if data.frozen:
# This can happen if an error occurred in a finally
# block somewhere, suppressing our original StopTest.
# We raise a new one here to resume normal operation.
raise StopTest(data.testcounter) from e

This means that if we got our data frozen, any exceptions (no matter if inside a group or not) get disregarded. So it doesn't matter what we do if there's only frozen exceptions

edit4: and this is very bad, because it can suppress TypeError and anything. Gotta rejig some logic here

edit5: except that's what the code currently does in sync code, so... ?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another question is if we get multiple HypothesisException/StopException, if that's possible?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we get multiple StopTest, re-raise the one with the lowest _testcounter i.e. the first one to occur.

class StateForActualGivenExecution:
def __init__(self, stuff, test, settings, random, wrapped_test):
self.test_runner = get_executor(stuff.selfy)
Expand Down Expand Up @@ -841,7 +891,7 @@ def execute_once(

@proxies(self.test)
def test(*args, **kwargs):
with ensure_free_stackframes():
with unwrap_exception_group(), ensure_free_stackframes():
return self.test(*args, **kwargs)

else:
Expand All @@ -853,7 +903,7 @@ def test(*args, **kwargs):
arg_gctime = gc_cumulative_time()
start = time.perf_counter()
try:
with ensure_free_stackframes():
with unwrap_exception_group(), ensure_free_stackframes():
result = self.test(*args, **kwargs)
finally:
finish = time.perf_counter()
Expand Down Expand Up @@ -1086,6 +1136,9 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None:
# This can happen if an error occurred in a finally
# block somewhere, suppressing our original StopTest.
# We raise a new one here to resume normal operation.
# TODO: this should maybe inspect the stacktrace to see that the above
# mentioned story is true? I.e. reraise as StopTest iff there is a
# StopTest somewhere in the tree of e.__context__
Comment on lines +1149 to +1151
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alas, raise Whatever() from None would defeat this inspection.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that would only override the __cause__ no? And the comment specifically talks about errors happening in finally, where you rarely raise explicit exceptions.
It seems pretty dangerous to ignore all exceptions in case the data is frozen, shouldn't the user be informed if there's errors happening in their teardown code?
Although thinking about it more, the problem is perhaps that the StopTest.__cause__ is not getting reraised/logged?

raise StopTest(data.testcounter) from e
else:
# The test failed by raising an exception, so we inform the
Expand Down Expand Up @@ -1146,7 +1199,7 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None:
self._timing_features = {}

def _deliver_information_message(
self, *, type: str, title: str, content: Union[str, dict]
self, *, type: str, title: str, content: str | dict
) -> None:
deliver_json_blob(
{
Expand Down Expand Up @@ -1249,6 +1302,7 @@ def run_engine(self):
ran_example.slice_comments = falsifying_example.slice_comments
tb = None
origin = None
assert info is not None
assert info._expected_exception is not None
try:
with with_reporter(fragments.append):
Expand Down Expand Up @@ -1407,7 +1461,7 @@ class HypothesisHandle:
@property
def fuzz_one_input(
self,
) -> Callable[[Union[bytes, bytearray, memoryview, BinaryIO]], Optional[bytes]]:
) -> Callable[[bytes | bytearray | memoryview | BinaryIO], bytes | None]:
"""Run the test as a fuzz target, driven with the `buffer` of bytes.

Returns None if buffer invalid for the strategy, canonical pruned
Expand All @@ -1428,7 +1482,7 @@ def fuzz_one_input(
def given(
_: EllipsisType, /
) -> Callable[
[Callable[..., Optional[Coroutine[Any, Any, None]]]], Callable[[], None]
[Callable[..., Coroutine[Any, Any, None] | None]], Callable[[], None]
]: # pragma: no cover
...

Expand All @@ -1437,26 +1491,24 @@ def given(
def given(
*_given_arguments: SearchStrategy[Any],
) -> Callable[
[Callable[..., Optional[Coroutine[Any, Any, None]]]], Callable[..., None]
[Callable[..., Coroutine[Any, Any, None] | None]], Callable[..., None]
]: # pragma: no cover
...


@overload
def given(
**_given_kwargs: Union[SearchStrategy[Any], EllipsisType],
**_given_kwargs: SearchStrategy[Any] | EllipsisType,
) -> Callable[
[Callable[..., Optional[Coroutine[Any, Any, None]]]], Callable[..., None]
[Callable[..., Coroutine[Any, Any, None] | None]], Callable[..., None]
]: # pragma: no cover
...


def given(
*_given_arguments: Union[SearchStrategy[Any], EllipsisType],
**_given_kwargs: Union[SearchStrategy[Any], EllipsisType],
) -> Callable[
[Callable[..., Optional[Coroutine[Any, Any, None]]]], Callable[..., None]
]:
*_given_arguments: SearchStrategy[Any] | EllipsisType,
**_given_kwargs: SearchStrategy[Any] | EllipsisType,
) -> Callable[[Callable[..., Coroutine[Any, Any, None] | None]], Callable[..., None]]:
"""A decorator for turning a test function that accepts arguments into a
randomized test.

Expand Down Expand Up @@ -1725,7 +1777,7 @@ def wrapped_test(*arguments, **kwargs):
raise SKIP_BECAUSE_NO_EXAMPLES

def _get_fuzz_target() -> (
Callable[[Union[bytes, bytearray, memoryview, BinaryIO]], Optional[bytes]]
Callable[[bytes | bytearray | memoryview | BinaryIO], bytes | None]
):
# Because fuzzing interfaces are very performance-sensitive, we use a
# somewhat more complicated structure here. `_get_fuzz_target()` is
Expand Down Expand Up @@ -1757,8 +1809,8 @@ def _get_fuzz_target() -> (
minimal_failures: dict = {}

def fuzz_one_input(
buffer: Union[bytes, bytearray, memoryview, BinaryIO]
) -> Optional[bytes]:
buffer: bytes | bytearray | memoryview | BinaryIO,
) -> bytes | None:
# This inner part is all that the fuzzer will actually run,
# so we keep it as small and as fast as possible.
if isinstance(buffer, io.IOBase):
Expand Down Expand Up @@ -1812,9 +1864,9 @@ def find(
specifier: SearchStrategy[Ex],
condition: Callable[[Any], bool],
*,
settings: Optional[Settings] = None,
random: Optional[Random] = None,
database_key: Optional[bytes] = None,
settings: Settings | None = None,
random: Random | None = None,
database_key: bytes | None = None,
) -> Ex:
"""Returns the minimal example from the given strategy ``specifier`` that
matches the predicate function ``condition``."""
Expand All @@ -1837,7 +1889,7 @@ def find(
)
specifier.validate()

last: List[Ex] = []
last: list[Ex] = []

@settings
@given(specifier)
Expand Down
33 changes: 29 additions & 4 deletions hypothesis-python/src/hypothesis/internal/escalation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
import sys
import textwrap
import traceback
from functools import partial
from inspect import getframeinfo
from pathlib import Path
from typing import Dict, NamedTuple, Optional, Type
from typing import Dict, NamedTuple, Optional, Tuple, Type

import hypothesis
from hypothesis.errors import _Trimmable
Expand Down Expand Up @@ -107,20 +108,44 @@ def __str__(self) -> str:
return f"{self.exc_type.__name__} at {self.filename}:{self.lineno}{ctx}{group}"

@classmethod
def from_exception(cls, exception: BaseException, /) -> "InterestingOrigin":
def from_exception(
cls,
exception: BaseException,
/,
handled_exceptions: Tuple[BaseException, ...] = (),
) -> "InterestingOrigin":
if exception in handled_exceptions:
exception.__context__ = None
filename, lineno = None, None
if tb := get_trimmed_traceback(exception):
filename, lineno, *_ = traceback.extract_tb(tb)[-1]

# Quick dirty fix for #4115
handled_exceptions = (*handled_exceptions, exception)

return cls(
type(exception),
filename,
lineno,
# Note that if __cause__ is set it is always equal to __context__, explicitly
# to support introspection when debugging, so we can use that unconditionally.
cls.from_exception(exception.__context__) if exception.__context__ else (),
(
cls.from_exception(
exception.__context__, handled_exceptions=handled_exceptions
)
if exception.__context__
else ()
),
# We distinguish exception groups by the inner exceptions, as for __context__
(
tuple(map(cls.from_exception, exception.exceptions))
tuple(
map(
partial(
cls.from_exception, handled_exceptions=handled_exceptions
),
exception.exceptions,
)
)
if isinstance(exception, BaseExceptionGroup)
else ()
),
Expand Down
Loading
Loading