Skip to content

Commit

Permalink
feat(grouping): Exception Groups (#48653)
Browse files Browse the repository at this point in the history
Implements issue grouping and titling changes required for processing
exception groups per [Sentry RFC
79](https://github.com/getsentry/rfcs/blob/main/text/0079-exception-groups.md#sentry-issue-grouping).

Part of #37716

A few notes:

- The issue titling requirements were implemented by dropping a
`main_exception_id` on the event data in the exception grouping logic,
and then later using it in the `ErrorEvent.extract_metadata` function.
If present, it uses the corresponding exception for metadata including
the title. This is working well, but just wanted to check if this is
safe, or if there's a better way.

- In the RFC, I had indicated that disparate top-level exceptions would
be grouped under their immediate parent exception, which might not
necessarily be the root exception. This turned out to be difficult to
implement, and didn't add much value. Instead, I always use the root
exception. This seems to work well enough. I included a comment at the
appropriate place in the code, in case it comes up in the future.

- I only modified the `NewStyle` strategy. I'm not sure if `Legacy` or
other strategies should be updated as well?

- I fixed a related bug in `exception.py`, which was that the first
exception was being used to create default issue tags instead of the
last. This should be done regardless of exception groups, as it corrects
the `handled` and `mechanism` event tags such that they are for the
outermost (latest) exception. Tests are updated to match this change as
well.
  • Loading branch information
mattjohnsonpint authored May 22, 2023
1 parent adcc680 commit f4fa54e
Show file tree
Hide file tree
Showing 127 changed files with 3,307 additions and 28 deletions.
27 changes: 26 additions & 1 deletion src/sentry/eventtypes/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,32 @@ class ErrorEvent(BaseEvent):
key = "error"

def extract_metadata(self, data):
exception = get_path(data, "exception", "values", -1)

exceptions = get_path(data, "exception", "values")
if not exceptions:
return {}

# When there are multiple exceptions, we need to pick one to extract the metadata from.
# If the event data has been marked with a main_exception_id, then we should be able to
# find the exception with the matching metadata.exception_id and use that one.
# This can be the case for some exception groups.

# Otherwise, the default behavior is to use the last one in the list.

main_exception_id = get_path(data, "main_exception_id")
exception = (
next(
exception
for exception in exceptions
if get_path(exception, "mechanism", "exception_id") == main_exception_id
)
if main_exception_id
else None
)

if not exception:
exception = get_path(exceptions, -1)

if not exception:
return {}

Expand Down
142 changes: 135 additions & 7 deletions src/sentry/grouping/strategies/newstyle.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import itertools
import re
from typing import Any, Dict, List, Optional
from typing import Any, Dict, Generator, List, Optional

import sentry_sdk

from sentry.eventstore.models import Event
from sentry.grouping.component import GroupingComponent, calculate_tree_label
Expand All @@ -13,8 +16,9 @@
from sentry.grouping.strategies.message import trim_message_for_grouping
from sentry.grouping.strategies.similarity_encoders import ident_encoder, text_shingle_encoder
from sentry.grouping.strategies.utils import has_url_origin, remove_non_stacktrace_variants
from sentry.grouping.utils import hash_from_values
from sentry.interfaces.exception import Exception as ChainedException
from sentry.interfaces.exception import SingleException
from sentry.interfaces.exception import Mechanism, SingleException
from sentry.interfaces.stacktrace import Frame, Stacktrace
from sentry.interfaces.threads import Threads
from sentry.stacktraces.platform import get_behavior_family_for_platform
Expand Down Expand Up @@ -706,19 +710,40 @@ def single_exception(
def chained_exception(
interface: ChainedException, event: Event, context: GroupingContext, **meta: Any
) -> ReturnedVariants:

# Get all the exceptions to consider.
all_exceptions = interface.exceptions()

# Get the grouping components for all exceptions up front, as we'll need them in a few places and only want to compute them once.
exception_components = {
id(exception): context.get_grouping_component(exception, event=event, **meta)
for exception in all_exceptions
}

# Filter the exceptions according to rules for handling exception groups.
with sentry_sdk.start_span(
op="grouping.strategies.newstyle.filter_exceptions_for_exception_groups"
) as span:
try:
exceptions = filter_exceptions_for_exception_groups(
all_exceptions, exception_components, event
)
except Exception:
# We shouldn't have exceptions here. But if we do, just record it and continue with the original list.
sentry_sdk.capture_exception()
span.set_status("internal_error")
exceptions = all_exceptions

# Case 1: we have a single exception, use the single exception
# component directly to avoid a level of nesting
exceptions = interface.exceptions()
if len(exceptions) == 1:
return context.get_grouping_component(exceptions[0], event=event, **meta)
return exception_components[id(exceptions[0])]

# Case 2: produce a component for each chained exception
by_name: Dict[str, List[GroupingComponent]] = {}

for exception in exceptions:
for name, component in context.get_grouping_component(
exception, event=event, **meta
).items():
for name, component in exception_components[id(exception)].items():
by_name.setdefault(name, []).append(component)

rv = {}
Expand All @@ -733,6 +758,109 @@ def chained_exception(
return rv


# See https://github.com/getsentry/rfcs/blob/main/text/0079-exception-groups.md#sentry-issue-grouping
def filter_exceptions_for_exception_groups(
exceptions: List[SingleException],
exception_components: Dict[int, GroupingComponent],
event: Event,
) -> List[SingleException]:

# This function only filters exceptions if there are at least two exceptions.
if len(exceptions) <= 1:
return exceptions

# Reconstruct the tree of exceptions if the required data is present.
class ExceptionTreeNode:
def __init__(
self,
exception: Optional[SingleException] = None,
children: Optional[List[SingleException]] = None,
):
self.exception = exception
self.children = children if children else []

exception_tree: Dict[int, ExceptionTreeNode] = {}
for exception in reversed(exceptions):
mechanism: Mechanism = exception.mechanism
if mechanism and mechanism.exception_id is not None:
node = exception_tree.setdefault(
mechanism.exception_id, ExceptionTreeNode()
).exception = exception
node.exception = exception
if mechanism.parent_id is not None:
parent_node = exception_tree.setdefault(mechanism.parent_id, ExceptionTreeNode())
parent_node.children.append(exception)
else:
# At least one exception is missing mechanism ids, so we can't continue with the filter.
# Exit early to not waste perf.
return exceptions

# This gets the child exceptions for an exception using the exception_id from the mechanism.
# That data is guaranteed to exist at this point.
def get_child_exceptions(exception: SingleException) -> List[SingleException]:
exception_id = exception.mechanism.exception_id
node = exception_tree.get(exception_id)
return node.children if node else []

# This recursive generator gets the "top-level exceptions", and is used below.
# "Top-level exceptions are those that are the first descendants of the root that are not exception groups.
# For examples, see https://github.com/getsentry/rfcs/blob/main/text/0079-exception-groups.md#sentry-issue-grouping
def get_top_level_exceptions(
exception: SingleException,
) -> Generator[SingleException, None, None]:
if exception.mechanism.is_exception_group:
children = get_child_exceptions(exception)
yield from itertools.chain.from_iterable(
get_top_level_exceptions(child) for child in children
)
else:
yield exception

# This recursive generator gets the "first-path" of exceptions, and is used below.
# The first path follows from the root to a leaf node, but only following the first child of each node.
def get_first_path(exception: SingleException) -> Generator[SingleException, None, None]:
yield exception
children = get_child_exceptions(exception)
if children:
yield from get_first_path(children[0])

# Traverse the tree recursively from the root exception to get all "top-level exceptions" and sort for consistency.
top_level_exceptions = sorted(
get_top_level_exceptions(exception_tree[0].exception),
key=lambda exception: str(exception.type),
reverse=True,
)

# Figure out the distinct top-level exceptions, grouping by the hash of the grouping component values.
distinct_top_level_exceptions = [
next(group)
for _, group in itertools.groupby(
top_level_exceptions,
key=lambda exception: hash_from_values(exception_components[id(exception)].values())
or "",
)
]

# If there's only one distinct top-level exception in the group,
# use it and its first-path children, but throw out the exception group and any copies.
# For example, Group<['Da', 'Da', 'Da']> should just be treated as a single 'Da'.
# We'll also set the main_exception_id, which is used in the extract_metadata function
# in src/sentry/eventtypes/error.py - which will ensure the issue is titled by this
# item rather than the exception group.
if len(distinct_top_level_exceptions) == 1:
main_exception = distinct_top_level_exceptions[0]
event.data["main_exception_id"] = main_exception.mechanism.exception_id
return list(get_first_path(main_exception))

# When there's more than one distinct top-level exception, return one of each of them AND the root exception group.
# NOTE: This deviates from the original RFC, because finding a common ancestor that shares
# one of each top-level exception that is _not_ the root is overly complicated.
# Also, it's more likely the stack trace of the root exception will be more meaningful
# than one of an inner exception group.
distinct_top_level_exceptions.append(exception_tree[0].exception)
return distinct_top_level_exceptions


@chained_exception.variant_processor
def chained_exception_variant_processor(
variants: ReturnedVariants, context: GroupingContext, **meta: Any
Expand Down
6 changes: 3 additions & 3 deletions src/sentry/interfaces/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,15 +465,15 @@ def to_string(self, event, is_public=False, **kwargs):
return ("".join(output)).strip()

def get_stacktrace(self, *args, **kwargs):
exc = self.values[0]
exc = self.values[-1]
if exc.stacktrace:
return exc.stacktrace.get_stacktrace(*args, **kwargs)
return ""

def iter_tags(self):
if not self.values or not self.values[0]:
if not self.values or not self.values[-1]:
return

mechanism = self.values[0].mechanism
mechanism = self.values[-1].mechanism
if mechanism:
yield from mechanism.iter_tags()
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
created: '2023-04-19T16:08:28.133847Z'
created: '2023-05-05T19:28:49.089651Z'
creator: sentry
source: tests/sentry/event_manager/interfaces/test_exception.py
---
Expand All @@ -9,9 +9,7 @@ get_api_context:
hasSystemFrames: true
values:
- mechanism:
exception_id: 1
is_exception_group: true
parent_id: 0
exception_id: 0
source: __context__
type: generic
module: foo.bar
Expand Down Expand Up @@ -41,12 +39,13 @@ get_api_context:
threadId: null
type: ValueError
value: hello world
tags:
- - mechanism
- generic
to_json:
values:
- mechanism:
exception_id: 1
is_exception_group: true
parent_id: 0
exception_id: 0
source: __context__
type: generic
module: foo.bar
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
---
created: '2023-05-05T19:28:49.100372Z'
creator: sentry
source: tests/sentry/event_manager/interfaces/test_exception.py
---
errors: null
get_api_context:
excOmitted: null
hasSystemFrames: true
values:
- mechanism:
exception_id: 1
handled: true
parent_id: 0
source: __context__
type: chained
module: foo.bar
rawStacktrace: null
stacktrace:
frames:
- absPath: foo/baz.py
colNo: null
context: []
errors: null
filename: foo/baz.py
function: null
inApp: true
instructionAddr: null
lineNo: 1
module: null
package: null
platform: null
rawFunction: null
symbol: null
symbolAddr: null
trust: null
vars: null
framesOmitted: null
hasSystemFrames: true
registers: null
threadId: null
type: ValueError
value: hello world
- mechanism:
exception_id: 0
handled: false
is_exception_group: true
source: __context__
type: generic
module: foo.bar
rawStacktrace: null
stacktrace:
frames:
- absPath: foo/baz.py
colNo: null
context: []
errors: null
filename: foo/baz.py
function: null
inApp: true
instructionAddr: null
lineNo: 1
module: null
package: null
platform: null
rawFunction: null
symbol: null
symbolAddr: null
trust: null
vars: null
framesOmitted: null
hasSystemFrames: true
registers: null
threadId: null
type: ValueError
value: hello world
tags:
- - handled
- 'no'
- - mechanism
- generic
to_json:
values:
- mechanism:
exception_id: 1
handled: true
parent_id: 0
source: __context__
type: chained
module: foo.bar
stacktrace:
frames:
- abs_path: foo/baz.py
filename: foo/baz.py
in_app: true
lineno: 1
type: ValueError
value: hello world
- mechanism:
exception_id: 0
handled: false
is_exception_group: true
source: __context__
type: generic
module: foo.bar
stacktrace:
frames:
- abs_path: foo/baz.py
filename: foo/baz.py
in_app: true
lineno: 1
type: ValueError
value: hello world
to_string: "ValueError: hello world\n File \"foo/baz.py\", line 1\n\nValueError:\
\ hello world\n File \"foo/baz.py\", line 1"
Loading

0 comments on commit f4fa54e

Please sign in to comment.