Skip to content

Commit

Permalink
[Exception Replay] Normalized exception hashing for more fine-grained…
Browse files Browse the repository at this point in the history
… aggregation (#5872)

## Summary of changes
In Exception Replay, the exception could be in one of several phases.
Two of these phases are `Done` and `Invalidated`.
- `Done`: The exception has already been captured and is waiting for the
next epoch to be wakened up.
- `Invalidated`: None of the frames could be captured, reporting tags
that tend to assist in understanding the reasoning for troubleshooting.

To be able to capture those phases as quick as possible, a cache is used
for a lookup before performing intensive calculation on the
`System.Exception` object itself, since the execution path is hot and
run as part of unwinding the request with an exception. The lookup key
used to be simply **Fnv1a** hashing of `exception.ToString()` of the
exception reaching the service root span.
Basing the hashing on `exception.ToString()` lead to scenarios where two
identical exceptions seemed different based on one of various factors;
non-deterministic participating frame, exception messages, PDB info
(file path + line number), etc.

To be able to determine quickly in which phase the exception is in
without performing costly computations, a new way of hashing is required
that should cleanse the exceptions in a way that two _similar_ exception
should fall into the same case, even though their stack traces are not
identical. Also, the new algorithm should be as performant as possible
with as little temporal allocations as possible - they play part every
time a service root span is finalized with an exception.

## Reason for change
Improve the experience of Exception Replay where we failed to report an
exception due to failure in determining if the exception is in `Done` /
`Invalidated` phases, as a result of it's previous occurrence looking a
bit different.

## Implementation details
A new class, `ExceptionNormalizer`, has been added that takes as input
the string representation of the exception alongside it's outermost
exception type, and one level deep of inner exception. It cleanses the
exception from the aforementioned attributes, and performs a more
fine-grained hash that shall have a better distribution based on the
actual exception, leaving out all the non-relevant bits that might
differ.

## Test coverage

[ExceptionNormalizerTests](https://github.com/DataDog/dd-trace-dotnet/blob/821e4860632a8fcb258bbbe74506249cb6865659/tracer/test/Datadog.Trace.Debugger.IntegrationTests/ExceptionNormalizerTests.cs)
with approvals on the hash + string representing the cleansed stack
trace.

## Other details
Fixes #DEBUG-2674
  • Loading branch information
GreenMatan committed Aug 13, 2024
1 parent abd12ae commit 0c18aa7
Show file tree
Hide file tree
Showing 42 changed files with 540 additions and 60 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@ internal static class CachedDoneExceptions
{
private static readonly CachedItems _cachedDoneExceptions = new CachedItems();

internal static void Add(string item)
internal static void Add(int item)
{
_cachedDoneExceptions.Add(item);
}

internal static bool Remove(string item)
internal static bool Remove(int item)
{
return _cachedDoneExceptions.Remove(item);
}

internal static bool Contains(string item)
internal static bool Contains(int item)
{
return _cachedDoneExceptions.Contains(item);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,48 +18,46 @@ namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation
{
internal class CachedItems
{
private readonly HashSet<int> cache = new();
private readonly ReaderWriterLockSlim cacheLocker = new();
private readonly HashSet<int> _cache = new();
private readonly ReaderWriterLockSlim _cacheLocker = new();

internal void Add(string item)
internal void Add(int item)
{
cacheLocker.EnterWriteLock();
_cacheLocker.EnterWriteLock();
try
{
cache.Add(Hash(item));
_cache.Add(item);
}
finally
{
cacheLocker.ExitWriteLock();
_cacheLocker.ExitWriteLock();
}
}

internal bool Remove(string item)
internal bool Remove(int item)
{
cacheLocker.EnterWriteLock();
_cacheLocker.EnterWriteLock();
try
{
return cache.Remove(Hash(item));
return _cache.Remove(item);
}
finally
{
cacheLocker.ExitWriteLock();
_cacheLocker.ExitWriteLock();
}
}

internal bool Contains(string item)
internal bool Contains(int item)
{
cacheLocker.EnterReadLock();
_cacheLocker.EnterReadLock();
try
{
return cache.Contains(Hash(item));
return _cache.Contains(item);
}
finally
{
cacheLocker.ExitReadLock();
_cacheLocker.ExitReadLock();
}
}

private int Hash(string item) => Fnv1aHash.GetFNVHashCode(StringEncoding.UTF8.GetBytes(item));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// <copyright file="ExceptionNormalizer.cs" company="Datadog">
// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License.
// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc.
// </copyright>

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Text;
using Datadog.Trace.VendoredMicrosoftCode.System;
using Fnv1aHash = Datadog.Trace.VendoredMicrosoftCode.System.Reflection.Internal.Hash;
using MemoryExtensions = Datadog.Trace.Debugger.Helpers.MemoryExtensions;

#nullable enable
namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation
{
internal class ExceptionNormalizer
{
/// <summary>
/// Given the string representation of an exception alongside it's FQN of the outer and (potential) inner exception,
/// this function cleanse the stack trace from error messages, customized information attached to the exception and PDB line info if present.
/// It returns a hash representing the resulting cleansed exception and inner exceptions.
/// Used to aggregate same/similar exceptions that only differ by non-relevant bits.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal int NormalizeAndHashException(string exceptionString, string outerExceptionType, string? innerExceptionType)
{
if (string.IsNullOrEmpty(exceptionString))
{
throw new ArgumentException(@"Exception string cannot be null or empty", nameof(exceptionString));
}

var fnvHashCode = HashLine(VendoredMicrosoftCode.System.MemoryExtensions.AsSpan(outerExceptionType), Fnv1aHash.FnvOffsetBias);

if (innerExceptionType != null)
{
fnvHashCode = HashLine(VendoredMicrosoftCode.System.MemoryExtensions.AsSpan(innerExceptionType), fnvHashCode);
}

var exceptionSpan = VendoredMicrosoftCode.System.MemoryExtensions.AsSpan(exceptionString);
var inSpan = VendoredMicrosoftCode.System.MemoryExtensions.AsSpan(" in ");
var atSpan = VendoredMicrosoftCode.System.MemoryExtensions.AsSpan("at ");
var lambdaSpan = VendoredMicrosoftCode.System.MemoryExtensions.AsSpan("lambda_");
var microsoftSpan = VendoredMicrosoftCode.System.MemoryExtensions.AsSpan("at Microsoft.");
var systemSpan = VendoredMicrosoftCode.System.MemoryExtensions.AsSpan("at System.");
var datadogSpan = VendoredMicrosoftCode.System.MemoryExtensions.AsSpan("at Datadog.");

while (!exceptionSpan.IsEmpty)
{
var lineEndIndex = exceptionSpan.IndexOfAny('\r', '\n');
VendoredMicrosoftCode.System.ReadOnlySpan<char> line;

if (lineEndIndex >= 0)
{
line = exceptionSpan.Slice(0, lineEndIndex);
exceptionSpan = exceptionSpan.Slice(lineEndIndex + 1);
if (!exceptionSpan.IsEmpty && exceptionSpan[0] == '\n')
{
exceptionSpan = exceptionSpan.Slice(1);
}
}
else
{
line = exceptionSpan;
exceptionSpan = default;
}

// Is frame line (starts with `in `).
if (VendoredMicrosoftCode.System.MemoryExtensions.StartsWith(line.TrimStart(), atSpan, StringComparison.Ordinal))
{
var index = VendoredMicrosoftCode.System.MemoryExtensions.IndexOf(line, inSpan, StringComparison.Ordinal);
line = index > 0 ? line.Slice(0, index) : line;

if (VendoredMicrosoftCode.System.MemoryExtensions.Contains(line, lambdaSpan, StringComparison.Ordinal) ||
VendoredMicrosoftCode.System.MemoryExtensions.Contains(line, microsoftSpan, StringComparison.Ordinal) ||
VendoredMicrosoftCode.System.MemoryExtensions.Contains(line, datadogSpan, StringComparison.Ordinal) ||
VendoredMicrosoftCode.System.MemoryExtensions.Contains(line, systemSpan, StringComparison.Ordinal))
{
continue;
}

fnvHashCode = HashLine(line, fnvHashCode);
}
}

return fnvHashCode;
}

protected virtual int HashLine(VendoredMicrosoftCode.System.ReadOnlySpan<char> line, int fnvHashCode)
{
for (var i = 0; i < line.Length; i++)
{
fnvHashCode = Fnv1aHash.Combine((uint)line[i], fnvHashCode);
}

return fnvHashCode;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,17 @@ namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation
internal static class ExceptionReplayDiagnosticTagNames
{
public const string Eligible = nameof(Eligible);
public const string EmptyShadowStack = nameof(EmptyShadowStack);
public const string NotEligible = nameof(NotEligible);
public const string ExceptionTrackManagerNotInitialized = nameof(ExceptionTrackManagerNotInitialized);
public const string NotRootSpan = nameof(NotRootSpan);
public const string ExceptionObjectIsNull = nameof(ExceptionObjectIsNull);
public const string NonSupportedExceptionType = nameof(NonSupportedExceptionType);
public const string CachedDoneExceptionCase = nameof(CachedDoneExceptionCase);
public const string CachedInvalidatedExceptionCase = nameof(CachedInvalidatedExceptionCase);
public const string InvalidatedExceptionCase = nameof(InvalidatedExceptionCase);
public const string CircuitBreakerIsOpen = nameof(CircuitBreakerIsOpen);
public const string NonCachedDoneExceptionCase = nameof(NonCachedDoneExceptionCase);
public const string NotSupportedExceptionType = nameof(NotSupportedExceptionType);
public const string NoCustomerFrames = nameof(NoCustomerFrames);
public const string NoFramesToInstrument = nameof(NoFramesToInstrument);
public const string EmptyCallStackTreeWhileCollecting = nameof(EmptyCallStackTreeWhileCollecting);
public const string InvalidatedCase = nameof(InvalidatedCase);
Expand Down
Loading

0 comments on commit 0c18aa7

Please sign in to comment.