Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Exception Replay] Better communicate non-captured exceptions #5371

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,54 +7,32 @@
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

#nullable enable
namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation
{
internal class CachedDoneExceptions
/// <summary>
/// Acts as a proxy to a static `CachedItems` object to make the Exception that are in done cases easily accesible, throughout the codebase
/// without references detouring.
/// </summary>
internal static class CachedDoneExceptions
{
private static readonly HashSet<string> DoneExceptions = new();
private static readonly ReaderWriterLockSlim DoneExceptionsLocker = new();
private static readonly CachedItems _cachedDoneExceptions = new CachedItems();

internal static void Add(string exceptionToString)
internal static void Add(string item)
{
DoneExceptionsLocker.EnterWriteLock();
try
{
DoneExceptions.Add(exceptionToString);
}
finally
{
DoneExceptionsLocker.ExitWriteLock();
}
_cachedDoneExceptions.Add(item);
}

internal static bool Remove(string exceptionToString)
internal static bool Remove(string item)
{
DoneExceptionsLocker.EnterWriteLock();
try
{
return DoneExceptions.Remove(exceptionToString);
}
finally
{
DoneExceptionsLocker.ExitWriteLock();
}
return _cachedDoneExceptions.Remove(item);
}

internal static bool Contains(string exceptionToString)
internal static bool Contains(string item)
{
DoneExceptionsLocker.EnterReadLock();
try
{
return DoneExceptions.Contains(exceptionToString);
}
finally
{
DoneExceptionsLocker.ExitReadLock();
}
return _cachedDoneExceptions.Contains(item);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// <copyright file="CachedItems.cs" company="Datadog">
// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License.
// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc.
// </copyright>

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Datadog.Trace.Vendors.MessagePack;
using Fnv1aHash = Datadog.Trace.VendoredMicrosoftCode.System.Reflection.Internal.Hash;

#nullable enable
namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation
{
internal class CachedItems
{
private readonly HashSet<int> cache = new();
private readonly ReaderWriterLockSlim cacheLocker = new();

internal void Add(string item)
{
cacheLocker.EnterWriteLock();
try
{
cache.Add(Hash(item));
}
finally
{
cacheLocker.ExitWriteLock();
}
}

internal bool Remove(string item)
{
cacheLocker.EnterWriteLock();
try
{
return cache.Remove(Hash(item));
}
finally
{
cacheLocker.ExitWriteLock();
}
}

internal bool Contains(string item)
{
cacheLocker.EnterReadLock();
try
{
return cache.Contains(Hash(item));
}
finally
{
cacheLocker.ExitReadLock();
}
}

private int Hash(string item) => Fnv1aHash.GetFNVHashCode(StringEncoding.UTF8.GetBytes(item));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ internal enum ExceptionCollectionState : byte
Initializing,
Collecting,
Finalizing,
Invalidated,
None
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ internal class ExceptionTrackManager
private static readonly int MaxFramesToCapture = ExceptionDebugging.Settings.MaximumFramesToCapture;
private static readonly TimeSpan RateLimit = ExceptionDebugging.Settings.RateLimit;
private static readonly BasicCircuitBreaker ReportingCircuitBreaker = new(ExceptionDebugging.Settings.MaxExceptionAnalysisLimit, TimeSpan.FromSeconds(1));
private static CachedItems _invalidatedCases = new CachedItems();
private static Task? _exceptionProcessorTask;
private static bool _isInitialized;

Expand Down Expand Up @@ -81,12 +82,21 @@ public static void Report(Span span, Exception? exception)

private static void ReportInternal(Exception exception, ErrorOriginKind errorOrigin, Span rootSpan)
{
if (CachedDoneExceptions.Contains(exception.ToString()))
var exToString = exception.ToString();

if (CachedDoneExceptions.Contains(exToString))
{
// Quick exit.
return;
}

if (_invalidatedCases.Contains(exToString))
{
Log.Information("Encountered invalidated exception.ToString(), exception: {Exception}", exToString);
ProcessException(exception, errorOrigin, rootSpan);
return;
}

if (ReportingCircuitBreaker.Trip() == CircuitBreakerState.Opened)
{
if (Log.IsEnabled(LogEventLevel.Debug))
Expand Down Expand Up @@ -141,58 +151,79 @@ private static void ProcessException(Exception exception, ErrorOriginKind errorO
if (trackedExceptionCase.IsDone)
{
}
else if (trackedExceptionCase.IsInvalidated)
{
if (rootSpan == null)
{
Log.Warning("rootSpan is null while processing invalidated case. Should not happen. exception: {Exception}", exception.ToString());
return;
}

var allFrames = trackedExceptionCase.ExceptionCase.ExceptionId.StackTrace;
var allProbes = trackedExceptionCase.ExceptionCase.Probes;
var frameIndex = allFrames.Length - 1;
var debugErrorPrefix = "_dd.debug.error";
var assignIndex = 0;

while (frameIndex >= 0)
{
var participatingFrame = allFrames[frameIndex--];
var noCaptureReason = GetNoCaptureReason(participatingFrame, allProbes.FirstOrDefault(p => p.Method.Equals(participatingFrame.MethodIdentifier)));

if (noCaptureReason != string.Empty)
{
TagMissingFrame(rootSpan, $"{debugErrorPrefix}.{assignIndex}.", participatingFrame.Method, noCaptureReason);
}

assignIndex += 1;
}

if (trackedExceptionCase.Revert())
{
CachedDoneExceptions.Add(trackedExceptionCase.ExceptionToString);
}
}
else if (trackedExceptionCase.IsCollecting)
{
Log.Information("Exception case re-occurred, data can be collected. Exception details: {FullName} {Message}.", exception.GetType().FullName, exception.Message);

var shouldCheckWhyThereAreNoFrames = false;

if (rootSpan == null)
{
Log.Information("The RootSpan is null. The exception might be reported from async processing. Should not happen. Exception: {Exception}", exception.ToString());
return;
Log.Information("The RootSpan is null. Exception: {Exception}", exception.ToString());
shouldCheckWhyThereAreNoFrames = true;
}

if (!ShadowStackHolder.IsShadowStackTrackingEnabled)
{
Log.Warning("The shadow stack is not enabled, while processing IsCollecting state of an exception. Exception details: {FullName} {Message}.", exception.GetType().FullName, exception.Message);
return;
shouldCheckWhyThereAreNoFrames = true;
}

var resultCallStackTree = ShadowStackHolder.ShadowStack!.CreateResultReport(exceptionPath: exception);
var resultCallStackTree = shouldCheckWhyThereAreNoFrames ? null : ShadowStackHolder.ShadowStack!.CreateResultReport(exceptionPath: exception);
if (resultCallStackTree == null || !resultCallStackTree.Frames.Any())
{
Log.Error("ExceptionTrackManager: Received an empty tree from the shadow stack for exception: {Exception}.", exception.ToString());
Log.Error("ExceptionTrackManager: Checking why there are no frames captured for exception: {Exception}.", exception.ToString());

// If we failed to instrument all the probes.
if (trackedExceptionCase.ExceptionCase.Probes.All(p => p.IsInstrumented && (p.ProbeStatus == Status.ERROR || p.ProbeStatus == Status.BLOCKED || (p.ProbeStatus == Status.INSTALLED && p.MayBeOmittedFromCallStack))))
if (trackedExceptionCase.ExceptionCase.Probes.All(p => p.IsInstrumented && (p.ProbeStatus == Status.ERROR || p.ProbeStatus == Status.BLOCKED || p.MayBeOmittedFromCallStack)))
{
var allFrames = trackedExceptionCase.ExceptionCase.ExceptionId.StackTrace;
var allProbes = trackedExceptionCase.ExceptionCase.Probes;
var frameIndex = allFrames.Length - 1;
var debugErrorPrefix = "_dd.debug.error";
var assignIndex = 0;

while (frameIndex >= 0)
{
var participatingFrame = allFrames[frameIndex--];
var noCaptureReason = GetNoCaptureReason(participatingFrame, allProbes.FirstOrDefault(p => p.Method.Equals(participatingFrame.MethodIdentifier)));

if (noCaptureReason != string.Empty)
{
TagMissingFrame(rootSpan, $"{debugErrorPrefix}.{assignIndex}.", participatingFrame.Method, noCaptureReason);
}

assignIndex += 1;
}

Log.Information("Reverting the exception case of the empty stack tree since none of the methods were instrumented, for exception: {Name}, Message: {Message}, StackTrace: {StackTrace}", exception.GetType().Name, exception.Message, exception.StackTrace);
if (trackedExceptionCase.Revert())
{
CachedDoneExceptions.Add(trackedExceptionCase.ExceptionToString);
}
Log.Information("Invalidating the exception case of the empty stack tree since none of the methods were instrumented, for exception: {Name}, Message: {Message}, StackTrace: {StackTrace}", exception.GetType().Name, exception.Message, exception.StackTrace);
trackedExceptionCase.InvalidateCase();
_invalidatedCases.Add(exception.ToString());
}

return;
}
else
{
if (rootSpan == null)
{
Log.Warning("The RootSpan is null in the branch of extracing snapshots. Should not happen. Exception: {Exception}", exception.ToString());
return;
}

// Attach tags to the root span
var debugErrorPrefix = "_dd.debug.error";
rootSpan.Tags.SetTag("error.debug_info_captured", "true");
Expand Down Expand Up @@ -293,10 +324,10 @@ private static string GetNoCaptureReason(ParticipatingFrame frame, ExceptionDebu

if (probe != null)
{
if (probe.ProbeStatus == Status.INSTALLED && probe.MayBeOmittedFromCallStack)
if (probe.MayBeOmittedFromCallStack)
{
// Frame is non-optimized in .NET 6+.
noCaptureReason = $"The method {frame.Method.GetFullyQualifiedName()} could not be captured.";
noCaptureReason = $"The method {frame.Method.GetFullyQualifiedName()} could not be captured because it resides in a module that is not optimized. In .NET 6 and later versions, the code must be compiled with optimizations enabled.";
}

if (probe.ProbeStatus == Status.ERROR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ public TrackedExceptionCase(ExceptionIdentifier exceptionId, string exceptionToS

public bool IsDone => TrackingExceptionCollectionState == ExceptionCollectionState.Finalizing || TrackingExceptionCollectionState == ExceptionCollectionState.Done;

public bool IsInvalidated => TrackingExceptionCollectionState == ExceptionCollectionState.Invalidated;

public DateTime StartCollectingTime { get; private set; }

public ExceptionCase ExceptionCase { get; private set; }
Expand Down Expand Up @@ -97,6 +99,11 @@ public bool Revert()
return false;
}

public void InvalidateCase()
{
EndInProgressState(ExceptionCollectionState.Invalidated);
}

private bool BeginTeardown()
{
return BeginInProgressState(ExceptionCollectionState.Finalizing);
Expand Down
Loading