From 06c8024f455ab0ab43aa87ee34d682c728df52b4 Mon Sep 17 00:00:00 2001 From: Matan Green Date: Sun, 31 Mar 2024 15:47:03 +0300 Subject: [PATCH] Better handling exceptions that could not be captured by Exception Replay --- .../CachedDoneExceptions.cs | 46 +++------ .../CachedItems.cs | 65 +++++++++++++ .../ExceptionCollectionState.cs | 1 + .../ExceptionTrackManager.cs | 97 ++++++++++++------- .../TrackedExceptionCase.cs | 7 ++ 5 files changed, 149 insertions(+), 67 deletions(-) create mode 100644 tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/CachedItems.cs diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/CachedDoneExceptions.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/CachedDoneExceptions.cs index ddd3d5e7b238..88b83c3584c8 100644 --- a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/CachedDoneExceptions.cs +++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/CachedDoneExceptions.cs @@ -7,54 +7,32 @@ using System.Collections.Generic; using System.Linq; using System.Text; -using System.Threading; using System.Threading.Tasks; #nullable enable namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation { - internal class CachedDoneExceptions + /// + /// Acts as a proxy to a static `CachedItems` object to make the Exception that are in done cases easily accesible, throughout the codebase + /// without references detouring. + /// + internal static class CachedDoneExceptions { - private static readonly HashSet DoneExceptions = new(); - private static readonly ReaderWriterLockSlim DoneExceptionsLocker = new(); + private static readonly CachedItems _cachedDoneExceptions = new CachedItems(); - internal static void Add(string exceptionToString) + internal static void Add(string item) { - DoneExceptionsLocker.EnterWriteLock(); - try - { - DoneExceptions.Add(exceptionToString); - } - finally - { - DoneExceptionsLocker.ExitWriteLock(); - } + _cachedDoneExceptions.Add(item); } - internal static bool Remove(string exceptionToString) + internal static bool Remove(string item) { - DoneExceptionsLocker.EnterWriteLock(); - try - { - return DoneExceptions.Remove(exceptionToString); - } - finally - { - DoneExceptionsLocker.ExitWriteLock(); - } + return _cachedDoneExceptions.Remove(item); } - internal static bool Contains(string exceptionToString) + internal static bool Contains(string item) { - DoneExceptionsLocker.EnterReadLock(); - try - { - return DoneExceptions.Contains(exceptionToString); - } - finally - { - DoneExceptionsLocker.ExitReadLock(); - } + return _cachedDoneExceptions.Contains(item); } } } diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/CachedItems.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/CachedItems.cs new file mode 100644 index 000000000000..ffb54f81eda6 --- /dev/null +++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/CachedItems.cs @@ -0,0 +1,65 @@ +// +// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License. +// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc. +// + +using System; +using System.Collections.Generic; +using System.ComponentModel; +using System.Linq; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using Datadog.Trace.Vendors.MessagePack; +using Fnv1aHash = Datadog.Trace.VendoredMicrosoftCode.System.Reflection.Internal.Hash; + +#nullable enable +namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation +{ + internal class CachedItems + { + private readonly HashSet cache = new(); + private readonly ReaderWriterLockSlim cacheLocker = new(); + + internal void Add(string item) + { + cacheLocker.EnterWriteLock(); + try + { + cache.Add(Hash(item)); + } + finally + { + cacheLocker.ExitWriteLock(); + } + } + + internal bool Remove(string item) + { + cacheLocker.EnterWriteLock(); + try + { + return cache.Remove(Hash(item)); + } + finally + { + cacheLocker.ExitWriteLock(); + } + } + + internal bool Contains(string item) + { + cacheLocker.EnterReadLock(); + try + { + return cache.Contains(Hash(item)); + } + finally + { + cacheLocker.ExitReadLock(); + } + } + + private int Hash(string item) => Fnv1aHash.GetFNVHashCode(StringEncoding.UTF8.GetBytes(item)); + } +} diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionCollectionState.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionCollectionState.cs index 10686c701689..d920ca03999a 100644 --- a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionCollectionState.cs +++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionCollectionState.cs @@ -18,6 +18,7 @@ internal enum ExceptionCollectionState : byte Initializing, Collecting, Finalizing, + Invalidated, None } } diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionTrackManager.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionTrackManager.cs index d29f7f1ddd5b..f578332efbdd 100644 --- a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionTrackManager.cs +++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionTrackManager.cs @@ -37,6 +37,7 @@ internal class ExceptionTrackManager private static readonly int MaxFramesToCapture = ExceptionDebugging.Settings.MaximumFramesToCapture; private static readonly TimeSpan RateLimit = ExceptionDebugging.Settings.RateLimit; private static readonly BasicCircuitBreaker ReportingCircuitBreaker = new(ExceptionDebugging.Settings.MaxExceptionAnalysisLimit, TimeSpan.FromSeconds(1)); + private static CachedItems _invalidatedCases = new CachedItems(); private static Task? _exceptionProcessorTask; private static bool _isInitialized; @@ -81,12 +82,21 @@ public static void Report(Span span, Exception? exception) private static void ReportInternal(Exception exception, ErrorOriginKind errorOrigin, Span rootSpan) { - if (CachedDoneExceptions.Contains(exception.ToString())) + var exToString = exception.ToString(); + + if (CachedDoneExceptions.Contains(exToString)) { // Quick exit. return; } + if (_invalidatedCases.Contains(exToString)) + { + Log.Information("Encountered invalidated exception.ToString(), exception: {Exception}", exToString); + ProcessException(exception, errorOrigin, rootSpan); + return; + } + if (ReportingCircuitBreaker.Trip() == CircuitBreakerState.Opened) { if (Log.IsEnabled(LogEventLevel.Debug)) @@ -141,58 +151,79 @@ private static void ProcessException(Exception exception, ErrorOriginKind errorO if (trackedExceptionCase.IsDone) { } + else if (trackedExceptionCase.IsInvalidated) + { + if (rootSpan == null) + { + Log.Warning("rootSpan is null while processing invalidated case. Should not happen. exception: {Exception}", exception.ToString()); + return; + } + + var allFrames = trackedExceptionCase.ExceptionCase.ExceptionId.StackTrace; + var allProbes = trackedExceptionCase.ExceptionCase.Probes; + var frameIndex = allFrames.Length - 1; + var debugErrorPrefix = "_dd.debug.error"; + var assignIndex = 0; + + while (frameIndex >= 0) + { + var participatingFrame = allFrames[frameIndex--]; + var noCaptureReason = GetNoCaptureReason(participatingFrame, allProbes.FirstOrDefault(p => p.Method.Equals(participatingFrame.MethodIdentifier))); + + if (noCaptureReason != string.Empty) + { + TagMissingFrame(rootSpan, $"{debugErrorPrefix}.{assignIndex}.", participatingFrame.Method, noCaptureReason); + } + + assignIndex += 1; + } + + if (trackedExceptionCase.Revert()) + { + CachedDoneExceptions.Add(trackedExceptionCase.ExceptionToString); + } + } else if (trackedExceptionCase.IsCollecting) { Log.Information("Exception case re-occurred, data can be collected. Exception details: {FullName} {Message}.", exception.GetType().FullName, exception.Message); + var shouldCheckWhyThereAreNoFrames = false; + if (rootSpan == null) { - Log.Information("The RootSpan is null. The exception might be reported from async processing. Should not happen. Exception: {Exception}", exception.ToString()); - return; + Log.Information("The RootSpan is null. Exception: {Exception}", exception.ToString()); + shouldCheckWhyThereAreNoFrames = true; } if (!ShadowStackHolder.IsShadowStackTrackingEnabled) { Log.Warning("The shadow stack is not enabled, while processing IsCollecting state of an exception. Exception details: {FullName} {Message}.", exception.GetType().FullName, exception.Message); - return; + shouldCheckWhyThereAreNoFrames = true; } - var resultCallStackTree = ShadowStackHolder.ShadowStack!.CreateResultReport(exceptionPath: exception); + var resultCallStackTree = shouldCheckWhyThereAreNoFrames ? null : ShadowStackHolder.ShadowStack!.CreateResultReport(exceptionPath: exception); if (resultCallStackTree == null || !resultCallStackTree.Frames.Any()) { - Log.Error("ExceptionTrackManager: Received an empty tree from the shadow stack for exception: {Exception}.", exception.ToString()); + Log.Error("ExceptionTrackManager: Checking why there are no frames captured for exception: {Exception}.", exception.ToString()); // If we failed to instrument all the probes. - if (trackedExceptionCase.ExceptionCase.Probes.All(p => p.IsInstrumented && (p.ProbeStatus == Status.ERROR || p.ProbeStatus == Status.BLOCKED || (p.ProbeStatus == Status.INSTALLED && p.MayBeOmittedFromCallStack)))) + if (trackedExceptionCase.ExceptionCase.Probes.All(p => p.IsInstrumented && (p.ProbeStatus == Status.ERROR || p.ProbeStatus == Status.BLOCKED || p.MayBeOmittedFromCallStack))) { - var allFrames = trackedExceptionCase.ExceptionCase.ExceptionId.StackTrace; - var allProbes = trackedExceptionCase.ExceptionCase.Probes; - var frameIndex = allFrames.Length - 1; - var debugErrorPrefix = "_dd.debug.error"; - var assignIndex = 0; - - while (frameIndex >= 0) - { - var participatingFrame = allFrames[frameIndex--]; - var noCaptureReason = GetNoCaptureReason(participatingFrame, allProbes.FirstOrDefault(p => p.Method.Equals(participatingFrame.MethodIdentifier))); - - if (noCaptureReason != string.Empty) - { - TagMissingFrame(rootSpan, $"{debugErrorPrefix}.{assignIndex}.", participatingFrame.Method, noCaptureReason); - } - - assignIndex += 1; - } - - Log.Information("Reverting the exception case of the empty stack tree since none of the methods were instrumented, for exception: {Name}, Message: {Message}, StackTrace: {StackTrace}", exception.GetType().Name, exception.Message, exception.StackTrace); - if (trackedExceptionCase.Revert()) - { - CachedDoneExceptions.Add(trackedExceptionCase.ExceptionToString); - } + Log.Information("Invalidating the exception case of the empty stack tree since none of the methods were instrumented, for exception: {Name}, Message: {Message}, StackTrace: {StackTrace}", exception.GetType().Name, exception.Message, exception.StackTrace); + trackedExceptionCase.InvalidateCase(); + _invalidatedCases.Add(exception.ToString()); } + + return; } else { + if (rootSpan == null) + { + Log.Warning("The RootSpan is null in the branch of extracing snapshots. Should not happen. Exception: {Exception}", exception.ToString()); + return; + } + // Attach tags to the root span var debugErrorPrefix = "_dd.debug.error"; rootSpan.Tags.SetTag("error.debug_info_captured", "true"); @@ -293,10 +324,10 @@ private static string GetNoCaptureReason(ParticipatingFrame frame, ExceptionDebu if (probe != null) { - if (probe.ProbeStatus == Status.INSTALLED && probe.MayBeOmittedFromCallStack) + if (probe.MayBeOmittedFromCallStack) { // Frame is non-optimized in .NET 6+. - noCaptureReason = $"The method {frame.Method.GetFullyQualifiedName()} could not be captured."; + noCaptureReason = $"The method {frame.Method.GetFullyQualifiedName()} could not be captured because it resides in a module that is not optimized. In .NET 6 and later versions, the code must be compiled with optimizations enabled."; } if (probe.ProbeStatus == Status.ERROR) diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/TrackedExceptionCase.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/TrackedExceptionCase.cs index 4d1677abe96c..4193288fcb50 100644 --- a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/TrackedExceptionCase.cs +++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/TrackedExceptionCase.cs @@ -41,6 +41,8 @@ public TrackedExceptionCase(ExceptionIdentifier exceptionId, string exceptionToS public bool IsDone => TrackingExceptionCollectionState == ExceptionCollectionState.Finalizing || TrackingExceptionCollectionState == ExceptionCollectionState.Done; + public bool IsInvalidated => TrackingExceptionCollectionState == ExceptionCollectionState.Invalidated; + public DateTime StartCollectingTime { get; private set; } public ExceptionCase ExceptionCase { get; private set; } @@ -97,6 +99,11 @@ public bool Revert() return false; } + public void InvalidateCase() + { + EndInProgressState(ExceptionCollectionState.Invalidated); + } + private bool BeginTeardown() { return BeginInProgressState(ExceptionCollectionState.Finalizing);