diff --git a/tracer/src/Datadog.Trace/Configuration/ConfigurationKeys.Debugger.cs b/tracer/src/Datadog.Trace/Configuration/ConfigurationKeys.Debugger.cs
index 68b9ce2e712e..701d8ca576e6 100644
--- a/tracer/src/Datadog.Trace/Configuration/ConfigurationKeys.Debugger.cs
+++ b/tracer/src/Datadog.Trace/Configuration/ConfigurationKeys.Debugger.cs
@@ -111,6 +111,13 @@ internal static class Debugger
///
///
public const string ExceptionDebuggingCaptureFullCallStack = "DD_EXCEPTION_DEBUGGING_CAPTURE_FULL_CALLSTACK";
+
+ ///
+ /// Configuration key for the interval used to track exceptions
+ /// Default value is 1h.
+ ///
+ ///
+ public const string RateLimitSeconds = "DD_EXCEPTION_DEBUGGING_RATE_LIMIT_SECONDS";
}
}
}
diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/CachedDoneExceptions.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/CachedDoneExceptions.cs
new file mode 100644
index 000000000000..03af093d4308
--- /dev/null
+++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/CachedDoneExceptions.cs
@@ -0,0 +1,59 @@
+//
+// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License.
+// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc.
+//
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation
+{
+ internal class CachedDoneExceptions
+ {
+ private static readonly HashSet DoneExceptions = new();
+ private static readonly ReaderWriterLockSlim DoneExceptionsLocker = new();
+
+ internal static void Add(Exception exception)
+ {
+ DoneExceptionsLocker.EnterWriteLock();
+ try
+ {
+ DoneExceptions.Add(exception.ToString());
+ }
+ finally
+ {
+ DoneExceptionsLocker.ExitWriteLock();
+ }
+ }
+
+ internal static bool Remove(string exceptionToString)
+ {
+ DoneExceptionsLocker.EnterWriteLock();
+ try
+ {
+ return DoneExceptions.Remove(exceptionToString);
+ }
+ finally
+ {
+ DoneExceptionsLocker.ExitWriteLock();
+ }
+ }
+
+ internal static bool Contains(Exception exception)
+ {
+ DoneExceptionsLocker.EnterReadLock();
+ try
+ {
+ return DoneExceptions.Contains(exception.ToString());
+ }
+ finally
+ {
+ DoneExceptionsLocker.ExitReadLock();
+ }
+ }
+ }
+}
diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionCaseInstrumentationManager.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionCaseInstrumentationManager.cs
new file mode 100644
index 000000000000..4676d464f47e
--- /dev/null
+++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionCaseInstrumentationManager.cs
@@ -0,0 +1,191 @@
+//
+// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License.
+// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc.
+//
+
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Reflection;
+using System.Threading;
+using System.Threading.Tasks;
+using Datadog.Trace.Debugger.Configurations.Models;
+using Datadog.Trace.Debugger.Expressions;
+using Datadog.Trace.Debugger.Helpers;
+using Datadog.Trace.Debugger.PInvoke;
+using Datadog.Trace.Debugger.RateLimiting;
+using Datadog.Trace.Debugger.Sink.Models;
+using Datadog.Trace.Debugger.Symbols;
+using Datadog.Trace.Logging;
+using Datadog.Trace.Telemetry.Metrics;
+using Datadog.Trace.Util;
+using Datadog.Trace.VendoredMicrosoftCode.System.Buffers;
+using Datadog.Trace.Vendors.Serilog.Events;
+
+namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation
+{
+ internal class ExceptionCaseInstrumentationManager
+ {
+ private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor();
+ private static readonly ConcurrentDictionary MethodToProbe = new();
+
+ private static int _maxFramesToCapture = ExceptionDebuggingSettings.DefaultMaxFramesToCapture;
+
+ public static void Initialize(int maxFramesToCapture)
+ {
+ _maxFramesToCapture = maxFramesToCapture;
+ }
+
+ internal static ExceptionCase Instrument(ExceptionIdentifier exceptionId)
+ {
+ Log.Information("Instrumenting {ExceptionId}", exceptionId);
+
+ var participatingUserMethods = GetMethodsToRejit(exceptionId.StackTrace);
+
+ var uniqueMethods = participatingUserMethods
+ .Distinct(EqualityComparer.Default)
+ .ToArray();
+
+ var neverSeenBeforeMethods = uniqueMethods
+ .Where(frame => !MethodToProbe.ContainsKey(frame))
+ .ToArray();
+
+ foreach (var frame in neverSeenBeforeMethods)
+ {
+ MethodToProbe.TryAdd(frame, new ExceptionDebuggingProbe(frame));
+ }
+
+ var probes = participatingUserMethods.Select((m, frameIndex) => MethodToProbe[m]).ToArray();
+
+ var thresholdIndex = participatingUserMethods.Count - _maxFramesToCapture;
+ var targetMethods = new HashSet();
+
+ for (var index = 0; index < probes.Length; index++)
+ {
+ if (ShouldInstrumentFrameAtIndex(index))
+ {
+ targetMethods.Add(probes[index].Method);
+ }
+ }
+
+ var newCase = new ExceptionCase(exceptionId, probes);
+
+ foreach (var method in uniqueMethods)
+ {
+ var probe = MethodToProbe[method];
+ probe.AddExceptionCase(newCase, targetMethods.Contains(method));
+ }
+
+ // TODO decide if a sampler is needed, ExceptionProbeProcessor does not use any sampler for now.
+ // TODO InnerExceptions poses struggle in ExceptionProbeProcessor leaving logic.
+ // TODO Capture arguments on exit upon first leave, collect lightweight snapshot for subsequent re-entrances.
+ // TODO AsyncLocal cleansing when done dealing with exception from the Exception Debugging instrumentation (ShadowStack cleansing)
+ // TODO In ExceptionProbeProcessor.ShouldProcess, maybe negotiate with the ShadowStack to determine if the top of the stack
+ // TODO is relevant for the specific exception case it manages. Maybe instead of ShouldProcess we can do that
+ // TODO in the Process method, in the branch where the exception type is checked to see if the previous method is relevant.
+ // TODO there's a gotcha in doing it - it might be the next method has not been instrumented (failed to instrument)
+ // TODO so it won't be there because it should. We will have to accommodate for that by checking the probe status and cache it.
+ // TODO When leaving with an exception, we can negotiate with the ShadowStack to determine if the previous frame
+ // TODO Is holding the same exception instance (either as inner / itself) to better decide if we should keep on collecting
+ // TODO or not.
+ // TODO Multiple AppDomains issue. The ProbeProcessor might not be there. Also relevant for DI probes. To assess how big
+ // TODO the issue is, we should determine how many people are using .NET Framework .VS. .NET Core.
+ // TODO For Exception Debugging we can possibly choose to ditch this altogether since if the same exception will
+ // TODO happen multiple times in different AppDomains, then they will all capture the exception. The only problem is
+ // TODO over-instrumenting which is not ideal.
+ // TODO In AsyncMethodProbe Invoker, is it always MultiProbe even when there is only one?
+ // TODO What do you do with empty shadow stack? meaning, all the participating methods has failed in the instrumentation process OR they are all 3rd party code?
+ // TODO There might be two different exceptions, that yield the same snapshots. Consider A -> B -> C with exception "InvalidOperationException"
+ // TODO and K -> B -> D with exception "InvalidOperationException". If we fail to instrument: A, B, K, D then there will be the same causality chain for both exceptions.
+ // TODO That's why ExceptionTrackManager is the only place where snapshots are uploaded, based on the exception in hand, to be able to stop tracking an exception
+ // TODO and keep on tracking the other.
+ // TODO For Lightweight/Full snapshot capturing:
+ // TODO Consider keeping a cache in ShadowStackTree's AsyncLocal (in ShadowStackContainer), where the cached key
+ // TODO will be the hash of parents & children (Enter/Leave) and the MethodToken of the method. This way,
+ // TODO the method that is leaving with an interesting exception can ask this AsyncLocal (top-thread-tree) cache
+ // TODO if it's hash (EnterHash+LeaveHash+MethodToken) is in there. If it is, collect lightweight snapshot.
+ // TODO if it's not, collect full snapshot.
+ // TODO In this technique we will have to verify AsyncLocal safety in terms of memory leaking and the cleansing timing.
+ // TODO we don't want this cache to be alive for a longer time than is needed or being reused by another execution
+ // TODO context in a later time. This cache will have to be thread-safe since many threads may access it at the same
+ // TODO time. Consider using Readers/Writer lock pattern or another one that is prioritizing readings than writings.
+ // TODO Or any other lock-free pattern that may be suitable in this case.
+ // TODO Better handle multiple exceptions related to concurrency - AggregateException. It's InnerException &
+ // TODO InnerExceptions properties.
+
+ return newCase;
+
+ bool ShouldInstrumentFrameAtIndex(int i)
+ {
+ return i == 0 || i >= thresholdIndex || participatingUserMethods.Count <= _maxFramesToCapture + 1;
+ }
+ }
+
+ private static List GetMethodsToRejit(ParticipatingFrame[] allFrames)
+ {
+ var methodsToRejit = new List();
+
+ foreach (var frame in allFrames)
+ {
+ try
+ {
+ // HasMethod?
+
+ if (frame.State == ParticipatingFrameState.Blacklist)
+ {
+ continue;
+ }
+
+ var frameMethod = frame.Method;
+ if (frameMethod.IsAbstract)
+ {
+ continue;
+ }
+
+ methodsToRejit.Add(frame.MethodIdentifier);
+ }
+ catch (Exception ex)
+ {
+ Log.Error(ex, "Failed to instrument frame the frame: {FrameToRejit}", frame);
+ }
+ }
+
+ return methodsToRejit;
+ }
+
+ internal static void Revert(ExceptionCase @case)
+ {
+ Log.Information("Reverting {ExceptionCase}", @case);
+
+ foreach (var probe in @case.Probes)
+ {
+ probe.RemoveExceptionCase(@case);
+ }
+
+ var revertProbeIds = new HashSet();
+
+ foreach (var processor in @case.Processors.Keys)
+ {
+ if (processor.ExceptionDebuggingProcessor.RemoveProbeProcessor(processor) == 0)
+ {
+ MethodToProbe.TryRemove(processor.ExceptionDebuggingProcessor.Method, out _);
+ revertProbeIds.Add(processor.ExceptionDebuggingProcessor.ProbeId);
+ }
+ }
+
+ if (revertProbeIds.Count > 0)
+ {
+ Log.Information("ExceptionTrackManager: Reverting {RevertCount} Probes.", revertProbeIds.Count.ToString());
+
+ var removeProbesRequests = revertProbeIds.Select(p => new NativeRemoveProbeRequest(p)).ToArray();
+ DebuggerNativeMethods.InstrumentProbes(
+ Array.Empty(),
+ Array.Empty(),
+ Array.Empty(),
+ removeProbesRequests);
+ }
+ }
+ }
+}
diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionCaseScheduler.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionCaseScheduler.cs
new file mode 100644
index 000000000000..5cc971c5c913
--- /dev/null
+++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionCaseScheduler.cs
@@ -0,0 +1,98 @@
+//
+// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License.
+// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc.
+//
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using Datadog.Trace.Logging;
+
+namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation
+{
+ internal class ExceptionCaseScheduler
+ {
+ private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor();
+ private static readonly List ScheduledExceptions = new();
+ private static readonly object Lock = new();
+ private static Timer _timer;
+
+ public ExceptionCaseScheduler()
+ {
+ _timer = new Timer(TimerCallback, null, Timeout.Infinite, Timeout.Infinite);
+ }
+
+ public void Schedule(TrackedExceptionCase doneCase, TimeSpan delay)
+ {
+ var dueTime = DateTime.UtcNow.Add(delay);
+ var scheduledTask = new ScheduledException { Case = doneCase, DueTime = dueTime };
+
+ lock (Lock)
+ {
+ ScheduledExceptions.Add(scheduledTask);
+ ScheduledExceptions.Sort();
+ if (ScheduledExceptions[0] == scheduledTask)
+ {
+ SetNextTimer(dueTime);
+ }
+ }
+ }
+
+ private void TimerCallback(object state)
+ {
+ try
+ {
+ SafeTimerCallback(state);
+ }
+ catch (Exception ex)
+ {
+ Log.Error(ex, "There was an error while processing the Exception Cases scheduler.");
+ }
+ }
+
+ private void SafeTimerCallback(object state)
+ {
+ var casesToInstrument = new List();
+
+ lock (Lock)
+ {
+ var now = DateTime.UtcNow;
+ var dueTasks = ScheduledExceptions.TakeWhile(e => e.DueTime <= now).ToList();
+ foreach (var task in dueTasks)
+ {
+ casesToInstrument.Add(task.Case);
+ ScheduledExceptions.Remove(task);
+ }
+
+ if (ScheduledExceptions.Any())
+ {
+ SetNextTimer(ScheduledExceptions[0].DueTime);
+ }
+ }
+
+ foreach (var @case in casesToInstrument)
+ {
+ @case.Instrument();
+ }
+ }
+
+ private void SetNextTimer(DateTime dueTime)
+ {
+ var delay = Math.Max((dueTime - DateTime.UtcNow).TotalMilliseconds, 0);
+ _timer.Change((int)delay, Timeout.Infinite);
+ }
+
+ private class ScheduledException : IComparable
+ {
+ public TrackedExceptionCase Case { get; set; }
+
+ public DateTime DueTime { get; set; }
+
+ public int CompareTo(ScheduledException other)
+ {
+ return DueTime.CompareTo(other?.DueTime);
+ }
+ }
+ }
+}
diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionDebuggingSettings.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionDebuggingSettings.cs
index 5266509e35f3..7a6e18e701aa 100644
--- a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionDebuggingSettings.cs
+++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionDebuggingSettings.cs
@@ -18,6 +18,7 @@ namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation
internal class ExceptionDebuggingSettings
{
public const int DefaultMaxFramesToCapture = 3;
+ public const int DefaultRateLimitSeconds = 60 * 60; // 1 hour
public ExceptionDebuggingSettings(IConfigurationSource? source, IConfigurationTelemetry telemetry)
{
@@ -34,6 +35,13 @@ public ExceptionDebuggingSettings(IConfigurationSource? source, IConfigurationTe
.Value;
MaximumFramesToCapture = CaptureFullCallStack ? int.MaxValue : maximumFramesToCapture;
+
+ var seconds = config
+ .WithKeys(ConfigurationKeys.Debugger.RateLimitSeconds)
+ .AsInt32(DefaultRateLimitSeconds, x => x > 0)
+ .Value;
+
+ RateLimit = TimeSpan.FromSeconds(seconds);
}
public bool Enabled { get; }
@@ -42,6 +50,8 @@ public ExceptionDebuggingSettings(IConfigurationSource? source, IConfigurationTe
public bool CaptureFullCallStack { get; }
+ public TimeSpan RateLimit { get; }
+
public static ExceptionDebuggingSettings FromSource(IConfigurationSource source, IConfigurationTelemetry telemetry)
{
return new ExceptionDebuggingSettings(source, telemetry);
diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionTrackManager.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionTrackManager.cs
index f6d02e12cd10..e28cdbc00b5a 100644
--- a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionTrackManager.cs
+++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionTrackManager.cs
@@ -31,14 +31,13 @@ internal class ExceptionTrackManager
{
private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor();
private static readonly ConcurrentDictionary TrackedExceptionCases = new();
- private static readonly ConcurrentDictionary MethodToProbe = new();
private static readonly ConcurrentQueue ExceptionProcessQueue = new();
private static readonly SemaphoreSlim WorkAvailable = new(0, int.MaxValue);
private static readonly CancellationTokenSource Cts = new();
- private static readonly HashSet CachedDoneExceptions = new();
- private static readonly ReaderWriterLockSlim DoneExceptionsLocker = new();
+ private static readonly ExceptionCaseScheduler ExceptionsScheduler = new();
+ private static readonly int MaxFramesToCapture = ExceptionDebugging.Settings.MaximumFramesToCapture;
+ private static readonly TimeSpan RateLimit = ExceptionDebugging.Settings.RateLimit;
private static Task _exceptionProcessorTask;
- private static int _maxFramesToCapture;
private static async Task StartExceptionProcessingAsync(CancellationToken cancellationToken)
{
@@ -75,7 +74,7 @@ public static void Report(Span span, Exception exception)
private static void ReportInternal(Exception exception, ErrorOriginType errorOrigin, Span rootSpan)
{
- if (IsInDoneState(exception))
+ if (CachedDoneExceptions.Contains(exception))
{
// Quick exit.
return;
@@ -93,22 +92,6 @@ private static void ReportInternal(Exception exception, ErrorOriginType errorOri
}
}
- ///
- /// Quick lookup for early exit.
- ///
- private static bool IsInDoneState(Exception exception)
- {
- DoneExceptionsLocker.EnterReadLock();
- try
- {
- return CachedDoneExceptions.Contains(exception.ToString());
- }
- finally
- {
- DoneExceptionsLocker.ExitReadLock();
- }
- }
-
private static void ProcessException(Exception exception, ErrorOriginType errorOrigin, Span rootSpan)
{
var allParticipatingFrames = GetAllExceptionRelatedStackFrames(exception);
@@ -136,19 +119,11 @@ private static void ProcessException(Exception exception, ErrorOriginType errorO
var exceptionId = new ExceptionIdentifier(exceptionTypes, allParticipatingFramesFlattened, errorOrigin);
- var trackedExceptionCase = TrackedExceptionCases.GetOrAdd(exceptionId, _ => new TrackedExceptionCase(exceptionId, TimeSpan.FromSeconds(1)));
+ var trackedExceptionCase = TrackedExceptionCases.GetOrAdd(exceptionId, _ => new TrackedExceptionCase(exceptionId, exception.ToString()));
if (trackedExceptionCase.IsDone)
{
- DoneExceptionsLocker.EnterWriteLock();
- try
- {
- CachedDoneExceptions.Add(exception.ToString());
- }
- finally
- {
- DoneExceptionsLocker.ExitWriteLock();
- }
+ CachedDoneExceptions.Add(exception);
}
else if (trackedExceptionCase.IsCollecting)
{
@@ -170,7 +145,13 @@ private static void ProcessException(Exception exception, ErrorOriginType errorO
if (resultCallStackTree == null || !resultCallStackTree.Frames.Any())
{
Log.Error("ExceptionTrackManager: Received an empty tree from the shadow stack for exception: {Exception}.", exception.ToString());
- System.Diagnostics.Debugger.Break();
+
+ // If we failed to instrument all the probes.
+ if (trackedExceptionCase.ExceptionCase.Probes.All(p => p.IsInstrumented && (p.ProbeStatus == Status.ERROR || p.ProbeStatus == Status.BLOCKED)))
+ {
+ Log.Information("Reverting the exception case of the empty stack tree since none of the methods were instrumented, for exception: {Name}, Message: {Message}, StackTrace: {StackTrace}", exception.GetType().Name, exception.Message, exception.StackTrace);
+ trackedExceptionCase.Revert();
+ }
}
else
{
@@ -190,7 +171,7 @@ private static void ProcessException(Exception exception, ErrorOriginType errorO
var assignIndex = 0;
// Upload tail frames
- while (frameIndex >= 0 && capturedFrameIndex >= 0 && assignIndex < _maxFramesToCapture)
+ while (frameIndex >= 0 && capturedFrameIndex >= 0 && assignIndex < MaxFramesToCapture)
{
var frame = capturedFrames[capturedFrameIndex];
@@ -206,7 +187,7 @@ private static void ProcessException(Exception exception, ErrorOriginType errorO
}
// Upload head frame
- if (capturedFrames.Count > _maxFramesToCapture)
+ if (capturedFrames.Count > MaxFramesToCapture)
{
var frame = capturedFrames[0];
@@ -222,53 +203,15 @@ private static void ProcessException(Exception exception, ErrorOriginType errorO
TagAndUpload(rootSpan, prefix, frame);
}
- if (trackedExceptionCase.BeginTeardown())
- {
- foreach (var probe in trackedExceptionCase.ExceptionCase.Probes)
- {
- probe.RemoveExceptionCase(trackedExceptionCase.ExceptionCase);
- }
-
- var revertProbeIds = new HashSet();
-
- foreach (var processor in trackedExceptionCase.ExceptionCase.Processors.Keys)
- {
- if (processor.ExceptionDebuggingProcessor.RemoveProbeProcessor(processor) == 0)
- {
- MethodToProbe.TryRemove(processor.ExceptionDebuggingProcessor.Method, out _);
- revertProbeIds.Add(processor.ExceptionDebuggingProcessor.ProbeId);
- }
- }
-
- if (revertProbeIds.Count > 0)
- {
- Log.Information("ExceptionTrackManager: Reverting {RevertCount} Probes.", revertProbeIds.Count.ToString());
-
- var removeProbesRequests = revertProbeIds.Select(p => new NativeRemoveProbeRequest(p)).ToArray();
- DebuggerNativeMethods.InstrumentProbes(
- Array.Empty(),
- Array.Empty(),
- Array.Empty(),
- removeProbesRequests);
- }
-
- trackedExceptionCase.EndTeardown();
- }
+ Log.Information("Reverting an exception case for exception: {Name}, Message: {Message}, StackTrace: {StackTrace}", exception.GetType().Name, exception.Message, exception.StackTrace);
+ trackedExceptionCase.Revert();
+ ExceptionsScheduler.Schedule(trackedExceptionCase, RateLimit);
}
}
else
{
- // else - If there is a concurrent initialization or tearing down, ignore this case
- if (!trackedExceptionCase.Initialized())
- {
- return;
- }
-
Log.Information("New exception case occurred, initiating data collection for exception: {Name}, Message: {Message}, StackTrace: {StackTrace}", exception.GetType().Name, exception.Message, exception.StackTrace);
-
- trackedExceptionCase.ExceptionCase = InstrumentFrames(trackedExceptionCase.ExceptionIdentifier);
-
- trackedExceptionCase.BeginCollect();
+ trackedExceptionCase.Instrument();
}
}
@@ -317,7 +260,7 @@ private static bool IsSupportedExceptionType(Exception ex) =>
public static void Initialize()
{
- _maxFramesToCapture = ExceptionDebugging.Settings.MaximumFramesToCapture;
+ ExceptionCaseInstrumentationManager.Initialize(MaxFramesToCapture);
_exceptionProcessorTask = Task.Factory.StartNew(
async () => await StartExceptionProcessingAsync(Cts.Token).ConfigureAwait(false), TaskCreationOptions.LongRunning)
@@ -333,121 +276,6 @@ public static bool IsSupportedExceptionType(Type ex) =>
ex != typeof(TypeLoadException) &&
ex != typeof(OutOfMemoryException);
- private static List GetMethodsToRejit(ParticipatingFrame[] allFrames)
- {
- var methodsToRejit = new List();
-
- foreach (var frame in allFrames)
- {
- try
- {
- // HasMethod?
-
- if (frame.State == ParticipatingFrameState.Blacklist)
- {
- continue;
- }
-
- var frameMethod = frame.Method;
- if (frameMethod.IsAbstract)
- {
- continue;
- }
-
- methodsToRejit.Add(frame.MethodIdentifier);
- }
- catch (Exception ex)
- {
- Log.Error(ex, "Failed to instrument frame the frame: {FrameToRejit}", frame);
- }
- }
-
- return methodsToRejit;
- }
-
- private static ExceptionCase InstrumentFrames(ExceptionIdentifier exceptionIdentifier)
- {
- var participatingUserMethods = GetMethodsToRejit(exceptionIdentifier.StackTrace);
-
- var uniqueMethods = participatingUserMethods
- .Distinct(EqualityComparer.Default)
- .ToArray();
-
- var neverSeenBeforeMethods = uniqueMethods
- .Where(frame => !MethodToProbe.ContainsKey(frame))
- .ToArray();
-
- foreach (var frame in neverSeenBeforeMethods)
- {
- MethodToProbe.TryAdd(frame, new ExceptionDebuggingProbe(frame));
- }
-
- var probes = participatingUserMethods.Select((m, frameIndex) => MethodToProbe[m]).ToArray();
-
- var thresholdIndex = participatingUserMethods.Count - _maxFramesToCapture;
- var targetMethods = new HashSet();
-
- for (var index = 0; index < probes.Length; index++)
- {
- if (ShouldInstrumentFrameAtIndex(index))
- {
- targetMethods.Add(probes[index].Method);
- }
- }
-
- var newCase = new ExceptionCase(exceptionIdentifier, probes);
-
- foreach (var method in uniqueMethods)
- {
- var probe = MethodToProbe[method];
- probe.AddExceptionCase(newCase, targetMethods.Contains(method));
- }
-
- // TODO decide if a sampler is needed, ExceptionProbeProcessor does not use any sampler for now.
- // TODO InnerExceptions poses struggle in ExceptionProbeProcessor leaving logic.
- // TODO Capture arguments on exit upon first leave, collect lightweight snapshot for subsequent re-entrances.
- // TODO AsyncLocal cleansing when done dealing with exception from the Exception Debugging instrumentation (ShadowStack cleansing)
- // TODO In ExceptionProbeProcessor.ShouldProcess, maybe negotiate with the ShadowStack to determine if the top of the stack
- // TODO is relevant for the specific exception case it manages. Maybe instead of ShouldProcess we can do that
- // TODO in the Process method, in the branch where the exception type is checked to see if the previous method is relevant.
- // TODO there's a gotcha in doing it - it might be the next method has not been instrumented (failed to instrument)
- // TODO so it won't be there because it should. We will have to accommodate for that by checking the probe status and cache it.
- // TODO When leaving with an exception, we can negotiate with the ShadowStack to determine if the previous frame
- // TODO Is holding the same exception instance (either as inner / itself) to better decide if we should keep on collecting
- // TODO or not.
- // TODO Multiple AppDomains issue. The ProbeProcessor might not be there. Also relevant for DI probes. To assess how big
- // TODO the issue is, we should determine how many people are using .NET Framework .VS. .NET Core.
- // TODO For Exception Debugging we can possibly choose to ditch this altogether since if the same exception will
- // TODO happen multiple times in different AppDomains, then they will all capture the exception. The only problem is
- // TODO over-instrumenting which is not ideal.
- // TODO In AsyncMethodProbe Invoker, is it always MultiProbe even when there is only one?
- // TODO What do you do with empty shadow stack? meaning, all the participating methods has failed in the instrumentation process OR they are all 3rd party code?
- // TODO There might be two different exceptions, that yield the same snapshots. Consider A -> B -> C with exception "InvalidOperationException"
- // TODO and K -> B -> D with exception "InvalidOperationException". If we fail to instrument: A, B, K, D then there will be the same causality chain for both exceptions.
- // TODO That's why ExceptionTrackManager is the only place where snapshots are uploaded, based on the exception in hand, to be able to stop tracking an exception
- // TODO and keep on tracking the other.
- // TODO For Lightweight/Full snapshot capturing:
- // TODO Consider keeping a cache in ShadowStackTree's AsyncLocal (in ShadowStackContainer), where the cached key
- // TODO will be the hash of parents & children (Enter/Leave) and the MethodToken of the method. This way,
- // TODO the method that is leaving with an interesting exception can ask this AsyncLocal (top-thread-tree) cache
- // TODO if it's hash (EnterHash+LeaveHash+MethodToken) is in there. If it is, collect lightweight snapshot.
- // TODO if it's not, collect full snapshot.
- // TODO In this technique we will have to verify AsyncLocal safety in terms of memory leaking and the cleansing timing.
- // TODO we don't want this cache to be alive for a longer time than is needed or being reused by another execution
- // TODO context in a later time. This cache will have to be thread-safe since many threads may access it at the same
- // TODO time. Consider using Readers/Writer lock pattern or another one that is prioritizing readings than writings.
- // TODO Or any other lock-free pattern that may be suitable in this case.
- // TODO Better handle multiple exceptions related to concurrency - AggregateException. It's InnerException &
- // TODO InnerExceptions properties.
-
- return newCase;
-
- bool ShouldInstrumentFrameAtIndex(int i)
- {
- return i == 0 || i >= thresholdIndex || participatingUserMethods.Count <= _maxFramesToCapture + 1;
- }
- }
-
private static void Stop()
{
Cts.Cancel();
diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/MethodUniqueIdentifier.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/MethodUniqueIdentifier.cs
index 23f8872e3629..ab773d01ec3f 100644
--- a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/MethodUniqueIdentifier.cs
+++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/MethodUniqueIdentifier.cs
@@ -76,6 +76,13 @@ public override int GetHashCode()
{
return _hashCode;
}
+
+ public override string ToString()
+ {
+ var probesInfo = Probes == null ? "null" : $"{Probes.Length} probes";
+ var processorsCount = Processors?.Count ?? 0;
+ return $"ExceptionCase: ExceptionId={ExceptionId}, Probes=[{probesInfo}], Processors={processorsCount}";
+ }
}
internal readonly struct ExceptionIdentifier : IEquatable
@@ -135,6 +142,13 @@ public override int GetHashCode()
{
return _hashCode;
}
+
+ public override string ToString()
+ {
+ var exceptionTypes = string.Join(", ", ExceptionTypes.Select(t => t.FullName));
+ var stackTrace = string.Join("; ", StackTrace.Select(frame => frame.ToString()));
+ return $"ErrorOrigin: {ErrorOrigin}, ExceptionTypes: [{exceptionTypes}], StackTrace: [{stackTrace}]";
+ }
}
internal class ExceptionDebuggingProbe
diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ParticipatingFrame.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ParticipatingFrame.cs
index 58af16323d44..154093c51c74 100644
--- a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ParticipatingFrame.cs
+++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ParticipatingFrame.cs
@@ -62,5 +62,13 @@ public override bool Equals(object obj)
return Equals((ParticipatingFrame)obj);
}
+
+ public override string ToString()
+ {
+ var methodName = Method?.DeclaringType?.FullName + "." + Method?.Name ?? "Unknown Method";
+ var blacklistStatus = IsInBlackList ? "Blacklisted" : "Not Blacklisted";
+ var ilOffsetInfo = ILOffset != UndefinedIlOffset ? $"IL Offset: {ILOffset}" : "IL Offset: Undefined";
+ return $"ParticipatingFrame: Method={methodName}, State={State}, {ilOffsetInfo}, {blacklistStatus}";
+ }
}
}
diff --git a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/TrackedExceptionCase.cs b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/TrackedExceptionCase.cs
index cbb611792dbb..05f2b9a462b6 100644
--- a/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/TrackedExceptionCase.cs
+++ b/tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/TrackedExceptionCase.cs
@@ -5,7 +5,6 @@
using System;
using System.Threading;
-using Datadog.Trace.Debugger.RateLimiting;
namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation
{
@@ -21,12 +20,12 @@ internal class TrackedExceptionCase
{
private volatile int _initializationOrTearDownInProgress;
- public TrackedExceptionCase(ExceptionIdentifier exceptionId, TimeSpan windowDuration)
+ public TrackedExceptionCase(ExceptionIdentifier exceptionId, string exceptionToString)
{
ExceptionIdentifier = exceptionId;
ErrorHash = MD5HashProvider.GetHash(exceptionId);
+ ExceptionToString = exceptionToString;
StartCollectingTime = DateTime.MaxValue;
- Sampler = new AdaptiveSampler(windowDuration, 1, 180, 16, null);
}
public bool IsCollecting => TrackingExceptionCollectionState == ExceptionCollectionState.Collecting;
@@ -35,15 +34,15 @@ public TrackedExceptionCase(ExceptionIdentifier exceptionId, TimeSpan windowDura
public string ErrorHash { get; }
+ public string ExceptionToString { get; }
+
public ExceptionCollectionState TrackingExceptionCollectionState { get; private set; } = ExceptionCollectionState.None;
public bool IsDone => TrackingExceptionCollectionState == ExceptionCollectionState.Finalizing || TrackingExceptionCollectionState == ExceptionCollectionState.Done;
public DateTime StartCollectingTime { get; private set; }
- public ExceptionCase ExceptionCase { get; set; }
-
- public AdaptiveSampler Sampler { get; }
+ public ExceptionCase ExceptionCase { get; private set; }
private bool BeginInProgressState(ExceptionCollectionState exceptionCollectionState)
{
@@ -62,25 +61,47 @@ private void EndInProgressState(ExceptionCollectionState exceptionCollectionStat
_initializationOrTearDownInProgress = 0;
}
- public bool Initialized()
+ private bool Initialized()
{
return BeginInProgressState(ExceptionCollectionState.Initializing);
}
- public void BeginCollect()
+ public void Instrument()
+ {
+ // else - If there is a concurrent initialization or tearing down, ignore this case
+ if (Initialized())
+ {
+ var @case = ExceptionCaseInstrumentationManager.Instrument(ExceptionIdentifier);
+ BeginCollect(@case);
+ CachedDoneExceptions.Remove(ExceptionToString);
+ }
+ }
+
+ private void BeginCollect(ExceptionCase @case)
{
+ ExceptionCase = @case;
EndInProgressState(ExceptionCollectionState.Collecting);
StartCollectingTime = DateTime.UtcNow;
}
- public bool BeginTeardown()
+ public void Revert()
+ {
+ if (BeginTeardown())
+ {
+ ExceptionCaseInstrumentationManager.Revert(ExceptionCase);
+ EndTeardown();
+ }
+ }
+
+ private bool BeginTeardown()
{
return BeginInProgressState(ExceptionCollectionState.Finalizing);
}
- public void EndTeardown()
+ private void EndTeardown()
{
EndInProgressState(ExceptionCollectionState.Done);
+ ExceptionCase = default;
}
public override int GetHashCode()