-
Notifications
You must be signed in to change notification settings - Fork 140
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added scheduling capablity to Exception Cases that turn to Done
- Loading branch information
1 parent
025f8c6
commit a135834
Showing
9 changed files
with
438 additions
and
202 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/CachedDoneExceptions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// <copyright file="CachedDoneExceptions.cs" company="Datadog"> | ||
// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License. | ||
// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc. | ||
// </copyright> | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
|
||
namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation | ||
{ | ||
internal class CachedDoneExceptions | ||
{ | ||
private static readonly HashSet<string> DoneExceptions = new(); | ||
private static readonly ReaderWriterLockSlim DoneExceptionsLocker = new(); | ||
|
||
internal static void Add(Exception exception) | ||
{ | ||
DoneExceptionsLocker.EnterWriteLock(); | ||
try | ||
{ | ||
DoneExceptions.Add(exception.ToString()); | ||
} | ||
finally | ||
{ | ||
DoneExceptionsLocker.ExitWriteLock(); | ||
} | ||
} | ||
|
||
internal static bool Remove(string exceptionToString) | ||
{ | ||
DoneExceptionsLocker.EnterWriteLock(); | ||
try | ||
{ | ||
return DoneExceptions.Remove(exceptionToString); | ||
} | ||
finally | ||
{ | ||
DoneExceptionsLocker.ExitWriteLock(); | ||
} | ||
} | ||
|
||
internal static bool Contains(Exception exception) | ||
{ | ||
DoneExceptionsLocker.EnterReadLock(); | ||
try | ||
{ | ||
return DoneExceptions.Contains(exception.ToString()); | ||
} | ||
finally | ||
{ | ||
DoneExceptionsLocker.ExitReadLock(); | ||
} | ||
} | ||
} | ||
} |
191 changes: 191 additions & 0 deletions
191
...atadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionCaseInstrumentationManager.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
// <copyright file="ExceptionCaseInstrumentationManager.cs" company="Datadog"> | ||
// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License. | ||
// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc. | ||
// </copyright> | ||
|
||
using System; | ||
using System.Collections.Concurrent; | ||
using System.Collections.Generic; | ||
using System.Diagnostics; | ||
using System.Linq; | ||
using System.Reflection; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
using Datadog.Trace.Debugger.Configurations.Models; | ||
using Datadog.Trace.Debugger.Expressions; | ||
using Datadog.Trace.Debugger.Helpers; | ||
using Datadog.Trace.Debugger.PInvoke; | ||
using Datadog.Trace.Debugger.RateLimiting; | ||
using Datadog.Trace.Debugger.Sink.Models; | ||
using Datadog.Trace.Debugger.Symbols; | ||
using Datadog.Trace.Logging; | ||
using Datadog.Trace.Telemetry.Metrics; | ||
using Datadog.Trace.Util; | ||
using Datadog.Trace.VendoredMicrosoftCode.System.Buffers; | ||
using Datadog.Trace.Vendors.Serilog.Events; | ||
|
||
namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation | ||
{ | ||
internal class ExceptionCaseInstrumentationManager | ||
{ | ||
private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor<ExceptionCaseInstrumentationManager>(); | ||
private static readonly ConcurrentDictionary<MethodUniqueIdentifier, ExceptionDebuggingProbe> MethodToProbe = new(); | ||
|
||
private static int _maxFramesToCapture = ExceptionDebuggingSettings.DefaultMaxFramesToCapture; | ||
|
||
public static void Initialize(int maxFramesToCapture) | ||
{ | ||
_maxFramesToCapture = maxFramesToCapture; | ||
} | ||
|
||
internal static ExceptionCase Instrument(ExceptionIdentifier exceptionId) | ||
{ | ||
Log.Information("Instrumenting {ExceptionId}", exceptionId); | ||
|
||
var participatingUserMethods = GetMethodsToRejit(exceptionId.StackTrace); | ||
|
||
var uniqueMethods = participatingUserMethods | ||
.Distinct(EqualityComparer<MethodUniqueIdentifier>.Default) | ||
.ToArray(); | ||
|
||
var neverSeenBeforeMethods = uniqueMethods | ||
.Where(frame => !MethodToProbe.ContainsKey(frame)) | ||
.ToArray(); | ||
|
||
foreach (var frame in neverSeenBeforeMethods) | ||
{ | ||
MethodToProbe.TryAdd(frame, new ExceptionDebuggingProbe(frame)); | ||
} | ||
|
||
var probes = participatingUserMethods.Select((m, frameIndex) => MethodToProbe[m]).ToArray(); | ||
|
||
var thresholdIndex = participatingUserMethods.Count - _maxFramesToCapture; | ||
var targetMethods = new HashSet<MethodUniqueIdentifier>(); | ||
|
||
for (var index = 0; index < probes.Length; index++) | ||
{ | ||
if (ShouldInstrumentFrameAtIndex(index)) | ||
{ | ||
targetMethods.Add(probes[index].Method); | ||
} | ||
} | ||
|
||
var newCase = new ExceptionCase(exceptionId, probes); | ||
|
||
foreach (var method in uniqueMethods) | ||
{ | ||
var probe = MethodToProbe[method]; | ||
probe.AddExceptionCase(newCase, targetMethods.Contains(method)); | ||
} | ||
|
||
// TODO decide if a sampler is needed, ExceptionProbeProcessor does not use any sampler for now. | ||
// TODO InnerExceptions poses struggle in ExceptionProbeProcessor leaving logic. | ||
// TODO Capture arguments on exit upon first leave, collect lightweight snapshot for subsequent re-entrances. | ||
// TODO AsyncLocal cleansing when done dealing with exception from the Exception Debugging instrumentation (ShadowStack cleansing) | ||
// TODO In ExceptionProbeProcessor.ShouldProcess, maybe negotiate with the ShadowStack to determine if the top of the stack | ||
// TODO is relevant for the specific exception case it manages. Maybe instead of ShouldProcess we can do that | ||
// TODO in the Process method, in the branch where the exception type is checked to see if the previous method is relevant. | ||
// TODO there's a gotcha in doing it - it might be the next method has not been instrumented (failed to instrument) | ||
// TODO so it won't be there because it should. We will have to accommodate for that by checking the probe status and cache it. | ||
// TODO When leaving with an exception, we can negotiate with the ShadowStack to determine if the previous frame | ||
// TODO Is holding the same exception instance (either as inner / itself) to better decide if we should keep on collecting | ||
// TODO or not. | ||
// TODO Multiple AppDomains issue. The ProbeProcessor might not be there. Also relevant for DI probes. To assess how big | ||
// TODO the issue is, we should determine how many people are using .NET Framework .VS. .NET Core. | ||
// TODO For Exception Debugging we can possibly choose to ditch this altogether since if the same exception will | ||
// TODO happen multiple times in different AppDomains, then they will all capture the exception. The only problem is | ||
// TODO over-instrumenting which is not ideal. | ||
// TODO In AsyncMethodProbe Invoker, is it always MultiProbe even when there is only one? | ||
// TODO What do you do with empty shadow stack? meaning, all the participating methods has failed in the instrumentation process OR they are all 3rd party code? | ||
// TODO There might be two different exceptions, that yield the same snapshots. Consider A -> B -> C with exception "InvalidOperationException" | ||
// TODO and K -> B -> D with exception "InvalidOperationException". If we fail to instrument: A, B, K, D then there will be the same causality chain for both exceptions. | ||
// TODO That's why ExceptionTrackManager is the only place where snapshots are uploaded, based on the exception in hand, to be able to stop tracking an exception | ||
// TODO and keep on tracking the other. | ||
// TODO For Lightweight/Full snapshot capturing: | ||
// TODO Consider keeping a cache in ShadowStackTree's AsyncLocal (in ShadowStackContainer), where the cached key | ||
// TODO will be the hash of parents & children (Enter/Leave) and the MethodToken of the method. This way, | ||
// TODO the method that is leaving with an interesting exception can ask this AsyncLocal (top-thread-tree) cache | ||
// TODO if it's hash (EnterHash+LeaveHash+MethodToken) is in there. If it is, collect lightweight snapshot. | ||
// TODO if it's not, collect full snapshot. | ||
// TODO In this technique we will have to verify AsyncLocal safety in terms of memory leaking and the cleansing timing. | ||
// TODO we don't want this cache to be alive for a longer time than is needed or being reused by another execution | ||
// TODO context in a later time. This cache will have to be thread-safe since many threads may access it at the same | ||
// TODO time. Consider using Readers/Writer lock pattern or another one that is prioritizing readings than writings. | ||
// TODO Or any other lock-free pattern that may be suitable in this case. | ||
// TODO Better handle multiple exceptions related to concurrency - AggregateException. It's InnerException & | ||
// TODO InnerExceptions properties. | ||
|
||
return newCase; | ||
|
||
bool ShouldInstrumentFrameAtIndex(int i) | ||
{ | ||
return i == 0 || i >= thresholdIndex || participatingUserMethods.Count <= _maxFramesToCapture + 1; | ||
} | ||
} | ||
|
||
private static List<MethodUniqueIdentifier> GetMethodsToRejit(ParticipatingFrame[] allFrames) | ||
{ | ||
var methodsToRejit = new List<MethodUniqueIdentifier>(); | ||
|
||
foreach (var frame in allFrames) | ||
{ | ||
try | ||
{ | ||
// HasMethod? | ||
|
||
if (frame.State == ParticipatingFrameState.Blacklist) | ||
{ | ||
continue; | ||
} | ||
|
||
var frameMethod = frame.Method; | ||
if (frameMethod.IsAbstract) | ||
{ | ||
continue; | ||
} | ||
|
||
methodsToRejit.Add(frame.MethodIdentifier); | ||
} | ||
catch (Exception ex) | ||
{ | ||
Log.Error(ex, "Failed to instrument frame the frame: {FrameToRejit}", frame); | ||
} | ||
} | ||
|
||
return methodsToRejit; | ||
} | ||
|
||
internal static void Revert(ExceptionCase @case) | ||
{ | ||
Log.Information("Reverting {ExceptionCase}", @case); | ||
|
||
foreach (var probe in @case.Probes) | ||
{ | ||
probe.RemoveExceptionCase(@case); | ||
} | ||
|
||
var revertProbeIds = new HashSet<string>(); | ||
|
||
foreach (var processor in @case.Processors.Keys) | ||
{ | ||
if (processor.ExceptionDebuggingProcessor.RemoveProbeProcessor(processor) == 0) | ||
{ | ||
MethodToProbe.TryRemove(processor.ExceptionDebuggingProcessor.Method, out _); | ||
revertProbeIds.Add(processor.ExceptionDebuggingProcessor.ProbeId); | ||
} | ||
} | ||
|
||
if (revertProbeIds.Count > 0) | ||
{ | ||
Log.Information("ExceptionTrackManager: Reverting {RevertCount} Probes.", revertProbeIds.Count.ToString()); | ||
|
||
var removeProbesRequests = revertProbeIds.Select(p => new NativeRemoveProbeRequest(p)).ToArray(); | ||
DebuggerNativeMethods.InstrumentProbes( | ||
Array.Empty<NativeMethodProbeDefinition>(), | ||
Array.Empty<NativeLineProbeDefinition>(), | ||
Array.Empty<NativeSpanProbeDefinition>(), | ||
removeProbesRequests); | ||
} | ||
} | ||
} | ||
} |
98 changes: 98 additions & 0 deletions
98
tracer/src/Datadog.Trace/Debugger/ExceptionAutoInstrumentation/ExceptionCaseScheduler.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
// <copyright file="ExceptionCaseScheduler.cs" company="Datadog"> | ||
// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License. | ||
// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc. | ||
// </copyright> | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Threading; | ||
using Datadog.Trace.Logging; | ||
|
||
namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation | ||
{ | ||
internal class ExceptionCaseScheduler | ||
{ | ||
private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor<ExceptionCaseScheduler>(); | ||
private static readonly List<ScheduledException> ScheduledExceptions = new(); | ||
private static readonly object Lock = new(); | ||
private static Timer _timer; | ||
|
||
public ExceptionCaseScheduler() | ||
{ | ||
_timer = new Timer(TimerCallback, null, Timeout.Infinite, Timeout.Infinite); | ||
} | ||
|
||
public void Schedule(TrackedExceptionCase doneCase, TimeSpan delay) | ||
{ | ||
var dueTime = DateTime.UtcNow.Add(delay); | ||
var scheduledTask = new ScheduledException { Case = doneCase, DueTime = dueTime }; | ||
|
||
lock (Lock) | ||
{ | ||
ScheduledExceptions.Add(scheduledTask); | ||
ScheduledExceptions.Sort(); | ||
if (ScheduledExceptions[0] == scheduledTask) | ||
{ | ||
SetNextTimer(dueTime); | ||
} | ||
} | ||
} | ||
|
||
private void TimerCallback(object state) | ||
{ | ||
try | ||
{ | ||
SafeTimerCallback(state); | ||
} | ||
catch (Exception ex) | ||
{ | ||
Log.Error(ex, "There was an error while processing the Exception Cases scheduler."); | ||
} | ||
} | ||
|
||
private void SafeTimerCallback(object state) | ||
{ | ||
var casesToInstrument = new List<TrackedExceptionCase>(); | ||
|
||
lock (Lock) | ||
{ | ||
var now = DateTime.UtcNow; | ||
var dueTasks = ScheduledExceptions.TakeWhile(e => e.DueTime <= now).ToList(); | ||
foreach (var task in dueTasks) | ||
{ | ||
casesToInstrument.Add(task.Case); | ||
ScheduledExceptions.Remove(task); | ||
} | ||
|
||
if (ScheduledExceptions.Any()) | ||
{ | ||
SetNextTimer(ScheduledExceptions[0].DueTime); | ||
} | ||
} | ||
|
||
foreach (var @case in casesToInstrument) | ||
{ | ||
@case.Instrument(); | ||
} | ||
} | ||
|
||
private void SetNextTimer(DateTime dueTime) | ||
{ | ||
var delay = Math.Max((dueTime - DateTime.UtcNow).TotalMilliseconds, 0); | ||
_timer.Change((int)delay, Timeout.Infinite); | ||
} | ||
|
||
private class ScheduledException : IComparable<ScheduledException> | ||
{ | ||
public TrackedExceptionCase Case { get; set; } | ||
|
||
public DateTime DueTime { get; set; } | ||
|
||
public int CompareTo(ScheduledException other) | ||
{ | ||
return DueTime.CompareTo(other?.DueTime); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.