From 69de9cf4c31cd8774393fc116d7309486786e427 Mon Sep 17 00:00:00 2001 From: nohwnd Date: Fri, 21 Aug 2020 14:02:13 +0200 Subject: [PATCH 1/2] Run each dump in a task in netclient dumper --- .../NetClientHangDumper.cs | 54 ++++++++++--------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/NetClientHangDumper.cs b/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/NetClientHangDumper.cs index 2a7d0e16d6..4f54ba70eb 100644 --- a/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/NetClientHangDumper.cs +++ b/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/NetClientHangDumper.cs @@ -4,9 +4,12 @@ namespace Microsoft.TestPlatform.Extensions.BlameDataCollector { using System; + using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; + using System.Threading; + using System.Threading.Tasks; using Microsoft.Diagnostics.NETCore.Client; using Microsoft.VisualStudio.TestPlatform.ObjectModel; using Microsoft.VisualStudio.TestPlatform.Utilities; @@ -40,35 +43,36 @@ public void Dump(int processId, string outputDirectory, DumpTypeOption type) var bottomUpTree = processTree.OrderByDescending(t => t.Level).Select(t => t.Process); + // Do not suspend processes with NetClient dumper it stops the diagnostic thread running in + // them and hang dump request will get stuck forever, because the process is not co-operating. + // Instead we start one task per dump asynchronously, and hope that the parent process will start dumping + // before the child process is done dumping. This way if the parent is waiting for the children to exit, + // we will be dumping it before it observes the child exiting and we get a more accurate results. If we did not + // do this, then parent that is awaiting child might exit before we get to dumping it. + var tasks = new List(); + var timeout = new CancellationTokenSource(); + timeout.CancelAfter(TimeSpan.FromSeconds(90)); foreach (var p in bottomUpTree) { - try - { - p.Suspend(); - } - catch (Exception ex) - { - EqtTrace.Error($"NetClientHangDumper.Dump: Error suspending process {p.Id} - {p.ProcessName}: {ex}."); - } - } - - foreach (var p in bottomUpTree) - { - try - { - var outputFile = Path.Combine(outputDirectory, $"{p.ProcessName}_{p.Id}_{DateTime.Now:yyyyMMddTHHmmss}_hangdump.dmp"); - EqtTrace.Verbose($"NetClientHangDumper.CollectDump: Selected dump type {type}. Dumping {process.Id} - {process.ProcessName} in {outputFile}. "); + tasks.Add(Task.Run( + () => + { + try + { + var outputFile = Path.Combine(outputDirectory, $"{p.ProcessName}_{p.Id}_{DateTime.Now:yyyyMMddTHHmmss}_hangdump.dmp"); + EqtTrace.Verbose($"NetClientHangDumper.CollectDump: Selected dump type {type}. Dumping {process.Id} - {process.ProcessName} in {outputFile}. "); - var client = new DiagnosticsClient(p.Id); + var client = new DiagnosticsClient(p.Id); - // Connecting the dump generation logging to verbose output to avoid changing the interfaces again -> EqtTrace.IsVerboseEnabled - // before we test this on some big repo. - client.WriteDump(type == DumpTypeOption.Full ? DumpType.Full : DumpType.Normal, outputFile, logDumpGeneration: false); - } - catch (Exception ex) - { - EqtTrace.Error($"NetClientHangDumper.Dump: Error dumping process {p.Id} - {p.ProcessName}: {ex}."); - } + // Connecting the dump generation logging to verbose output to avoid changing the interfaces again -> EqtTrace.IsVerboseEnabled + // before we test this on some big repo. + client.WriteDump(type == DumpTypeOption.Full ? DumpType.Full : DumpType.Normal, outputFile, logDumpGeneration: false); + } + catch (Exception ex) + { + EqtTrace.Error($"NetClientHangDumper.Dump: Error dumping process {p.Id} - {p.ProcessName}: {ex}."); + } + }, timeout.Token); } foreach (var p in bottomUpTree) From f2df542352003aed1e82c60968b57ce968cf8e22 Mon Sep 17 00:00:00 2001 From: nohwnd Date: Fri, 21 Aug 2020 14:02:58 +0200 Subject: [PATCH 2/2] More reasonable timeout --- .../NetClientHangDumper.cs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/NetClientHangDumper.cs b/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/NetClientHangDumper.cs index 4f54ba70eb..ef2f75ac0d 100644 --- a/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/NetClientHangDumper.cs +++ b/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/NetClientHangDumper.cs @@ -51,7 +51,7 @@ public void Dump(int processId, string outputDirectory, DumpTypeOption type) // do this, then parent that is awaiting child might exit before we get to dumping it. var tasks = new List(); var timeout = new CancellationTokenSource(); - timeout.CancelAfter(TimeSpan.FromSeconds(90)); + timeout.CancelAfter(TimeSpan.FromMinutes(5)); foreach (var p in bottomUpTree) { tasks.Add(Task.Run( @@ -72,7 +72,16 @@ public void Dump(int processId, string outputDirectory, DumpTypeOption type) { EqtTrace.Error($"NetClientHangDumper.Dump: Error dumping process {p.Id} - {p.ProcessName}: {ex}."); } - }, timeout.Token); + }, timeout.Token)); + } + + try + { + Task.WhenAll(tasks).GetAwaiter().GetResult(); + } + catch (TaskCanceledException) + { + EqtTrace.Error($"NetClientHangDumper.Dump: Hang dump timed out."); } foreach (var p in bottomUpTree) @@ -87,6 +96,6 @@ public void Dump(int processId, string outputDirectory, DumpTypeOption type) EqtTrace.Error($"NetClientHangDumper.Dump: Error killing process {p.Id} - {p.ProcessName}: {ex}."); } } - } + } } }