Skip to content

Commit

Permalink
Fix JobObjectWrapper detection (dotnet#670)
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastienros authored Dec 14, 2023
1 parent 014339f commit 06f7982
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 82 deletions.
177 changes: 96 additions & 81 deletions src/Microsoft.Crank.Agent/Startup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1536,15 +1536,13 @@ async Task StopJobAsync(bool abortCollection = false)

if (process != null && !process.HasExited)
{
var processId = process.Id;

// Invoking Stop should also abort collection only the job failed.
// The normal workflow is to stop collection using the TraceCollecting state
if (abortCollection)
{
if (job.Collect)
{
// Abort all Perfview processes
// Abort all PerfView processes
if (OperatingSystem == OperatingSystem.Windows)
{
var logFilename = Path.Combine(workingDirectory, "perfview.log");
Expand Down Expand Up @@ -1576,105 +1574,124 @@ async Task StopJobAsync(bool abortCollection = false)
}
}

if (OperatingSystem == OperatingSystem.Linux)
{
Log.Info($"Invoking SIGTERM ...");
// Stop a child process first if any

Mono.Unix.Native.Syscall.kill(process.Id, Mono.Unix.Native.Signum.SIGTERM);
foreach (var processId in job.AllProcessIds)
{
Process localProcess;

var waitForShutdownDelay = Task.Delay(TimeSpan.FromSeconds(5));
while (!process.HasExited && !waitForShutdownDelay.IsCompletedSuccessfully)
try
{
await Task.Delay(200);
localProcess = Process.GetProcessById(processId);
}
catch (ArgumentException)
{
// Happens if the process is not running anymore
continue;
}

if (!process.HasExited)
if (OperatingSystem == OperatingSystem.Linux)
{
Log.Info($"Invoking SIGINT ...");
Log.Info($"Invoking SIGTERM ...");

Mono.Unix.Native.Syscall.kill(process.Id, Mono.Unix.Native.Signum.SIGINT);
Mono.Unix.Native.Syscall.kill(processId, Mono.Unix.Native.Signum.SIGTERM);

waitForShutdownDelay = Task.Delay(TimeSpan.FromSeconds(5));
while (!process.HasExited && !waitForShutdownDelay.IsCompletedSuccessfully)
var waitForShutdownDelay = Task.Delay(TimeSpan.FromSeconds(5));

while (!localProcess.HasExited && !waitForShutdownDelay.IsCompletedSuccessfully)
{
await Task.Delay(200);
localProcess.Refresh();
}

if (!localProcess.HasExited)
{
Log.Info($"Invoking SIGINT ...");

Mono.Unix.Native.Syscall.kill(localProcess.Id, Mono.Unix.Native.Signum.SIGINT);

waitForShutdownDelay = Task.Delay(TimeSpan.FromSeconds(5));
while (!localProcess.HasExited && !waitForShutdownDelay.IsCompletedSuccessfully)
{
await Task.Delay(200);
localProcess.Refresh();
}
}
}
}

if (OperatingSystem == OperatingSystem.Windows)
{
if (!process.HasExited)
if (OperatingSystem == OperatingSystem.Windows)
{
Log.Info("Sending CTRL+C ...");
if (!localProcess.HasExited)
{
Log.Info("Sending CTRL+C ...");

SendCtrlCSignalToProcess(process);
SendCtrlCSignalToProcess(localProcess);
}
}
}

if (!process.HasExited)
{
try
if (!localProcess.HasExited)
{
// Tentatively invoke the shutdown endpoint on the client application
var response = await _httpClient.GetAsync(new Uri(new Uri(job.Url), "/shutdown"));

// Shutdown invoked successfully, wait for the application to stop by itself
if (response.StatusCode == HttpStatusCode.OK)
try
{
var epoch = DateTime.UtcNow;
// Tentatively invoke the shutdown endpoint on the client application
var response = await _httpClient.GetAsync(new Uri(new Uri(job.Url), "/shutdown"));

do
// Shutdown invoked successfully, wait for the application to stop by itself
if (response.StatusCode == HttpStatusCode.OK)
{
Log.Info("Shutdown successfully invoked, waiting for graceful shutdown ...");
await Task.Delay(1000);
var epoch = DateTime.UtcNow;

} while (!process.HasExited && (DateTime.UtcNow - epoch < TimeSpan.FromSeconds(5)));
do
{
Log.Info("Shutdown successfully invoked, waiting for graceful shutdown ...");
await Task.Delay(1000);

} while (!localProcess.HasExited && (DateTime.UtcNow - epoch < TimeSpan.FromSeconds(5)));
}
}
catch
{
Log.Info($"/shutdown endpoint failed... '{job.Url}/shutdown'");
}
}
catch

if (!localProcess.HasExited)
{
Log.Info($"/shutdown endpoint failed... '{job.Url}/shutdown'");
}
}
Log.Info($"Forcing process to stop ...");
localProcess.CloseMainWindow();

if (!process.HasExited)
{
Log.Info($"Forcing process to stop ...");
process.CloseMainWindow();
if (!localProcess.HasExited)
{
localProcess.Kill();
}

if (!process.HasExited)
{
process.Kill();
}
localProcess.Dispose();

process.Dispose();
do
{
Log.Info($"Waiting for process {processId} to stop ...");

do
{
Log.Info($"Waiting for process {processId} to stop ...");
await Task.Delay(1000);

await Task.Delay(1000);
try
{
localProcess.Refresh();
}
catch
{
localProcess = null;
}

try
{
process = Process.GetProcessById(processId);
process.Refresh();
}
catch
{
process = null;
}
} while (localProcess != null && !localProcess.HasExited);
}
else
{
job.ExitCode = process.ExitCode;
}

} while (process != null && !process.HasExited);
}
else
{
job.ExitCode = process.ExitCode;
Log.Info($"Process has stopped ({job.Service}:{job.Id})");
}

Log.Info($"Process has stopped ({job.Service}:{job.Id})");


job.State = JobState.Stopped;

Expand Down Expand Up @@ -4554,6 +4571,8 @@ private static async Task<Process> StartProcess(string hostname, string benchmar

process.ErrorDataReceived += (_, e) =>
{
const string processIdMarker = "##ChildProcessId:";

if (e != null && e.Data != null)
{
var log = "[STDERR] " + e.Data;
Expand All @@ -4569,7 +4588,6 @@ private static async Task<Process> StartProcess(string hostname, string benchmar
}

// Detect the app is wrapping a child process
var processIdMarker = "##ChildProcessId:";
if (e.Data.StartsWith(processIdMarker)
&& int.TryParse(e.Data.Substring(processIdMarker.Length), out var childProcessId))
{
Expand All @@ -4589,7 +4607,7 @@ private static async Task<Process> StartProcess(string hostname, string benchmar

if (job.DotNetTrace)
{
StartDotNetTrace(process.Id, job);
StartDotNetTrace(job);
}
}

Expand Down Expand Up @@ -4680,7 +4698,7 @@ void RunAndTrace()

if (job.DotNetTrace)
{
StartDotNetTrace(process.Id, job);
StartDotNetTrace(job);
}
}
}
Expand Down Expand Up @@ -4714,16 +4732,16 @@ public static List<int> CalculateCpuList(string cpuSet)

private static async Task StartCountersAsync(Job job, JobContext context)
{
if (job.ProcessId == 0)
if (job.ActiveProcessId == 0)
{
throw new ArgumentException($"Undefined process id for '{job.Service}'");
}

Log.Info("Starting counters");
Log.Info($"Starting counters for process {job.ActiveProcessId}");

var metricsEventSourceSessionId = Guid.NewGuid().ToString();

var client = new DiagnosticsClient(job.ProcessId);
var client = new DiagnosticsClient(job.ActiveProcessId);

var providerNames = job.Counters.Select(x => x.Provider).Distinct().ToArray();

Expand Down Expand Up @@ -4751,18 +4769,15 @@ private static async Task StartCountersAsync(Job job, JobContext context)
const long TimeSeriesValues = 0x2;
var metrics = string.Join(",", providerNames);

var defaultMaxHitograms = 10;
var defaultMaxTimeSeries = 1000;

var metricsEventSourceProvider =
new EventPipeProvider("System.Diagnostics.Metrics", EventLevel.Informational, TimeSeriesValues,
new Dictionary<string, string>()
{
{ "SessionId", metricsEventSourceSessionId },
{ "Metrics", metrics },
{ "RefreshInterval", job.MeasurementsIntervalSec.ToString() },
{ "MaxTimeSeries", defaultMaxHitograms.ToString() },
{ "MaxHistograms", defaultMaxTimeSeries.ToString() }
{ "MaxTimeSeries", "10" },
{ "MaxHistograms", "1000" }
}
);

Expand Down Expand Up @@ -5112,12 +5127,12 @@ private static void StartCollection(string workingDirectory, Job job)
}
}

private static void StartDotNetTrace(int processId, Job job)
private static void StartDotNetTrace(Job job)
{
job.PerfViewTraceFile = Path.Combine(job.BasePath, "trace.nettrace");

dotnetTraceManualReset = new ManualResetEvent(false);
dotnetTraceTask = Collect(dotnetTraceManualReset, processId, new FileInfo(job.PerfViewTraceFile), 256, job.DotNetTraceProviders, TimeSpan.MaxValue);
dotnetTraceTask = Collect(dotnetTraceManualReset, job.ActiveProcessId, new FileInfo(job.PerfViewTraceFile), 256, job.DotNetTraceProviders, TimeSpan.MaxValue);
}

private static async Task UseMonoRuntimeAsync(string runtimeVersion, string outputFolder, string mode)
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.Crank.JobOjectWrapper/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
Console.WriteLine($"Args: {process.StartInfo.Arguments}");

process.Start();
Console.WriteLine($"##ChildProcessId:{process.Id}");
Console.Error.WriteLine($"##ChildProcessId:{process.Id}");
process.WaitForExit();

await Task.Delay(1000);
2 changes: 2 additions & 0 deletions src/Microsoft.Crank.Models/Job.cs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ public Source Source
public string BasePath { get; set; }
public int ProcessId { get; set; }
public int ChildProcessId { get; set; }
public int ActiveProcessId => ChildProcessId > 0 ? ChildProcessId : ProcessId;
public int[] AllProcessIds => ChildProcessId > 0 ? new [] { ChildProcessId, ProcessId } : new[] { ProcessId };
public Dictionary<string, string> EnvironmentVariables { get; set; } = new Dictionary<string, string>();
public Dictionary<string, string> PackageReferences { get; set; } = new Dictionary<string, string>();
public List<string> BuildArguments { get; set; } = new List<string>();
Expand Down

0 comments on commit 06f7982

Please sign in to comment.