Skip to content

Commit

Permalink
Time-constrain user commands in XHarness workloads (#7936)
Browse files Browse the repository at this point in the history
We have to make sure that user commands are time-constrained so that we have buffer to call clean-up and send the telemetry at the end of the job.

We do this by bumping the original work item timeout and setting a timeout for the injected user commands.
  • Loading branch information
premun authored Sep 24, 2021
1 parent fcd1fdb commit 8f58bb3
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 29 deletions.
2 changes: 1 addition & 1 deletion src/Common/Microsoft.Arcade.Common/ZipArchiveManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public async Task AddContentToArchive(string archivePath, string targetFilename,
await content.CopyToAsync(targetStream);
}

private static Stream GetResourceFileContent<TAssembly>(string resourceFileName)
public static Stream GetResourceFileContent<TAssembly>(string resourceFileName)
{
Assembly assembly = typeof(TAssembly).Assembly;
return assembly.GetManifestResourceStream($"{assembly.GetName().Name}.{resourceFileName}");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public void AndroidXHarnessWorkItemIsCreated()

var workItem = _task.WorkItems.First();
workItem.GetMetadata("Identity").Should().Be("System.Foo");
workItem.GetMetadata("Timeout").Should().Be("00:15:42");
workItem.GetMetadata("Timeout").Should().Be("00:16:02");

var payloadArchive = workItem.GetMetadata("PayloadArchive");
payloadArchive.Should().NotBeNullOrEmpty();
Expand Down Expand Up @@ -178,7 +178,7 @@ public void ZippedApkIsProvided()

var workItem = _task.WorkItems.First();
workItem.GetMetadata("Identity").Should().Be("System.Foo");
workItem.GetMetadata("Timeout").Should().Be("00:15:42");
workItem.GetMetadata("Timeout").Should().Be("00:16:02");

var payloadArchive = workItem.GetMetadata("PayloadArchive");
payloadArchive.Should().NotBeNullOrEmpty();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public void AppleXHarnessWorkItemIsCreated()

var workItem = _task.WorkItems.First();
workItem.GetMetadata("Identity").Should().Be("System.Foo");
workItem.GetMetadata("Timeout").Should().Be("00:15:42");
workItem.GetMetadata("Timeout").Should().Be("00:16:02");

var payloadArchive = workItem.GetMetadata("PayloadArchive");
payloadArchive.Should().NotBeNullOrEmpty();
Expand Down Expand Up @@ -225,7 +225,7 @@ public void ZippedAppIsProvided()

var workItem = _task.WorkItems.First();
workItem.GetMetadata("Identity").Should().Be("System.Foo");
workItem.GetMetadata("Timeout").Should().Be("00:15:42");
workItem.GetMetadata("Timeout").Should().Be("00:16:02");

var payloadArchive = workItem.GetMetadata("PayloadArchive");
payloadArchive.Should().NotBeNullOrEmpty();
Expand Down
26 changes: 24 additions & 2 deletions src/Microsoft.DotNet.Helix/Sdk/CreateXHarnessAndroidWorkItems.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ public static class MetadataNames

private const string PosixAndroidWrapperScript = "xharness-helix-job.android.sh";
private const string NonPosixAndroidWrapperScript = "xharness-helix-job.android.ps1";
private const string NonPosixAndroidHeaderScript = "xharness-helix-job.android.header.ps1";

/// <summary>
/// Boolean true if this is a posix shell, false if not.
Expand Down Expand Up @@ -105,7 +106,13 @@ private async Task<ITaskItem> PrepareWorkItem(IZipArchiveManager zipArchiveManag
apkName = apkName.Replace(".zip", ".apk");
}

string command = GetHelixCommand(appPackage, apkName, androidPackageName, testTimeout, expectedExitCode);
string command = GetHelixCommand(appPackage, apkName, androidPackageName, workItemTimeout, testTimeout, expectedExitCode);

if (!IsPosixShell)
{
// For windows, we need to add a .ps1 header to turn the script into a cmdlet
customCommands = GetPowerShellHeader() + customCommands;
}

string workItemZip = await CreatePayloadArchive(
zipArchiveManager,
Expand Down Expand Up @@ -150,7 +157,13 @@ private string GetDefaultCommand(ITaskItem appPackage, int expectedExitCode)
$"{ devOutArg } { instrumentationArg } { exitCodeArg } { extraArguments } { passthroughArgs }";
}

private string GetHelixCommand(ITaskItem appPackage, string apkName, string androidPackageName, TimeSpan xHarnessTimeout, int expectedExitCode)
private string GetHelixCommand(
ITaskItem appPackage,
string apkName,
string androidPackageName,
TimeSpan workItemTimeout,
TimeSpan xHarnessTimeout,
int expectedExitCode)
{
appPackage.TryGetMetadata(MetadataNames.AndroidInstrumentationName, out string androidInstrumentationName);
appPackage.TryGetMetadata(MetadataNames.DeviceOutputPath, out string deviceOutputPath);
Expand All @@ -163,6 +176,7 @@ private string GetHelixCommand(ITaskItem appPackage, string apkName, string andr
string dash = IsPosixShell ? "--" : "-";
string xharnessRunCommand = $"{xharnessHelixWrapperScript} " +
$"{dash}app \"{apkName}\" " +
$"{dash}command_timeout {(int)workItemTimeout.TotalSeconds} " +
$"{dash}timeout \"{xHarnessTimeout}\" " +
$"{dash}package_name \"{androidPackageName}\" " +
(expectedExitCode != 0 ? $" {dash}expected_exit_code \"{expectedExitCode}\" " : string.Empty) +
Expand All @@ -173,5 +187,13 @@ private string GetHelixCommand(ITaskItem appPackage, string apkName, string andr

return xharnessRunCommand;
}

private static string GetPowerShellHeader()
{
using Stream stream = ZipArchiveManager.GetResourceFileContent<CreateXHarnessAndroidWorkItems>(
ScriptNamespace + NonPosixAndroidHeaderScript);
using StreamReader reader = new(stream);
return reader.ReadToEnd();
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Arcade.Common;
using Microsoft.Build.Framework;
Expand Down Expand Up @@ -168,11 +169,11 @@ private async Task<ITaskItem> PrepareWorkItem(
if (customCommands == null)
{
// When no user commands are specified, we add the default `apple test ...` command
customCommands = GetDefaultCommand(target, includesTestRunner, resetSimulator);
customCommands = GetDefaultCommand(includesTestRunner, resetSimulator);
}

string appName = isAlreadyArchived ? $"{fileSystem.GetFileNameWithoutExtension(appFolderPath)}.app" : fileSystem.GetFileName(appFolderPath);
string helixCommand = GetHelixCommand(appName, target, testTimeout, launchTimeout, includesTestRunner, expectedExitCode, resetSimulator);
string helixCommand = GetHelixCommand(appName, target, workItemTimeout, testTimeout, launchTimeout, includesTestRunner, expectedExitCode, resetSimulator);
string payloadArchivePath = await CreatePayloadArchive(
zipArchiveManager,
fileSystem,
Expand All @@ -196,7 +197,7 @@ private bool ValidateAppBundlePath(
return isAlreadyArchived ? fileSystem.FileExists(appBundlePath) : fileSystem.DirectoryExists(appBundlePath);
}

private string GetDefaultCommand(string target, bool includesTestRunner, bool resetSimulator) =>
private string GetDefaultCommand(bool includesTestRunner, bool resetSimulator) =>
$"xharness apple {(includesTestRunner ? "test" : "run")} " +
"--app \"$app\" " +
"--output-directory \"$output_directory\" " +
Expand All @@ -213,6 +214,7 @@ private string GetDefaultCommand(string target, bool includesTestRunner, bool re
private string GetHelixCommand(
string appName,
string target,
TimeSpan workItemTimeout,
TimeSpan testTimeout,
TimeSpan launchTimeout,
bool includesTestRunner,
Expand All @@ -222,6 +224,7 @@ private string GetHelixCommand(
$"chmod +x {EntryPointScript} && ./{EntryPointScript} " +
$"--app \"{appName}\" " +
$"--target \"{target}\" " +
$"--command-timeout {(int)workItemTimeout.TotalSeconds} " +
$"--timeout \"{testTimeout}\" " +
$"--launch-timeout \"{launchTimeout}\" " +
(includesTestRunner ? "--includes-test-runner " : string.Empty) +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
<EmbeddedResource Include="tools\xharness-runner\xharness-event-reporter.py">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</EmbeddedResource>
<EmbeddedResource Include="tools\xharness-runner\xharness-helix-job.android.header.ps1">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</EmbeddedResource>
<EmbeddedResource Include="tools\xharness-runner\xharness-helix-job.apple.sh">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</EmbeddedResource>
Expand Down
6 changes: 5 additions & 1 deletion src/Microsoft.DotNet.Helix/Sdk/XharnessTaskBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public abstract class XHarnessTaskBase : MSBuildTaskBase
{
private static readonly TimeSpan s_defaultWorkItemTimeout = TimeSpan.FromMinutes(20);
private static readonly TimeSpan s_defaultTestTimeout = TimeSpan.FromMinutes(12);
protected static readonly TimeSpan s_telemetryBuffer = TimeSpan.FromSeconds(20); // extra time to send the XHarness telemetry

public class MetadataName
{
Expand All @@ -23,7 +24,7 @@ public class MetadataName
public const string CustomCommands = "CustomCommands";
}

private const string ScriptNamespace = "tools.xharness_runner.";
protected const string ScriptNamespace = "tools.xharness_runner.";
private const string CustomCommandsScript = "command";
private const string DiagnosticsScript = "xharness-event-reporter.py";

Expand Down Expand Up @@ -102,6 +103,9 @@ protected Build.Utilities.TaskItem CreateTaskItem(string workItemName, string pa
{
Log.LogMessage($"Creating work item with properties Identity: {workItemName}, Payload: {payloadArchivePath}, Command: {command}");

// Leave some time at the end of the work item to send the telemetry (in case it times out)
timeout += s_telemetryBuffer;

return new(workItemName, new Dictionary<string, string>()
{
{ "Identity", workItemName },
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<#
This script is used as a payload of Helix jobs that execute Android workloads through XHarness on Windows systems.
This file is a header of script that gets populated with user's custom commands.
This script is separate as it is executed with a timeout in its own process.
#>

param (
[Parameter(Mandatory)]
[string]$output_directory,
[Parameter(Mandatory)]
[string]$app,
[Parameter(Mandatory)]
[string]$timeout,
[Parameter()]
[string]$package_name = $null,
[Parameter()]
[int]$expected_exit_code = 0,
[Parameter()]
[string]$device_output_path = $null,
[Parameter()]
[string]$instrumentation = $null
)

$ErrorActionPreference="Continue"

# The xharness alias
function xharness() {
dotnet exec $Env:XHARNESS_CLI_PATH @args
}

# User can call this when they detect a problem they think is caused by the infrastructure
function report_infrastructure_failure($message) {
Write-Output "Infrastructural problem reported by the user, requesting retry+reboot: $message"

& "$Env:HELIX_PYTHONPATH" -c "from helix.workitemutil import request_infra_retry; request_infra_retry('Retrying because we could not enumerate all Android devices')"
& "$Env:HELIX_PYTHONPATH" -c "from helix.workitemutil import request_reboot; request_reboot('Rebooting to allow Android emulator or device to restart')"
}

Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ param (
[string]$app,
[Parameter(Mandatory)]
[string]$timeout,
[Parameter(Mandatory)]
[int]$command_timeout, # in seconds
[Parameter()]
[string]$package_name = $null,
[Parameter()]
Expand All @@ -22,24 +24,33 @@ param (
[string]$instrumentation = $null
)

$ErrorActionPreference="Stop"

[Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSUseDeclaredVarsMoreThanAssignments", "")] # Variable used in sourced script
$output_directory=$Env:HELIX_WORKITEM_UPLOAD_ROOT

# The xharness alias
function xharness() {
dotnet exec $Env:XHARNESS_CLI_PATH @args
}

$ErrorActionPreference="Continue"

# Act out the actual commands
. "$PSScriptRoot\command.ps1"
# We have to time constrain them to create buffer for the end of this script
$psinfo = [System.Diagnostics.ProcessStartInfo]::new()
$psinfo.FileName = "powershell"
$psinfo.Arguments = " -ExecutionPolicy ByPass -NoProfile -File `"$PSScriptRoot\command.ps1`" -output_directory `"$Env:HELIX_WORKITEM_UPLOAD_ROOT`" -app `"$app`" -timeout `"$timeout`" -package_name `"$package_name`" -expected_exit_code `"$expected_exit_code`" -device_output_path `"$device_output_path`" -instrumentation `"$instrumentation`""
$psinfo.RedirectStandardError = $false
$psinfo.RedirectStandardOutput = $false
$psinfo.UseShellExecute = $false

$ErrorActionPreference="Continue"
$process = [System.Diagnostics.Process]::new()
$process.StartInfo = $psinfo
$process.Start()

$exit_code=$LASTEXITCODE
Wait-Process -InputObject $process -TimeOut $command_timeout -ErrorVariable ev -ErrorAction SilentlyContinue

if ($ev) {
$exit_code = -3
Stop-Process -InputObject $process -Force
$process.WaitForExit()
[Console]::Out.Flush()
Write-Output "User command timed out after $command_timeout seconds!"
} else {
$exit_code = $process.ExitCode
Write-Output "User command ended with $exit_code"
}

$retry=$false
$reboot=$false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ echo "XHarness Helix Job Wrapper calling '$@'"
set -x

app=''
command_timeout=20
timeout=''
package_name=''
expected_exit_code=0
Expand All @@ -26,6 +27,10 @@ while [[ $# -gt 0 ]]; do
app="$2"
shift
;;
--command_timeout)
command_timeout="$2"
shift
;;
--timeout)
timeout="$2"
shift
Expand Down Expand Up @@ -67,8 +72,8 @@ function xharness() {
dotnet exec $XHARNESS_CLI_PATH "$@"
}

# Act out the actual commands
source command.sh
# Act out the actual commands (and time constrain them to create buffer for the end of this script)
source command.sh & PID=$! ; (sleep $command_timeout && kill $PID 2> /dev/null & ) ; wait $PID

exit_code=$?

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ app=''
target=''
timeout=''
launch_timeout=''
command_timeout=20
xcode_version=''
app_arguments=''
expected_exit_code=0
Expand All @@ -31,6 +32,10 @@ while [[ $# -gt 0 ]]; do
timeout="$2"
shift
;;
--command-timeout)
command_timeout="$2"
shift
;;
--launch-timeout)
launch_timeout="$2"
shift
Expand Down Expand Up @@ -139,8 +144,8 @@ function xharness() {
dotnet exec "$XHARNESS_CLI_PATH" "$@"
}

# Act out the actual commands
source command.sh
# Act out the actual commands (and time constrain them to create buffer for the end of this script)
source command.sh & PID=$! ; (sleep $command_timeout && kill $PID 2> /dev/null & ) ; wait $PID
exit_code=$?

# Exit code values - https://github.com/dotnet/xharness/blob/main/src/Microsoft.DotNet.XHarness.Common/CLI/ExitCode.cs
Expand Down Expand Up @@ -183,7 +188,7 @@ find "$output_directory" -name "*.log" -maxdepth 1 -size 0 -print -delete
# Rename test result XML so that AzDO reporter recognizes it
test_results=$(ls "$output_directory"/xunit-*.xml)
if [ -f "$test_results" ]; then
echo "Found test results in $output_directory/$test_results. Renaming to testResults.xml to prepare for Helix upload"
echo "Found test results in $test_results. Renaming to testResults.xml to prepare for Helix upload"

# Prepare test results for Helix to pick up
mv "$test_results" "$output_directory/testResults.xml"
Expand Down

0 comments on commit 8f58bb3

Please sign in to comment.