From 77610ffd98d8a043822c3f059785cbc5220c13cf Mon Sep 17 00:00:00 2001 From: Premek Vysoky Date: Fri, 1 Oct 2021 12:17:09 +0200 Subject: [PATCH 1/5] Enable requesting retry/reboot via files on Windows XHarness workloads --- .../xharness-helix-job.android.header.ps1 | 4 ++-- .../xharness-helix-job.android.ps1 | 18 +++++++++++++----- .../xharness-helix-job.android.sh | 8 ++++++++ .../xharness-helix-job.apple.sh | 4 ++-- .../xharness-runner/xharness-runner.apple.sh | 4 ++-- 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 index a8033609966..dec6d7c9db4 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 @@ -33,7 +33,7 @@ function xharness() { function report_infrastructure_failure($message) { Write-Output "Infrastructural problem reported by the user, requesting retry+reboot: $message" - & "$Env:HELIX_PYTHONPATH" -c "from helix.workitemutil import request_infra_retry; request_infra_retry('Retrying because we could not enumerate all Android devices')" - & "$Env:HELIX_PYTHONPATH" -c "from helix.workitemutil import request_reboot; request_reboot('Rebooting to allow Android emulator or device to restart')" + New-Item -Path "$Env:HELIX_WORKITEM_ROOT" -Name ".retry" -ItemType "file" + New-Item -Path "$Env:HELIX_WORKITEM_ROOT" -Name ".reboot" -ItemType "file" } diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.ps1 b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.ps1 index 8c0a4e21a9b..4eb7f197e39 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.ps1 +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.ps1 @@ -52,8 +52,8 @@ if ($ev) { Write-Output "User command ended with $exit_code" } -$retry=$false -$reboot=$false +$retry = $false +$reboot = $false switch ($exit_code) { @@ -61,8 +61,8 @@ switch ($exit_code) 85 { Write-Error "Encountered ADB_DEVICE_ENUMERATION_FAILURE. This is typically not a failure of the work item. We will run it again and reboot this computer to help its devices" Write-Error "If this occurs repeatedly, please check for architectural mismatch, e.g. sending x86 or x86_64 APKs to an arm64_v8a-only queue." - $retry=$true - $reboot=$true + $retry = $true + $reboot = $true Break } @@ -70,11 +70,19 @@ switch ($exit_code) 78 { Write-Error "Encountered PACKAGE_INSTALLATION_FAILURE. This is typically not a failure of the work item. We will try it again on another Helix agent" Write-Error "If this occurs repeatedly, please check for architectural mismatch, e.g. requesting installation on arm64_v8a-only queue for x86 or x86_64 APKs." - $retry=$true + $retry = $true Break } } +if (Test-Path -Path "$Env:HELIX_WORKITEM_ROOT\.retry" -PathType Leaf) { + $retry = $true; +} + +if (Test-Path -Path "$Env:HELIX_WORKITEM_ROOT\.reboot" -PathType Leaf) { + $reboot = $true; +} + if ($retry) { & "$Env:HELIX_PYTHONPATH" -c "from helix.workitemutil import request_infra_retry; request_infra_retry('Retrying because we could not enumerate all Android devices')" } diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.sh b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.sh index ce025ebc736..a49d55d0d05 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.sh +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.sh @@ -105,6 +105,14 @@ case "$exit_code" in ;; esac +if [ -f "$HELIX_WORKITEM_ROOT/.retry" ]; then + retry=true +fi + +if [ -f "$HELIX_WORKITEM_ROOT/.reboot" ]; then + reboot=true +fi + if [ "$retry" == true ]; then "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_infra_retry; request_infra_retry('Retrying because we could not enumerate all Android devices')" fi diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh index c87d0375930..54e4b2328ba 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh @@ -44,11 +44,11 @@ exit_code=$? # We usually also ask the work item to be re-tried on a different machine # Since we run the payload script using launchctl, env vars such as PYTHON_PATH are not set there and we have to do this part here # We signal this by creating files -if [ -f './.retry' ]; then +if [ -f "$HELIX_WORKITEM_ROOT/.retry" ]; then "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_infra_retry; request_infra_retry('Retrying work item because XHarness workload requested it')" fi -if [ -f './.reboot' ]; then +if [ -f "$HELIX_WORKITEM_ROOT/.reboot" ]; then "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_reboot; request_reboot('Rebooting because XHarness workload requested it)" fi diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-runner.apple.sh b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-runner.apple.sh index 0f9188a88ab..111e45c337c 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-runner.apple.sh +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-runner.apple.sh @@ -147,8 +147,8 @@ function xharness() { function report_infrastructure_failure() { echo "Infrastructural problem reported by the user, requesting retry+reboot: $1" - touch './.retry' - touch './.reboot' + touch "$HELIX_WORKITEM_ROOT/.retry" + touch "$HELIX_WORKITEM_ROOT/.reboot" } # Act out the actual commands (and time constrain them to create buffer for the end of this script) From 3b3694fe5347006902e7c74defa38bad8714a1db Mon Sep 17 00:00:00 2001 From: Premek Vysoky Date: Fri, 1 Oct 2021 12:31:23 +0200 Subject: [PATCH 2/5] Missing `'` --- .../Sdk/tools/xharness-runner/xharness-helix-job.apple.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh index 54e4b2328ba..8da10de3b17 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh @@ -49,7 +49,7 @@ if [ -f "$HELIX_WORKITEM_ROOT/.retry" ]; then fi if [ -f "$HELIX_WORKITEM_ROOT/.reboot" ]; then - "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_reboot; request_reboot('Rebooting because XHarness workload requested it)" + "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_reboot; request_reboot('Rebooting because XHarness workload requested it')" fi exit $exit_code From 2d581cb82daf545a4e22817cebbbcc28d311ed74 Mon Sep 17 00:00:00 2001 From: Premek Vysoky Date: Fri, 1 Oct 2021 12:35:23 +0200 Subject: [PATCH 3/5] Add -Force --- .../xharness-runner/xharness-helix-job.android.header.ps1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 index dec6d7c9db4..ca84761923b 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 @@ -33,7 +33,7 @@ function xharness() { function report_infrastructure_failure($message) { Write-Output "Infrastructural problem reported by the user, requesting retry+reboot: $message" - New-Item -Path "$Env:HELIX_WORKITEM_ROOT" -Name ".retry" -ItemType "file" - New-Item -Path "$Env:HELIX_WORKITEM_ROOT" -Name ".reboot" -ItemType "file" + New-Item -Path "$Env:HELIX_WORKITEM_ROOT" -Name ".retry" -ItemType "file" -Force + New-Item -Path "$Env:HELIX_WORKITEM_ROOT" -Name ".reboot" -ItemType "file" -Force } From 4767f16d7832d0e942a1989bb932ebbb56ffe72b Mon Sep 17 00:00:00 2001 From: Premek Vysoky Date: Fri, 1 Oct 2021 13:38:57 +0200 Subject: [PATCH 4/5] Accept message inside of .retry/.reboot files --- .../xharness-helix-job.android.header.ps1 | 3 +++ .../xharness-helix-job.android.ps1 | 14 +++++++++++-- .../xharness-helix-job.android.sh | 21 +++++++++++++++++-- .../xharness-helix-job.apple.sh | 16 ++++++++++++-- .../xharness-runner/xharness-runner.apple.sh | 4 ++-- 5 files changed, 50 insertions(+), 8 deletions(-) diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 index ca84761923b..42aa6640e9e 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 @@ -35,5 +35,8 @@ function report_infrastructure_failure($message) { New-Item -Path "$Env:HELIX_WORKITEM_ROOT" -Name ".retry" -ItemType "file" -Force New-Item -Path "$Env:HELIX_WORKITEM_ROOT" -Name ".reboot" -ItemType "file" -Force + + $message | Out-File -FilePath "$Env:HELIX_WORKITEM_ROOT\.retry" + $message | Out-File -FilePath "$Env:HELIX_WORKITEM_ROOT\.reboot" } diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.ps1 b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.ps1 index 4eb7f197e39..7b82076a871 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.ps1 +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.ps1 @@ -77,18 +77,28 @@ switch ($exit_code) if (Test-Path -Path "$Env:HELIX_WORKITEM_ROOT\.retry" -PathType Leaf) { $retry = $true; + $retry_message = Get-Content -Path "$Env:HELIX_WORKITEM_ROOT\.retry" } if (Test-Path -Path "$Env:HELIX_WORKITEM_ROOT\.reboot" -PathType Leaf) { $reboot = $true; + $reboot_message = Get-Content -Path "$Env:HELIX_WORKITEM_ROOT\.reboot" } if ($retry) { - & "$Env:HELIX_PYTHONPATH" -c "from helix.workitemutil import request_infra_retry; request_infra_retry('Retrying because we could not enumerate all Android devices')" + if ([string]::IsNullOrEmpty($retry_message)) { + $retry_message = 'Retrying because we could not enumerate all Android devices' + } + + & "$Env:HELIX_PYTHONPATH" -c "from helix.workitemutil import request_infra_retry; request_infra_retry('$retry_message')" } if ($reboot) { - & "$Env:HELIX_PYTHONPATH" -c "from helix.workitemutil import request_reboot; request_reboot('Rebooting to allow Android emulator or device to restart')" + if ([string]::IsNullOrEmpty($reboot_message)) { + $reboot_message = 'Rebooting to allow Android emulator to restart' + } + + & "$Env:HELIX_PYTHONPATH" -c "from helix.workitemutil import request_reboot; request_reboot('$reboot_message')" } exit $exit_code diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.sh b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.sh index a49d55d0d05..dd879e3ed29 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.sh +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.sh @@ -72,6 +72,13 @@ function xharness() { dotnet exec $XHARNESS_CLI_PATH "$@" } +function report_infrastructure_failure() { + echo "Infrastructural problem reported by the user, requesting retry+reboot: $1" + + echo "$1" > "$HELIX_WORKITEM_ROOT/.retry" + echo "$1" > "$HELIX_WORKITEM_ROOT/.reboot" +} + # Act out the actual commands (and time constrain them to create buffer for the end of this script) source command.sh & PID=$! ; (sleep $command_timeout && kill $PID 2> /dev/null & ) ; wait $PID @@ -107,18 +114,28 @@ esac if [ -f "$HELIX_WORKITEM_ROOT/.retry" ]; then retry=true + retry_message=$(cat "$HELIX_WORKITEM_ROOT/.retry") fi if [ -f "$HELIX_WORKITEM_ROOT/.reboot" ]; then reboot=true + reboot_message=$(cat "$HELIX_WORKITEM_ROOT/.reboot") fi if [ "$retry" == true ]; then - "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_infra_retry; request_infra_retry('Retrying because we could not enumerate all Android devices')" + if [ -z "$retry_message" ]; then + retry_message='Retrying because we could not enumerate all Android devices' + fi + + "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_infra_retry; request_infra_retry('$retry_message')" fi if [ "$reboot" == true ]; then - "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_reboot; request_reboot('Rebooting to allow Android emulator to restart')" + if [ -z "$reboot_message" ]; then + reboot_message='Rebooting to allow Android emulator to restart' + fi + + "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_reboot; request_reboot('$reboot_message')" fi exit $exit_code diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh index 8da10de3b17..ad968c960af 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh @@ -45,11 +45,23 @@ exit_code=$? # Since we run the payload script using launchctl, env vars such as PYTHON_PATH are not set there and we have to do this part here # We signal this by creating files if [ -f "$HELIX_WORKITEM_ROOT/.retry" ]; then - "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_infra_retry; request_infra_retry('Retrying work item because XHarness workload requested it')" + retry_message=$(cat "$HELIX_WORKITEM_ROOT/.retry") + + if [ -z "$retry_message" ]; then + retry_message='Retrying because we could not enumerate all Android devices' + fi + + "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_infra_retry; request_infra_retry('$retry_message')" fi if [ -f "$HELIX_WORKITEM_ROOT/.reboot" ]; then - "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_reboot; request_reboot('Rebooting because XHarness workload requested it')" + reboot_message=$(cat "$HELIX_WORKITEM_ROOT/.reboot") + + if [ -z "$reboot_message" ]; then + reboot_message='Rebooting to allow Android emulator to restart' + fi + + "$HELIX_PYTHONPATH" -c "from helix.workitemutil import request_reboot; request_reboot('$reboot_message')" fi exit $exit_code diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-runner.apple.sh b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-runner.apple.sh index 111e45c337c..836db2e74dd 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-runner.apple.sh +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-runner.apple.sh @@ -147,8 +147,8 @@ function xharness() { function report_infrastructure_failure() { echo "Infrastructural problem reported by the user, requesting retry+reboot: $1" - touch "$HELIX_WORKITEM_ROOT/.retry" - touch "$HELIX_WORKITEM_ROOT/.reboot" + echo "$1" > "$HELIX_WORKITEM_ROOT/.retry" + echo "$1" > "$HELIX_WORKITEM_ROOT/.reboot" } # Act out the actual commands (and time constrain them to create buffer for the end of this script) From df68bfba4fa4f6b0edee7a578b4747dfabb0f721 Mon Sep 17 00:00:00 2001 From: Premek Vysoky Date: Fri, 1 Oct 2021 14:32:33 +0200 Subject: [PATCH 5/5] Escape \ and ' --- .../xharness-runner/xharness-helix-job.android.header.ps1 | 4 ++-- .../Sdk/tools/xharness-runner/xharness-helix-job.android.sh | 4 ++-- .../Sdk/tools/xharness-runner/xharness-helix-job.apple.sh | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 index 42aa6640e9e..e91764050f2 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.header.ps1 @@ -36,7 +36,7 @@ function report_infrastructure_failure($message) { New-Item -Path "$Env:HELIX_WORKITEM_ROOT" -Name ".retry" -ItemType "file" -Force New-Item -Path "$Env:HELIX_WORKITEM_ROOT" -Name ".reboot" -ItemType "file" -Force - $message | Out-File -FilePath "$Env:HELIX_WORKITEM_ROOT\.retry" - $message | Out-File -FilePath "$Env:HELIX_WORKITEM_ROOT\.reboot" + $message -replace "['\\]" | Out-File -FilePath "$Env:HELIX_WORKITEM_ROOT\.retry" + $message -replace "['\\]" | Out-File -FilePath "$Env:HELIX_WORKITEM_ROOT\.reboot" } diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.sh b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.sh index dd879e3ed29..5754e39f42f 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.sh +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.android.sh @@ -114,12 +114,12 @@ esac if [ -f "$HELIX_WORKITEM_ROOT/.retry" ]; then retry=true - retry_message=$(cat "$HELIX_WORKITEM_ROOT/.retry") + retry_message=$(cat "$HELIX_WORKITEM_ROOT/.retry" | tr -d "'\\\\") fi if [ -f "$HELIX_WORKITEM_ROOT/.reboot" ]; then reboot=true - reboot_message=$(cat "$HELIX_WORKITEM_ROOT/.reboot") + reboot_message=$(cat "$HELIX_WORKITEM_ROOT/.reboot" | tr -d "'\\\\") fi if [ "$retry" == true ]; then diff --git a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh index ad968c960af..7ed61784d5e 100644 --- a/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh +++ b/src/Microsoft.DotNet.Helix/Sdk/tools/xharness-runner/xharness-helix-job.apple.sh @@ -45,7 +45,7 @@ exit_code=$? # Since we run the payload script using launchctl, env vars such as PYTHON_PATH are not set there and we have to do this part here # We signal this by creating files if [ -f "$HELIX_WORKITEM_ROOT/.retry" ]; then - retry_message=$(cat "$HELIX_WORKITEM_ROOT/.retry") + retry_message=$(cat "$HELIX_WORKITEM_ROOT/.retry" | tr -d "'\\\\") if [ -z "$retry_message" ]; then retry_message='Retrying because we could not enumerate all Android devices' @@ -55,7 +55,7 @@ if [ -f "$HELIX_WORKITEM_ROOT/.retry" ]; then fi if [ -f "$HELIX_WORKITEM_ROOT/.reboot" ]; then - reboot_message=$(cat "$HELIX_WORKITEM_ROOT/.reboot") + reboot_message=$(cat "$HELIX_WORKITEM_ROOT/.reboot" | tr -d "'\\\\") if [ -z "$reboot_message" ]; then reboot_message='Rebooting to allow Android emulator to restart'