Skip to content

Commit

Permalink
[Merge-on-Red] - Implement Test Process Watcher (#78742)
Browse files Browse the repository at this point in the history
Initial implementation of the test watcher that looks out for hangs and freezes during test runs.
  • Loading branch information
ivdiazsa authored Mar 10, 2023
1 parent f52e277 commit 728fd85
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 9 deletions.
6 changes: 6 additions & 0 deletions src/coreclr/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ else()
endif()
endif()

#----------------------------------------------------
# Build the test watchdog alongside the CLR
#----------------------------------------------------
add_subdirectory("${CLR_SRC_NATIVE_DIR}/watchdog" test-watchdog)

# Add this subdir. We install the headers for the jit.
add_subdirectory(pal/prebuilt/inc)

Expand Down Expand Up @@ -275,3 +280,4 @@ endif(NOT CLR_CMAKE_HOST_MACCATALYST AND NOT CLR_CMAKE_HOST_IOS AND NOT CLR_CMAK
if(CLR_CROSS_COMPONENTS_BUILD)
include(crosscomponents.cmake)
endif(CLR_CROSS_COMPONENTS_BUILD)

4 changes: 4 additions & 0 deletions src/native/watchdog/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
add_executable_clr(watchdog ${CMAKE_CURRENT_LIST_DIR}/watchdog.cpp)
install_clr(TARGETS watchdog DESTINATIONS . COMPONENT hosts)
install_clr(TARGETS watchdog DESTINATIONS . COMPONENT nativeaot)

136 changes: 136 additions & 0 deletions src/native/watchdog/watchdog.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

#include <cstdio>
#include <cstdlib>
#include <errno.h>
#include <signal.h>

#ifdef TARGET_WINDOWS

#include <windows.h>
#include <string>

#else // !TARGET_WINDOWS

#include <chrono>
#include <sys/wait.h>
#include <thread>
#include <unistd.h>
#include <vector>

#endif // TARGET_WINDOWS

int run_timed_process(const long, const int, const char *[]);

#ifdef TARGET_X86
int __cdecl main(const int argc, const char *argv[])
#else
int main(const int argc, const char *argv[])
#endif
{
if (argc < 3)
{
printf("There are missing arguments. Got %d instead of 3+ :(\n", argc);
return EXIT_FAILURE;
}

const long timeout_sec = strtol(argv[1], nullptr, 10);
int exit_code = run_timed_process(timeout_sec * 1000L, argc-2, &argv[2]);

printf("App Exit Code: %d\n", exit_code);
return exit_code;
}

int run_timed_process(const long timeout_ms, const int proc_argc, const char *proc_argv[])
{
#ifdef TARGET_WINDOWS
std::string cmdline(proc_argv[0]);

for (int i = 1; i < proc_argc; i++)
{
cmdline.append(" ");
cmdline.append(proc_argv[i]);
}

STARTUPINFOA startup_info;
PROCESS_INFORMATION proc_info;
unsigned long exit_code;

ZeroMemory(&startup_info, sizeof(startup_info));
startup_info.cb = sizeof(startup_info);
ZeroMemory(&proc_info, sizeof(proc_info));

if (!CreateProcessA(NULL, &cmdline[0], NULL, NULL, FALSE, 0, NULL, NULL,
&startup_info, &proc_info))
{
int error_code = GetLastError();
printf("Process creation failed... Code %d.\n", error_code);
return error_code;
}

WaitForSingleObject(proc_info.hProcess, timeout_ms);
GetExitCodeProcess(proc_info.hProcess, &exit_code);

CloseHandle(proc_info.hProcess);
CloseHandle(proc_info.hThread);
return exit_code;

#else // !TARGET_WINDOWS

const int check_interval_ms = 25;
int check_count = 0;
std::vector<const char*> args;

pid_t child_pid;
int child_status;
int wait_code;

for (int i = 0; i < proc_argc; i++)
{
args.push_back(proc_argv[i]);
}
args.push_back(NULL);

child_pid = fork();

if (child_pid < 0)
{
// Fork failed. No memory remaining available :(
printf("Fork failed... Returning ENOMEM.\n");
return ENOMEM;
}
else if (child_pid == 0)
{
// Instructions for child process!
execv(args[0], const_cast<char* const*>(args.data()));
}
else
{
do
{
// Instructions for the parent process!
wait_code = waitpid(child_pid, &child_status, WNOHANG);

if (wait_code == -1)
return EINVAL;

std::this_thread::sleep_for(std::chrono::milliseconds(check_interval_ms));

if (wait_code)
{
if (WIFEXITED(child_status))
return WEXITSTATUS(child_status);
}
check_count++;

} while (check_count < (timeout_ms / check_interval_ms));
}

printf("Child process took too long. Timed out... Exiting...\n");
kill(child_pid, SIGKILL);

#endif // TARGET_WINDOWS
return ETIMEDOUT;
}

23 changes: 19 additions & 4 deletions src/tests/Common/CLRTest.Execute.Bash.targets
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,18 @@ fi
<Command><![CDATA[ export __DotEnv="${i#*=}"
if [ ! -f "$__DotEnv" ]
then
echo "The Debugger FullPath %5C%22${__DotEnv}%5C%22 does not exist"
echo "The dotenv file FullPath %5C%22${__DotEnv}%5C%22 does not exist"
usage
fi
export __DotEnvArg=-e ${__DotEnv}]]></Command>
<Description>A dotenv file to pass to corerun to set environment variables for the test run.</Description>
</BashCLRTestExecutionScriptArgument>

<BashCLRTestExecutionScriptArgument Include="usewatcher">
<HasParam>false</HasParam>
<Command><![CDATA[ _RunWithWatcher=1]]></Command>
<Description>Run the tests using the test watcher.</Description>
</BashCLRTestExecutionScriptArgument>
</ItemGroup>

<PropertyGroup>
Expand Down Expand Up @@ -250,10 +256,11 @@ then
exit 1
fi
# Copy CORECLR native binaries to $LinkBin,
# Copy CORECLR native binaries and the test watcher to $LinkBin,
# so that we can run the test based on that directory
cp $CORE_ROOT/*.so $LinkBin/
cp $CORE_ROOT/corerun $LinkBin/
cp $CORE_ROOT/watchdog $LinkBin/
# Copy some files that may be arguments
for f in *.txt;
Expand Down Expand Up @@ -283,6 +290,7 @@ fi
</PropertyGroup>
<PropertyGroup>
<CLRTestRunFile Condition="'$(CLRTestIsHosted)'=='true'">"$CORE_ROOT/corerun" $(CoreRunArgs) ${__DotEnvArg}</CLRTestRunFile>
<WatcherRunFile>"$CORE_ROOT/watchdog" 300</WatcherRunFile>

<!-- Note that this overwrites CLRTestBashPreCommands rather than adding to it. -->
<CLRTestBashPreCommands Condition="'$(CLRTestKind)' == 'BuildAndRun' and '$(TargetArchitecture)' == 'wasm'"><![CDATA[
Expand Down Expand Up @@ -318,6 +326,9 @@ fi
if [ ! -z "$CLRCustomTestLauncher" ];
then
LAUNCHER="$CLRCustomTestLauncher $PWD/"
elif [ "$_RunWithWatcher" == 1 ];
then
LAUNCHER="$(WatcherRunFile) $(CLRTestRunFile)"
else
LAUNCHER="$_DebuggerFullPath $_DebuggerArgsSeparator $(CLRTestRunFile)"
fi
Expand Down Expand Up @@ -346,8 +357,11 @@ $(BashLinkerTestLaunchCmds)
if [ ! -z "$CLRCustomTestLauncher" ];
then
LAUNCHER="$CLRCustomTestLauncher $PWD/"
elif [ "$_RunWithWatcher" == 1 ];
then
LAUNCHER="$(WatcherRunFile) $(CLRTestRunFile)"
else
LAUNCHER="$_DebuggerFullPath $(CLRTestRunFile)"
LAUNCHER="$_DebuggerFullPath $_DebuggerArgsSeparator $(CLRTestRunFile)"
fi
$(BashIlrtTestLaunchCmds)
Expand Down Expand Up @@ -484,7 +498,7 @@ usage()
for i in "$@"
do
case $i in
-?|-h|--help)
-?|-h|--help|/?|/h|/help)
usage
%3B%3B
@(BashCLRTestExecutionScriptArgument -> ' -%(Identity)%(ParamText)|/%(Identity)%(ParamText))
Expand Down Expand Up @@ -534,6 +548,7 @@ ReleaseLock()
}
cd "$%28dirname "${BASH_SOURCE[0]}")"
LockFile="lock"
_RunWithWatcher=0
# The __TestEnv variable may be used to specify a script to source before the test.
Expand Down
18 changes: 16 additions & 2 deletions src/tests/Common/CLRTest.Execute.Batch.targets
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,14 @@ Exit /b 0
]]></Command>
<Description>Set CORE_ROOT to the specified value before running the test.</Description>
</BatchCLRTestExecutionScriptArgument>

<BatchCLRTestExecutionScriptArgument Include="usewatcher">
<HasParam>false</HasParam>
<Command><![CDATA[
set /A _RunWithWatcher=1
]]></Command>
<Description>Run the tests using the test watcher.</Description>
</BatchCLRTestExecutionScriptArgument>
</ItemGroup>

<PropertyGroup>
Expand Down Expand Up @@ -260,17 +268,18 @@ IF defined DoLink (
Exit /b 1
)
REM Copy CORECLR native binaries to %LinkBin%, so that we can run the test based on that directory
REM Copy CORECLR native binaries and the test watcher to %LinkBin%, so that we can run the test based on that directory
copy %CORE_ROOT%\clrjit.dll %LinkBin% > nul 2> nul
copy %CORE_ROOT%\coreclr.dll %LinkBin% > nul 2> nul
copy %CORE_ROOT%\mscorrc.dll %LinkBin% > nul 2> nul
copy %CORE_ROOT%\CoreRun.exe %LinkBin% > nul 2> nul
copy %CORE_ROOT%\watchdog.exe %LinkBin% > nul 2> nul
REM Copy some files that may be arguments
copy *.txt %LinkBin% > nul 2> nul
set ExePath=%LinkBin%\$(InputAssemblyName)
set CORE_ROOT=%scriptPath%LinkBin%
set CORE_ROOT=%scriptPath%\%LinkBin%
)
]]>
</BatchLinkerTestLaunchCmds>
Expand All @@ -289,6 +298,8 @@ if defined DoLink (
</PropertyGroup>
<PropertyGroup>
<CLRTestRunFile Condition="'$(CLRTestIsHosted)'=='true'">"%CORE_ROOT%\corerun.exe" $(CoreRunArgs) %__DotEnvArg%</CLRTestRunFile>
<WatcherRunFile>"%CORE_ROOT%\watchdog.exe" 300</WatcherRunFile>

<BatchCopyCoreShimLocalCmds Condition="'$(CLRTestScriptLocalCoreShim)' == 'true'"><![CDATA[
REM Local CoreShim requested - see MSBuild property 'CLRTestScriptLocalCoreShim'
ECHO Copying '%CORE_ROOT%\CoreShim.dll'...
Expand All @@ -301,6 +312,8 @@ $(BatchCopyCoreShimLocalCmds)
IF NOT "%CLRCustomTestLauncher%"=="" (
set LAUNCHER=call %CLRCustomTestLauncher% %scriptPath%
) ELSE IF %_RunWithWatcher% EQU 1 (
set LAUNCHER=$(WatcherRunFile) $(CLRTestRunFile)
) ELSE (
set LAUNCHER=%_DebuggerFullPath% $(CLRTestRunFile)
)
Expand Down Expand Up @@ -425,6 +438,7 @@ setlocal ENABLEDELAYEDEXPANSION
set "lockFolder=%~dp0\lock"
pushd %~dp0
set "scriptPath=%~dp0"
set /A _RunWithWatcher=0
$(BatchCLRTestArgPrep)
$(BatchCLRTestExitCodePrep)
Expand Down
8 changes: 5 additions & 3 deletions src/tests/Common/helixpublishwitharcade.proj
Original file line number Diff line number Diff line change
Expand Up @@ -411,8 +411,8 @@
<HelixCommandLines Condition="'$(TestWrapperTargetsWindows)' != 'true'" Include="export TEST_HARNESS_STRIPE_TO_EXECUTE=.0.1" />
<HelixCommandLines Condition="'$(TestWrapperTargetsWindows)' != 'true'" Include="chmod +x $(_MergedWrapperRunScriptRelative)" />
<!-- Force assemblies to lazy-load for LLVM AOT test runs to enable using tests that fail at AOT time (and as a result can't be AOTd) -->
<HelixCommandLines Condition="'$(RuntimeVariant)' == 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) --aot-lazy-assembly-load" />
<HelixCommandLines Condition="'$(RuntimeVariant)' != 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative)" />
<HelixCommandLines Condition="'$(RuntimeVariant)' == 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) -usewatcher --aot-lazy-assembly-load" />
<HelixCommandLines Condition="'$(RuntimeVariant)' != 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) -usewatcher" />
<HelixCommandLines Include="$(XUnitLogCheckerCommand)" />
</ItemGroup>

Expand Down Expand Up @@ -722,7 +722,9 @@

<ItemGroup Condition=" '$(UsesHelixSdk)' == 'true' ">
<HelixCorrelationPayload Include="$(CoreRootDirectory)" />
<HelixCorrelationPayload Include="$(XUnitLogCheckerDirectory)" />

<!-- Browser-Wasm follows a very different workflow, which is currently out of scope of the Log Checker. -->
<HelixCorrelationPayload Include="$(XUnitLogCheckerDirectory)" Condition="'$(TargetsBrowser)' != 'true'" />

<LegacyPayloads Include="$([System.IO.Directory]::GetDirectories($(LegacyPayloadsRootDirectory)))" Condition="Exists('$(LegacyPayloadsRootDirectory)')" />
<LegacyPayloads Update="@(LegacyPayloads)">
Expand Down

0 comments on commit 728fd85

Please sign in to comment.