From db90ab46162b5576cf48f88c7e5716272499fc66 Mon Sep 17 00:00:00 2001 From: Taekyung Heo <7621438+TaekyungHeo@users.noreply.github.com> Date: Fri, 6 Sep 2024 11:50:51 -0400 Subject: [PATCH 1/2] Fix bug in generating NCCL test reports on K8s --- src/cloudai/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cloudai/__init__.py b/src/cloudai/__init__.py index a652626b..19852423 100644 --- a/src/cloudai/__init__.py +++ b/src/cloudai/__init__.py @@ -103,7 +103,9 @@ Registry().add_strategy(InstallStrategy, [SlurmSystem], [NcclTest], NcclTestSlurmInstallStrategy) Registry().add_strategy(InstallStrategy, [SlurmSystem], [NeMoLauncher], NeMoLauncherSlurmInstallStrategy) -Registry().add_strategy(ReportGenerationStrategy, [SlurmSystem], [NcclTest], NcclTestReportGenerationStrategy) +Registry().add_strategy( + ReportGenerationStrategy, [SlurmSystem, KubernetesSystem], [NcclTest], NcclTestReportGenerationStrategy +) Registry().add_strategy(CommandGenStrategy, [StandaloneSystem], [Sleep], SleepStandaloneCommandGenStrategy) Registry().add_strategy(CommandGenStrategy, [SlurmSystem], [Sleep], SleepSlurmCommandGenStrategy) Registry().add_strategy(JsonGenStrategy, [KubernetesSystem], [Sleep], SleepKubernetesJsonGenStrategy) From a80dd2e742e8dbde7d41f066edf608caff961d93 Mon Sep 17 00:00:00 2001 From: Taekyung Heo <7621438+TaekyungHeo@users.noreply.github.com> Date: Mon, 9 Sep 2024 10:10:15 -0400 Subject: [PATCH 2/2] Update tests/test_init.py:test_strategies Co-authored-by: Andrei Maslennikov --- tests/test_init.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_init.py b/tests/test_init.py index deb3a65b..25a7b51d 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -120,6 +120,7 @@ def test_runners(): ((ReportGenerationStrategy, SlurmSystem, ChakraReplay), ChakraReplayReportGenerationStrategy), ((ReportGenerationStrategy, SlurmSystem, JaxToolbox), JaxToolboxReportGenerationStrategy), ((ReportGenerationStrategy, SlurmSystem, NcclTest), NcclTestReportGenerationStrategy), + ((ReportGenerationStrategy, KubernetesSystem, NcclTest), NcclTestReportGenerationStrategy), ((ReportGenerationStrategy, SlurmSystem, NeMoLauncher), NeMoLauncherReportGenerationStrategy), ((ReportGenerationStrategy, SlurmSystem, Sleep), SleepReportGenerationStrategy), ((ReportGenerationStrategy, SlurmSystem, UCCTest), UCCTestReportGenerationStrategy),