contrib/intel: simplify and expand ZE testing

Since not all of the fabtests supported device memory, the CI used a wrapper script to run a subset of tests that were supported. Now that all of the runfabtests support FI_HMEM properly, this simplifies the ZE testing by going through the regular run_fabtests path. The --device parameter is removed and replaced with the --way parameter which indicates which direction to test (h2d, d2d, xd2d, default None). This simplifies the code path and enables the ZE testing to run the full testsuite, rather than the subset, increasing our coverage. Signed-off-by: Alexia Ingerson <alexia.ingerson@intel.com>
ofiwg · Oct 14, 2023 · 301c13e · 301c13e
1 parent 95aa987
commit 301c13e
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 130 deletions.
diff --git a/contrib/intel/jenkins/Jenkinsfile b/contrib/intel/jenkins/Jenkinsfile
@@ -31,16 +31,22 @@ def slurm_batch(partition, node_num, output, command) {
 }
 
 def run_fabtests(stage_name, partition, node_num, prov, util=null,
-                 user_env=null) {
+                 user_env=null, way=null) {
   def command = "python3.9 ${RUN_LOCATION}/runtests.py"
   def opts = "--prov=${prov} --test=fabtests"
+  def modes = BUILD_MODES
   if (util)
     opts = "${opts} --util=${util}"
 
   if (user_env)
     opts = "${opts} --user_env ${user_env}"
 
-  for (mode in BUILD_MODES) {
+  if (way) {
+    opts = "${opts} --way ${way}"
+    modes = ["reg"]
+  }
+
+  for (mode in modes) {
     echo "Running $stage_name fabtests $mode"
     slurm_batch("${partition}", "${node_num}",
                 "${env.LOG_DIR}/${stage_name}_fabtests_${mode}",
@@ -670,27 +676,9 @@ pipeline {
           steps {
             script {
               dir (RUN_LOCATION) {
-                def providers = [["shm", null]]
-                def directions = ["h2d", "d2d", "xd2d"]
-                def base_cmd = "python3.9 runtests.py --device=ze"
-                def prefix = "${env.LOG_DIR}/ze_v3_"
-                def suffix = "_reg"
-                for (prov in providers) {
-                  for (way in directions) {
-                    if (prov[1]) {
-                      echo "Running ${prov[0]}-${prov[1]} ze"
-                      slurm_batch("fabrics-ci", "1",
-                                "${prefix}${prov[0]}-${prov[1]}_${way}${suffix}",
-                                """${base_cmd} --prov=${prov[0]} \
-                                --util=${prov[1]} --way=${way}""")
-                    } else {
-                      echo "Running ${prov[0]} ze"
-                      slurm_batch("fabrics-ci", "1",
-                                  "${prefix}${prov[0]}_${way}${suffix}",
-                                  "${base_cmd} --prov=${prov[0]} --way=${way}")
-                    }
-                  }
-                }
+                run_fabtests("ze_v3_shm", "fabrics-ci", "1", "shm", null, null, "h2d")
+                run_fabtests("ze_v3_shm", "fabrics-ci", "1", "shm", null, null, "d2d")
+                run_fabtests("ze_v3_shm", "fabrics-ci", "1", "shm", null, null, "xd2d")
               }
             }
           }

diff --git a/contrib/intel/jenkins/run.py b/contrib/intel/jenkins/run.py
@@ -31,13 +31,13 @@ def fi_info_test(core, hosts, mode, user_env, log_file, util):
     fi_info_test.execute_cmd()
     print('-------------------------------------------------------------------')
 
-def fabtests(core, hosts, mode, user_env, log_file, util):
+def fabtests(core, hosts, mode, user_env, log_file, util, way):
 
     runfabtest = tests.Fabtest(jobname=jbname,buildno=bno,
                                testname='runfabtests', core_prov=core,
                                fabric=fab, hosts=hosts, ofi_build_mode=mode,
                                user_env=user_env, log_file=log_file,
-                               util_prov=util)
+                               util_prov=util, way=way)
 
     print('-------------------------------------------------------------------')
     if (runfabtest.execute_condn):
@@ -94,22 +94,6 @@ def multinodetest(core, hosts, mode, user_env, log_file, util):
               .format(runmultinodetest.testname))
     print("-------------------------------------------------------------------")
 
-def ze_fabtests(core, hosts, mode, way, user_env, log_file, util):
-
-    runzefabtests = tests.ZeFabtests(jobname=jbname,buildno=bno,
-                                     testname="ze test", core_prov=core,
-                                     fabric=fab, hosts=hosts,
-                                     ofi_build_mode=mode, user_env=user_env,
-                                     log_file=log_file, util_prov=util)
-
-    print('-------------------------------------------------------------------')
-    if (runzefabtests.execute_condn):
-        print(f"Running ze {way} tests for {core}-{util}-{fab}")
-        runzefabtests.execute_cmd(way)
-    else:
-        print(f"Skipping {core} {runzefabtests.testname} as execute condition fails")
-    print('-------------------------------------------------------------------')
-
 def intel_mpi_benchmark(core, hosts, mpi, mode, group, user_env, log_file, util):
 
     imb = tests.IMBtests(jobname=jbname, buildno=bno,

diff --git a/contrib/intel/jenkins/runtests.py b/contrib/intel/jenkins/runtests.py
@@ -28,9 +28,8 @@ def __call__(self, parser, namespace, values, option_string=None):
 
 parser.add_argument('--imb_grp', help="IMB test group 1:[MPI1, P2P], \
                     2:[EXT, IO], 3:[NBC, RMA, MT]", choices=['1', '2', '3'])
-parser.add_argument('--device', help="optional gpu device", choices=['ze'])
 parser.add_argument('--way', help="direction to run with device option",
-                    choices=['h2d', 'd2d', 'xd2d'], default='h2d')
+                    choices=['h2d', 'd2d', 'xd2d'], default=None)
 parser.add_argument('--user_env', help="Run with additional environment " \
                     "variables", nargs='*', action=ParseDict, default={})
 parser.add_argument('--mpi', help="Select mpi to use for middlewares",
@@ -43,7 +42,6 @@ def __call__(self, parser, namespace, values, option_string=None):
 args_core = args.prov
 
 args_util = args.util
-args_device = args.device
 user_env = args.user_env
 log_file = args.log_file
 weekly = args.weekly
@@ -103,56 +101,51 @@ def __call__(self, parser, namespace, values, option_string=None):
 os.chdir('/tmp/')
 
 if(args_core):
-    if (args.device != 'ze'):
-        if (run_test == 'all' or run_test == 'fi_info'):
-            run.fi_info_test(args_core, hosts, ofi_build_mode,
-                             user_env, log_file, util=args.util)
-
-        if (run_test == 'all' or run_test == 'fabtests'):
-            run.fabtests(args_core, hosts, ofi_build_mode, user_env, log_file,
-                         args_util)
-
-        if (run_test == 'all' or run_test == 'shmem'):
-            run.shmemtest(args_core, hosts, ofi_build_mode, user_env, log_file,
-                          args_util)
-
-        if (run_test == 'all' or run_test == 'oneccl'):
-            run.oneccltest(args_core, hosts, ofi_build_mode, user_env, log_file,
-                           args_util)
-
-        if (run_test == 'all' or run_test == 'onecclgpu'):
-            run.oneccltestgpu(args_core, hosts, ofi_build_mode,
-                              user_env, log_file, args_util)
-
-        if (run_test == 'all' or run_test == 'daos'):
-            run.daos_cart_tests(args_core, hosts, ofi_build_mode,
+    if (run_test == 'all' or run_test == 'fi_info'):
+        run.fi_info_test(args_core, hosts, ofi_build_mode,
+                         user_env, log_file, util=args.util)
+
+    if (run_test == 'all' or run_test == 'fabtests'):
+        run.fabtests(args_core, hosts, ofi_build_mode, user_env, log_file,
+                     args_util, way)
+
+    if (run_test == 'all' or run_test == 'shmem'):
+        run.shmemtest(args_core, hosts, ofi_build_mode, user_env, log_file,
+                      args_util)
+
+    if (run_test == 'all' or run_test == 'oneccl'):
+        run.oneccltest(args_core, hosts, ofi_build_mode, user_env, log_file,
+                       args_util)
+
+    if (run_test == 'all' or run_test == 'onecclgpu'):
+        run.oneccltestgpu(args_core, hosts, ofi_build_mode,
+                          user_env, log_file, args_util)
+
+    if (run_test == 'all' or run_test == 'daos'):
+        run.daos_cart_tests(args_core, hosts, ofi_build_mode,
+                            user_env, log_file, args_util)
+
+    if (run_test == 'all' or run_test == 'multinode'):
+        run.multinodetest(args_core, hosts, ofi_build_mode,
+                          user_env, log_file, args_util)
+
+    if (run_test == 'all' or run_test == 'mpichtestsuite'):
+        run.mpich_test_suite(args_core, hosts, mpi,
+                             ofi_build_mode, user_env, log_file,
+                             args_util, weekly)
+
+    if (run_test == 'all' or run_test == 'IMB'):
+        run.intel_mpi_benchmark(args_core, hosts, mpi,
+                                ofi_build_mode, imb_group,
                                 user_env, log_file, args_util)
 
-        if (run_test == 'all' or run_test == 'multinode'):
-            run.multinodetest(args_core, hosts, ofi_build_mode,
-                              user_env, log_file, args_util)
-
-        if (run_test == 'all' or run_test == 'mpichtestsuite'):
-            run.mpich_test_suite(args_core, hosts, mpi,
-                                ofi_build_mode, user_env, log_file,
-                                args_util, weekly)
-
-        if (run_test == 'all' or run_test == 'IMB'):
-            run.intel_mpi_benchmark(args_core, hosts, mpi,
-                                    ofi_build_mode, imb_group,
-                                    user_env, log_file, args_util)
-
-        if (run_test == 'all' or run_test == 'osu'):
-            run.osu_benchmark(args_core, hosts, mpi,
-                                ofi_build_mode, user_env, log_file,
-                                args_util)
-
-        if (run_test == 'all' or run_test == 'dmabuf'):
-            run.dmabuftests(args_core, hosts, ofi_build_mode,
-                              user_env, log_file, args_util)
-    else:
-        run.ze_fabtests(args_core, hosts, ofi_build_mode, way, user_env, log_file,
-                        args_util)
+    if (run_test == 'all' or run_test == 'osu'):
+        run.osu_benchmark(args_core, hosts, mpi,
+                          ofi_build_mode, user_env, log_file,
+                          args_util)
 
+    if (run_test == 'all' or run_test == 'dmabuf'):
+        run.dmabuftests(args_core, hosts, ofi_build_mode,
+                        user_env, log_file, args_util)
 else:
     print("Error : Specify a core provider to run tests")
diff --git a/contrib/intel/jenkins/tests.py b/contrib/intel/jenkins/tests.py
@@ -16,7 +16,8 @@
 class Test:
 
     def __init__ (self, jobname, buildno, testname, core_prov, fabric,
-                  hosts, ofi_build_mode, user_env, log_file, mpitype=None, util_prov=None):
+                  hosts, ofi_build_mode, user_env, log_file, mpitype=None,
+                  util_prov=None, way=None):
         self.jobname = jobname
         self.buildno = buildno
         self.testname = testname
@@ -48,6 +49,7 @@ def __init__ (self, jobname, buildno, testname, core_prov, fabric,
                                    f'{self.jobname}/{self.buildno}/'\
                                    'log_dir'
         self.env = user_env
+        self.way = way
 
         self.mpi = ''
         if (self.mpi_type == 'impi'):
@@ -101,10 +103,12 @@ def execute_cmd(self):
 class Fabtest(Test):
 
     def __init__(self, jobname, buildno, testname, core_prov, fabric,
-                 hosts, ofi_build_mode, user_env, log_file, util_prov=None):
+                 hosts, ofi_build_mode, user_env, log_file, util_prov=None,
+                 way=None):
 
         super().__init__(jobname, buildno, testname, core_prov, fabric,
-                         hosts, ofi_build_mode, user_env, log_file, None, util_prov)
+                         hosts, ofi_build_mode, user_env, log_file, None,
+                         util_prov, way)
         self.fabtestpath = f'{self.libfab_installpath}/bin'
         self.fabtestconfigpath = f'{self.libfab_installpath}/share/fabtests'
 
@@ -152,6 +156,13 @@ def options(self):
         else:
             opts += "-t all "
 
+        if (self.way == 'h2d'):
+            opts += "-C \"-H\" -L \"-D ze\" "
+        elif (self.way == 'd2d'):
+            opts += "-C \"-D ze\" -L \"-D ze\" "
+        elif (self.way == 'xd2d'):
+            opts += "-C \"-D ze\" -L \"-D ze -i 1\" "
+
         if (self.core_prov == 'sockets' and self.ofi_build_mode == 'reg'):
             complex_test_file = f'{self.libfab_installpath}/share/fabtests/'\
                                 f'test_configs/{self.core_prov}/quick.test'
@@ -352,41 +363,6 @@ def execute_cmd(self):
         common.run_command(outputcmd)
         os.chdir(curdir)
 
-class ZeFabtests(Test):
-    def __init__(self, jobname, buildno, testname, core_prov, fabric,
-                 hosts, ofi_build_mode, user_env, log_file, util_prov=None):
-
-        super().__init__(jobname, buildno, testname, core_prov, fabric,
-                         hosts, ofi_build_mode, user_env, log_file, None, util_prov)
-
-        self.fabtestpath = f'{self.libfab_installpath}/bin'
-        self.zefabtest_script_path = f'{cloudbees_config.ze_testpath}'
-        self.fabtestconfigpath = f'{self.libfab_installpath}/share/fabtests'
-
-    @property
-    def cmd(self):
-        return f'{self.zefabtest_script_path}/runfabtests_ze.sh '
-
-    def options(self, test_name):
-        opts = f"-p {self.fabtestpath} "
-        opts += f"-B {self.fabtestpath} "
-        opts += f"-t {test_name} "
-        opts += f"{self.server} {self.client} "
-        return opts
-
-    @property
-    def execute_condn(self):
-        return True if (self.core_prov == 'shm') else False
-
-    def execute_cmd(self, test_name):
-        curdir = os.getcwd()
-        os.chdir(self.fabtestconfigpath)
-        command = self.cmd + self.options(test_name)
-        outputcmd = shlex.split(command)
-        common.run_command(outputcmd)
-        os.chdir(curdir)
-
-
 class OMPI:
     def __init__(self, core_prov, hosts, libfab_installpath, nw_interface,
                  server, client, environ, middlewares_path, util_prov=None):