Skip to content

Commit

Permalink
contrib/intel: simplify and expand ZE testing
Browse files Browse the repository at this point in the history
Since not all of the fabtests supported device memory, the CI used
a wrapper script to run a subset of tests that were supported.
Now that all of the runfabtests support FI_HMEM properly, this simplifies
the ZE testing by going through the regular run_fabtests path.
The --device parameter is removed and replaced with the --way parameter
which indicates which direction to test (h2d, d2d, xd2d, default None).
This simplifies the code path and enables the ZE testing to run the
full testsuite, rather than the subset, increasing our coverage.

Signed-off-by: Alexia Ingerson <alexia.ingerson@intel.com>
  • Loading branch information
aingerson committed Oct 14, 2023
1 parent 95aa987 commit 301c13e
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 130 deletions.
34 changes: 11 additions & 23 deletions contrib/intel/jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,22 @@ def slurm_batch(partition, node_num, output, command) {
}

def run_fabtests(stage_name, partition, node_num, prov, util=null,
user_env=null) {
user_env=null, way=null) {
def command = "python3.9 ${RUN_LOCATION}/runtests.py"
def opts = "--prov=${prov} --test=fabtests"
def modes = BUILD_MODES
if (util)
opts = "${opts} --util=${util}"

if (user_env)
opts = "${opts} --user_env ${user_env}"

for (mode in BUILD_MODES) {
if (way) {
opts = "${opts} --way ${way}"
modes = ["reg"]
}

for (mode in modes) {
echo "Running $stage_name fabtests $mode"
slurm_batch("${partition}", "${node_num}",
"${env.LOG_DIR}/${stage_name}_fabtests_${mode}",
Expand Down Expand Up @@ -670,27 +676,9 @@ pipeline {
steps {
script {
dir (RUN_LOCATION) {
def providers = [["shm", null]]
def directions = ["h2d", "d2d", "xd2d"]
def base_cmd = "python3.9 runtests.py --device=ze"
def prefix = "${env.LOG_DIR}/ze_v3_"
def suffix = "_reg"
for (prov in providers) {
for (way in directions) {
if (prov[1]) {
echo "Running ${prov[0]}-${prov[1]} ze"
slurm_batch("fabrics-ci", "1",
"${prefix}${prov[0]}-${prov[1]}_${way}${suffix}",
"""${base_cmd} --prov=${prov[0]} \
--util=${prov[1]} --way=${way}""")
} else {
echo "Running ${prov[0]} ze"
slurm_batch("fabrics-ci", "1",
"${prefix}${prov[0]}_${way}${suffix}",
"${base_cmd} --prov=${prov[0]} --way=${way}")
}
}
}
run_fabtests("ze_v3_shm", "fabrics-ci", "1", "shm", null, null, "h2d")
run_fabtests("ze_v3_shm", "fabrics-ci", "1", "shm", null, null, "d2d")
run_fabtests("ze_v3_shm", "fabrics-ci", "1", "shm", null, null, "xd2d")
}
}
}
Expand Down
20 changes: 2 additions & 18 deletions contrib/intel/jenkins/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ def fi_info_test(core, hosts, mode, user_env, log_file, util):
fi_info_test.execute_cmd()
print('-------------------------------------------------------------------')

def fabtests(core, hosts, mode, user_env, log_file, util):
def fabtests(core, hosts, mode, user_env, log_file, util, way):

runfabtest = tests.Fabtest(jobname=jbname,buildno=bno,
testname='runfabtests', core_prov=core,
fabric=fab, hosts=hosts, ofi_build_mode=mode,
user_env=user_env, log_file=log_file,
util_prov=util)
util_prov=util, way=way)

print('-------------------------------------------------------------------')
if (runfabtest.execute_condn):
Expand Down Expand Up @@ -94,22 +94,6 @@ def multinodetest(core, hosts, mode, user_env, log_file, util):
.format(runmultinodetest.testname))
print("-------------------------------------------------------------------")

def ze_fabtests(core, hosts, mode, way, user_env, log_file, util):

runzefabtests = tests.ZeFabtests(jobname=jbname,buildno=bno,
testname="ze test", core_prov=core,
fabric=fab, hosts=hosts,
ofi_build_mode=mode, user_env=user_env,
log_file=log_file, util_prov=util)

print('-------------------------------------------------------------------')
if (runzefabtests.execute_condn):
print(f"Running ze {way} tests for {core}-{util}-{fab}")
runzefabtests.execute_cmd(way)
else:
print(f"Skipping {core} {runzefabtests.testname} as execute condition fails")
print('-------------------------------------------------------------------')

def intel_mpi_benchmark(core, hosts, mpi, mode, group, user_env, log_file, util):

imb = tests.IMBtests(jobname=jbname, buildno=bno,
Expand Down
95 changes: 44 additions & 51 deletions contrib/intel/jenkins/runtests.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@ def __call__(self, parser, namespace, values, option_string=None):

parser.add_argument('--imb_grp', help="IMB test group 1:[MPI1, P2P], \
2:[EXT, IO], 3:[NBC, RMA, MT]", choices=['1', '2', '3'])
parser.add_argument('--device', help="optional gpu device", choices=['ze'])
parser.add_argument('--way', help="direction to run with device option",
choices=['h2d', 'd2d', 'xd2d'], default='h2d')
choices=['h2d', 'd2d', 'xd2d'], default=None)
parser.add_argument('--user_env', help="Run with additional environment " \
"variables", nargs='*', action=ParseDict, default={})
parser.add_argument('--mpi', help="Select mpi to use for middlewares",
Expand All @@ -43,7 +42,6 @@ def __call__(self, parser, namespace, values, option_string=None):
args_core = args.prov

args_util = args.util
args_device = args.device
user_env = args.user_env
log_file = args.log_file
weekly = args.weekly
Expand Down Expand Up @@ -103,56 +101,51 @@ def __call__(self, parser, namespace, values, option_string=None):
os.chdir('/tmp/')

if(args_core):
if (args.device != 'ze'):
if (run_test == 'all' or run_test == 'fi_info'):
run.fi_info_test(args_core, hosts, ofi_build_mode,
user_env, log_file, util=args.util)

if (run_test == 'all' or run_test == 'fabtests'):
run.fabtests(args_core, hosts, ofi_build_mode, user_env, log_file,
args_util)

if (run_test == 'all' or run_test == 'shmem'):
run.shmemtest(args_core, hosts, ofi_build_mode, user_env, log_file,
args_util)

if (run_test == 'all' or run_test == 'oneccl'):
run.oneccltest(args_core, hosts, ofi_build_mode, user_env, log_file,
args_util)

if (run_test == 'all' or run_test == 'onecclgpu'):
run.oneccltestgpu(args_core, hosts, ofi_build_mode,
user_env, log_file, args_util)

if (run_test == 'all' or run_test == 'daos'):
run.daos_cart_tests(args_core, hosts, ofi_build_mode,
if (run_test == 'all' or run_test == 'fi_info'):
run.fi_info_test(args_core, hosts, ofi_build_mode,
user_env, log_file, util=args.util)

if (run_test == 'all' or run_test == 'fabtests'):
run.fabtests(args_core, hosts, ofi_build_mode, user_env, log_file,
args_util, way)

if (run_test == 'all' or run_test == 'shmem'):
run.shmemtest(args_core, hosts, ofi_build_mode, user_env, log_file,
args_util)

if (run_test == 'all' or run_test == 'oneccl'):
run.oneccltest(args_core, hosts, ofi_build_mode, user_env, log_file,
args_util)

if (run_test == 'all' or run_test == 'onecclgpu'):
run.oneccltestgpu(args_core, hosts, ofi_build_mode,
user_env, log_file, args_util)

if (run_test == 'all' or run_test == 'daos'):
run.daos_cart_tests(args_core, hosts, ofi_build_mode,
user_env, log_file, args_util)

if (run_test == 'all' or run_test == 'multinode'):
run.multinodetest(args_core, hosts, ofi_build_mode,
user_env, log_file, args_util)

if (run_test == 'all' or run_test == 'mpichtestsuite'):
run.mpich_test_suite(args_core, hosts, mpi,
ofi_build_mode, user_env, log_file,
args_util, weekly)

if (run_test == 'all' or run_test == 'IMB'):
run.intel_mpi_benchmark(args_core, hosts, mpi,
ofi_build_mode, imb_group,
user_env, log_file, args_util)

if (run_test == 'all' or run_test == 'multinode'):
run.multinodetest(args_core, hosts, ofi_build_mode,
user_env, log_file, args_util)

if (run_test == 'all' or run_test == 'mpichtestsuite'):
run.mpich_test_suite(args_core, hosts, mpi,
ofi_build_mode, user_env, log_file,
args_util, weekly)

if (run_test == 'all' or run_test == 'IMB'):
run.intel_mpi_benchmark(args_core, hosts, mpi,
ofi_build_mode, imb_group,
user_env, log_file, args_util)

if (run_test == 'all' or run_test == 'osu'):
run.osu_benchmark(args_core, hosts, mpi,
ofi_build_mode, user_env, log_file,
args_util)

if (run_test == 'all' or run_test == 'dmabuf'):
run.dmabuftests(args_core, hosts, ofi_build_mode,
user_env, log_file, args_util)
else:
run.ze_fabtests(args_core, hosts, ofi_build_mode, way, user_env, log_file,
args_util)
if (run_test == 'all' or run_test == 'osu'):
run.osu_benchmark(args_core, hosts, mpi,
ofi_build_mode, user_env, log_file,
args_util)

if (run_test == 'all' or run_test == 'dmabuf'):
run.dmabuftests(args_core, hosts, ofi_build_mode,
user_env, log_file, args_util)
else:
print("Error : Specify a core provider to run tests")
52 changes: 14 additions & 38 deletions contrib/intel/jenkins/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
class Test:

def __init__ (self, jobname, buildno, testname, core_prov, fabric,
hosts, ofi_build_mode, user_env, log_file, mpitype=None, util_prov=None):
hosts, ofi_build_mode, user_env, log_file, mpitype=None,
util_prov=None, way=None):
self.jobname = jobname
self.buildno = buildno
self.testname = testname
Expand Down Expand Up @@ -48,6 +49,7 @@ def __init__ (self, jobname, buildno, testname, core_prov, fabric,
f'{self.jobname}/{self.buildno}/'\
'log_dir'
self.env = user_env
self.way = way

self.mpi = ''
if (self.mpi_type == 'impi'):
Expand Down Expand Up @@ -101,10 +103,12 @@ def execute_cmd(self):
class Fabtest(Test):

def __init__(self, jobname, buildno, testname, core_prov, fabric,
hosts, ofi_build_mode, user_env, log_file, util_prov=None):
hosts, ofi_build_mode, user_env, log_file, util_prov=None,
way=None):

super().__init__(jobname, buildno, testname, core_prov, fabric,
hosts, ofi_build_mode, user_env, log_file, None, util_prov)
hosts, ofi_build_mode, user_env, log_file, None,
util_prov, way)
self.fabtestpath = f'{self.libfab_installpath}/bin'
self.fabtestconfigpath = f'{self.libfab_installpath}/share/fabtests'

Expand Down Expand Up @@ -152,6 +156,13 @@ def options(self):
else:
opts += "-t all "

if (self.way == 'h2d'):
opts += "-C \"-H\" -L \"-D ze\" "
elif (self.way == 'd2d'):
opts += "-C \"-D ze\" -L \"-D ze\" "
elif (self.way == 'xd2d'):
opts += "-C \"-D ze\" -L \"-D ze -i 1\" "

if (self.core_prov == 'sockets' and self.ofi_build_mode == 'reg'):
complex_test_file = f'{self.libfab_installpath}/share/fabtests/'\
f'test_configs/{self.core_prov}/quick.test'
Expand Down Expand Up @@ -352,41 +363,6 @@ def execute_cmd(self):
common.run_command(outputcmd)
os.chdir(curdir)

class ZeFabtests(Test):
def __init__(self, jobname, buildno, testname, core_prov, fabric,
hosts, ofi_build_mode, user_env, log_file, util_prov=None):

super().__init__(jobname, buildno, testname, core_prov, fabric,
hosts, ofi_build_mode, user_env, log_file, None, util_prov)

self.fabtestpath = f'{self.libfab_installpath}/bin'
self.zefabtest_script_path = f'{cloudbees_config.ze_testpath}'
self.fabtestconfigpath = f'{self.libfab_installpath}/share/fabtests'

@property
def cmd(self):
return f'{self.zefabtest_script_path}/runfabtests_ze.sh '

def options(self, test_name):
opts = f"-p {self.fabtestpath} "
opts += f"-B {self.fabtestpath} "
opts += f"-t {test_name} "
opts += f"{self.server} {self.client} "
return opts

@property
def execute_condn(self):
return True if (self.core_prov == 'shm') else False

def execute_cmd(self, test_name):
curdir = os.getcwd()
os.chdir(self.fabtestconfigpath)
command = self.cmd + self.options(test_name)
outputcmd = shlex.split(command)
common.run_command(outputcmd)
os.chdir(curdir)


class OMPI:
def __init__(self, core_prov, hosts, libfab_installpath, nw_interface,
server, client, environ, middlewares_path, util_prov=None):
Expand Down

0 comments on commit 301c13e

Please sign in to comment.