Skip to content

Commit

Permalink
Merge pull request #4131 from Flamefire/gpu_info-fix
Browse files Browse the repository at this point in the history
Avoid exception in `get_gpu_info`
  • Loading branch information
branfosj authored Dec 4, 2022
2 parents 13f3982 + 2c2f07d commit 7150262
Showing 1 changed file with 17 additions and 8 deletions.
25 changes: 17 additions & 8 deletions easybuild/tools/systemtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@

from easybuild.base import fancylogger
from easybuild.tools.build_log import EasyBuildError, print_warning
from easybuild.tools.config import IGNORE
from easybuild.tools.filetools import is_readable, read_file, which
from easybuild.tools.py2vs3 import OrderedDict, string_type
from easybuild.tools.run import run_cmd
Expand Down Expand Up @@ -608,14 +609,19 @@ def get_gpu_info():
"""
Get the GPU info
"""
gpu_info = {}
os_type = get_os_type()
if get_os_type() != LINUX:
_log.info("Only know how to get GPU info on Linux, assuming no GPUs are present")
return {}

if os_type == LINUX:
gpu_info = {}
if not which('nvidia-smi', on_error=IGNORE):
_log.info("nvidia-smi not found. Cannot detect NVIDIA GPUs")
else:
try:
cmd = "nvidia-smi --query-gpu=gpu_name,driver_version --format=csv,noheader"
_log.debug("Trying to determine NVIDIA GPU info on Linux via cmd '%s'", cmd)
out, ec = run_cmd(cmd, force_in_dry_run=True, trace=False, stream_output=False)
out, ec = run_cmd(cmd, simple=False, log_ok=False, log_all=False,
force_in_dry_run=True, trace=False, stream_output=False)
if ec == 0:
for line in out.strip().split('\n'):
nvidia_gpu_info = gpu_info.setdefault('NVIDIA', {})
Expand All @@ -627,16 +633,21 @@ def get_gpu_info():
_log.debug("Exception was raised when running nvidia-smi: %s", err)
_log.info("No NVIDIA GPUs detected")

if not which('rocm-smi', on_error=IGNORE):
_log.info("rocm-smi not found. Cannot detect AMD GPUs")
else:
try:
cmd = "rocm-smi --showdriverversion --csv"
_log.debug("Trying to determine AMD GPU driver on Linux via cmd '%s'", cmd)
out, ec = run_cmd(cmd, force_in_dry_run=True, trace=False, stream_output=False)
out, ec = run_cmd(cmd, simple=False, log_ok=False, log_all=False,
force_in_dry_run=True, trace=False, stream_output=False)
if ec == 0:
amd_driver = out.strip().split('\n')[1].split(',')[1]

cmd = "rocm-smi --showproductname --csv"
_log.debug("Trying to determine AMD GPU info on Linux via cmd '%s'", cmd)
out, ec = run_cmd(cmd, force_in_dry_run=True, trace=False, stream_output=False)
out, ec = run_cmd(cmd, simple=False, log_ok=False, log_all=False,
force_in_dry_run=True, trace=False, stream_output=False)
if ec == 0:
for line in out.strip().split('\n')[1:]:
amd_card_series = line.split(',')[1]
Expand All @@ -650,8 +661,6 @@ def get_gpu_info():
except Exception as err:
_log.debug("Exception was raised when running rocm-smi: %s", err)
_log.info("No AMD GPUs detected")
else:
_log.info("Only know how to get GPU info on Linux, assuming no GPUs are present")

return gpu_info

Expand Down

0 comments on commit 7150262

Please sign in to comment.