Skip to content

Commit

Permalink
Merge pull request #1283 from spcl/gpu-ux
Browse files Browse the repository at this point in the history
GPU User Experience Improvements
  • Loading branch information
tbennun authored Jun 29, 2023
2 parents 672fc30 + 5d2ce3e commit 81b3e4e
Show file tree
Hide file tree
Showing 35 changed files with 1,146 additions and 524 deletions.
7 changes: 4 additions & 3 deletions dace/codegen/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def generate_headers(sdfg: SDFG, frame: framecode.DaCeCodeGenerator) -> str:
exit_params = (sdfg.name, sdfg.name)
proto += 'typedef void * %sHandle_t;\n' % sdfg.name
proto += 'extern "C" %sHandle_t __dace_init_%s(%s);\n' % init_params
proto += 'extern "C" void __dace_exit_%s(%sHandle_t handle);\n' % exit_params
proto += 'extern "C" int __dace_exit_%s(%sHandle_t handle);\n' % exit_params
proto += 'extern "C" void __program_%s(%sHandle_t handle%s);\n' % params
return proto

Expand Down Expand Up @@ -69,15 +69,16 @@ def generate_dummy(sdfg: SDFG, frame: framecode.DaCeCodeGenerator) -> str:
int main(int argc, char **argv) {{
{sdfg.name}Handle_t handle;
int err;
{allocations}
handle = __dace_init_{sdfg.name}({init_params});
__program_{sdfg.name}(handle{params});
__dace_exit_{sdfg.name}(handle);
err = __dace_exit_{sdfg.name}(handle);
{deallocations}
return 0;
return err;
}}
'''

Expand Down
45 changes: 45 additions & 0 deletions dace/codegen/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,48 @@ def _try_execute(cmd: str) -> bool:
'set the DaCe configuration entry ``compiler.cuda.backend`` '
'or the ``DACE_compiler_cuda_backend`` environment variable '
'to either "cuda" or "hip".')


def get_gpu_runtime_library() -> ctypes.CDLL:
backend = get_gpu_backend()
if backend == 'cuda':
libpath = ctypes.util.find_library('cudart')
if os.name == 'nt' and not libpath: # Windows-based search
for version in (12, 11, 10, 9):
libpath = ctypes.util.find_library(f'cudart64_{version}0')
if libpath:
break
elif backend == 'hip':
libpath = ctypes.util.find_library('amdhip64')
else:
raise RuntimeError(f'Cannot obtain GPU runtime library for backend {backend}')

if not libpath:
envname = 'PATH' if os.name == 'nt' else 'LD_LIBRARY_PATH'
raise RuntimeError(f'GPU runtime library for {backend} not found. Please set the {envname} '
'environment variable to point to the libraries.')

return ctypes.CDLL(libpath)


def get_gpu_runtime_error_string(err: int) -> str:
lib = get_gpu_runtime_library()

# Obtain the error string
geterrorstring = getattr(lib, f'{get_gpu_backend()}GetErrorString')
geterrorstring.restype = ctypes.c_char_p
return geterrorstring(err).decode('utf-8')


def get_gpu_runtime_last_error() -> str:
lib = get_gpu_runtime_library()

getlasterror = getattr(lib, f'{get_gpu_backend()}GetLastError')
res: int = getlasterror()
if res == 0:
return None

# Obtain the error string
geterrorstring = getattr(lib, f'{get_gpu_backend()}GetErrorString')
geterrorstring.restype = ctypes.c_char_p
return geterrorstring(res).decode('utf-8')
46 changes: 41 additions & 5 deletions dace/codegen/compiled_sdfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
import re
import shutil
import subprocess
from typing import Any, Callable, Dict, List, Tuple, Optional, Type
from typing import Any, Callable, Dict, List, Tuple, Optional, Type, Union
import warnings

import numpy as np
import sympy as sp

from dace import data as dt, dtypes, hooks, symbolic
from dace.codegen import exceptions as cgx
from dace.codegen import exceptions as cgx, common
from dace.config import Config
from dace.frontend import operations

Expand All @@ -22,6 +22,7 @@ class ReloadableDLL(object):
A reloadable shared object (or dynamically linked library), which
bypasses Python's dynamic library reloading issues.
"""

def __init__(self, library_filename, program_name):
"""
Creates a new reloadable shared object.
Expand Down Expand Up @@ -181,6 +182,7 @@ def __init__(self, sdfg, lib: ReloadableDLL, argnames: List[str] = None):
self._init = lib.get_symbol('__dace_init_{}'.format(sdfg.name))
self._init.restype = ctypes.c_void_p
self._exit = lib.get_symbol('__dace_exit_{}'.format(sdfg.name))
self._exit.restype = ctypes.c_int
self._cfunc = lib.get_symbol('__program_{}'.format(sdfg.name))

# Cache SDFG return values
Expand All @@ -197,6 +199,17 @@ def __init__(self, sdfg, lib: ReloadableDLL, argnames: List[str] = None):
self._free_symbols = self._sdfg.free_symbols
self.argnames = argnames

self.has_gpu_code = False
for _, _, aval in self._sdfg.arrays_recursive():
if aval.storage in dtypes.GPU_STORAGES:
self.has_gpu_code = True
break
if not self.has_gpu_code:
for node, _ in self._sdfg.all_nodes_recursive():
if getattr(node, 'schedule', False) in dtypes.GPU_SCHEDULES:
self.has_gpu_code = True
break

def get_exported_function(self, name: str, restype=None) -> Optional[Callable[..., Any]]:
"""
Tries to find a symbol by name in the compiled SDFG, and convert it to a callable function
Expand Down Expand Up @@ -297,8 +310,20 @@ def initialize(self, *args, **kwargs):

def finalize(self):
if self._exit is not None:
self._exit(self._libhandle)
res: int = self._exit(self._libhandle)
self._initialized = False
if res != 0:
raise RuntimeError(
f'An error was detected after running "{self._sdfg.name}": {self._get_error_text(res)}')

def _get_error_text(self, result: Union[str, int]) -> str:
if self.has_gpu_code:
if isinstance(result, int):
result = common.get_gpu_runtime_error_string(result)
return (f'{result}. Consider enabling synchronous debugging mode (environment variable: '
'DACE_compiler_cuda_syncdebug=1) to see where the issue originates from.')
else:
return result

def __call__(self, *args, **kwargs):
# Update arguments from ordered list
Expand All @@ -312,11 +337,23 @@ def __call__(self, *args, **kwargs):
if self._initialized is False:
self._lib.load()
self._initialize(initargtuple)

with hooks.invoke_compiled_sdfg_call_hooks(self, argtuple):
if self.do_not_execute is False:
self._cfunc(self._libhandle, *argtuple)

if self.has_gpu_code:
# Optionally get errors from call
try:
lasterror = common.get_gpu_runtime_last_error()
except RuntimeError as ex:
warnings.warn(f'Could not get last error from GPU runtime: {ex}')
lasterror = None

if lasterror is not None:
raise RuntimeError(
f'An error was detected when calling "{self._sdfg.name}": {self._get_error_text(lasterror)}')

return self._convert_return_values()
except (RuntimeError, TypeError, UnboundLocalError, KeyError, cgx.DuplicateDLLError, ReferenceError):
self._lib.unload()
Expand Down Expand Up @@ -545,7 +582,6 @@ def _initialize_return_values(self, kwargs):
arr = self._create_array(*shape_desc)
self._return_arrays.append(arr)


def _convert_return_values(self):
# Return the values as they would be from a Python function
if self._return_arrays is None or len(self._return_arrays) == 0:
Expand Down
10 changes: 5 additions & 5 deletions dace/codegen/targets/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1318,7 +1318,7 @@ def presynchronize_streams(sdfg, dfg, state_id, node, callsite_stream):
if hasattr(e.src, "_cuda_stream") and e.src._cuda_stream != 'nullptr':
cudastream = "__state->gpu_context->streams[%d]" % e.src._cuda_stream
callsite_stream.write(
"%sStreamSynchronize(%s);" % (common.get_gpu_backend(), cudastream),
"DACE_GPU_CHECK(%sStreamSynchronize(%s));" % (common.get_gpu_backend(), cudastream),
sdfg,
state_id,
[e.src, e.dst],
Expand Down Expand Up @@ -1356,9 +1356,9 @@ def synchronize_streams(sdfg, dfg, state_id, node, scope_exit, callsite_stream,
if isinstance(desc, data.Array) and desc.start_offset != 0:
ptrname = f'({ptrname} - {sym2cpp(desc.start_offset)})'
if Config.get_bool('compiler', 'cuda', 'syncdebug'):
callsite_stream.write(f'DACE_CUDA_CHECK({backend}FreeAsync({ptrname}, {cudastream}));\n', sdfg,
callsite_stream.write(f'DACE_GPU_CHECK({backend}FreeAsync({ptrname}, {cudastream}));\n', sdfg,
state_id, scope_exit)
callsite_stream.write(f'DACE_CUDA_CHECK({backend}DeviceSynchronize());')
callsite_stream.write(f'DACE_GPU_CHECK({backend}DeviceSynchronize());')
else:
callsite_stream.write(f'{backend}FreeAsync({ptrname}, {cudastream});\n', sdfg, state_id, scope_exit)
to_remove.add((sd, name))
Expand All @@ -1380,8 +1380,8 @@ def synchronize_streams(sdfg, dfg, state_id, node, scope_exit, callsite_stream,
if (isinstance(edge.dst, nodes.AccessNode) and hasattr(edge.dst, '_cuda_stream')
and edge.dst._cuda_stream != node._cuda_stream):
callsite_stream.write(
"""{backend}EventRecord(__state->gpu_context->events[{ev}], {src_stream});
{backend}StreamWaitEvent(__state->gpu_context->streams[{dst_stream}], __state->gpu_context->events[{ev}], 0);""".format(
"""DACE_GPU_CHECK({backend}EventRecord(__state->gpu_context->events[{ev}], {src_stream}));
DACE_GPU_CHECK({backend}StreamWaitEvent(__state->gpu_context->streams[{dst_stream}], __state->gpu_context->events[{ev}], 0));""".format(
ev=edge._cuda_event if hasattr(edge, "_cuda_event") else 0,
src_stream=cudastream,
dst_stream=edge.dst._cuda_stream,
Expand Down
Loading

0 comments on commit 81b3e4e

Please sign in to comment.