Skip to content

Commit

Permalink
Conditionally add LTO-able objects for PTX prints
Browse files Browse the repository at this point in the history
  • Loading branch information
isVoid committed Oct 30, 2024
1 parent b42c67d commit afcce87
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 25 deletions.
37 changes: 15 additions & 22 deletions numba_cuda/numba/cuda/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import os
import subprocess
import tempfile
from warnings import warn

CUDA_TRIPLE = 'nvptx64-nvidia-cuda'

Expand Down Expand Up @@ -179,7 +178,7 @@ def get_ltoir(self, cc=None):

return ltoir

def _link_all(self, linker, cc):
def _link_all(self, linker, cc, ignore_nonlto=False):
if linker.lto:
ltoir = self.get_ltoir(cc=cc)
linker.add_ltoir(ltoir)
Expand All @@ -188,9 +187,11 @@ def _link_all(self, linker, cc):
linker.add_ptx(ptx.encode())

for path in self._linking_files:
linker.add_file_guess_ext(path)
linker.add_file_guess_ext(path, ignore_nonlto)
if self.needs_cudadevrt:
linker.add_file_guess_ext(get_cudalib('cudadevrt', static=True))
linker.add_file_guess_ext(
get_cudalib('cudadevrt', static=True), ignore_nonlto
)

def get_cubin(self, cc=None):
cc = self._ensure_cc(cc)
Expand All @@ -206,30 +207,22 @@ def get_cubin(self, cc=None):
additional_flags=["-ptx"],
lto=self._lto
)
self._link_all(linker, cc)

try:
ptx = linker.get_linked_ptx().decode('utf-8')

print(("ASSEMBLY (AFTER LTO) %s" % self._name).center(80, '-'))
print(ptx)
print('=' * 80)
except driver.LinkerError as e:
if linkererr_cause := getattr(e, "__cause__", None):
if "-ptx requires that all inputs have LTOIR" in str(
linkererr_cause
):
warn(
"Linker input contains non-LTOIR objects, nvjitlink"
" cannot generate LTO-ed PTX."
)
# `-ptx` flag is meant to view the optimized PTX for LTO objects.
# Non-LTO objects are not passed to linker.
self._link_all(linker, cc, ignore_nonlto=True)

ptx = linker.get_linked_ptx().decode('utf-8')

print(("ASSEMBLY (AFTER LTO) %s" % self._name).center(80, '-'))
print(ptx)
print('=' * 80)

linker = driver.Linker.new(
max_registers=self._max_registers,
cc=cc,
lto=self._lto
)
self._link_all(linker, cc)
self._link_all(linker, cc, ignore_nonlto=False)
cubin = linker.complete()

self._cubin_cache[cc] = cubin
Expand Down
24 changes: 22 additions & 2 deletions numba_cuda/numba/cuda/cudadrv/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from .drvapi import API_PROTOTYPES
from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
from .mappings import FILE_EXTENSION_MAP
from .linkable_code import LinkableCode
from .linkable_code import LinkableCode, LTOIR
from numba.cuda.cudadrv import enums, drvapi, nvrtc

USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
Expand Down Expand Up @@ -2683,12 +2683,18 @@ def add_cu_file(self, path):
cu = f.read()
self.add_cu(cu, os.path.basename(path))

def add_file_guess_ext(self, path_or_code):
def add_file_guess_ext(self, path_or_code, ignore_nonlto=False):
"""
Add a file or LinkableCode object to the link. If a file is
passed, the type will be inferred from the extension. A LinkableCode
object represents a file already in memory.
When `ignore_nonlto` is set to true, do not add code that are will not
be LTO-ed in the linking process. This is useful in inspecting the
LTO-ed portion of the PTX when linker is added with objects that can be
both LTO-ed and not LTO-ed.
"""

if isinstance(path_or_code, str):
ext = pathlib.Path(path_or_code).suffix
if ext == '':
Expand All @@ -2704,6 +2710,13 @@ def add_file_guess_ext(self, path_or_code):
"Don't know how to link file with extension "
f"{ext}"
)
if ignore_nonlto and kind != FILE_EXTENSION_MAP["ltoir"]:
warnings.warn(
f"Not adding {path_or_code} as it is not optimizable "
"at link time, and `ignore_nonlto == True`."
)
return

self.add_file(path_or_code, kind)
return
else:
Expand All @@ -2716,6 +2729,13 @@ def add_file_guess_ext(self, path_or_code):
if path_or_code.kind == "cu":
self.add_cu(path_or_code.data, path_or_code.name)
else:
if ignore_nonlto and not isinstance(path_or_code.kind, LTOIR):
warnings.warn(
f"Not adding {path_or_code.name} as it is not "
"optimizable at link time, and `ignore_nonlto == True`."
)
return

self.add_data(
path_or_code.data, path_or_code.kind, path_or_code.name
)
Expand Down
3 changes: 2 additions & 1 deletion numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,8 @@ def kernel(result):
assert result[0] == 3

assert len(w) == 1
self.assertIn("cannot generate LTO-ed PTX", str(w[0].message))
self.assertIn("it is not optimizable at link time, and "
"`ignore_nonlto == True`", str(w[0].message))

config.DUMP_ASSEMBLY = False

Expand Down

0 comments on commit afcce87

Please sign in to comment.