Use pynvjitlink for MVC

This commit moves some of the logic from the pynvjitlink patch.py into numba-cuda so that it can use pynvjitlink without patching.
NVIDIA · Oct 21, 2024 · cd94f0d · cd94f0d
1 parent d3abb3a
commit cd94f0d
Show file tree

Hide file tree

Showing 18 changed files with 907 additions and 57 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -19,8 +19,10 @@ jobs:
       - compute-matrix
       - build-conda
       - test-conda
+      - test-conda-pynvjitlink
       - build-wheels
       - test-wheels
+      - test-wheels-pynvjitlink
       - build-docs
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.08
@@ -57,6 +59,16 @@ jobs:
       script: "ci/test_conda.sh"
       run_codecov: false
       matrix_filter: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
+  test-conda-pynvjitlink:
+    needs:
+      - build-conda
+      - compute-matrix
+    uses: ./.github/workflows/conda-python-tests.yaml
+    with:
+      build_type: pull-request
+      script: "ci/test_conda_pynvjitlink.sh"
+      run_codecov: false
+      matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "12.5.1" and .PY_VER == "3.11"))
   build-wheels:
     needs:
       - compute-matrix
@@ -71,7 +83,15 @@ jobs:
     uses: ./.github/workflows/wheels-test.yaml
     with:
       build_type: pull-request
-      script: "ci/test_wheel.sh"
+      script: "ci/test_wheel.sh false"
+  test-wheels-pynvjitlink:
+    needs:
+      - build-wheels
+    uses: ./.github/workflows/wheels-test.yaml
+    with:
+      build_type: pull-request
+      script: "ci/test_wheel_pynvjitlink.sh"
+      matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "12.5.1" and .PY_VER == "3.12"))
   build-docs:
     needs:
       - build-conda

diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,5 @@ __pycache__
 build
 .*.swp
 *.so
+numba_cuda/numba/cuda/tests/cudadrv/test_device_functions.*
+numba_cuda/numba/cuda/tests/cudadrv/undefined_extern.*
diff --git a/ci/test_conda_pynvjitlink.sh b/ci/test_conda_pynvjitlink.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION
+
+set -euo pipefail
+
+. /opt/conda/etc/profile.d/conda.sh
+
+if [ "${CUDA_VER%.*.*}" = "11" ]; then
+  CTK_PACKAGES="cudatoolkit"
+else
+  CTK_PACKAGES="cuda-nvcc-impl cuda-nvrtc"
+fi
+
+rapids-logger "Install testing dependencies"
+# TODO: Replace with rapids-dependency-file-generator
+rapids-mamba-retry create -n test \
+    c-compiler \
+    cxx-compiler \
+    ${CTK_PACKAGES} \
+    cuda-python \
+    cuda-version=${CUDA_VER%.*} \
+    make \
+    psutil \
+    pytest \
+    python=${RAPIDS_PY_VERSION}
+
+# Temporarily allow unbound variables for conda activation.
+set +u
+conda activate test
+set -u
+
+rapids-mamba-retry install -c `pwd`/conda-repo numba-cuda
+
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
+mkdir -p "${RAPIDS_TESTS_DIR}"
+pushd "${RAPIDS_TESTS_DIR}"
+
+rapids-print-env
+
+rapids-logger "Check GPU usage"
+nvidia-smi
+
+rapids-logger "Show Numba system info"
+python -m numba --sysinfo
+
+EXITCODE=0
+trap "EXITCODE=1" ERR
+set +e
+
+
+rapids-logger "Install pynvjitlink"
+set +u
+rapids-mamba-retry install -c rapidsai pynvjitlink
+set -u
+
+rapids-logger "Build tests"
+
+PY_SCRIPT="
+import numba_cuda
+root = numba_cuda.__file__.rstrip('__init__.py')
+test_dir = root + \"numba/cuda/tests/test_binary_generation/\"
+print(test_dir)
+"
+
+NUMBA_CUDA_TEST_BIN_DIR=$(python -c "$PY_SCRIPT")
+pushd $NUMBA_CUDA_TEST_BIN_DIR
+make
+popd
+
+
+rapids-logger "Run Tests"
+ENABLE_PYNVJITLINK=1 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR python -m numba.runtests numba.cuda.tests -v
+
+popd
+
+rapids-logger "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/test_wheel_pynvjitlink.sh b/ci/test_wheel_pynvjitlink.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# Copyright (c) 2023-2024, NVIDIA CORPORATION
+
+set -euo pipefail
+
+rapids-logger "Install testing dependencies"
+# TODO: Replace with rapids-dependency-file-generator
+python -m pip install \
+    psutil \
+    cuda-python \
+    pytest
+
+rapids-logger "Install pynvjitlink"
+python -m pip install pynvjitlink-cu12
+
+rapids-logger "Build tests"
+PY_SCRIPT="
+import numba_cuda
+root = numba_cuda.__file__.rstrip('__init__.py')
+test_dir = root + \"numba/cuda/tests/test_binary_generation/\"
+print(test_dir)
+"
+
+NUMBA_CUDA_TEST_BIN_DIR=$(python -c "$PY_SCRIPT")
+pushd $NUMBA_CUDA_TEST_BIN_DIR
+make
+popd
+
+rapids-logger "Install wheel"
+package=$(realpath wheel/numba_cuda*.whl)
+echo "Package path: $package"
+python -m pip install $package
+
+rapids-logger "Check GPU usage"
+nvidia-smi
+
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/
+mkdir -p "${RAPIDS_TESTS_DIR}"
+pushd "${RAPIDS_TESTS_DIR}"
+
+rapids-logger "Show Numba system info"
+python -m numba --sysinfo
+
+rapids-logger "Run Tests"
+ENABLE_PYNVJITLINK=1 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR python -m numba.runtests numba.cuda.tests -v
+
+popd
diff --git a/numba_cuda/numba/cuda/codegen.py b/numba_cuda/numba/cuda/codegen.py
@@ -59,11 +59,18 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
     get_cufunc), which may be of different compute capabilities.
     """
 
-    def __init__(self, codegen, name, entry_name=None, max_registers=None,
-                 nvvm_options=None):
+    def __init__(
+        self,
+        codegen,
+        name,
+        entry_name=None,
+        max_registers=None,
+        lto=False,
+        nvvm_options=None
+    ):
         """
         codegen:
-            Codegen object.
+        Codegen object.
         name:
             Name of the function in the source.
         entry_name:
@@ -103,6 +110,7 @@ def __init__(self, codegen, name, entry_name=None, max_registers=None,
         self._cufunc_cache = {}
 
         self._max_registers = max_registers
+        self._lto = lto
         if nvvm_options is None:
             nvvm_options = {}
         self._nvvm_options = nvvm_options
@@ -178,7 +186,9 @@ def get_cubin(self, cc=None):
         if cubin:
             return cubin
 
-        linker = driver.Linker.new(max_registers=self._max_registers, cc=cc)
+        linker = driver.Linker.new(
+            max_registers=self._max_registers, cc=cc, lto=self._lto
+        )
 
         if linker.lto:
             ltoir = self.get_ltoir(cc=cc)
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,3 +3,5 @@ __pycache__ @@
     build
     .*.swp
     *.so
+    numba_cuda/numba/cuda/tests/cudadrv/test_device_functions.*
+    numba_cuda/numba/cuda/tests/cudadrv/undefined_extern.*