Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ptx file discovery in editable installs #14767

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions python/cudf/cudf/core/udf/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

import os
from typing import Any, Callable, Dict
Expand All @@ -17,10 +17,7 @@

import rmm

from cudf._lib.strings_udf import (
column_from_udf_string_array,
column_to_string_view_array,
)
from cudf._lib import strings_udf
from cudf.api.types import is_scalar
from cudf.core.column.column import as_column
from cudf.core.dtypes import dtype
Expand Down Expand Up @@ -63,7 +60,10 @@
precompiled: cachetools.LRUCache = cachetools.LRUCache(maxsize=32)
launch_arg_getters: Dict[Any, Any] = {}

_PTX_FILE = _get_ptx_file(os.path.dirname(__file__), "shim_")
_PTX_FILE = _get_ptx_file(
os.path.join(os.path.dirname(strings_udf.__file__), "..", "core", "udf"),
"shim_",
)


@_cudf_nvtx_annotate
Expand Down Expand Up @@ -319,7 +319,7 @@ def _return_arr_from_dtype(dtype, size):

def _post_process_output_col(col, retty):
if retty == _cudf_str_dtype:
return column_from_udf_string_array(col)
return strings_udf.column_from_udf_string_array(col)
return as_column(col, retty)


Expand Down Expand Up @@ -361,7 +361,7 @@ def set_malloc_heap_size(size=None):

def column_to_string_view_array_init_heap(col):
# lazily allocate heap only when a string needs to be returned
return column_to_string_view_array(col)
return strings_udf.column_to_string_view_array(col)


class UDFError(RuntimeError):
Expand Down
24 changes: 19 additions & 5 deletions python/cudf/cudf/utils/_numba.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

import glob
import os
import sys
import warnings
from functools import lru_cache

from numba import config as numba_config

Expand All @@ -20,9 +21,20 @@ def patch_numba_linker_pynvjitlink():
)


CC_60_PTX_FILE = os.path.join(
os.path.dirname(__file__), "../core/udf/shim_60.ptx"
)
# Use an lru_cache with a single value to allow a delayed import of
# strings_udf. This is the easiest way to break an otherwise circular import
# loop of _lib.*->cudautils->_numba->_lib.strings_udf
@lru_cache
def _get_cc_60_ptx_file():
from cudf._lib import strings_udf

return os.path.join(
os.path.dirname(strings_udf.__file__),
"..",
"core",
"udf",
"shim_60.ptx",
)


def _get_best_ptx_file(archs, max_compute_capability):
Expand Down Expand Up @@ -119,7 +131,9 @@ def _setup_numba():
versions = safe_get_versions()
if versions != NO_DRIVER:
driver_version, runtime_version = versions
ptx_toolkit_version = _get_cuda_version_from_ptx_file(CC_60_PTX_FILE)
ptx_toolkit_version = _get_cuda_version_from_ptx_file(
_get_cc_60_ptx_file()
)

# MVC is required whenever any PTX is newer than the driver
# This could be the shipped PTX file or the PTX emitted by
Expand Down
29 changes: 2 additions & 27 deletions python/cudf/udf_cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -55,30 +55,6 @@ target_compile_options(
target_link_libraries(cudf_strings_udf PUBLIC cudf::cudf)
install(TARGETS cudf_strings_udf DESTINATION ./cudf/_lib/)

# This function will copy the generated PTX file from its generator-specific location in the build
# tree into a specified location in the build tree from which we can install it.
function(copy_ptx_to_location target destination new_name)
set(cmake_generated_file
"${CMAKE_CURRENT_BINARY_DIR}/cmake/cp_${target}_$<LOWER_CASE:$<CONFIG>>_ptx.cmake"
)
file(
GENERATE
OUTPUT "${cmake_generated_file}"
CONTENT
"
set(ptx_path \"$<TARGET_OBJECTS:${target}>\")
file(MAKE_DIRECTORY \"${destination}\")
file(COPY_FILE \${ptx_path} \"${destination}/${new_name}\")"
)

add_custom_target(
${target}_cp_ptx ALL
COMMAND ${CMAKE_COMMAND} -P "${cmake_generated_file}"
DEPENDS $<TARGET_OBJECTS:${target}>
COMMENT "Copying PTX files to '${destination}'"
)
endfunction()

# Create the shim library for each architecture.
set(SHIM_CUDA_FLAGS --expt-relaxed-constexpr -rdc=true)

Expand All @@ -104,10 +80,9 @@ foreach(arch IN LISTS CMAKE_CUDA_ARCHITECTURES)
target_compile_options(${tgt} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${SHIM_CUDA_FLAGS}>")
target_link_libraries(${tgt} PUBLIC cudf::cudf)

copy_ptx_to_location(${tgt} "${CMAKE_CURRENT_BINARY_DIR}/../udf" ${tgt}.ptx)
install(
FILES $<TARGET_OBJECTS:${tgt}>
DESTINATION ./cudf/core/udf/
DESTINATION cudf/core/udf/
RENAME ${tgt}.ptx
)
endforeach()