Skip to content

Commit

Permalink
Reapply "[libc] Remove 'packaged' GPU build support (#100208)"
Browse files Browse the repository at this point in the history
Summary: This reverts commit 550b83d.

Test Plan: 

Reviewers: 

Subscribers: 

Tasks: 

Tags: 


Differential Revision: https://phabricator.intern.facebook.com/D60250773
  • Loading branch information
jhuber6 authored and yuxuanchen1997 committed Jul 25, 2024
1 parent 38ea68b commit 39cc344
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 140 deletions.
91 changes: 0 additions & 91 deletions libc/cmake/modules/LLVMLibCLibraryRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -83,97 +83,6 @@ function(get_all_object_file_deps result fq_deps_list)
set(${result} ${all_deps} PARENT_SCOPE)
endfunction()

# A rule to build a library from a collection of entrypoint objects and bundle
# it into a GPU fatbinary. Usage is the same as 'add_entrypoint_library'.
# Usage:
# add_gpu_entrypoint_library(
# DEPENDS <list of add_entrypoint_object targets>
# )
function(add_gpu_entrypoint_library target_name base_target_name)
cmake_parse_arguments(
"ENTRYPOINT_LIBRARY"
"" # No optional arguments
"" # No single value arguments
"DEPENDS" # Multi-value arguments
${ARGN}
)
if(NOT ENTRYPOINT_LIBRARY_DEPENDS)
message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list "
"of 'add_entrypoint_object' targets.")
endif()

get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS})
get_all_object_file_deps(all_deps "${fq_deps_list}")

# The GPU 'libc' needs to be exported in a format that can be linked with
# offloading langauges like OpenMP or CUDA. This wraps every GPU object into a
# fat binary and adds them to a static library.
set(objects "")
foreach(dep IN LISTS all_deps)
set(object $<$<STREQUAL:$<TARGET_NAME_IF_EXISTS:${dep}>,${dep}>:$<TARGET_OBJECTS:${dep}>>)
string(FIND ${dep} "." last_dot_loc REVERSE)
math(EXPR name_loc "${last_dot_loc} + 1")
string(SUBSTRING ${dep} ${name_loc} -1 name)
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
set(prefix --image=arch=generic,triple=nvptx64-nvidia-cuda,feature=+ptx63)
elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
set(prefix --image=arch=generic,triple=amdgcn-amd-amdhsa)
endif()

# Use the 'clang-offload-packager' to merge these files into a binary blob.
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin"
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary
COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER}
"${prefix},file=$<JOIN:${object},,file=>" -o
${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin
DEPENDS ${dep} ${base_target_name}
COMMENT "Packaging LLVM offloading binary for '${object}'"
)
add_custom_target(${dep}.__gpubin__ DEPENDS ${dep}
"${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
if(TARGET clang-offload-packager)
add_dependencies(${dep}.__gpubin__ clang-offload-packager)
endif()

# CMake does not permit setting the name on object files. In order to have
# human readable names we create an empty stub file with the entrypoint
# name. This empty file will then have the created binary blob embedded.
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp
DEPENDS ${dep} ${dep}.__gpubin__ ${base_target_name}
)
add_custom_target(${dep}.__stub__
DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp")

add_library(${dep}.__fatbin__
EXCLUDE_FROM_ALL OBJECT
"${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
)

# This is always compiled for the LLVM host triple instead of the native GPU
# triple that is used by default in the build.
target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib)
target_compile_options(${dep}.__fatbin__ PRIVATE
--target=${LLVM_HOST_TRIPLE}
"SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
add_dependencies(${dep}.__fatbin__
${dep} ${dep}.__stub__ ${dep}.__gpubin__ ${base_target_name})

# Set the list of newly create fat binaries containing embedded device code.
list(APPEND objects $<TARGET_OBJECTS:${dep}.__fatbin__>)
endforeach()

add_library(
${target_name}
STATIC
${objects}
)
set_target_properties(${target_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR})
endfunction(add_gpu_entrypoint_library)

# A rule to build a library from a collection of entrypoint objects and bundle
# it in a single LLVM-IR bitcode file.
# Usage:
Expand Down
19 changes: 0 additions & 19 deletions libc/docs/gpu/building.rst
Original file line number Diff line number Diff line change
Expand Up @@ -151,25 +151,6 @@ Build overview
Once installed, the GPU build will create several files used for different
targets. This section will briefly describe their purpose.

**lib/<host-triple>/libcgpu-amdgpu.a or lib/libcgpu-amdgpu.a**
A static library containing fat binaries supporting AMD GPUs. These are built
using the support described in the `clang documentation
<https://clang.llvm.org/docs/OffloadingDesign.html>`_. These are intended to
be static libraries included natively for offloading languages like CUDA, HIP,
or OpenMP. This implements the standard C library.

**lib/<host-triple>/libmgpu-amdgpu.a or lib/libmgpu-amdgpu.a**
A static library containing fat binaries that implements the standard math
library for AMD GPUs.

**lib/<host-triple>/libcgpu-nvptx.a or lib/libcgpu-nvptx.a**
A static library containing fat binaries that implement the standard C library
for NVIDIA GPUs.

**lib/<host-triple>/libmgpu-nvptx.a or lib/libmgpu-nvptx.a**
A static library containing fat binaries that implement the standard math
library for NVIDIA GPUs.

**include/<target-triple>**
The include directory where all of the generated headers for the target will
go. These definitions are strictly for the GPU when being targeted directly.
Expand Down
15 changes: 8 additions & 7 deletions libc/docs/gpu/using.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,17 @@ described in the `clang documentation
by the OpenMP toolchain, but is currently opt-in for the CUDA and HIP toolchains
through the ``--offload-new-driver``` and ``-fgpu-rdc`` flags.

The installation should contain a static library called ``libcgpu-amdgpu.a`` or
``libcgpu-nvptx.a`` depending on which GPU architectures your build targeted.
These contain fat binaries compatible with the offloading toolchain such that
they can be used directly.
In order or link the GPU runtime, we simply pass this library to the embedded
device linker job. This can be done using the ``-Xoffload-linker`` option, which
forwards an argument to a ``clang`` job used to create the final GPU executable.
The toolchain should pick up the C libraries automatically in most cases, so
this shouldn't be necessary.

.. code-block:: sh
$> clang openmp.c -fopenmp --offload-arch=gfx90a -lcgpu-amdgpu
$> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -lcgpu-nvptx
$> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -lcgpu-amdgpu
$> clang openmp.c -fopenmp --offload-arch=gfx90a -Xoffload-linker -lc
$> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc
$> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc
This will automatically link in the needed function definitions if they were
required by the user's application. Normally using the ``-fgpu-rdc`` option
Expand Down
15 changes: 0 additions & 15 deletions libc/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,20 +40,6 @@ foreach(archive IN ZIP_LISTS
# Add the offloading version of the library for offloading languages. These
# are installed in the standard search path separate from the other libraries.
if(LIBC_TARGET_OS_IS_GPU)
add_gpu_entrypoint_library(
${archive_1}gpu
${archive_1}
DEPENDS
${${archive_2}}
)
set_target_properties(
${archive_1}gpu
PROPERTIES
ARCHIVE_OUTPUT_NAME ${archive_0}gpu-${LIBC_TARGET_ARCHITECTURE}
ARCHIVE_OUTPUT_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR}
)
list(APPEND added_gpu_archive_targets ${archive_1}gpu)

add_bitcode_entrypoint_library(
${archive_1}bitcode
${archive_1}
Expand All @@ -65,7 +51,6 @@ foreach(archive IN ZIP_LISTS
PROPERTIES
OUTPUT_NAME ${archive_1}.bc
)
add_dependencies(${archive_1}gpu ${archive_1}bitcode)
list(APPEND added_gpu_bitcode_targets ${archive_1}bitcode)
endif()
endforeach()
Expand Down
3 changes: 2 additions & 1 deletion offload/test/libc/assert.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

// REQUIRES: libc

// NVPTX without LTO uses the implementation in OpenMP currently.
// AMDGPU and NVPTX without LTO uses the implementation in OpenMP currently.
// UNSUPPORTED: nvptx64-nvidia-cuda
// UNSUPPORTED: amdgcn-amd-amdhsa
// REQUIRES: gpu

#include <assert.h>
Expand Down
7 changes: 0 additions & 7 deletions offload/test/lit.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -179,13 +179,6 @@ def remove_suffix_if_present(name):
return name

def add_libraries(source):
if config.libomptarget_has_libc:
if config.libomptarget_current_target.startswith('nvptx'):
return source + " " + config.llvm_library_dir + "/libcgpu-nvptx.a " + \
config.llvm_library_intdir + "/libomptarget.devicertl.a"
elif config.libomptarget_current_target.startswith('amdgcn'):
return source + " " + config.llvm_library_dir + "/libcgpu-amdgpu.a " + \
config.llvm_library_intdir + "/libomptarget.devicertl.a"
return source + " " + config.llvm_library_intdir + "/libomptarget.devicertl.a"

# Add platform targets
Expand Down

0 comments on commit 39cc344

Please sign in to comment.