Skip to content

Commit

Permalink
ROCm CMake configuration cleanup (#3716)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #3716

Renaming `USE_ROCM` to `FAISS_ENABLE_ROCM` in CMake files, `FAISS_ENABLE_ROCM` in SWIG files, and `USE_AMD_ROCM` in other source files to follow the existing naming convention.

Reviewed By: mnorris11

Differential Revision: D60673731

fbshipit-source-id: 1aaa3f2ff6836830c4eb733ee7f41554f79f9695
  • Loading branch information
ramilbakhshyiev authored and facebook-github-bot committed Aug 2, 2024
1 parent e59d8c3 commit a757309
Show file tree
Hide file tree
Showing 24 changed files with 69 additions and 83 deletions.
14 changes: 4 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ set(FAISS_LANGUAGES CXX)

if(FAISS_ENABLE_GPU)
if (FAISS_ENABLE_ROCM)
set(USE_ROCM TRUE)
list(APPEND FAISS_LANGUAGES HIP)
list(PREPEND CMAKE_MODULE_PATH "/opt/rocm/lib/cmake")
list(PREPEND CMAKE_PREFIX_PATH "/opt/rocm")
Expand Down Expand Up @@ -66,9 +65,9 @@ option(FAISS_ENABLE_PYTHON "Build Python extension." ON)
option(FAISS_ENABLE_C_API "Build C API." OFF)

if(FAISS_ENABLE_GPU)
if(USE_ROCM)
if(FAISS_ENABLE_ROCM)
enable_language(HIP)
add_definitions(-DUSE_ROCM)
add_definitions(-DUSE_AMD_ROCM)
find_package(HIP REQUIRED)
find_package(hipBLAS REQUIRED)
set(GPU_EXT_PREFIX "hip")
Expand All @@ -83,15 +82,10 @@ if(FAISS_ENABLE_RAFT AND NOT TARGET raft::raft)
find_package(raft COMPONENTS compiled distributed)
endif()

if(USE_ROCM)
find_package(HIP REQUIRED)
find_package(hipBLAS REQUIRED)
endif()

add_subdirectory(faiss)

if(FAISS_ENABLE_GPU)
if(USE_ROCM)
if(FAISS_ENABLE_ROCM)
add_subdirectory(faiss/gpu-rocm)
else()
add_subdirectory(faiss/gpu)
Expand All @@ -116,7 +110,7 @@ if(BUILD_TESTING)
add_subdirectory(tests)

if(FAISS_ENABLE_GPU)
if(USE_ROCM)
if(FAISS_ENABLE_ROCM)
add_subdirectory(faiss/gpu-rocm/test)
else()
add_subdirectory(faiss/gpu/test)
Expand Down
2 changes: 1 addition & 1 deletion c_api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ add_executable(example_c EXCLUDE_FROM_ALL example_c.c)
target_link_libraries(example_c PRIVATE faiss_c)

if(FAISS_ENABLE_GPU)
if(USE_ROCM)
if(FAISS_ENABLE_ROCM)
add_subdirectory(gpu-rocm)
else ()
add_subdirectory(gpu)
Expand Down
12 changes: 6 additions & 6 deletions c_api/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ target_sources(faiss_c PRIVATE
file(GLOB FAISS_C_API_GPU_HEADERS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.h")
faiss_install_headers("${FAISS_C_API_GPU_HEADERS}" c_api/gpu)

if (USE_ROCM)
find_package(HIP REQUIRED)
find_package(hipBLAS REQUIRED)
target_link_libraries(faiss_c PUBLIC hip::host roc::hipblas)
if (FAISS_ENABLE_ROCM)
target_link_libraries(faiss_c PUBLIC hip::host roc::hipblas)
else()
find_package(CUDAToolkit REQUIRED)
target_link_libraries(faiss_c PUBLIC CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
find_package(CUDAToolkit REQUIRED)
target_link_libraries(faiss_c PUBLIC CUDA::cudart CUDA::cublas
$<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft>
$<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
endif()

add_executable(example_gpu_c EXCLUDE_FROM_ALL example_gpu_c.c)
Expand Down
10 changes: 4 additions & 6 deletions faiss/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ function(generate_ivf_interleaved_code)
"64|2048|8"
)

if (USE_ROCM)
if (FAISS_ENABLE_ROCM)
list(TRANSFORM FAISS_GPU_SRC REPLACE cu$ hip)
endif()

Expand Down Expand Up @@ -294,7 +294,7 @@ if(FAISS_ENABLE_RAFT)
target_compile_definitions(faiss_gpu PUBLIC USE_NVIDIA_RAFT=1)
endif()

if (USE_ROCM)
if (FAISS_ENABLE_ROCM)
list(TRANSFORM FAISS_GPU_SRC REPLACE cu$ hip)
endif()

Expand All @@ -313,10 +313,8 @@ foreach(header ${FAISS_GPU_HEADERS})
)
endforeach()

if (USE_ROCM)
target_link_libraries(faiss_gpu PRIVATE
$<$<BOOL:${USE_ROCM}>:hip::host>
$<$<BOOL:${USE_ROCM}>:roc::hipblas>)
if (FAISS_ENABLE_ROCM)
target_link_libraries(faiss_gpu PRIVATE hip::host roc::hipblas)
target_compile_options(faiss_gpu PRIVATE)
else()
# Prepares a host linker script and enables host linker to support
Expand Down
2 changes: 1 addition & 1 deletion faiss/gpu/GpuFaissAssert.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
/// Assertions
///

#if defined(__CUDA_ARCH__) || defined(USE_ROCM)
#if defined(__CUDA_ARCH__) || defined(USE_AMD_ROCM)
#define GPU_FAISS_ASSERT(X) assert(X)
#define GPU_FAISS_ASSERT_MSG(X, MSG) assert(X)
#define GPU_FAISS_ASSERT_FMT(X, FMT, ...) assert(X)
Expand Down
2 changes: 1 addition & 1 deletion faiss/gpu/StandardGpuResources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
prop.major,
prop.minor);

#if USE_ROCM
#if USE_AMD_ROCM
// Our code is pre-built with and expects warpSize == 32 or 64, validate
// that
FAISS_ASSERT_FMT(
Expand Down
2 changes: 1 addition & 1 deletion faiss/gpu/impl/PQCodeDistances-inl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
namespace faiss {
namespace gpu {

#if defined(USE_ROCM) && __AMDGCN_WAVEFRONT_SIZE == 64u
#if defined(USE_AMD_ROCM) && __AMDGCN_WAVEFRONT_SIZE == 64u
#define LAUNCH_BOUND 320
#else
#define LAUNCH_BOUND 288
Expand Down
6 changes: 3 additions & 3 deletions faiss/gpu/impl/PQCodeLoad.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ inline __device__ unsigned int getByte(uint64_t v, int pos, int width) {
return getBitfield(v, pos, width);
}

#ifdef USE_ROCM
#ifdef USE_AMD_ROCM

template <int NumSubQuantizers>
struct LoadCode32 {};
Expand Down Expand Up @@ -276,7 +276,7 @@ struct LoadCode32<96> {
}
};

#else // USE_ROCM
#else // USE_AMD_ROCM

template <int NumSubQuantizers>
struct LoadCode32 {};
Expand Down Expand Up @@ -609,7 +609,7 @@ struct LoadCode32<96> {
}
};

#endif // USE_ROCM
#endif // USE_AMD_ROCM

} // namespace gpu
} // namespace faiss
2 changes: 1 addition & 1 deletion faiss/gpu/impl/VectorResidual.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include <faiss/gpu/utils/DeviceUtils.h>
#include <faiss/gpu/utils/StaticUtils.h>
#include <faiss/impl/FaissAssert.h>
#ifdef USE_ROCM
#ifdef USE_AMD_ROCM
#define CUDART_NAN_F __int_as_float(0x7fffffff)
#else
#include <math_constants.h> // in CUDA SDK, for CUDART_NAN_F
Expand Down
9 changes: 4 additions & 5 deletions faiss/gpu/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,8 @@
# Defines `gtest_discover_tests()`.
include(GoogleTest)
add_library(faiss_gpu_test_helper TestUtils.cpp)
if(USE_ROCM)
target_link_libraries(faiss_gpu_test_helper PUBLIC
faiss gtest $<$<BOOL:${USE_ROCM}>:hip::host>)
if(FAISS_ENABLE_ROCM)
target_link_libraries(faiss_gpu_test_helper PUBLIC faiss gtest hip::host)
else()
find_package(CUDAToolkit REQUIRED)
target_link_libraries(faiss_gpu_test_helper PUBLIC
Expand Down Expand Up @@ -56,9 +55,9 @@ endif()
add_executable(demo_ivfpq_indexing_gpu EXCLUDE_FROM_ALL
demo_ivfpq_indexing_gpu.cpp)

if (USE_ROCM)
if (FAISS_ENABLE_ROCM)
target_link_libraries(demo_ivfpq_indexing_gpu
PRIVATE faiss gtest_main $<$<BOOL:${USE_ROCM}>:hip::host>)
PRIVATE faiss gtest_main hip::host)
else()
target_link_libraries(demo_ivfpq_indexing_gpu
PRIVATE faiss gtest_main CUDA::cudart)
Expand Down
6 changes: 3 additions & 3 deletions faiss/gpu/utils/DeviceDefs.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
namespace faiss {
namespace gpu {

#ifdef USE_ROCM
#ifdef USE_AMD_ROCM

#if __AMDGCN_WAVEFRONT_SIZE == 32u
constexpr int kWarpSize = 32;
Expand All @@ -27,7 +27,7 @@ __forceinline__ __device__ void warpFence() {

#define GPU_MAX_SELECTION_K 2048

#else // USE_ROCM
#else // USE_AMD_ROCM

// We require at least CUDA 8.0 for compilation
#if CUDA_VERSION < 8000
Expand Down Expand Up @@ -56,7 +56,7 @@ __forceinline__ __device__ void warpFence() {
#define GPU_MAX_SELECTION_K 1024
#endif

#endif // USE_ROCM
#endif // USE_AMD_ROCM

} // namespace gpu
} // namespace faiss
2 changes: 1 addition & 1 deletion faiss/gpu/utils/DeviceUtils.cu
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ int getDeviceForAddress(const void* p) {
return -1;
}

#if USE_ROCM
#if USE_AMD_ROCM
if (att.type != hipMemoryTypeHost &&
att.type != hipMemoryTypeUnregistered) {
return att.device;
Expand Down
2 changes: 1 addition & 1 deletion faiss/gpu/utils/Float16.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <faiss/gpu/utils/DeviceUtils.h>

// Some compute capabilities have full float16 ALUs.
#if __CUDA_ARCH__ >= 530 || defined(USE_ROCM)
#if __CUDA_ARCH__ >= 530 || defined(USE_AMD_ROCM)
#define FAISS_USE_FULL_FLOAT16 1
#endif // __CUDA_ARCH__ types

Expand Down
2 changes: 1 addition & 1 deletion faiss/gpu/utils/Limits.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ struct Limits<float> {
};

inline __device__ __host__ half kGetHalf(unsigned short v) {
#if CUDA_VERSION >= 9000 || defined(USE_ROCM)
#if CUDA_VERSION >= 9000 || defined(USE_AMD_ROCM)
__half_raw h;
h.x = v;
return __half(h);
Expand Down
6 changes: 3 additions & 3 deletions faiss/gpu/utils/LoadStoreOperators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
namespace faiss {
namespace gpu {

#ifdef USE_ROCM
#ifdef USE_AMD_ROCM

template <typename T>
struct LoadStore {
Expand Down Expand Up @@ -66,7 +66,7 @@ struct LoadStore<Half8> {
}
};

#else // USE_ROCM
#else // USE_AMD_ROCM

template <typename T>
struct LoadStore {
Expand Down Expand Up @@ -142,7 +142,7 @@ struct LoadStore<Half8> {
}
};

#endif // USE_ROCM
#endif // USE_AMD_ROCM

} // namespace gpu
} // namespace faiss
2 changes: 1 addition & 1 deletion faiss/gpu/utils/MathOperators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ struct Math<half> {
}

static inline __device__ half zero() {
#if CUDA_VERSION >= 9000 || defined(USE_ROCM)
#if CUDA_VERSION >= 9000 || defined(USE_AMD_ROCM)
return 0;
#else
half h;
Expand Down
8 changes: 4 additions & 4 deletions faiss/gpu/utils/MatrixMult-inl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace gpu {
template <typename T>
struct GetCudaType;

#ifdef USE_ROCM
#ifdef USE_AMD_ROCM
template <>
struct GetCudaType<float> {
static constexpr hipblasDatatype_t Type = HIPBLAS_R_32F;
Expand Down Expand Up @@ -61,7 +61,7 @@ cublasStatus_t rawGemm(
auto cAT = GetCudaType<AT>::Type;
auto cBT = GetCudaType<BT>::Type;

#ifdef USE_ROCM
#ifdef USE_AMD_ROCM
return hipblasGemmEx(
handle,
transa,
Expand Down Expand Up @@ -135,7 +135,7 @@ cublasStatus_t rawGemm(
C,
CUDA_R_32F,
ldc);
#endif // USE_ROCM
#endif // USE_AMD_ROCM
}

template <typename AT, typename BT>
Expand All @@ -162,7 +162,7 @@ cublasStatus_t rawBatchGemm(
auto cBT = GetCudaType<BT>::Type;

// Always accumulate in f32
#ifdef USE_ROCM
#ifdef USE_AMD_ROCM
return hipblasGemmStridedBatchedEx(
handle,
transa,
Expand Down
8 changes: 4 additions & 4 deletions faiss/gpu/utils/PtxUtils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
#pragma once

#include <cuda.h>
#ifdef USE_ROCM
#ifdef USE_AMD_ROCM
#include <device_functions.h>
#endif

namespace faiss {
namespace gpu {

#ifdef USE_ROCM
#ifdef USE_AMD_ROCM

#define GET_BITFIELD_U32(OUT, VAL, POS, LEN) \
do { \
Expand Down Expand Up @@ -51,7 +51,7 @@ __device__ __forceinline__ int getLaneId() {
return ::__lane_id();
}

#else // USE_ROCM
#else // USE_AMD_ROCM

// defines to simplify the SASS assembly structure file/line in the profiler
#define GET_BITFIELD_U32(OUT, VAL, POS, LEN) \
Expand Down Expand Up @@ -129,7 +129,7 @@ __device__ __forceinline__ void namedBarrierArrived(int name, int numThreads) {
: "memory");
}

#endif // USE_ROCM
#endif // USE_AMD_ROCM

} // namespace gpu
} // namespace faiss
4 changes: 2 additions & 2 deletions faiss/gpu/utils/Select.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ struct BlockSelect {
__device__ inline void checkThreadQ() {
bool needSort = (numVals == NumThreadQ);

#if CUDA_VERSION < 9000 || defined(USE_ROCM)
#if CUDA_VERSION < 9000 || defined(USE_AMD_ROCM)
needSort = __any(needSort);
#else
needSort = __any_sync(0xffffffff, needSort);
Expand Down Expand Up @@ -484,7 +484,7 @@ struct WarpSelect {
__device__ inline void checkThreadQ() {
bool needSort = (numVals == NumThreadQ);

#if CUDA_VERSION < 9000 || defined(USE_ROCM)
#if CUDA_VERSION < 9000 || defined(USE_AMD_ROCM)
needSort = __any(needSort);
#else
needSort = __any_sync(0xffffffff, needSort);
Expand Down
Loading

0 comments on commit a757309

Please sign in to comment.