Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#76 from YaoCheng8667/dev_xpu_mempool
Browse files Browse the repository at this point in the history
merge from paddlebox master; add xpu_mem_check; fix paddle on infer; Support buddy allocator
  • Loading branch information
YaoCheng8667 authored May 30, 2024
2 parents 8bc6a52 + a253d83 commit 83e62b7
Show file tree
Hide file tree
Showing 98 changed files with 6,280 additions and 1,203 deletions.
56 changes: 39 additions & 17 deletions cmake/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,34 @@ if(WITH_NV_JETSON)
add_definitions(-DWITH_NV_JETSON)
set(paddle_known_gpu_archs "53 62 72")
set(paddle_known_gpu_archs10 "53 62 72")
set(paddle_known_gpu_archs11 "53 62 72 87")
set(paddle_known_gpu_archs12 "53 62 72 87 90")
elseif(NEW_RELEASE_ALL)
message("Using New Release Strategy - All Arches Packge")
add_definitions(-DNEW_RELEASE_ALL)
set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
set(paddle_known_gpu_archs "50 52 60 61 70 75 80 86 90")
set(paddle_known_gpu_archs10 "50 52 60 61 70 75")
set(paddle_known_gpu_archs11 "50 60 61 70 75 80")
set(paddle_known_gpu_archs12 "50 60 61 70 75 80 90")
elseif(NEW_RELEASE_PYPI)
message("Using New Release Strategy - Cubin Packge")
add_definitions(-DNEW_RELEASE_PYPI)
set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
set(paddle_known_gpu_archs "50 52 60 61 70 75 80 86 90")
set(paddle_known_gpu_archs10 "")
set(paddle_known_gpu_archs11 "60 61 70 75 80")
set(paddle_known_gpu_archs11 "61 70 75 80")
set(paddle_known_gpu_archs12 "61 70 75 80 90")
elseif(NEW_RELEASE_JIT)
message("Using New Release Strategy - JIT Packge")
add_definitions(-DNEW_RELEASE_JIT)
set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
set(paddle_known_gpu_archs10 "35 50 60 70 75")
set(paddle_known_gpu_archs11 "35 50 60 70 75 80")
set(paddle_known_gpu_archs "50 52 60 61 70 75 80 86 90")
set(paddle_known_gpu_archs10 "50 60 70 75")
set(paddle_known_gpu_archs11 "50 60 70 75 80")
set(paddle_known_gpu_archs12 "50 60 70 75 80 90")
else()
set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80")
set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
set(paddle_known_gpu_archs "70 80")
set(paddle_known_gpu_archs10 "50 52 60 61 70 75")
set(paddle_known_gpu_archs11 "52 60 61 70 75 80")
set(paddle_known_gpu_archs12 "70 80")
endif()

######################################################################################
Expand Down Expand Up @@ -98,12 +104,12 @@ endfunction()
function(select_nvcc_arch_flags out_variable)
# List of arch names
set(archs_names
"Kepler"
"Maxwell"
"Pascal"
"Volta"
"Turing"
"Ampere"
"Hopper"
"All"
"Manual")
set(archs_name_default "Auto")
Expand Down Expand Up @@ -142,9 +148,7 @@ function(select_nvcc_arch_flags out_variable)
unset(CUDA_ARCH_PTX CACHE)
endif()

if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
set(cuda_arch_bin "30 35")
elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
if(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
if(WITH_NV_JETSON)
set(cuda_arch_bin "53")
else()
Expand All @@ -165,11 +169,17 @@ function(select_nvcc_arch_flags out_variable)
elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
set(cuda_arch_bin "75")
elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0
set(cuda_arch_bin "80")
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.1+
set(cuda_arch_bin "80 86")
if(WITH_NV_JETSON)
set(cuda_arch_bin "87")
else()
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0
set(cuda_arch_bin "80")
else()
set(cuda_arch_bin "80 86")
endif()
endif()
elseif(${CUDA_ARCH_NAME} STREQUAL "Hopper")
set(cuda_arch_bin "90")
elseif(${CUDA_ARCH_NAME} STREQUAL "All")
set(cuda_arch_bin ${paddle_known_gpu_archs})
elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
Expand All @@ -186,6 +196,13 @@ function(select_nvcc_arch_flags out_variable)
set(cuda_arch_bin ${CUDA_ARCH_BIN})
endif()

# cuda11.4
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.6)
set(cuda_arch_bin "70 80")
else()
set(cuda_arch_bin "70 80 90")
endif()

if(NEW_RELEASE_JIT)
set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}")
set(cuda_arch_bin "")
Expand Down Expand Up @@ -249,6 +266,11 @@ elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 13.0) # CUDA 12.0+
set(paddle_known_gpu_archs "${paddle_known_gpu_archs12} 90")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
endif()

if(NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0)
Expand Down
14 changes: 12 additions & 2 deletions cmake/external/gloo.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,18 @@ set(GLOO_LIBRARY_DIR
"${GLOO_INSTALL_DIR}/lib"
CACHE PATH "gloo library directory." FORCE)
# As we add extra features for gloo, we use the non-official repo
set(GLOO_REPOSITORY ${GIT_URL}/sandyhouse/gloo.git)
set(GLOO_TAG v0.0.2)
if(WITH_GPU)
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0)
set(GLOO_REPOSITORY ${GIT_URL}/sandyhouse/gloo.git)
set(GLOO_TAG v0.0.2)
else()
set(GLOO_REPOSITORY ${GIT_URL}/ziyoujiyi/gloo.git)
set(GLOO_TAG v0.0.3)
endif()
else()
set(GLOO_REPOSITORY ${GIT_URL}/sandyhouse/gloo.git)
set(GLOO_TAG v0.0.2)
endif()
set(GLOO_LIBRARIES
"${GLOO_INSTALL_DIR}/lib/libgloo.a"
CACHE FILEPATH "gloo library." FORCE)
Expand Down
10 changes: 9 additions & 1 deletion cmake/external/warpctc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,15 @@ set(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc)
# in case of low internet speed
#set(WARPCTC_REPOSITORY https://gitee.com/tianjianhe/warp-ctc.git)
set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git)
set(WARPCTC_TAG 37ece0e1bbe8a0019a63ac7e6462c36591c66a5b)
if(WITH_GPU)
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0)
set(WARPCTC_TAG 37ece0e1bbe8a0019a63ac7e6462c36591c66a5b)
else()
set(WARPCTC_TAG bdc2b4550453e0ef2d3b5190f9c6103a84eff184)
endif()
else()
set(WARPCTC_TAG 37ece0e1bbe8a0019a63ac7e6462c36591c66a5b)
endif()

set(WARPCTC_INCLUDE_DIR
"${WARPCTC_INSTALL_DIR}/include"
Expand Down
8 changes: 6 additions & 2 deletions cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ set(XPU_API_LIB_NAME "libxpuapi.so")
set(XPU_API_PLUGIN_NAME "libxpuplugin.so")
set(XPU_RT_LIB_NAME "libxpurt.so")
set(XPU_RT_ALIAS_LIB_NAME "libxpurt.so.1")
set(XPU_ML_LIB_NAME "libxpuml.so")
set(XPU_ML_ALIAS_LIB_NAME "libxpuml.so.1")

if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE
Expand Down Expand Up @@ -128,6 +130,8 @@ set(XPU_API_LIB "${XPU_LIB_DIR}/${XPU_API_LIB_NAME}")
set(XPU_API_PLUGIN "${XPU_LIB_DIR}/${XPU_API_PLUGIN_NAME}")
set(XPU_RT_LIB "${XPU_LIB_DIR}/${XPU_RT_LIB_NAME}")
set(XPU_RT_ALIAS_LIB "${XPU_LIB_DIR}/${XPU_RT_ALIAS_LIB_NAME}")
set(XPU_ML_LIB "${THIRD_PARTY_PATH}/xpu/src/extern_xpu/xre-bdcentos_x86_64/so/${XPU_ML_LIB_NAME}")
set(XPU_ML_ALIAS_LIB "${THIRD_PARTY_PATH}/xpu/src/extern_xpu/xre-bdcentos_x86_64/so/${XPU_ML_ALIAS_LIB_NAME}")

set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib")

Expand Down Expand Up @@ -173,9 +177,9 @@ if(WITH_XPU_BKCL)
set(XPU_BKCL_LIB "${XPU_LIB_DIR}/${XPU_BKCL_LIB_NAME}")
set(XPU_BKCL_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include")
include_directories(${XPU_BKCL_INC_DIR})
target_link_libraries(xpulib -Wl,--push-state,--no-as-needed ${XPU_API_LIB} ${XPU_API_PLUGIN} ${XPU_RT_LIB} ${XPU_BKCL_LIB} -Wl,--pop-state)
target_link_libraries(xpulib -Wl,--push-state,--no-as-needed ${XPU_API_LIB} ${XPU_API_PLUGIN} ${XPU_RT_LIB} ${XPU_BKCL_LIB} ${XPU_ML_LIB} -Wl,--pop-state)
else()
target_link_libraries(xpulib -Wl,--push-state,--no-as-needed ${XPU_API_LIB} ${XPU_API_PLUGIN} ${XPU_RT_LIB} -Wl,--pop-state)
target_link_libraries(xpulib -Wl,--push-state,--no-as-needed ${XPU_API_LIB} ${XPU_API_PLUGIN} ${XPU_RT_LIB} ${XPU_ML_LIB} -Wl,--pop-state)
endif()

add_dependencies(xpulib ${XPU_PROJECT})
Expand Down
3 changes: 1 addition & 2 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,7 @@ if(WITH_ONNXRUNTIME)
endif()

if(WITH_GPU)
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0 OR ${CMAKE_CUDA_COMPILER_VERSION}
GREATER_EQUAL 11.6)
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
include(external/cub) # download cub
list(APPEND third_party_deps extern_cub)
endif()
Expand Down
24 changes: 24 additions & 0 deletions cmake/version.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,27 @@ math(EXPR PADDLE_VERSION_INTEGER "${PADDLE_MAJOR_VER} * 1000000
add_definitions(-DPADDLE_VERSION=${PADDLE_VERSION})
add_definitions(-DPADDLE_VERSION_INTEGER=${PADDLE_VERSION_INTEGER})
message(STATUS "Paddle version is ${PADDLE_VERSION}")

#add git version
set(COMMIT_HASH "")
set(BRANCH_NAME "")
find_package(Git QUIET)
if(GIT_FOUND)
execute_process(
COMMAND ${GIT_EXECUTABLE} log -1 --pretty=format:%H
OUTPUT_VARIABLE COMMIT_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_QUIET
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)
execute_process(
COMMAND ${GIT_EXECUTABLE} symbolic-ref --short -q HEAD
OUTPUT_VARIABLE BRANCH_NAME
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_QUIET
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)
endif()
message(STATUS "Git version is ${BRANCH_NAME}:${COMMIT_HASH}")
add_definitions(-DPADDLE_BRANCH_NAME="${BRANCH_NAME}")
add_definitions(-DPADDLE_COMMIT_HASH="${COMMIT_HASH}")
Loading

0 comments on commit 83e62b7

Please sign in to comment.