Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add xpu2 compiler #37254

Merged
merged 49 commits into from
Jan 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
15700b8
Add XPU compiler for paddle, test=develop
Liu-xiandong Nov 11, 2021
3ce9a61
clean code
Liu-xiandong Nov 11, 2021
b90bc3c
clean useless code
Liu-xiandong Nov 11, 2021
9ed1f1b
clean useless code
Liu-xiandong Nov 11, 2021
99ddb5e
clean useless code
Liu-xiandong Nov 11, 2021
163f920
test
Liu-xiandong Nov 16, 2021
46bd3cb
add include path
Liu-xiandong Nov 16, 2021
931edb5
use clang compiler
Liu-xiandong Nov 19, 2021
408419c
xpu2.cmake
Liu-xiandong Nov 19, 2021
dd0aef3
XPU2 compiler passed
Liu-xiandong Nov 19, 2021
8a9cdad
update
Liu-xiandong Nov 22, 2021
a49660e
update after pten
Liu-xiandong Nov 22, 2021
9c0a1dd
combination the WITH_XPU and WITH_XPU2
Liu-xiandong Nov 23, 2021
d473766
update the fuse operation in WITH_XPU and WITH_XPU2
Liu-xiandong Nov 26, 2021
78af7ff
update
Liu-xiandong Dec 6, 2021
007c570
Merge branch 'develop' into Add_xpu_compiler
Liu-xiandong Dec 6, 2021
76d8d51
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Liu-xiandong Dec 6, 2021
bdaa02c
update
Liu-xiandong Dec 7, 2021
d818563
update
Liu-xiandong Dec 21, 2021
d461b0a
fix the merge error
Liu-xiandong Dec 21, 2021
a93cece
--no-verify
Liu-xiandong Dec 21, 2021
27ff9d8
update
Liu-xiandong Dec 21, 2021
14a0382
update the code
Liu-xiandong Dec 23, 2021
7a871a2
update the code
Liu-xiandong Dec 24, 2021
56e2052
add run_kp_kernel flag
Liu-xiandong Dec 28, 2021
bfe52ab
update
Liu-xiandong Dec 28, 2021
83da9f5
update
Liu-xiandong Dec 28, 2021
8b9c96c
fix prepared type_ bug
Liu-xiandong Dec 29, 2021
12522e1
clean and update the code
Liu-xiandong Jan 5, 2022
90abd9c
reset the kernel_primitives
Liu-xiandong Jan 5, 2022
50caa48
update
Liu-xiandong Jan 5, 2022
f717f72
update
Liu-xiandong Jan 5, 2022
1970e17
clean the code
Liu-xiandong Jan 5, 2022
51f32e1
delete useless comment
Liu-xiandong Jan 5, 2022
5576ba9
fix the bug in WITH_XPU
Liu-xiandong Jan 6, 2022
cec1cb0
update
Liu-xiandong Jan 10, 2022
4181edb
update
Liu-xiandong Jan 13, 2022
3f73106
fix conflict
Liu-xiandong Jan 25, 2022
03896bc
modify the abi
Liu-xiandong Jan 25, 2022
6feb7ab
fix the xpu_op conflict
Liu-xiandong Jan 25, 2022
0c4d097
delete some useless code
Liu-xiandong Jan 25, 2022
15cd90d
fix conflict
Liu-xiandong Jan 27, 2022
ebe3313
Parameter automation in xpu compilation
Liu-xiandong Jan 27, 2022
16914fa
Parameter automation in xpu compilation
Liu-xiandong Jan 27, 2022
f1bb460
delete kps in cmake
Liu-xiandong Jan 27, 2022
24e7fe5
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Liu-xiandong Jan 28, 2022
720af78
delete useless comment
Liu-xiandong Jan 28, 2022
b55d391
clean the code
Liu-xiandong Jan 28, 2022
2541c1b
clean the code
Liu-xiandong Jan 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ option(WITH_ONEMKL "Compile PaddlePaddle with oneMKL" OFF)
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_TENSORRT "Compile PaddlePaddle with NVIDIA TensorRT" OFF)
option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
option(WITH_XPU_KP "Compile PaddlePaddle with BAIDU XPU compiler " OFF)
option(WITH_MLU "Compile PaddlePaddle with CAMBRICON MLU" OFF)
option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF)
option(WITH_ASCEND "Compile PaddlePaddle with ASCEND" OFF)
Expand All @@ -59,6 +60,9 @@ include(generic) # simplify cmake module
if (WITH_GPU AND WITH_XPU)
Liu-xiandong marked this conversation as resolved.
Show resolved Hide resolved
message(FATAL_ERROR "Error when compile GPU and XPU at the same time")
endif()
if (WITH_GPU AND WITH_XPU_KP)
message(FATAL_ERROR "Error when compile GPU and XPU2 at the same time")
endif()
if (WITH_GPU AND WITH_ASCEND)
message(FATAL_ERROR "Error when compile GPU and ASCEND at the same time")
endif()
Expand Down Expand Up @@ -273,6 +277,14 @@ if (NOT WITH_GPU AND WITH_NCCL)
"Disable NCCL when compiling without GPU" FORCE)
endif()

# force WITH_XPU on when WITH_XPU_KP
if (WITH_XPU_KP AND NOT WITH_XPU)
MESSAGE(WARNING
"Enable WITH_XPU when compiling with WITH_XPU_KP. Force WITH_XPU=ON.")
set(WITH_XPU ON CACHE STRING
"Enable WITH_XPU when compiling with WITH_XPU_KP" FORCE)
endif()

if (NOT WITH_XPU AND WITH_XPU_BKCL)
MESSAGE(WARNING
"Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF.")
Expand Down Expand Up @@ -317,6 +329,10 @@ if(WITH_ROCM)
include(miopen) # set miopen libraries, must before configure
endif(WITH_ROCM)

if(WITH_XPU_KP)
include(xpu_kp)
endif()

if (NOT WITH_ROCM AND WITH_RCCL)
MESSAGE(WARNING
"Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF.")
Expand Down
5 changes: 5 additions & 0 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ if(WITH_XPU)
add_definitions(-DPADDLE_WITH_XPU)
endif()

if(WITH_XPU_KP)
message(STATUS "Compile with XPU_KP!")
add_definitions(-DPADDLE_WITH_XPU_KP)
endif()

if(WITH_IPU)
message(STATUS "Compile with IPU!")
add_definitions(-DPADDLE_WITH_IPU)
Expand Down
75 changes: 75 additions & 0 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,81 @@ function(hip_test TARGET_NAME)
endif()
endfunction(hip_test)

function(xpu_library TARGET_NAME)
if (WITH_XPU_KP)
set(options STATIC static SHARED shared)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(xpu_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

if(xpu_library_SRCS)
if (xpu_library_SHARED OR xpu_library_shared) # build *.so
message(FATAL_ERROR "XPU kernel currently does not support dynamic links")
else()
xpu_add_library(${TARGET_NAME} STATIC ${xpu_library_SRCS} DEPENDS ${xpu_library_DEPS})
find_fluid_modules(${TARGET_NAME})
endif()
if (xpu_library_DEPS)
add_dependencies(${TARGET_NAME} ${xpu_library_DEPS})
target_link_libraries(${TARGET_NAME} ${xpu_library_DEPS})
endif()
# cpplint code style
foreach(source_file ${xpu_library_SRCS})
string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
list(APPEND xpu_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
else(xpu_library_SRCS)
if (xpu_library_DEPS)
list(REMOVE_DUPLICATES xpu_library_DEPS)
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:xpu_library")
target_link_libraries(${TARGET_NAME} ${xpu_library_DEPS})
add_dependencies(${TARGET_NAME} ${xpu_library_DEPS})
else()
message(FATAL "Please specify source file or library in xpu_library.")
endif()
endif(xpu_library_SRCS)
endif()
endfunction(xpu_library)

function(xpu_binary TARGET_NAME)
if (WITH_XPU_KP)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(xpu_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${xpu_binary_SRCS})
if(xpu_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${xpu_binary_DEPS})
add_dependencies(${TARGET_NAME} ${xpu_binary_DEPS})
common_link(${TARGET_NAME})
endif()
endif()
endfunction(xpu_binary)

function(xpu_test TARGET_NAME)
# The environment variable `CI_SKIP_CPP_TEST` is used to skip the compilation
# and execution of test in CI. `CI_SKIP_CPP_TEST` is set to ON when no files
# other than *.py are modified.
if (WITH_XPU_KP AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(xpu_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${xpu_test_SRCS})
# "-pthread -ldl -lrt" is defined in CMAKE_CXX_LINK_EXECUTABLE
target_link_options(${TARGET_NAME} PRIVATE -pthread -ldl -lrt)
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${xpu_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog ${os_dependency_modules})
add_dependencies(${TARGET_NAME} ${xpu_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME})
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
endif()
endfunction(xpu_test)

function(go_library TARGET_NAME)
set(options STATIC static SHARED shared)
set(oneValueArgs "")
Expand Down
22 changes: 19 additions & 3 deletions cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ function(op_library TARGET)
set(cu_cc_srcs)
set(hip_cc_srcs)
set(xpu_cc_srcs)
set(xpu_kp_cc_srcs)
set(npu_cc_srcs)
set(mlu_cc_srcs)
set(cudnn_cu_cc_srcs)
Expand Down Expand Up @@ -120,6 +121,11 @@ function(op_library TARGET)
list(APPEND xpu_cc_srcs ${XPU_FILE}.cc)
endif()
endif()
if(WITH_XPU_KP)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.xpu)
list(APPEND xpu_kp_cc_srcs ${TARGET}.xpu)
endif()
endif()
if(WITH_ASCEND_CL)
string(REPLACE "_op" "_op_npu" NPU_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${NPU_FILE}.cc)
Expand Down Expand Up @@ -154,18 +160,22 @@ function(op_library TARGET)
list(APPEND mkldnn_cc_srcs ${src})
elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$")
list(APPEND xpu_cc_srcs ${src})
elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.xpu$")
list(APPEND xpu_kp_cc_srcs ${src})
elseif(WITH_ASCEND_CL AND ${src} MATCHES ".*_op_npu.cc$")
list(APPEND npu_cc_srcs ${src})
elseif(WITH_MLU AND ${src} MATCHES ".*_op_mlu.cc$")
list(APPEND mlu_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cc$")
list(APPEND cc_srcs ${src})
else()
message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu")
message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu or .xpu")
endif()
endforeach()
endif()


list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
list(LENGTH xpu_kp_cc_srcs xpu_kp_cc_srcs_len)
list(LENGTH cc_srcs cc_srcs_len)
if (${cc_srcs_len} EQUAL 0)
message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file")
Expand Down Expand Up @@ -231,6 +241,8 @@ function(op_library TARGET)
list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu")
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
elseif (WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0)
xpu_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${xpu_kp_cc_srcs} DEPS ${op_library_DEPS} ${op_common_deps})
else()
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY)
Expand Down Expand Up @@ -359,6 +371,11 @@ function(op_library TARGET)
endif()
endif()

# pybind USE_OP_DEVICE_KERNEL for XPU KP
if (WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, KP);\n")
endif()

# pybind USE_OP_DEVICE_KERNEL for NPU
if (WITH_ASCEND_CL AND ${npu_cc_srcs_len} GREATER 0)
foreach(npu_src ${npu_cc_srcs})
Expand Down Expand Up @@ -438,7 +455,6 @@ function(op_library TARGET)
endif()
endfunction()


function(register_operators)
set(options "")
set(oneValueArgs "")
Expand Down
Loading