PaddlePaddle · YuanRisheng · Sep 13, 2022 · Sep 13, 2022 · Sep 14, 2022 · Sep 14, 2022
diff --git a/.gitignore b/.gitignore
@@ -53,6 +53,7 @@ model_test
 
 Testing
 tools/__pycache__
+tools/nvcc_lazy
 
 # This file is automatically generated.
 # TODO(zhiqiang) Move this file to build directory.
@@ -70,7 +71,9 @@ paddle/fluid/pybind/eager_op_function.cc
 
 # these files (directories) are generated before build system generation
 paddle/fluid/operators/generated_op.cc
+paddle/fluid/operators/generated_sparse_op.cc
 paddle/phi/ops/compat/generated_sig.cc
+paddle/phi/ops/compat/generated_sparse_sig.cc
 paddle/phi/api/yaml/parsed_apis/
 python/paddle/utils/code_gen/
 paddle/fluid/pybind/tmp_eager_op_function_impl.h

@@ -4,10 +4,10 @@ repos:
     hooks:
     -   id: remove-crlf
         files: (?!.*third_party)^.*$ | (?!.*book)^.*$
--   repo: https://github.com/google/yapf
-    rev: v0.32.0
+-   repo: https://github.com/psf/black.git
+    rev: 22.8.0
     hooks:
-    -   id: yapf
+    -   id: black
         files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
         exclude: |
             (?x)^(
@@ -24,7 +24,7 @@ repos:
         files: (?!.*third_party)^.*$ | (?!.*book)^.*$
     -   id: end-of-file-fixer
     -   id: sort-simple-yaml
-        files: (api|backward|api_[a-z_]+)\.yaml$
+        files: (op|backward|op_[a-z_]+)\.yaml$
 -   repo: local
     hooks:
     -   id: clang-format

diff --git a/.style.yapf b/.style.yapf
@@ -249,7 +249,7 @@ option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
 option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
 option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
 option(ON_INFER "Turn on inference optimization and inference-lib generation"
-       OFF)
+       ON)
 ################################ Internal Configurations #######################################
 option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
 option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools"
@@ -336,7 +336,7 @@ endif()
 
 if(LINUX
    AND NOT WITH_CUSTOM_DEVICE
-   AND NOT ON_INFER)
+   AND WITH_PYTHON)
   set(WITH_CUSTOM_DEVICE
       ON
       CACHE BOOL "Enable Custom Device when compiling for Linux" FORCE)

diff --git a/README.md b/README.md
@@ -15,12 +15,12 @@ English | [简体中文](./README_cn.md)
 Welcome to the PaddlePaddle GitHub.
 
 PaddlePaddle, as the first independent R&D deep learning platform in China, has been officially open-sourced to professional communities since 2016. It is an industrial platform with advanced technologies and rich features that cover core deep learning frameworks, basic model libraries, end-to-end development kits, tools & components as well as service platforms.
-PaddlePaddle is originated from industrial practices with dedication and commitments to industrialization. It has been widely adopted by a wide range of sectors including manufacturing, agriculture, enterprise service, and so on while serving more than 4.7 million developers, 180,000 companies and generating 560,000 models. With such advantages, PaddlePaddle has helped an increasing number of partners commercialize AI.
+PaddlePaddle is originated from industrial practices with dedication and commitments to industrialization. It has been widely adopted by a wide range of sectors including manufacturing, agriculture, enterprise service, and so on while serving more than 5.35 million developers, 200,000 companies and generating 670,000 models. With such advantages, PaddlePaddle has helped an increasing number of partners commercialize AI.
 
 
 ## Installation
 
-### Latest PaddlePaddle Release: [v2.3](https://github.com/PaddlePaddle/Paddle/tree/release/2.3)
+### Latest PaddlePaddle Release: [v2.4](https://github.com/PaddlePaddle/Paddle/tree/release/2.4)
 
 Our vision is to enable deep learning for everyone via PaddlePaddle.
 Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest features of PaddlePaddle.
@@ -89,8 +89,8 @@ We provide [English](https://www.paddlepaddle.org.cn/documentation/docs/en/guide
 
 ## Courses
 
-- [Server Deployments](https://aistudio.baidu.com/aistudio/course/introduce/19084): Courses intorducing high performance server deployments via local and remote services.
-- [Edge Deployments](https://aistudio.baidu.com/aistudio/course/introduce/22690): Courses intorducing edge deployments from mobile, IoT to web and applets.   
+- [Server Deployments](https://aistudio.baidu.com/aistudio/course/introduce/19084): Courses introducing high performance server deployments via local and remote services.
+- [Edge Deployments](https://aistudio.baidu.com/aistudio/course/introduce/22690): Courses introducing edge deployments from mobile, IoT to web and applets.
 
 ## Copyright and License
 PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
diff --git a/README_cn.md b/README_cn.md
@@ -15,11 +15,11 @@
 
 欢迎来到 PaddlePaddle GitHub
 
-飞桨(PaddlePaddle)以百度多年的深度学习技术研究和业务应用为基础，是中国首个自主研发、功能完备、 开源开放的产业级深度学习平台，集深度学习核心训练和推理框架、基础模型库、端到端开发套件和丰富的工具组件于一体。目前，飞桨累计开发者477万，服务企业18万家，基于飞桨开源深度学习平台产生了56万个模型。飞桨助力开发者快速实现AI想法，快速上线AI业务。帮助越来越多的行业完成AI赋能，实现产业智能化升级。
+飞桨(PaddlePaddle)以百度多年的深度学习技术研究和业务应用为基础，是中国首个自主研发、功能完备、 开源开放的产业级深度学习平台，集深度学习核心训练和推理框架、基础模型库、端到端开发套件和丰富的工具组件于一体。目前，飞桨累计开发者535万，服务企业20万家，基于飞桨开源深度学习平台产生了67万个模型。飞桨助力开发者快速实现AI想法，快速上线AI业务。帮助越来越多的行业完成AI赋能，实现产业智能化升级。
 
 ## 安装
 
-### PaddlePaddle最新版本: [v2.3](https://github.com/PaddlePaddle/Paddle/tree/release/2.3)
+### PaddlePaddle最新版本: [v2.4](https://github.com/PaddlePaddle/Paddle/tree/release/2.4)
 
 跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
 
@@ -63,32 +63,25 @@ PaddlePaddle用户可领取**免费Tesla V100在线算力资源**，训练模型
 我们提供 [英文](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html) 和
 [中文](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/index_cn.html) 文档
 
-- [使用指南](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/index_cn.html)
+- [使用指南](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/index_cn.html)：或许您想从深度学习基础开始学习飞桨
 
-   或许您想从深度学习基础开始学习飞桨
-
-- [应用实践](https://www.paddlepaddle.org.cn/documentation/docs/zh/tutorial/index_cn.html)
+- [应用实践](https://www.paddlepaddle.org.cn/documentation/docs/zh/tutorial/index_cn.html)：使用飞桨搭建您的模型，更高效的完成深度学习任务
 
-
-- [API Reference](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/index_cn.html)
+- [API 文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/index_cn.html)：新的 API 支持代码更少更简洁的程序
 
-   新的API支持代码更少更简洁的程序
-
 
-- [贡献方式](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/08_contribution/index_cn.html)
-
-   欢迎您的贡献!
+- [贡献方式](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/08_contribution/index_cn.html)：欢迎您的贡献!
 
 ## 交流与反馈
 
 - 欢迎您通过[Github Issues](https://github.com/PaddlePaddle/Paddle/issues)来提交问题、报告与建议
 - QQ群: 441226485 (PaddlePaddle)
-- [论坛](https://aistudio.baidu.com/paddle/forum): 欢迎大家在PaddlePaddle论坛分享在使用PaddlePaddle中遇到的问题和经验, 营造良好的论坛氛围
-    
+- [论坛](https://aistudio.baidu.com/paddle/forum): 欢迎大家在PaddlePaddle论坛分享在使用PaddlePaddle中遇到的问题和经验，营造良好的论坛氛围
+
 ## 课程
 
 - [服务器部署](https://aistudio.baidu.com/aistudio/course/introduce/19084): 详细介绍高性能服务器端部署实操，包含本地端及服务化Serving部署等
-- [端侧部署](https://aistudio.baidu.com/aistudio/course/introduce/22690): 详细介绍端侧多场景部署实操，从移端端设备、IoT、网页到小程序部署
+- [端侧部署](https://aistudio.baidu.com/aistudio/course/introduce/22690): 详细介绍端侧多场景部署实操，从移动端设备、IoT、网页到小程序部署
 
 ## 版权和许可证
 PaddlePaddle由[Apache-2.0 license](LICENSE)提供
diff --git a/SECURITY.md b/SECURITY.md
@@ -48,7 +48,7 @@ We will indicate the bug fix in the release of PaddlePaddle, and publish the vul
 
 ### What is a vulnerability?
 
-In the process of computation graphs in PaddlePaddle, models can perform arbitrary computations , including reading and writing files, communicating with the network, etc. It may cause memory exhaustion, deadlock, etc., which will lead to unexpected behavior of PaddlePaddle. We consider these behavior to be security vulnerabilities only if they are out of the intention of the operation involved. 
+In the process of computation graphs in PaddlePaddle, models can perform arbitrary computations , including reading and writing files, communicating with the network, etc. It may cause memory exhaustion, deadlock, etc., which will lead to unexpected behavior of PaddlePaddle. We consider these behavior to be security vulnerabilities only if they are out of the intention of the operation involved.
 
 
 
@@ -60,4 +60,4 @@ If malicious input can trigger memory corruption or non-clean exit, such bug is
 
 
 
-[security advisories](https://github.com/PaddlePaddle/Paddle/blob/develop/security/README.md)
+[security advisories](./security/README.md)
diff --git a/SECURITY_cn.md b/SECURITY_cn.md
@@ -46,4 +46,4 @@
 
 如果输入非预期的参数后，对飞桨代码造成了内存破坏，或者非干净退出，这类行为被认定为存在安全问题。
 
-### [安全公告](https://github.com/PaddlePaddle/Paddle/blob/develop/security/README_cn.md)
+### [安全公告](./security/README_cn.md)
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
@@ -6,6 +6,7 @@ if(WITH_NV_JETSON)
   add_definitions(-DWITH_NV_JETSON)
   set(paddle_known_gpu_archs "53 62 72")
   set(paddle_known_gpu_archs10 "53 62 72")
+  set(paddle_known_gpu_archs11 "53 62 72")
 elseif(NEW_RELEASE_ALL)
   message("Using New Release Strategy - All Arches Packge")
   add_definitions(-DNEW_RELEASE_ALL)

diff --git a/cmake/experiments/cuda_module_loading_lazy.cmake b/cmake/experiments/cuda_module_loading_lazy.cmake
@@ -1,4 +1,4 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,16 +16,15 @@
 # cuda moduel lazy loading is supported by CUDA 11.7+
 # this experiment option makes Paddle supports lazy loading before CUDA 11.7.
 
-option(EXP_CUDA_MODULE_LOADING_LAZY "enable lazy cuda module loading" OFF)
-if(${EXP_CUDA_MODULE_LOADING_LAZY})
-  if(NOT ${ON_INFER} OR NOT ${LINUX})
+if(LINUX)
+  if(NOT ON_INFER)
     message(
       "EXP_CUDA_MODULE_LOADING_LAZY only works with ON_INFER=ON on Linux platforms"
     )
     return()
   endif()
-  if(NOT ${CUDA_FOUND})
-    message("EXP_CUDA_MODULE_LOADING_LAZY only works with CUDA")
+  if(NOT WITH_GPU)
+    message("EXP_CUDA_MODULE_LOADING_LAZY only works with GPU")
     return()
   endif()
   if(${CUDA_VERSION} VERSION_GREATER_EQUAL "11.7")
@@ -41,6 +40,12 @@ if(${EXP_CUDA_MODULE_LOADING_LAZY})
       CACHE BOOL "" FORCE)
   set(CMAKE_CUDA_FLAGS "--cudart shared")
   enable_language(CUDA)
+  execute_process(
+    COMMAND "rm" "-rf" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy"
+    COMMAND "chmod" "755" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy.sh"
+    COMMAND "bash" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy.sh"
+            "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" "${CUDA_TOOLKIT_ROOT_DIR}")
+  execute_process(COMMAND "chmod" "755" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy")
   set(CUDA_NVCC_EXECUTABLE
       "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy"
       CACHE FILEPATH "" FORCE)

diff --git a/cmake/external/gloo.cmake b/cmake/external/gloo.cmake
@@ -25,8 +25,8 @@ set(GLOO_LIBRARY_DIR
     "${GLOO_INSTALL_DIR}/lib"
     CACHE PATH "gloo library directory." FORCE)
 # As we add extra features for gloo, we use the non-official repo
-set(GLOO_REPOSITORY ${GIT_URL}/sandyhouse/gloo.git)
-set(GLOO_TAG v0.0.2)
+set(GLOO_REPOSITORY ${GIT_URL}/ziyoujiyi/gloo.git)
+set(GLOO_TAG v0.0.3)
 set(GLOO_LIBRARIES
     "${GLOO_INSTALL_DIR}/lib/libgloo.a"
     CACHE FILEPATH "gloo library." FORCE)

diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake
@@ -268,8 +268,6 @@ function(build_protobuf TARGET_NAME BUILD_FOR_HOST)
       DOWNLOAD_DIR ${PROTOBUF_SOURCE_DIR}
       DOWNLOAD_COMMAND rm -rf arm_protobuf.tar.gz && wget --no-check-certificate
                        ${ARM_PROTOBUF_URL} && tar zxvf arm_protobuf.tar.gz
-      #DOWNLOAD_COMMAND    cp /home/wangbin44/Paddle/build/arm_protobuf.tar.gz .
-      #                    && tar zxvf arm_protobuf.tar.gz
       UPDATE_COMMAND ""
       CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR}
                  -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}

diff --git a/cmake/external/rocksdb.cmake b/cmake/external/rocksdb.cmake
@@ -35,6 +35,7 @@ ExternalProject_Add(
   CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
              -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
              -DWITH_BZ2=OFF
+             -DPORTABLE=1
              -DWITH_GFLAGS=OFF
              -DCMAKE_CXX_FLAGS=${ROCKSDB_CMAKE_CXX_FLAGS}
              -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}

diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake
@@ -9,8 +9,8 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
 
 if(NOT DEFINED XPU_BASE_URL)
   set(XPU_BASE_URL_WITHOUT_DATE
-      "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
-  set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220907")
+      "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
+  set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220919")
 else()
   set(XPU_BASE_URL "${XPU_BASE_URL}")
 endif()
@@ -19,7 +19,7 @@ endif()
 if(NOT DEFINED XPU_XDNN_BASE_URL)
   set(XPU_XDNN_BASE_URL_WITHOUT_DATE
       "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
-  set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220907")
+  set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220919")
 else()
   set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
 endif()

diff --git a/cmake/external/xxhash.cmake b/cmake/external/xxhash.cmake
@@ -97,5 +97,4 @@ endif()
 
 add_library(xxhash STATIC IMPORTED GLOBAL)
 set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES})
-include_directories(${XXHASH_INCLUDE_DIR})
 add_dependencies(xxhash extern_xxhash)
diff --git a/cmake/operators.cmake b/cmake/operators.cmake
@@ -510,7 +510,7 @@ function(op_library TARGET)
   if(WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
     # Append first implemented MKLDNN activation operator
     if(${MKLDNN_FILE} STREQUAL "activation_mkldnn_op")
-      file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(gelu, MKLDNN);\n")
+      file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(softplus, MKLDNN);\n")
     elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op")
       file(APPEND ${pybind_file}
            "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n")
@@ -611,3 +611,58 @@ function(register_operators)
     endif()
   endif()
 endfunction()
+
+function(prune_pybind_h)
+  set(op_list ${OP_LIST})
+
+  list(APPEND op_list "load_combine")
+  list(APPEND op_list "tensorrt_engine")
+
+  # add fused_op in op_list
+  list(APPEND op_list "fc")
+  list(APPEND op_list "conv2d_fusion")
+  list(APPEND op_list "fusion_seqconv_eltadd_relu")
+  list(APPEND op_list "fusion_seqpool_cvm_concat")
+  list(APPEND op_list "fusion_gru")
+  list(APPEND op_list "fusion_seqexpand_concat_fc")
+  list(APPEND op_list "fusion_repeated_fc_relu")
+  list(APPEND op_list "fusion_squared_mat_sub")
+
+  # add plugin_op in op_list
+  list(APPEND op_list "anchor_generator")
+
+  file(STRINGS ${pybind_file} op_registry_list)
+
+  file(WRITE ${pybind_file_prune} "")
+  file(
+    APPEND ${pybind_file_prune}
+    "// Generated by the paddle/fluid/operators/CMakeLists.txt.  DO NOT EDIT!\n"
+  )
+
+  # add USE_OP_ITSELF for all op in op_list
+  foreach(op_name IN LISTS op_list)
+    file(APPEND ${pybind_file_prune} "USE_OP_ITSELF(${op_name});\n")
+  endforeach()
+
+  foreach(op_registry IN LISTS op_registry_list)
+    if(NOT ${op_registry} EQUAL "")
+      foreach(op_name IN LISTS op_list)
+        string(FIND ${op_registry} "(${op_name})" index1)
+        string(FIND ${op_registry} "(${op_name}," index2)
+        string(FIND ${op_registry} "USE_OP_ITSELF" index3)
+        if(((NOT ${index1} EQUAL "-1") OR (NOT ${index2} EQUAL "-1"))
+           AND (${index3} EQUAL "-1"))
+          file(APPEND ${pybind_file_prune} "${op_registry}\n")
+        endif()
+      endforeach()
+    endif()
+  endforeach()
+
+  file(WRITE ${pybind_file} "")
+  file(STRINGS ${pybind_file_prune} op_registry_list_tmp)
+  foreach(op_name IN LISTS op_registry_list_tmp)
+    if(NOT ${op_name} EQUAL "")
+      file(APPEND ${pybind_file} "${op_name}\n")
+    endif()
+  endforeach()
+endfunction()