Skip to content

Commit

Permalink
Merge branch 'develop' into l2_distance
Browse files Browse the repository at this point in the history
  • Loading branch information
lcy-seso committed Nov 17, 2017
2 parents 929efdc + 093c526 commit 29fc94b
Show file tree
Hide file tree
Showing 156 changed files with 2,760 additions and 3,706 deletions.
26 changes: 19 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ include(simd)
################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND})
option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND})
option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND})
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
Expand Down Expand Up @@ -82,10 +81,8 @@ if(ANDROID OR IOS)
"Disable PYTHON when cross-compiling for Android and iOS" FORCE)
set(WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android and iOS" FORCE)
set(WITH_MKLDNN OFF CACHE STRING
"Disable MKLDNN when cross-compiling for Android and iOS" FORCE)
set(WITH_MKLML OFF CACHE STRING
"Disable MKLML package when cross-compiling for Android and iOS" FORCE)
set(WITH_MKL OFF CACHE STRING
"Disable MKL when cross-compiling for Android and iOS" FORCE)

# Compile PaddlePaddle mobile inference library
if (NOT WITH_C_API)
Expand All @@ -111,6 +108,17 @@ else()
set(THIRD_PARTY_BUILD_TYPE Release)
endif()

if(WITH_MKL)
set(WITH_MKLML ON)
set(WITH_MKLDNN ${AVX2_FOUND})
if(NOT WITH_MKLDNN)
message(WARNING "Do not have AVX2 intrinsics and disabled MKL-DNN")
endif()
else()
set(WITH_MKLML OFF)
set(WITH_MKLDNN OFF)
endif()

########################################################################################

include(external/mklml) # download mklml package
Expand Down Expand Up @@ -164,8 +172,12 @@ if(WITH_GPU)
endif(NOT WITH_DSO)
endif(WITH_GPU)

if(WITH_MKLML)
list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
endif()

if(WITH_MKLDNN)
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB})
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
endif()

if(USE_NNPACK)
Expand Down
6 changes: 1 addition & 5 deletions benchmark/paddle/image/run_mkldnn.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
set -e

function train() {
unset OMP_NUM_THREADS MKL_NUM_THREADS
export OMP_DYNAMIC="FALSE"
export KMP_AFFINITY="granularity=fine,compact,0,0"
unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
topology=$1
layer_num=$2
bs=$3
Expand All @@ -14,8 +12,6 @@ function train() {
elif [ $4 == "False" ]; then
thread=`nproc`
# each trainer_count use only 1 core to avoid conflict
export OMP_NUM_THREADS=1
export MKL_NUM_THREADS=1
log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log"
else
echo "Wrong input $3, use True or False."
Expand Down
29 changes: 8 additions & 21 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -76,27 +76,14 @@ else()
include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU)

if(WITH_MKLDNN)
add_definitions(-DPADDLE_USE_MKLDNN)
if (WITH_MKLML AND MKLDNN_IOMP_DIR)
message(STATUS "Enable Intel OpenMP at ${MKLDNN_IOMP_DIR}")
set(OPENMP_FLAGS "-fopenmp")
set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}")
else()
find_package(OpenMP)
if(OPENMP_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
else()
message(WARNING "Can not find OpenMP."
"Some performance features in MKLDNN may not be available")
endif()
endif()

endif(WITH_MKLDNN)
if (WITH_MKLML AND MKLML_IOMP_LIB)
message(STATUS "Enable Intel OpenMP with ${MKLML_IOMP_LIB}")
set(OPENMP_FLAGS "-fopenmp")
set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}")
endif()

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}")
Expand Down
8 changes: 3 additions & 5 deletions cmake/cross_compiling/ios.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
# Set the architecture for iOS
if(NOT DEFINED IOS_ARCH)
if(IOS_PLATFORM STREQUAL "OS")
# FIXME(liuyiqun): support "armv7;armv7s;arm64" future
set(IOS_ARCH "arm64")
set(IOS_ARCH "armv7;armv7s;arm64")
elseif(IOS_PLATFORM STREQUAL "SIMULATOR")
# FIXME(liuyiqun): support "i386;x86_64" future
set(IOS_ARCH "x86_64")
set(IOS_ARCH "i386;x86_64")
endif()
endif()
set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS")
Expand Down Expand Up @@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_

# Hidden visibilty is required for cxx on iOS
set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags")
set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")
set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")

set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first")

Expand Down
14 changes: 7 additions & 7 deletions cmake/external/mkldnn.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,9 @@ INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR})

IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
SET(MKLDNN_DEPENDS ${MKLML_PROJECT})
SET(MKLDNN_MKLROOT ${MKLML_ROOT})
SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB})
SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR})
MESSAGE(STATUS "Build MKLDNN with ${MKLDNN_MKLROOT}")
MESSAGE(STATUS "Build MKLDNN with MKLML ${MKLML_ROOT}")
ELSE()
MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN")
ENDIF()

SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} -Wno-error=strict-overflow")
Expand All @@ -57,15 +56,16 @@ ExternalProject_Add(
PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
CMAKE_ARGS -DMKLROOT=${MKLDNN_MKLROOT}
CMAKE_ARGS -DMKLROOT=${MKLML_ROOT}
CMAKE_ARGS -DCMAKE_C_FLAGS=${MKLDNN_CFLAG}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR}
-DMKLROOT:PATH=${MKLDNN_MKLROOT}
-DMKLROOT:PATH=${MKLML_ROOT}
)

ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIB}")
MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}")
add_definitions(-DPADDLE_USE_MKLDNN)
LIST(APPEND external_project_dependencies mkldnn)
15 changes: 7 additions & 8 deletions cmake/external/openblas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ IF(NOT ${CBLAS_FOUND})
"${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
CACHE FILEPATH "openblas library." FORCE)

SET(OPENBLAS_CC "${CMAKE_C_COMPILER}")
SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable")

IF(CMAKE_CROSSCOMPILING)
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER})
Expand All @@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND})
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0)
ENDIF()
ELSEIF(IOS)
# FIXME(liuyiqun): support multiple architectures
SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5")
SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
IF(CMAKE_OSX_ARCHITECTURES MATCHES "armv7")
SET(OPENBLAS_CC "${OPENBLAS_CC} -arch armv7")
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0)
ELSEIF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
IF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5")
SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64")
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX})
ELSE()
MESSAGE(FATAL_ERROR "OpenBLAS only support arm64 architectures on iOS. "
"You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead.")
ENDIF()
ELSEIF(RPI)
# use hardfp
Expand Down
4 changes: 4 additions & 0 deletions cmake/external/warpctc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

IF(MOBILE_INFERENCE)
return()
ENDIF()

INCLUDE(ExternalProject)

SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc)
Expand Down
4 changes: 2 additions & 2 deletions cmake/util.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ function(link_paddle_exe TARGET_NAME)
target_link_libraries(${TARGET_NAME} log)
endif(ANDROID)

if(WITH_MKLDNN AND WITH_MKLML AND MKLDNN_IOMP_DIR)
target_link_libraries(${TARGET_NAME} "-L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed")
if(WITH_MKLML AND MKLML_LIB_DIR AND MKLML_IOMP_LIB)
target_link_libraries(${TARGET_NAME} "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed")
endif()

add_dependencies(${TARGET_NAME} ${external_project_dependencies})
Expand Down
10 changes: 10 additions & 0 deletions doc/api/v2/config/layer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,16 @@ bilinear_interp
.. autoclass:: paddle.v2.layer.bilinear_interp
:noindex:

dot_prod
---------
.. autoclass:: paddle.v2.layer.dot_prod
:noindex:

out_prod
--------
.. autoclass:: paddle.v2.layer.out_prod
:noindex:

power
-----
.. autoclass:: paddle.v2.layer.power
Expand Down
8 changes: 4 additions & 4 deletions doc/design/mkldnn/README.MD
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ Figure 1. PaddlePaddle on IA.
我们把集成方案大致分为了如下几个方面。

### CMake
我们会在`CMakeLists.txt`中会添加`WITH_MKLDNN`的选项,当设置这个值为`ON`的时候会启用编译MKL-DNN功能。同时会自动开启OpenMP用于提高MKL-DNN的性能
我们会在`CMakeLists.txt`中会给用户添加一个`WITH_MKL`的开关,他是负责`WITH_MKLML``WITH_MKLDNN`的总开关

同时,我们会引入`WITH_MKLML`选项,用于选择是否使用MKL-DNN自带的MKLML安装包。这个安装包可以独立于MKL-DNN使用,但是建议在开启MKL-DNN的同时也打开MKLML的开关,这样才能发挥最好的性能
当打开`WITH_MKL`时,会开启MKLML的功能,作为PaddlePaddle的CBLAS和LAPACK库,同时会开启Intel OpenMP用于提高MKLML的性能。 如果系统支持AVX2指令集及以上,同时会开启MKL-DNN功能

所以,我们会在`cmake/external`目录新建`mkldnn.cmake``mklml.cmake`文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中
当关闭`WITH_MKL`时,MKLML和MKL-DNN功能会同时关闭

**备注**:当`WITH_MKLML=ON`的时候,会优先使用这个包作为PaddlePaddle的CBLAS和LAPACK库,所以会稍微改动`cmake/cblas.cmake`中的逻辑
所以,我们会在`cmake/external`目录新建`mkldnn.cmake``mklml.cmake`文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中

### Layers
所有MKL-DNN相关的C++ layers,都会按照PaddlePaddle的目录结构存放在
Expand Down
2 changes: 1 addition & 1 deletion doc/howto/dev/write_docs_cn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ PaddlePaddle的文档构建有两种方式。
cd TO_YOUR_PADDLE_CLONE_PATH
mkdir -p build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
make gen_proto_py
make paddle_docs paddle_docs_cn
Expand Down
2 changes: 1 addition & 1 deletion doc/mobile/cross_compiling_for_android_cn.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# 构建Android平台上的PaddlePaddle库
# Android平台编译指南

用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库:
- 基于Docker容器的编译方式
Expand Down
12 changes: 6 additions & 6 deletions doc/mobile/cross_compiling_for_ios_cn.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# 构建iOS平台上的PaddlePaddle库
# iOS平台编译指南
交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。

## 准备交叉编译环境
Expand All @@ -25,7 +25,7 @@ iOS平台可选配置参数:
- `IOS_PLATFORM`,可设置为`OS/SIMULATOR`,默认值为`OS`
- `OS`,构建目标为`arm`架构的iPhone或者iPad等物理设备。
- `SIMULATOR`,构建目标为`x86`架构的模拟器平台。
- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示:
- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示,默认编译所有架构

<table class="docutils">
<colgroup>
Expand All @@ -41,11 +41,11 @@ iOS平台可选配置参数:
<tbody valign="top">
<tr class="row-even">
<td>OS</td>
<td>armv7, armv7s, arm64 (默认)</td>
<td>armv7, armv7s, arm64 </td>
</tr>
<tr class="row-odd">
<td>SIMULATOR</td>
<td>i386, x86_64 (默认)</td>
<td>i386, x86_64 </td>
</tr>
</tbody>
</table>
Expand All @@ -66,7 +66,7 @@ iOS平台可选配置参数:
```bash
cmake -DCMAKE_SYSTEM_NAME=iOS \
-DIOS_PLATFORM=OS \
-DIOS_ARCH="arm64" \
-DIOS_ARCH="armv7;arm64" \
-DIOS_ENABLE_BITCODE=ON \
-DIOS_USE_VECLIB_FOR_BLAS=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
Expand Down Expand Up @@ -112,6 +112,6 @@ $ make install
- `lib`目录,其中包含PaddlePaddle的C-API静态库
- `third_party`目录,其中包含所依赖的所有第三方库

注意,不同架构的PaddlePaddle库建议安装到不同的目录下,然后使用`lipo`工具将多个静态库合并成一个支持多个架构的fat库
注意,如果PaddlePaddle库需要同时支持真机和模拟器,则需要分别编译真机和模拟器版本,然后使用`lipo`工具合并fat库

自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。
2 changes: 1 addition & 1 deletion doc/mobile/cross_compiling_for_raspberry_cn.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# 构建Raspberry Pi平台上的PaddlePaddle库
# Raspberry Pi平台编译指南

通常有两个方法来构建基于 Rasspberry Pi 的版本:

Expand Down
4 changes: 4 additions & 0 deletions paddle/capi/Main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ static void initPaddle(int argc, char** argv) {

extern "C" {
paddle_error paddle_init(int argc, char** argv) {
static bool isInit = false;
if (isInit) return kPD_NO_ERROR;

std::vector<char*> realArgv;
realArgv.reserve(argc + 1);
realArgv.push_back(strdup(""));
Expand All @@ -37,6 +40,7 @@ paddle_error paddle_init(int argc, char** argv) {
}
initPaddle(argc + 1, realArgv.data());
free(realArgv[0]);
isInit = true;
return kPD_NO_ERROR;
}
}
2 changes: 2 additions & 0 deletions paddle/cuda/include/hl_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ limitations under the License. */
#include "hl_matrix.h"
#include "hl_sequence.h"
#include "hl_sparse.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "hl_warpctc_wrap.h"
#endif

#ifdef HPPL_STUB_FUNC
#include "stub/hl_aggregate_stub.h"
Expand Down
Loading

0 comments on commit 29fc94b

Please sign in to comment.