Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

USE_NVRTC -> ENABLE_CUDA_RTC to fix maven build. Add compile-guard to fusion. #16838

Merged
merged 5 commits into from
Nov 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -633,14 +633,17 @@ if(USE_CUDA)
else()
list(APPEND CUDA_INCLUDE_DIRS ${INCLUDE_DIRECTORIES})
# define preprocessor macro so that we will not include the generated forcelink header
if(ENABLE_CUDA_RTC)
add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
endif()
# Create '.cmake' files for cuda compiles given definitions added thus far
mshadow_cuda_compile(cuda_objs ${CUDA})
if(MSVC)
if(ENABLE_CUDA_RTC)
FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY})
set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib")
list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY})
add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
endif()
FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator
Expand All @@ -652,7 +655,6 @@ if(USE_CUDA)
list(APPEND mxnet_LINKER_LIBS cufft cusolver)
if(ENABLE_CUDA_RTC)
list(APPEND mxnet_LINKER_LIBS nvrtc cuda)
add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
endif()
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
endif()
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ before_build:

set OpenCV_DIR=%APPVEYOR_BUILD_FOLDER%/%MXNET_OPENCV_DIR%/build

cmake .. -DOPENCV_DIR=%OpenCV_DIR% -DUSE_PROFILER=1 -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -G "Visual Studio 12 2013 Win64"
cmake .. -DOPENCV_DIR=%OpenCV_DIR% -DUSE_PROFILER=1 -DUSE_CUDA=0 -DUSE_CUDNN=0 -DENABLE_CUDA_RTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -G "Visual Studio 12 2013 Win64"

build_script:
- cmd: >-
Expand Down
12 changes: 6 additions & 6 deletions ci/build_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class BuildFlavour(Enum):
'WIN_CPU': (
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DUSE_NVRTC=OFF '
'-DENABLE_CUDA_RTC=OFF '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=open '
Expand All @@ -67,7 +67,7 @@ class BuildFlavour(Enum):
, 'WIN_CPU_MKLDNN': (
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DUSE_NVRTC=OFF '
'-DENABLE_CUDA_RTC=OFF '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=open '
Expand All @@ -80,7 +80,7 @@ class BuildFlavour(Enum):
, 'WIN_CPU_MKLDNN_MKL': (
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DUSE_NVRTC=OFF '
'-DENABLE_CUDA_RTC=OFF '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=mkl '
Expand All @@ -93,7 +93,7 @@ class BuildFlavour(Enum):
, 'WIN_CPU_MKL': (
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DUSE_NVRTC=OFF '
'-DENABLE_CUDA_RTC=OFF '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=mkl '
Expand All @@ -106,7 +106,7 @@ class BuildFlavour(Enum):
, 'WIN_GPU': (
'-DUSE_CUDA=ON '
'-DUSE_CUDNN=ON '
'-DUSE_NVRTC=ON '
'-DENABLE_CUDA_RTC=ON '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=open '
Expand All @@ -122,7 +122,7 @@ class BuildFlavour(Enum):
, 'WIN_GPU_MKLDNN': (
'-DUSE_CUDA=ON '
'-DUSE_CUDNN=ON '
'-DUSE_NVRTC=ON '
'-DENABLE_CUDA_RTC=ON '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=open '
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_darwin_mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ USE_CUDNN = 0
# CUDA_ARCH :=

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVRTC = 0
ENABLE_CUDA_RTC = 0

# use openmp for parallelization
USE_OPENMP = 0
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_linux_cu90mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ USE_NCCL = 1

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVTX=1
USE_NVRTC = 1
ENABLE_CUDA_RTC = 1

# use openmp for parallelization
USE_OPENMP = 1
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_linux_cu92mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ USE_NCCL = 1

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVTX=1
USE_NVRTC = 1
ENABLE_CUDA_RTC = 1

# use openmp for parallelization
USE_OPENMP = 1
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_linux_mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ USE_CUDNN = 0
# CUDA_ARCH :=

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVRTC = 0
ENABLE_CUDA_RTC = 0

# use openmp for parallelization
USE_OPENMP = 1
Expand Down
5 changes: 5 additions & 0 deletions src/executor/exec_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,11 @@ Graph FusePointwiseForward(Graph&& g);
*/
Graph FusePointwiseBackward(Graph&& g);

/*!
* \brief Issue a one-time warning that fusion is not possible for this platform or build.
*/
void WarnFusionNotSupported();

/*!
* \brief Infer shapes in the graph given the information.
* \param graph The input graph.
Expand Down
9 changes: 7 additions & 2 deletions src/executor/graph_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -999,7 +999,7 @@ Graph GraphExecutor::InitGraph(nnvm::Symbol symbol,
// setup gradient
nnvm::Graph g = InitFullGraph(symbol, grad_req_types);

#if MXNET_USE_CUDA && !defined(_WIN32)
#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)
if (default_ctx.dev_mask() == Context::kGPU && dmlc::GetEnv("MXNET_USE_FUSION", true)) {
nnvm::Graph unoptimized_graph;
common::CopyGraph(&unoptimized_graph, g, false);
Expand Down Expand Up @@ -1032,7 +1032,12 @@ Graph GraphExecutor::InitGraph(nnvm::Symbol symbol,
<< "Graph contains duplicate names for some of its inputs - fusion is NOT enabled!";
}
}
#endif // MXNET_USE_CUDA
#else
// Only warn user if MXNET_USE_FUSION env var is explicitly set
if (default_ctx.dev_mask() == Context::kGPU && dmlc::GetEnv("MXNET_USE_FUSION", false)) {
WarnFusionNotSupported();
}
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)

// create "device" and "context" attrs for the graph
g = AssignContext(g, default_ctx, ctx_map,
Expand Down
22 changes: 19 additions & 3 deletions src/executor/pointwise_fusion_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,26 @@
#include "../operator/fusion/fused_op.h"
#include "../operator/operator_common.h"

#if MXNET_USE_CUDA

namespace mxnet {
namespace exec {

void WarnFusionNotSupported() {
static bool issued_warning = false;
if (!issued_warning) {
issued_warning = true;
#if defined(_WIN32)
LOG(WARNING) << "Omitting dynamic fused op creation- not enabled on Windows. "
<< "Unset env var MXNET_USE_FUSION=1 to quiet this message.";
#else
LOG(WARNING) << "Omitting dynamic fused op creation- needs MXNet lib built with "
<< "USE_CUDA=1 and ENABLE_CUDA_RTC=1. Unset env var MXNET_USE_FUSION=1 "
<< "to quiet this message.";
#endif // defined(_WIN32)
}
}

#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

namespace {
bool IsFusionCompatible(nnvm::Node* n) {
using namespace mxnet::fusion;
Expand Down Expand Up @@ -304,8 +320,8 @@ Graph FusePointwiseBackward(Graph &&g) {
ret.outputs = g.outputs;
return ret;
}
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

} // namespace exec
} // namespace mxnet

#endif // MXNET_USE_CUDA
13 changes: 8 additions & 5 deletions src/imperative/cached_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,8 @@ void SetRefCounts(nnvm::Graph* fwd_graph, const nnvm::Graph& full_graph) {

void OptimizeGraph(nnvm::Graph * full_graph, nnvm::Graph * fwd_graph, nnvm::Graph * grad_graph,
const Context& context, size_t num_forward_outputs, const bool inlining) {
#if MXNET_USE_CUDA && !defined(_WIN32)
if (context.dev_mask() == kGPU &&
!inlining &&
dmlc::GetEnv("MXNET_USE_FUSION", true)) {
#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)
if (context.dev_mask() == kGPU && !inlining && dmlc::GetEnv("MXNET_USE_FUSION", true)) {
nnvm::Graph unoptimized_graph;
common::CopyGraph(&unoptimized_graph, *full_graph, false);

Expand Down Expand Up @@ -202,7 +200,12 @@ void OptimizeGraph(nnvm::Graph * full_graph, nnvm::Graph * fwd_graph, nnvm::Grap
<< "Graph contains duplicate names for some of its inputs - fusion is NOT enabled!";
}
}
#endif // MXNET_USE_CUDA
#else
// Only warn user if MXNET_USE_FUSION env var is explicitly set
if (context.dev_mask() == kGPU && !inlining && dmlc::GetEnv("MXNET_USE_FUSION", false)) {
exec::WarnFusionNotSupported();
}
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)

*fwd_graph = nnvm::Graph();
fwd_graph->outputs = std::vector<nnvm::NodeEntry>(full_graph->outputs.begin(),
Expand Down
4 changes: 2 additions & 2 deletions src/operator/fusion/fused_op-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <map>
#include <vector>

#if MXNET_USE_CUDA
#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

namespace mxnet {

Expand Down Expand Up @@ -992,6 +992,6 @@ const char kernel_end[] = R"code(}

} // namespace mxnet

#endif // MXNET_USE_CUDA
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

#endif // MXNET_OPERATOR_FUSION_FUSED_OP_INL_H_
4 changes: 2 additions & 2 deletions src/operator/fusion/fused_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include "../operator_common.h"
#include "../../executor/exec_pass.h"

#if MXNET_USE_CUDA
#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

namespace mxnet {

Expand Down Expand Up @@ -302,4 +302,4 @@ NNVM_REGISTER_OP(_FusedOpOutHelper)

} // namespace mxnet

#endif // MXNET_USE_CUDA
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC
5 changes: 5 additions & 0 deletions src/operator/fusion/fused_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
* under the License.
*/

// Additional use of MXNET_USE_CUDA is not needed to guard a '.cu' file.
#if MXNET_ENABLE_CUDA_RTC

#include <sys/stat.h>
#include <nvrtc.h>
#include <cuda.h>
Expand Down Expand Up @@ -787,3 +790,5 @@ NNVM_REGISTER_OP(_FusedOp)
.set_attr<FCompute>("FCompute<gpu>", FusedOpForwardGPU);

} // namespace mxnet

#endif // MXNET_ENABLE_CUDA_RTC
7 changes: 3 additions & 4 deletions src/operator/fusion/fused_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#ifndef MXNET_OPERATOR_FUSION_FUSED_OP_H_
#define MXNET_OPERATOR_FUSION_FUSED_OP_H_


#include <mxnet/operator.h>
#include <nnvm/graph.h>
#include <vector>
Expand All @@ -29,8 +28,7 @@
#include <mutex>
#include <tuple>

#if MXNET_USE_CUDA

#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

namespace mxnet {

Expand Down Expand Up @@ -202,5 +200,6 @@ using FusedOpHelperParamPtr = std::shared_ptr<FusedOpHelperParam>;

} // namespace mxnet

#endif // MXNET_USE_CUDA
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

#endif // MXNET_OPERATOR_FUSION_FUSED_OP_H_