Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

USE_NVRTC -> ENABLE_CUDA_RTC to fix maven build. Add compile-guard to fusion. #16838

Merged
merged 5 commits into from
Nov 20, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ before_build:

set OpenCV_DIR=%APPVEYOR_BUILD_FOLDER%/%MXNET_OPENCV_DIR%/build

cmake .. -DOPENCV_DIR=%OpenCV_DIR% -DUSE_PROFILER=1 -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -G "Visual Studio 12 2013 Win64"
cmake .. -DOPENCV_DIR=%OpenCV_DIR% -DUSE_PROFILER=1 -DUSE_CUDA=0 -DUSE_CUDNN=0 -DENABLE_CUDA_RTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -G "Visual Studio 12 2013 Win64"

build_script:
- cmd: >-
Expand Down
12 changes: 6 additions & 6 deletions ci/build_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class BuildFlavour(Enum):
'WIN_CPU': (
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DUSE_NVRTC=OFF '
'-DENABLE_CUDA_RTC=OFF '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=open '
Expand All @@ -67,7 +67,7 @@ class BuildFlavour(Enum):
, 'WIN_CPU_MKLDNN': (
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DUSE_NVRTC=OFF '
'-DENABLE_CUDA_RTC=OFF '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=open '
Expand All @@ -80,7 +80,7 @@ class BuildFlavour(Enum):
, 'WIN_CPU_MKLDNN_MKL': (
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DUSE_NVRTC=OFF '
'-DENABLE_CUDA_RTC=OFF '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=mkl '
Expand All @@ -93,7 +93,7 @@ class BuildFlavour(Enum):
, 'WIN_CPU_MKL': (
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DUSE_NVRTC=OFF '
'-DENABLE_CUDA_RTC=OFF '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=mkl '
Expand All @@ -106,7 +106,7 @@ class BuildFlavour(Enum):
, 'WIN_GPU': (
'-DUSE_CUDA=ON '
'-DUSE_CUDNN=ON '
'-DUSE_NVRTC=ON '
'-DENABLE_CUDA_RTC=ON '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=open '
Expand All @@ -122,7 +122,7 @@ class BuildFlavour(Enum):
, 'WIN_GPU_MKLDNN': (
'-DUSE_CUDA=ON '
'-DUSE_CUDNN=ON '
'-DUSE_NVRTC=ON '
'-DENABLE_CUDA_RTC=ON '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=open '
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_darwin_mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ USE_CUDNN = 0
# CUDA_ARCH :=

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVRTC = 0
ENABLE_CUDA_RTC = 0

# use openmp for parallelization
USE_OPENMP = 0
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_linux_cu90mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ USE_NCCL = 1

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVTX=1
USE_NVRTC = 1
ENABLE_CUDA_RTC = 1

# use openmp for parallelization
USE_OPENMP = 1
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_linux_cu92mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ USE_NCCL = 1

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVTX=1
USE_NVRTC = 1
ENABLE_CUDA_RTC = 1

# use openmp for parallelization
USE_OPENMP = 1
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_linux_mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ USE_CUDNN = 0
# CUDA_ARCH :=

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVRTC = 0
ENABLE_CUDA_RTC = 0

# use openmp for parallelization
USE_OPENMP = 1
Expand Down
5 changes: 5 additions & 0 deletions src/executor/exec_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,11 @@ Graph FusePointwiseForward(Graph&& g);
*/
Graph FusePointwiseBackward(Graph&& g);

/*!
* \brief Issue a one-time warning that fusion is not possible for this platform or build.
*/
void WarnFusionNotSupported();

/*!
* \brief Infer shapes in the graph given the information.
* \param graph The input graph.
Expand Down
9 changes: 7 additions & 2 deletions src/executor/graph_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -999,7 +999,7 @@ Graph GraphExecutor::InitGraph(nnvm::Symbol symbol,
// setup gradient
nnvm::Graph g = InitFullGraph(symbol, grad_req_types);

#if MXNET_USE_CUDA && !defined(_WIN32)
#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)
if (default_ctx.dev_mask() == Context::kGPU && dmlc::GetEnv("MXNET_USE_FUSION", true)) {
nnvm::Graph unoptimized_graph;
common::CopyGraph(&unoptimized_graph, g, false);
Expand Down Expand Up @@ -1032,7 +1032,12 @@ Graph GraphExecutor::InitGraph(nnvm::Symbol symbol,
<< "Graph contains duplicate names for some of its inputs - fusion is NOT enabled!";
}
}
#endif // MXNET_USE_CUDA
#else
// Only warn user if MXNET_USE_FUSION env var is explicitly set
if (default_ctx.dev_mask() == Context::kGPU && dmlc::GetEnv("MXNET_USE_FUSION", false)) {
WarnFusionNotSupported();
}
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)

// create "device" and "context" attrs for the graph
g = AssignContext(g, default_ctx, ctx_map,
Expand Down
22 changes: 19 additions & 3 deletions src/executor/pointwise_fusion_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,26 @@
#include "../operator/fusion/fused_op.h"
#include "../operator/operator_common.h"

#if MXNET_USE_CUDA

namespace mxnet {
namespace exec {

void WarnFusionNotSupported() {
static bool issued_warning = false;
if (!issued_warning) {
issued_warning = true;
#if defined(_WIN32)
LOG(WARNING) << "Omitting dynamic fused op creation- not enabled on Windows. "
<< "Unset env var MXNET_USE_FUSION=1 to quiet this message.";
#else
LOG(WARNING) << "Omitting dynamic fused op creation- needs MXNet lib built with "
<< "USE_CUDA=1 and ENABLE_CUDA_RTC=1. Unset env var MXNET_USE_FUSION=1 "
<< "to quiet this message.";
#endif // defined(_WIN32)
}
}

#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

namespace {
bool IsFusionCompatible(nnvm::Node* n) {
using namespace mxnet::fusion;
Expand Down Expand Up @@ -304,8 +320,8 @@ Graph FusePointwiseBackward(Graph &&g) {
ret.outputs = g.outputs;
return ret;
}
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

} // namespace exec
} // namespace mxnet

#endif // MXNET_USE_CUDA
13 changes: 8 additions & 5 deletions src/imperative/cached_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,8 @@ void SetRefCounts(nnvm::Graph* fwd_graph, const nnvm::Graph& full_graph) {

void OptimizeGraph(nnvm::Graph * full_graph, nnvm::Graph * fwd_graph, nnvm::Graph * grad_graph,
const Context& context, size_t num_forward_outputs, const bool inlining) {
#if MXNET_USE_CUDA && !defined(_WIN32)
if (context.dev_mask() == kGPU &&
!inlining &&
dmlc::GetEnv("MXNET_USE_FUSION", true)) {
#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)
if (context.dev_mask() == kGPU && !inlining && dmlc::GetEnv("MXNET_USE_FUSION", true)) {
nnvm::Graph unoptimized_graph;
common::CopyGraph(&unoptimized_graph, *full_graph, false);

Expand Down Expand Up @@ -202,7 +200,12 @@ void OptimizeGraph(nnvm::Graph * full_graph, nnvm::Graph * fwd_graph, nnvm::Grap
<< "Graph contains duplicate names for some of its inputs - fusion is NOT enabled!";
}
}
#endif // MXNET_USE_CUDA
#else
// Only warn user if MXNET_USE_FUSION env var is explicitly set
if (context.dev_mask() == kGPU && !inlining && dmlc::GetEnv("MXNET_USE_FUSION", false)) {
WarnFusionNotSupported();
}
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)

*fwd_graph = nnvm::Graph();
fwd_graph->outputs = std::vector<nnvm::NodeEntry>(full_graph->outputs.begin(),
Expand Down
4 changes: 2 additions & 2 deletions src/operator/fusion/fused_op-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <map>
#include <vector>

#if MXNET_USE_CUDA
#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

namespace mxnet {

Expand Down Expand Up @@ -992,6 +992,6 @@ const char kernel_end[] = R"code(}

} // namespace mxnet

#endif // MXNET_USE_CUDA
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

#endif // MXNET_OPERATOR_FUSION_FUSED_OP_INL_H_
6 changes: 3 additions & 3 deletions src/operator/fusion/fused_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
* under the License.
*/

#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

#include <tuple>

#include "./fused_op.h"
#include "../operator_common.h"
#include "../../executor/exec_pass.h"

#if MXNET_USE_CUDA

namespace mxnet {

DMLC_REGISTER_PARAMETER(FusedOpConfig);
Expand Down Expand Up @@ -302,4 +302,4 @@ NNVM_REGISTER_OP(_FusedOpOutHelper)

} // namespace mxnet

#endif // MXNET_USE_CUDA
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC
4 changes: 4 additions & 0 deletions src/operator/fusion/fused_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
* under the License.
*/

#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

#include <sys/stat.h>
#include <nvrtc.h>
#include <cuda.h>
Expand Down Expand Up @@ -787,3 +789,5 @@ NNVM_REGISTER_OP(_FusedOp)
.set_attr<FCompute>("FCompute<gpu>", FusedOpForwardGPU);

} // namespace mxnet

#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC
7 changes: 3 additions & 4 deletions src/operator/fusion/fused_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#ifndef MXNET_OPERATOR_FUSION_FUSED_OP_H_
#define MXNET_OPERATOR_FUSION_FUSED_OP_H_

#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

#include <mxnet/operator.h>
#include <nnvm/graph.h>
Expand All @@ -29,9 +30,6 @@
#include <mutex>
#include <tuple>

#if MXNET_USE_CUDA


namespace mxnet {

namespace fusion {
Expand Down Expand Up @@ -202,5 +200,6 @@ using FusedOpHelperParamPtr = std::shared_ptr<FusedOpHelperParam>;

} // namespace mxnet

#endif // MXNET_USE_CUDA
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

#endif // MXNET_OPERATOR_FUSION_FUSED_OP_H_