Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

MXNet Extensions enhancements2 #19016

Merged
merged 51 commits into from
Sep 1, 2020
Merged
Show file tree
Hide file tree
Changes from 50 commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
4fea36d
initial commit
Aug 11, 2020
6d5fce2
Merge branch 'master' of https://github.com/apache/incubator-mxnet in…
Aug 15, 2020
3cea397
split lib_api.h into lib_api.cc, updated examples for 2.0/gluon
Aug 16, 2020
ead2684
fixed licenses
Aug 16, 2020
51ce458
whitespace
Aug 16, 2020
1ec330b
whitespace
Aug 16, 2020
bee854b
modernize
Aug 16, 2020
c36363e
fix modernize
Aug 16, 2020
53b4136
fix modernize
Aug 16, 2020
2953891
fix modernize
Aug 16, 2020
0c0cceb
fixed move
Aug 16, 2020
7cbc99b
added lib_api.cc to CMakeLists.txt for example libs
Aug 16, 2020
6965cd7
working example
Aug 16, 2020
db84377
Merge branch 'master' of https://github.com/apache/incubator-mxnet in…
Aug 17, 2020
42d00d0
remove GLOBAL to fix protobuf issue
Aug 17, 2020
2379eed
fixed library unload
Aug 17, 2020
afa87a7
added test target
Aug 17, 2020
b2f5a19
fixed sanity
Aug 17, 2020
68a3733
changed destructor to default
Aug 17, 2020
f90d8ad
Merge branch 'master' of https://github.com/apache/incubator-mxnet in…
Aug 18, 2020
04e88fb
added /LD option for customop_gpu_lib target
Aug 19, 2020
f4aaa84
Merge branch 'master' of https://github.com/apache/incubator-mxnet in…
Aug 19, 2020
b9f67ef
moved /LD inside the <>
Aug 19, 2020
4b9a4dc
diff compile flags for relu_lib.cu and lib_api.cc
Aug 19, 2020
4afe182
set CMAKE_VERBOSE_MAKEFILE for debug
Aug 20, 2020
7a36a40
added -v to ninja
Aug 20, 2020
8607847
added /MT
Aug 20, 2020
4165d02
another try
Aug 20, 2020
55b441f
Merge branch 'master' of https://github.com/apache/incubator-mxnet in…
Aug 20, 2020
f7de08e
changed /MT to -MT
Aug 20, 2020
4b7d119
set flags for cxx separately
Aug 20, 2020
c3719fd
split /LD /MT flags
Aug 20, 2020
1a79284
Merge branch 'master' of https://github.com/apache/incubator-mxnet in…
Aug 20, 2020
f6b9082
Merge branch 'master' of https://github.com/apache/incubator-mxnet in…
Aug 21, 2020
4335985
refactored cuda APIs into header file
Aug 21, 2020
1ef7b0a
removed debugging stuff
Aug 21, 2020
d7e241b
Merge branch 'master' of https://github.com/apache/incubator-mxnet in…
Aug 24, 2020
cac8fba
updated instructions for gpu build
Aug 25, 2020
36e0a6a
moved building into cmakelists
Aug 25, 2020
caaa011
moved build stuff into separate CMakeLists.txt
Aug 25, 2020
27c46d7
fixed gpu example
Aug 25, 2020
591141a
fixed license
Aug 25, 2020
0a4621d
added dlmc library dependency
Aug 25, 2020
c2e534b
added nnvm dependency
Aug 25, 2020
fd9f836
removed nnvm dmlc dependencies, added WINDOWS_EXPORT_ALL_SYMBOLS option
Aug 25, 2020
545aff6
fixed WINDOWS_EXPORT_ALL_SYMBOLS
Aug 25, 2020
df85c38
changed nnvm to shared library
Aug 26, 2020
bc80960
backed out external ops changes
Aug 26, 2020
ab0cc43
split relu example into separate files to test separate lib_api.h/cc
Aug 28, 2020
8fd4e2d
sanity
Aug 28, 2020
2431c9e
addressed initial review items
Aug 28, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -700,19 +700,20 @@ endif()
target_compile_definitions(mxnet PUBLIC DMLC_LOG_FATAL_THROW=$<BOOL:${LOG_FATAL_THROW}>)

# extension libraries (custom operators, custom subgraphs) are built by default
add_library(customop_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/gemm_lib.cc)
add_library(transposecsr_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposecsr_lib.cc)
add_library(transposerowsp_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposerowsp_lib.cc)
add_library(subgraph_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_subgraph/subgraph_lib.cc)
add_library(pass_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_pass/pass_lib.cc)
add_library(customop_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/gemm_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc)
add_library(transposecsr_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposecsr_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc)
add_library(transposerowsp_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposerowsp_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc)
add_library(subgraph_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_subgraph/subgraph_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc)
add_library(pass_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_pass/pass_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc)

Comment on lines +703 to +708
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Offline discussion with @leezu, in another PR we should move this build code into CMakeLists.txt for each example and use add_subdirectory to include it and replace the current Makefiles so theres only 1 set of build steps for each example.

target_include_directories(customop_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
target_include_directories(transposecsr_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
target_include_directories(transposerowsp_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
target_include_directories(subgraph_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
target_include_directories(pass_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
if(USE_CUDA)
add_library(customop_gpu_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/relu_lib.cu)
target_include_directories(customop_gpu_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
add_library(customop_gpu_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/relu_lib.cu ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/relu_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc)
target_include_directories(customop_gpu_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op)
endif()
if(UNIX)
if (USE_CUDA)
Expand All @@ -730,7 +731,9 @@ elseif(MSVC)
set_target_properties(subgraph_lib PROPERTIES PREFIX "lib")
set_target_properties(pass_lib PROPERTIES PREFIX "lib")
if(USE_CUDA)
target_compile_options(customop_gpu_lib PUBLIC "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fPIC>")
target_compile_options(customop_gpu_lib PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-LD -MT>")
target_compile_options(customop_gpu_lib PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:/LD>")
target_compile_options(customop_gpu_lib PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:/MT>")
set_target_properties(customop_gpu_lib PROPERTIES PREFIX "lib")
endif()
endif()
Expand Down
16 changes: 10 additions & 6 deletions config/linux_gpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
#
# $ cp config/linux_gpu.cmake config.cmake
#
# Next modify the according entries, and then compile by
# Next modify the entries in the config.cmake like MXNET_CUDA_ARCH to set the specific
# GPU architecture, and then compile by
#
# $ mkdir build; cd build
# $ cmake ..
Expand All @@ -42,15 +43,18 @@ set(USE_CUDA ON CACHE BOOL "Build with CUDA support")
set(USE_CUDNN ON CACHE BOOL "Build with cudnn support, if found")

# Target NVIDIA GPU achitecture.
# Valid options are "Auto" for autodetection, "All" for all available
# architectures or a list of architectures by compute capability number, such as
# "7.0" or "7.0;7.5" as well as name, such as "Volta" or "Volta;Turing".
# Valid options are:
# - "Auto" for autodetection, will try and discover which GPU architecture to use by
# looking at the available GPUs on the machine that you're building on
# - "All" for all available GPU architectures supported by the version of CUDA installed
# - "specific GPU architectures" by giving the compute capability number such as
# "7.0" or "7.0;7.5" (ie. sm_70 or sm_75) or you can specify the name like:
# "Volta" or "Volta;Turing".
samskalicky marked this conversation as resolved.
Show resolved Hide resolved
# The value specified here is passed to cmake's CUDA_SELECT_NVCC_ARCH_FLAGS to
# obtain the compilation flags for nvcc.
#
# When compiling on a machine without GPU, autodetection will fail and you
# should instead specify the target architecture manually to avoid excessive
# compilation times.
# should instead specify the target architecture manually.
set(MXNET_CUDA_ARCH "Auto" CACHE STRING "Target NVIDIA GPU achitecture")

#---------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion example/extensions/lib_api/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.

all:
g++ -std=c++11 -shared -fPIC init_lib.cc -o libinit_lib.so -I ../../../include/mxnet
g++ -std=c++11 -shared -fPIC init_lib.cc ../../../src/lib_api.cc -o libinit_lib.so -I ../../../include

test:
g++ -std=c++11 -O3 -o libtest libtest.cc -ldl -I ../../../include/mxnet
Expand Down
2 changes: 1 addition & 1 deletion example/extensions/lib_api/init_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
*/

#include <iostream>
#include "lib_api.h"
#include "mxnet/lib_api.h"

using namespace mxnet::ext;

Expand Down
11 changes: 7 additions & 4 deletions example/extensions/lib_custom_op/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,19 @@
all: gemm_lib relu_lib transposecsr_lib transposerowsp_lib

gemm_lib:
g++ -shared -fPIC -std=c++11 gemm_lib.cc -o libgemm_lib.so -I ../../../include/mxnet
g++ -shared -fPIC -std=c++11 gemm_lib.cc ../../../src/lib_api.cc -o libgemm_lib.so -I ../../../include

relu_lib:
nvcc -shared -std=c++11 -Xcompiler -fPIC relu_lib.cu -o librelu_lib.so -I ../../../include/mxnet
g++ -fPIC -c -std=c++11 relu_lib.cc -o relu_lib.cc.o -I ../../../include
g++ -fPIC -c -std=c++11 ../../../src/lib_api.cc -o lib_api.cc.o -I ../../../include
nvcc -c -std=c++11 -Xcompiler -fPIC relu_lib.cu -o relu_lib.cu.o -I ../../../include
nvcc -shared relu_lib.cc.o lib_api.cc.o relu_lib.cu.o -o librelu_lib.so

transposecsr_lib:
g++ -shared -fPIC -std=c++11 transposecsr_lib.cc -o libtransposecsr_lib.so -I ../../../include/mxnet
g++ -shared -fPIC -std=c++11 transposecsr_lib.cc ../../../src/lib_api.cc -o libtransposecsr_lib.so -I ../../../include

transposerowsp_lib:
g++ -shared -fPIC -std=c++11 transposerowsp_lib.cc -o libtransposerowsp_lib.so -I ../../../include/mxnet
g++ -shared -fPIC -std=c++11 transposerowsp_lib.cc ../../../src/lib_api.cc -o libtransposerowsp_lib.so -I ../../../include

clean:
rm -rf libgemm_lib.so librelu_lib.so libtransposecsr_lib.so libtransposerowsp_lib.so
2 changes: 1 addition & 1 deletion example/extensions/lib_custom_op/gemm_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

#include <iostream>
#include <utility>
#include "lib_api.h"
#include "mxnet/lib_api.h"

using namespace mxnet::ext;

Expand Down
171 changes: 171 additions & 0 deletions example/extensions/lib_custom_op/relu_lib.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2020 by Contributors
* \file relu_lib.cu
* \brief simple custom relu and noisy relu operator implemented using CUDA function
*/

#include <iostream>
#include "relu_lib.h"

using namespace mxnet::ext;

MXReturnValue parseAttrs(const std::unordered_map<std::string, std::string>& attrs,
int* num_in, int* num_out) {
*num_in = 1;
*num_out = 1;
return MX_SUCCESS;
}

MXReturnValue inferType(const std::unordered_map<std::string, std::string>& attrs,
std::vector<int>* intypes,
std::vector<int>* outtypes) {
outtypes->at(0) = intypes->at(0);
return MX_SUCCESS;
}

MXReturnValue inferShape(const std::unordered_map<std::string, std::string>& attrs,
std::vector<std::vector<unsigned int>>* inshapes,
std::vector<std::vector<unsigned int>>* outshapes) {
outshapes->at(0) = inshapes->at(0);
return MX_SUCCESS;
}

MXReturnValue forwardCPU(const std::unordered_map<std::string, std::string>& attrs,
std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& res) {
float* in_data = inputs->at(0).data<float>();
float* out_data = outputs->at(0).data<float>();
for (int i=0; i<inputs->at(0).size(); i++) {
out_data[i] = in_data[i] > 0 ? in_data[i] : 0;
}
return MX_SUCCESS;
}

MXReturnValue backwardCPU(const std::unordered_map<std::string, std::string>& attrs,
std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& res) {
float* out_grad = inputs->at(0).data<float>();
float* in_data = inputs->at(1).data<float>();
float* in_grad = outputs->at(0).data<float>();
for (int i=0; i<inputs->at(1).size(); i++) {
in_grad[i] = in_data[i] > 0 ? 1 * out_grad[i] : 0;
}
return MX_SUCCESS;
}

REGISTER_OP(my_relu)
.setParseAttrs(parseAttrs)
.setInferType(inferType)
.setInferShape(inferShape)
.setForward(forwardCPU, "cpu")
.setForward(forwardGPU, "gpu")
.setBackward(backwardCPU, "cpu")
.setBackward(backwardGPU, "gpu");


MyStatefulReluCPU::MyStatefulReluCPU(const std::unordered_map<std::string, std::string>& attrs)
: attrs_(attrs) {}

MXReturnValue MyStatefulReluCPU::Forward(std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& op_res) {
return forwardCPU(attrs_, inputs, outputs, op_res);
}

MXReturnValue MyStatefulReluCPU::Backward(std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& op_res) {
return backwardCPU(attrs_, inputs, outputs, op_res);
}

MyStatefulReluGPU::MyStatefulReluGPU(const std::unordered_map<std::string, std::string>& attrs)
: attrs_(attrs) {}

MXReturnValue MyStatefulReluGPU::Forward(std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& op_res) {
return forwardGPU(attrs_, inputs, outputs, op_res);
}

MXReturnValue MyStatefulReluGPU::Backward(std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& op_res) {
return backwardGPU(attrs_, inputs, outputs, op_res);
}


MXReturnValue createOpStateCPU(const std::unordered_map<std::string, std::string>& attrs,
CustomStatefulOp** op_inst) {
*op_inst = new MyStatefulReluCPU(attrs);
return MX_SUCCESS;
}

MXReturnValue createOpStateGPU(const std::unordered_map<std::string, std::string>& attrs,
CustomStatefulOp** op_inst) {
*op_inst = new MyStatefulReluGPU(attrs);
return MX_SUCCESS;
}

REGISTER_OP(my_state_relu)
.setParseAttrs(parseAttrs)
.setInferType(inferType)
.setInferShape(inferShape)
.setCreateOpState(createOpStateCPU, "cpu")
.setCreateOpState(createOpStateGPU, "gpu");

MXReturnValue noisyForwardCPU(const std::unordered_map<std::string, std::string>& attrs,
std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& res) {
float* in_data = inputs->at(0).data<float>();
float* out_data = outputs->at(0).data<float>();

mx_cpu_rand_t* states = res.get_cpu_rand_states();
std::normal_distribution<float> dist_normal;

for (int i=0; i<inputs->at(0).size(); ++i) {
float noise = dist_normal(*states);
out_data[i] = in_data[i] + noise > 0 ? in_data[i] + noise : 0;
}
return MX_SUCCESS;
}

REGISTER_OP(my_noisy_relu)
.setParseAttrs(parseAttrs)
.setInferType(inferType)
.setInferShape(inferShape)
.setForward(noisyForwardCPU, "cpu")
.setForward(noisyForwardGPU, "gpu")
.setBackward(backwardCPU, "cpu")
.setBackward(backwardGPU, "gpu");

MXReturnValue initialize(int version) {
if (version >= 20000) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you sure about this? since gemm_lib is still 10700

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we should update all the examples to 20000 on master. ill do that in the next PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

discussed offline and we will change example corresponding to master

std::cout << "MXNet version " << version << " supported" << std::endl;
return MX_SUCCESS;
} else {
MX_ERROR_MSG << "MXNet version " << version << " not supported";
return MX_FAIL;
}
}
Loading