Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
MXNet Extensions enhancements2 (#19016)
Browse files Browse the repository at this point in the history
* initial commit

* split lib_api.h into lib_api.cc, updated examples for 2.0/gluon

* fixed licenses

* whitespace

* whitespace

* modernize

* fix modernize

* fix modernize

* fix modernize

* fixed move

* added lib_api.cc to CMakeLists.txt for example libs

* working example

* remove GLOBAL to fix protobuf issue

* fixed library unload

* added test target

* fixed sanity

* changed destructor to default

* added /LD option for customop_gpu_lib target

* moved /LD inside the <>

* diff compile flags for relu_lib.cu and lib_api.cc

* set CMAKE_VERBOSE_MAKEFILE for debug

* added -v to ninja

* added /MT

* another try

* changed /MT to -MT

* set flags for cxx separately

* split /LD /MT flags

* refactored cuda APIs into header file

* removed debugging stuff

* updated instructions for gpu build

* moved building into cmakelists

* moved build stuff into separate CMakeLists.txt

* fixed gpu example

* fixed license

* added dlmc library dependency

* added nnvm dependency

* removed nnvm dmlc dependencies, added WINDOWS_EXPORT_ALL_SYMBOLS option

* fixed WINDOWS_EXPORT_ALL_SYMBOLS

* changed nnvm to shared library

* backed out external ops changes

* split relu example into separate files to test separate lib_api.h/cc

* sanity

* addressed initial review items

Co-authored-by: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
  • Loading branch information
samskalicky and Ubuntu authored Sep 1, 2020
1 parent 9268f89 commit 8379740
Show file tree
Hide file tree
Showing 23 changed files with 2,150 additions and 1,556 deletions.
19 changes: 11 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -722,19 +722,20 @@ endif()
target_compile_definitions(mxnet PUBLIC DMLC_LOG_FATAL_THROW=$<BOOL:${LOG_FATAL_THROW}>)

# extension libraries (custom operators, custom subgraphs) are built by default
add_library(customop_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/gemm_lib.cc)
add_library(transposecsr_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposecsr_lib.cc)
add_library(transposerowsp_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposerowsp_lib.cc)
add_library(subgraph_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_subgraph/subgraph_lib.cc)
add_library(pass_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_pass/pass_lib.cc)
add_library(customop_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/gemm_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc)
add_library(transposecsr_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposecsr_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc)
add_library(transposerowsp_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposerowsp_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc)
add_library(subgraph_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_subgraph/subgraph_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc)
add_library(pass_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_pass/pass_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc)

target_include_directories(customop_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
target_include_directories(transposecsr_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
target_include_directories(transposerowsp_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
target_include_directories(subgraph_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
target_include_directories(pass_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
if(USE_CUDA)
add_library(customop_gpu_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/relu_lib.cu)
target_include_directories(customop_gpu_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
add_library(customop_gpu_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/relu_lib.cu ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/relu_lib.cc ${CMAKE_CURRENT_SOURCE_DIR}/src/lib_api.cc)
target_include_directories(customop_gpu_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op)
endif()
if(UNIX)
if (USE_CUDA)
Expand All @@ -752,7 +753,9 @@ elseif(MSVC)
set_target_properties(subgraph_lib PROPERTIES PREFIX "lib")
set_target_properties(pass_lib PROPERTIES PREFIX "lib")
if(USE_CUDA)
target_compile_options(customop_gpu_lib PUBLIC "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fPIC>")
target_compile_options(customop_gpu_lib PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-LD -MT>")
target_compile_options(customop_gpu_lib PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:/LD>")
target_compile_options(customop_gpu_lib PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:/MT>")
set_target_properties(customop_gpu_lib PROPERTIES PREFIX "lib")
endif()
endif()
Expand Down
16 changes: 10 additions & 6 deletions config/linux_gpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
#
# $ cp config/linux_gpu.cmake config.cmake
#
# Next modify the according entries, and then compile by
# Next modify the entries in the config.cmake like MXNET_CUDA_ARCH to set the specific
# GPU architecture, and then compile by
#
# $ mkdir build; cd build
# $ cmake ..
Expand All @@ -42,15 +43,18 @@ set(USE_CUDA ON CACHE BOOL "Build with CUDA support")
set(USE_CUDNN ON CACHE BOOL "Build with cudnn support, if found")

# Target NVIDIA GPU achitecture.
# Valid options are "Auto" for autodetection, "All" for all available
# architectures or a list of architectures by compute capability number, such as
# "7.0" or "7.0;7.5" as well as name, such as "Volta" or "Volta;Turing".
# Valid options are:
# - "Auto" for autodetection, will try and discover which GPU architecture to use by
# looking at the available GPUs on the machine that you're building on
# - "All" for all available GPU architectures supported by the version of CUDA installed
# - "specific GPU architectures" by giving the compute capability number such as
# "7.0" or "7.0;7.5" (ie. sm_70 or sm_75) or you can specify the name like:
# "Volta" or "Volta;Turing", be sure not to use quotes (ie. just set to 7.0)
# The value specified here is passed to cmake's CUDA_SELECT_NVCC_ARCH_FLAGS to
# obtain the compilation flags for nvcc.
#
# When compiling on a machine without GPU, autodetection will fail and you
# should instead specify the target architecture manually to avoid excessive
# compilation times.
# should instead specify the target architecture manually.
set(MXNET_CUDA_ARCH "Auto" CACHE STRING "Target NVIDIA GPU achitecture")

#---------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion example/extensions/lib_api/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.

all:
g++ -std=c++11 -shared -fPIC init_lib.cc -o libinit_lib.so -I ../../../include/mxnet
g++ -std=c++11 -shared -fPIC init_lib.cc ../../../src/lib_api.cc -o libinit_lib.so -I ../../../include

test:
g++ -std=c++11 -O3 -o libtest libtest.cc -ldl -I ../../../include/mxnet
Expand Down
2 changes: 1 addition & 1 deletion example/extensions/lib_api/init_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
*/

#include <iostream>
#include "lib_api.h"
#include "mxnet/lib_api.h"

using namespace mxnet::ext;

Expand Down
11 changes: 7 additions & 4 deletions example/extensions/lib_custom_op/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,19 @@
all: gemm_lib relu_lib transposecsr_lib transposerowsp_lib

gemm_lib:
g++ -shared -fPIC -std=c++11 gemm_lib.cc -o libgemm_lib.so -I ../../../include/mxnet
g++ -shared -fPIC -std=c++11 gemm_lib.cc ../../../src/lib_api.cc -o libgemm_lib.so -I ../../../include

relu_lib:
nvcc -shared -std=c++11 -Xcompiler -fPIC relu_lib.cu -o librelu_lib.so -I ../../../include/mxnet
g++ -fPIC -c -std=c++11 relu_lib.cc -o relu_lib.cc.o -I ../../../include
g++ -fPIC -c -std=c++11 ../../../src/lib_api.cc -o lib_api.cc.o -I ../../../include
nvcc -c -std=c++11 -Xcompiler -fPIC relu_lib.cu -o relu_lib.cu.o -I ../../../include
nvcc -shared relu_lib.cc.o lib_api.cc.o relu_lib.cu.o -o librelu_lib.so

transposecsr_lib:
g++ -shared -fPIC -std=c++11 transposecsr_lib.cc -o libtransposecsr_lib.so -I ../../../include/mxnet
g++ -shared -fPIC -std=c++11 transposecsr_lib.cc ../../../src/lib_api.cc -o libtransposecsr_lib.so -I ../../../include

transposerowsp_lib:
g++ -shared -fPIC -std=c++11 transposerowsp_lib.cc -o libtransposerowsp_lib.so -I ../../../include/mxnet
g++ -shared -fPIC -std=c++11 transposerowsp_lib.cc ../../../src/lib_api.cc -o libtransposerowsp_lib.so -I ../../../include

clean:
rm -rf libgemm_lib.so librelu_lib.so libtransposecsr_lib.so libtransposerowsp_lib.so
2 changes: 1 addition & 1 deletion example/extensions/lib_custom_op/gemm_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

#include <iostream>
#include <utility>
#include "lib_api.h"
#include "mxnet/lib_api.h"

using namespace mxnet::ext;

Expand Down
171 changes: 171 additions & 0 deletions example/extensions/lib_custom_op/relu_lib.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2020 by Contributors
* \file relu_lib.cu
* \brief simple custom relu and noisy relu operator implemented using CUDA function
*/

#include <iostream>
#include "relu_lib.h"

using namespace mxnet::ext;

MXReturnValue parseAttrs(const std::unordered_map<std::string, std::string>& attrs,
int* num_in, int* num_out) {
*num_in = 1;
*num_out = 1;
return MX_SUCCESS;
}

MXReturnValue inferType(const std::unordered_map<std::string, std::string>& attrs,
std::vector<int>* intypes,
std::vector<int>* outtypes) {
outtypes->at(0) = intypes->at(0);
return MX_SUCCESS;
}

MXReturnValue inferShape(const std::unordered_map<std::string, std::string>& attrs,
std::vector<std::vector<unsigned int>>* inshapes,
std::vector<std::vector<unsigned int>>* outshapes) {
outshapes->at(0) = inshapes->at(0);
return MX_SUCCESS;
}

MXReturnValue forwardCPU(const std::unordered_map<std::string, std::string>& attrs,
std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& res) {
float* in_data = inputs->at(0).data<float>();
float* out_data = outputs->at(0).data<float>();
for (int i=0; i<inputs->at(0).size(); i++) {
out_data[i] = in_data[i] > 0 ? in_data[i] : 0;
}
return MX_SUCCESS;
}

MXReturnValue backwardCPU(const std::unordered_map<std::string, std::string>& attrs,
std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& res) {
float* out_grad = inputs->at(0).data<float>();
float* in_data = inputs->at(1).data<float>();
float* in_grad = outputs->at(0).data<float>();
for (int i=0; i<inputs->at(1).size(); i++) {
in_grad[i] = in_data[i] > 0 ? 1 * out_grad[i] : 0;
}
return MX_SUCCESS;
}

REGISTER_OP(my_relu)
.setParseAttrs(parseAttrs)
.setInferType(inferType)
.setInferShape(inferShape)
.setForward(forwardCPU, "cpu")
.setForward(forwardGPU, "gpu")
.setBackward(backwardCPU, "cpu")
.setBackward(backwardGPU, "gpu");


MyStatefulReluCPU::MyStatefulReluCPU(const std::unordered_map<std::string, std::string>& attrs)
: attrs_(attrs) {}

MXReturnValue MyStatefulReluCPU::Forward(std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& op_res) {
return forwardCPU(attrs_, inputs, outputs, op_res);
}

MXReturnValue MyStatefulReluCPU::Backward(std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& op_res) {
return backwardCPU(attrs_, inputs, outputs, op_res);
}

MyStatefulReluGPU::MyStatefulReluGPU(const std::unordered_map<std::string, std::string>& attrs)
: attrs_(attrs) {}

MXReturnValue MyStatefulReluGPU::Forward(std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& op_res) {
return forwardGPU(attrs_, inputs, outputs, op_res);
}

MXReturnValue MyStatefulReluGPU::Backward(std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& op_res) {
return backwardGPU(attrs_, inputs, outputs, op_res);
}


MXReturnValue createOpStateCPU(const std::unordered_map<std::string, std::string>& attrs,
CustomStatefulOp** op_inst) {
*op_inst = new MyStatefulReluCPU(attrs);
return MX_SUCCESS;
}

MXReturnValue createOpStateGPU(const std::unordered_map<std::string, std::string>& attrs,
CustomStatefulOp** op_inst) {
*op_inst = new MyStatefulReluGPU(attrs);
return MX_SUCCESS;
}

REGISTER_OP(my_state_relu)
.setParseAttrs(parseAttrs)
.setInferType(inferType)
.setInferShape(inferShape)
.setCreateOpState(createOpStateCPU, "cpu")
.setCreateOpState(createOpStateGPU, "gpu");

MXReturnValue noisyForwardCPU(const std::unordered_map<std::string, std::string>& attrs,
std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& res) {
float* in_data = inputs->at(0).data<float>();
float* out_data = outputs->at(0).data<float>();

mx_cpu_rand_t* states = res.get_cpu_rand_states();
std::normal_distribution<float> dist_normal;

for (int i=0; i<inputs->at(0).size(); ++i) {
float noise = dist_normal(*states);
out_data[i] = in_data[i] + noise > 0 ? in_data[i] + noise : 0;
}
return MX_SUCCESS;
}

REGISTER_OP(my_noisy_relu)
.setParseAttrs(parseAttrs)
.setInferType(inferType)
.setInferShape(inferShape)
.setForward(noisyForwardCPU, "cpu")
.setForward(noisyForwardGPU, "gpu")
.setBackward(backwardCPU, "cpu")
.setBackward(backwardGPU, "gpu");

MXReturnValue initialize(int version) {
if (version >= 20000) {
std::cout << "MXNet version " << version << " supported" << std::endl;
return MX_SUCCESS;
} else {
MX_ERROR_MSG << "MXNet version " << version << " not supported";
return MX_FAIL;
}
}
Loading

0 comments on commit 8379740

Please sign in to comment.