From 6150f4a6f257c6b688c7f3366b1a64e69d74e706 Mon Sep 17 00:00:00 2001 From: Kellen Sunderland Date: Sun, 18 Nov 2018 08:49:53 -0800 Subject: [PATCH] [MXNET-703] Fix incorrect predictions, update onnx-tensorrt Updates IR used to pass subgraphs to ONNX3 v8. Fixes a number of bugs including crashes. Adds support for TensorRT 5. --- 3rdparty/onnx-tensorrt | 2 +- Makefile | 3 +- .../Dockerfile.build.ubuntu_gpu_tensorrt | 5 +- ci/docker/install/tensorrt.sh | 4 +- src/executor/onnx_to_tensorrt.cc | 2 +- src/executor/tensorrt_pass.cc | 2 +- src/executor/trt_graph_executor.cc | 2 +- src/operator/contrib/nnvm_to_onnx-inl.h | 2 +- src/operator/contrib/nnvm_to_onnx.cc | 126 +++++++++--------- src/operator/contrib/tensorrt-inl.h | 2 +- 10 files changed, 75 insertions(+), 75 deletions(-) diff --git a/3rdparty/onnx-tensorrt b/3rdparty/onnx-tensorrt index 3d8ee049970e..f1c7aa63d88d 160000 --- a/3rdparty/onnx-tensorrt +++ b/3rdparty/onnx-tensorrt @@ -1 +1 @@ -Subproject commit 3d8ee049970e81ff4935cc7f36b653c0b27bcbbc +Subproject commit f1c7aa63d88d8d8ef70490f2ebb6b33f7450218b diff --git a/Makefile b/Makefile index ad7f0ff3485f..afe5a8ae454b 100644 --- a/Makefile +++ b/Makefile @@ -98,10 +98,9 @@ ifeq ($(ENABLE_TESTCOVERAGE), 1) endif ifeq ($(USE_TENSORRT), 1) - CFLAGS += -I$(ROOTDIR) -I$(TPARTYDIR) -DONNX_NAMESPACE=$(ONNX_NAMESPACE) -DMXNET_USE_TENSORRT=1 + CFLAGS += -I$(ROOTDIR) -I$(TPARTYDIR) -I$(TPARTYDIR)/onnx-tensorrt/third_party/onnx/ -DONNX_NAMESPACE=$(ONNX_NAMESPACE) -DMXNET_USE_TENSORRT=1 LDFLAGS += -lprotobuf -pthread -lonnx -lonnx_proto -lnvonnxparser -lnvonnxparser_runtime -lnvinfer -lnvinfer_plugin endif -# -L/usr/local/lib ifeq ($(DEBUG), 1) NVCCFLAGS += -std=c++11 -Xcompiler -D_FORCE_INLINES -g -G -O0 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS) diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt b/ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt index 255da316041f..26051ea3cd6d 100644 --- a/ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt +++ b/ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt @@ -18,7 +18,10 @@ # # Dockerfile to run MXNet on Ubuntu 16.04 for CPU -FROM nvidia/cuda:9.0-cudnn7-devel +FROM nvidia/cuda:10.0-cudnn7-devel + +# Avoid interactive package installers. +ENV DEBIAN_FRONTEND noninteractive WORKDIR /work/deps diff --git a/ci/docker/install/tensorrt.sh b/ci/docker/install/tensorrt.sh index 61e73ef9a62f..1950cad0b52f 100755 --- a/ci/docker/install/tensorrt.sh +++ b/ci/docker/install/tensorrt.sh @@ -26,7 +26,7 @@ pip3 install gluoncv==0.2.0 pushd . cd .. apt-get update -apt-get install -y automake libtool +apt-get install -y automake libtool zip git clone --recursive -b 3.5.1.1 /~https://github.com/google/protobuf.git cd protobuf ./autogen.sh @@ -41,7 +41,7 @@ popd # Install TensorRT echo "TensorRT build enabled. Installing TensorRT." -wget -qO tensorrt.deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0_1-1_amd64.deb +wget -qO tensorrt.deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0_1-1_amd64.deb dpkg -i tensorrt.deb apt-get update apt-get install -y --allow-downgrades libnvinfer-dev diff --git a/src/executor/onnx_to_tensorrt.cc b/src/executor/onnx_to_tensorrt.cc index e3a4ae868ce2..c37b856f9d62 100644 --- a/src/executor/onnx_to_tensorrt.cc +++ b/src/executor/onnx_to_tensorrt.cc @@ -28,7 +28,7 @@ #include "./onnx_to_tensorrt.h" -#include +#include #include #include diff --git a/src/executor/tensorrt_pass.cc b/src/executor/tensorrt_pass.cc index b5fc8d15f7ac..e95b88ae8461 100644 --- a/src/executor/tensorrt_pass.cc +++ b/src/executor/tensorrt_pass.cc @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "../operator/contrib/nnvm_to_onnx-inl.h" #include "./exec_pass.h" diff --git a/src/executor/trt_graph_executor.cc b/src/executor/trt_graph_executor.cc index 65dbb29792e0..b3291f8c902b 100644 --- a/src/executor/trt_graph_executor.cc +++ b/src/executor/trt_graph_executor.cc @@ -21,7 +21,7 @@ #include "trt_graph_executor.h" -#include +#include #include #include "./onnx_to_tensorrt.h" #include "../operator/contrib/tensorrt-inl.h" diff --git a/src/operator/contrib/nnvm_to_onnx-inl.h b/src/operator/contrib/nnvm_to_onnx-inl.h index 58f88b051433..3a7d1c4288f4 100644 --- a/src/operator/contrib/nnvm_to_onnx-inl.h +++ b/src/operator/contrib/nnvm_to_onnx-inl.h @@ -38,7 +38,7 @@ #include #include -#include +#include #include #include diff --git a/src/operator/contrib/nnvm_to_onnx.cc b/src/operator/contrib/nnvm_to_onnx.cc index 902466614c7c..f04d8ee9482c 100644 --- a/src/operator/contrib/nnvm_to_onnx.cc +++ b/src/operator/contrib/nnvm_to_onnx.cc @@ -60,15 +60,21 @@ namespace nnvm_to_onnx { op::TRTParam ConvertNnvmGraphToOnnx( const nnvm::Graph& g, std::unordered_map* const shared_buffer) { - op::TRTParam trt_param; - op::tensorrt::NameToIdx_t trt_input_map; - op::tensorrt::InferenceMap_t trt_output_map; + static std::atomic_ulong subgraph_count = { 0 }; + op::TRTParam trt_param; + op::tensorrt::NameToIdx_t trt_input_map; + op::tensorrt::InferenceMap_t trt_output_map; const nnvm::IndexedGraph& ig = g.indexed_graph(); + const auto& storage_types = g.GetAttr("storage_type"); const auto& dtypes = g.GetAttr("dtype"); const auto& shape_inputs = g.GetAttr("shape_inputs"); + // TODO(kellens): At the moment this check always passes no matter the weight dtypes used in your + // graph. We should first iterate over datatypes by name and ensure they're valid types + // (fp16 or fp32) and that they're uniform. Then ensure later conversions set tensor types + // correctly in ONNX. for (auto& e : storage_types) { if (e != mshadow::kFloat32) { LOG(FATAL) << "ONNX converter does not support types other than float32 " @@ -78,8 +84,13 @@ op::TRTParam ConvertNnvmGraphToOnnx( ModelProto model_proto; // Need to determine IR versions and features to support - model_proto.set_ir_version(static_cast(2)); + auto opset_proto = model_proto.add_opset_import(); + opset_proto->set_version(static_cast(8)); + model_proto.set_ir_version(static_cast(3)); + GraphProto* graph_proto = model_proto.mutable_graph(); + auto subgraph_name_id = subgraph_count.fetch_add(1); + graph_proto->set_name("MXNetTRTSubgraph" + std::to_string(subgraph_name_id)); std::unordered_map placeholder_shapes = GetPlaceholderShapes(shape_inputs, ig); @@ -174,6 +185,20 @@ void ConvertConvolution(NodeProto* node_proto, const NodeAttrs& attrs, // const bool no_bias = conv_param.no_bias; const dmlc::optional layout = conv_param.layout; + // dilations + AttributeProto* const dilations = node_proto->add_attribute(); + dilations->set_name("dilations"); + dilations->set_type(AttributeProto::INTS); + for (const dim_t kval : dilate) { + dilations->add_ints(static_cast(kval)); + } + + // group + AttributeProto* const group = node_proto->add_attribute(); + group->set_name("group"); + group->set_type(AttributeProto::INT); + group->set_i(static_cast(num_group)); + // kernel shape AttributeProto* const kernel_shape = node_proto->add_attribute(); kernel_shape->set_name("kernel_shape"); @@ -193,14 +218,6 @@ void ConvertConvolution(NodeProto* node_proto, const NodeAttrs& attrs, pads->add_ints(static_cast(kval)); } - // dilations - AttributeProto* const dilations = node_proto->add_attribute(); - dilations->set_name("dilations"); - dilations->set_type(AttributeProto::INTS); - for (const dim_t kval : dilate) { - dilations->add_ints(static_cast(kval)); - } - // strides AttributeProto* const strides = node_proto->add_attribute(); strides->set_name("strides"); @@ -208,12 +225,6 @@ void ConvertConvolution(NodeProto* node_proto, const NodeAttrs& attrs, for (const dim_t kval : stride) { strides->add_ints(static_cast(kval)); } - - // group - AttributeProto* const group = node_proto->add_attribute(); - group->set_name("group"); - group->set_type(AttributeProto::INT); - group->set_i(static_cast(num_group)); } // end ConvertConvolution void ConvertPooling(NodeProto* node_proto, const NodeAttrs& attrs, @@ -248,8 +259,12 @@ void ConvertPooling(NodeProto* node_proto, const NodeAttrs& attrs, AttributeProto* const pads = node_proto->add_attribute(); pads->set_name("pads"); pads->set_type(AttributeProto::INTS); - for (int kval : pad) { - pads->add_ints(static_cast(kval)); + + // Convert from MXNet symetric pads to ONNX non-symetric by running through padding twice. + for (int i =0; i < 2; i++) { + for (dim_t kval : pad) { + pads->add_ints(static_cast(kval)); + } } // strides @@ -313,11 +328,6 @@ void ConvertFullyConnected(NodeProto* node_proto, const NodeAttrs& attrs, beta->set_type(AttributeProto::FLOAT); beta->set_f(1.0f); - AttributeProto* const broadcast = node_proto->add_attribute(); - broadcast->set_name("broadcast"); - broadcast->set_type(AttributeProto::INT); - broadcast->set_i(1); - AttributeProto* const transA = node_proto->add_attribute(); transA->set_name("transA"); transA->set_type(AttributeProto::INT); @@ -369,11 +379,6 @@ void ConvertBatchNorm(NodeProto* node_proto, const NodeAttrs& attrs, epsilon->set_type(AttributeProto::FLOAT); epsilon->set_f(static_cast(param.eps)); - AttributeProto* const is_test = node_proto->add_attribute(); - is_test->set_name("is_test"); - is_test->set_type(AttributeProto::INT); - is_test->set_i(1); - AttributeProto* const momentum = node_proto->add_attribute(); momentum->set_name("momentum"); momentum->set_type(AttributeProto::FLOAT); @@ -382,31 +387,16 @@ void ConvertBatchNorm(NodeProto* node_proto, const NodeAttrs& attrs, AttributeProto* const spatial = node_proto->add_attribute(); spatial->set_name("spatial"); spatial->set_type(AttributeProto::INT); - spatial->set_i(1); - - AttributeProto* const consumed = node_proto->add_attribute(); - consumed->set_name("consumed_inputs"); - consumed->set_type(AttributeProto::INTS); - - for (int i = 0; i < 5; i++) { - int val = (i < 3) ? 0 : 1; - consumed->add_ints(static_cast(val)); - } + // MXNet computes mean and variance per feature for batchnorm. Enabling spatial mode + // (default in ONNX3) implies running batchnorm on all spatial features so we need to explicitly + // disable this for MXNet's BatchNorm. + spatial->set_i(0); } void ConvertElementwiseAdd(NodeProto* node_proto, const NodeAttrs& /*attrs*/, const nnvm::IndexedGraph& /*ig*/, const array_view& /*inputs*/) { node_proto->set_op_type("Add"); - AttributeProto* const axis = node_proto->add_attribute(); - axis->set_name("axis"); - axis->set_type(AttributeProto::INT); - axis->set_i(1); - - AttributeProto* const broadcast = node_proto->add_attribute(); - broadcast->set_name("broadcast"); - broadcast->set_type(AttributeProto::INT); - broadcast->set_i(0); // 1 } std::unordered_map GetPlaceholderShapes( @@ -459,32 +449,40 @@ void ConvertPlaceholder( void ConvertConstant( GraphProto* const graph_proto, const std::string& node_name, std::unordered_map* const shared_buffer) { - NodeProto* const node_proto = graph_proto->add_node(); - node_proto->set_name(node_name); - node_proto->add_output(node_name); - node_proto->set_op_type("Constant"); + TensorProto* const initializer_proto = graph_proto->add_initializer(); + + // Create initializer for constants + initializer_proto->set_name(node_name); + // TODO(kellens): convert to fp16 if needed. + initializer_proto->set_data_type(TensorProto_DataType_FLOAT); const NDArray nd = shared_buffer->find(node_name)->second; const TBlob& blob = nd.data(); const TShape shape = blob.shape_; - const int32_t size = shape.Size(); + for (auto& dim : shape) { + initializer_proto->add_dims(static_cast(dim)); + } + + auto size = shape.Size(); + // TODO(kellens): Note hard coded float32 size assumed. std::shared_ptr shared_data_ptr(new float[size]); float* const data_ptr = shared_data_ptr.get(); nd.SyncCopyToCPU(static_cast(data_ptr), size); - AttributeProto* const tensor_attr = node_proto->add_attribute(); - tensor_attr->set_name("value"); - tensor_attr->set_type(AttributeProto::TENSOR); - - TensorProto* const tensor_proto = tensor_attr->mutable_t(); - tensor_proto->set_data_type(TensorProto_DataType_FLOAT); - for (auto& dim : shape) { - tensor_proto->add_dims(static_cast(dim)); + for (int blob_idx = 0; blob_idx < size; ++blob_idx) { + initializer_proto->add_float_data(data_ptr[blob_idx]); } - for (int blob_idx = 0; blob_idx < size; ++blob_idx) { - tensor_proto->add_float_data(data_ptr[blob_idx]); + // Create inputs for constants. + ValueInfoProto* const input_proto = graph_proto->add_input(); + input_proto->set_name(node_name); + + // TODO(kellens): (fp16 support) + input_proto->mutable_type()->mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT); + for (auto& dim : shape) { + auto new_dim = input_proto->mutable_type()->mutable_tensor_type()->mutable_shape()->add_dim(); + new_dim->set_dim_value(static_cast(dim)); } } diff --git a/src/operator/contrib/tensorrt-inl.h b/src/operator/contrib/tensorrt-inl.h index be335ab1208f..589e133479ff 100644 --- a/src/operator/contrib/tensorrt-inl.h +++ b/src/operator/contrib/tensorrt-inl.h @@ -38,7 +38,7 @@ #include #include -#include +#include #include #include