diff --git a/3rdparty/onnx-tensorrt b/3rdparty/onnx-tensorrt index f1c7aa63d88d..1e209e546061 160000 --- a/3rdparty/onnx-tensorrt +++ b/3rdparty/onnx-tensorrt @@ -1 +1 @@ -Subproject commit f1c7aa63d88d8d8ef70490f2ebb6b33f7450218b +Subproject commit 1e209e546061173ccc37b25bbca69a795c6c86e4 diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt b/ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt index f4844115c0fd..8ad90aedeb6f 100644 --- a/ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt +++ b/ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt @@ -39,3 +39,4 @@ COPY runtime_functions.sh /work/ WORKDIR /work/mxnet ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib +ENV CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH}:/usr/local/cuda-10.0/targets/x86_64-linux/include/ diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index d3c679455a57..f79f224029b2 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -2065,7 +2065,6 @@ MXNET_DLL int MXExecutorReshapeEx(int partial_shaping, */ MXNET_DLL int MXExecutorGetOptimizedSymbol(ExecutorHandle handle, SymbolHandle *out); - /*! * \brief set a call back to notify the completion of operation */ diff --git a/python/mxnet/contrib/tensorrt.py b/python/mxnet/contrib/tensorrt.py index 4ff39c4b4829..d94600e1cee3 100644 --- a/python/mxnet/contrib/tensorrt.py +++ b/python/mxnet/contrib/tensorrt.py @@ -16,95 +16,50 @@ # under the License. """ Module to enable the use of TensorRT optimized graphs.""" - -import ctypes -import logging import os -from .. import symbol as sym - -from ..base import _LIB, SymbolHandle, MXNetError -from ..base import check_call - - -def set_use_tensorrt(status): +def set_use_fp16(status): """ - Set an environment variable which will enable or disable the use of TensorRT in the backend. - Note: this is useful for A/B testing purposes. - :param status: Boolean, true if TensorRT optimization should be applied, False for legacy - behaviour. + Set an environment variable which will enable or disable the use of FP16 precision in + TensorRT + Note: The mode FP16 force the whole TRT node to be executed in FP16 + :param status: Boolean, True if TensorRT should run in FP16, False for FP32 """ - os.environ["MXNET_USE_TENSORRT"] = str(int(status)) - + os.environ["MXNET_TENSORRT_USE_FP16"] = str(int(status)) -def get_use_tensorrt(): +def get_use_fp16(): """ - Get an environment variable which describes if TensorRT is currently enabled in the backend. - Note: this is useful for A/B testing purposes. - :return: Boolean, true if TensorRT optimization should be applied, False for legacy - behaviour. + Get an environment variable which describes if TensorRT is currently running in FP16 + :return: Boolean, true if TensorRT is running in FP16, False for FP32 """ - return bool(int(os.environ.get("MXNET_USE_TENSORRT", 0)) == 1) + return bool(int(os.environ.get("MXNET_TENSORRT_USE_FP16", 1)) == 1) - -def get_optimized_symbol(executor): +def init_tensorrt_params(sym, arg_params, aux_params): """ - Take an executor's underlying symbol graph and return its generated optimized version. - - Parameters - ---------- - executor : - An executor for which you want to see an optimized symbol. Getting an optimized symbol - is useful to compare and verify the work TensorRT has done against a legacy behaviour. - - Returns - ------- - symbol : nnvm::Symbol - The nnvm symbol optimized. - """ - handle = SymbolHandle() - try: - check_call(_LIB.MXExecutorGetOptimizedSymbol(executor.handle, ctypes.byref(handle))) - result = sym.Symbol(handle=handle) - return result - except MXNetError: - logging.error('Error while trying to fetch TRT optimized symbol for graph. Please ensure ' - 'build was compiled with MXNET_USE_TENSORRT enabled.') - raise - - -def tensorrt_bind(symbol, ctx, all_params, type_dict=None, stype_dict=None, group2ctx=None, - **kwargs): - """Bind current symbol to get an optimized trt executor. - - Parameters - ---------- - symbol : Symbol - The symbol you wish to bind, and optimize with TensorRT. - - ctx : Context - The device context the generated executor to run on. - - all_params : Dict of str->ndarray - A dictionary of mappings from parameter names to parameter NDArrays. - - type_dict : Dict of str->numpy.dtype - Input type dictionary, name->dtype - - stype_dict : Dict of str->str - Input storage type dictionary, name->storage_type - - group2ctx : Dict of string to mx.Context - The dict mapping the `ctx_group` attribute to the context assignment. - - kwargs : Dict of str->shape - Input shape dictionary, name->shape - - Returns - ------- - executor : mxnet.Executor - An optimized TensorRT executor. + Set weights in attributes of TensorRT nodes + :param sym: Symbol, the symbol graph should contains some TensorRT nodes + :param arg_params: arg_params + :param aux_params: aux_params + :return arg_params, aux_params: remaining params that are not in TensorRT nodes """ - kwargs['shared_buffer'] = all_params - return symbol.simple_bind(ctx, type_dict=type_dict, stype_dict=stype_dict, - group2ctx=group2ctx, **kwargs) + for s in sym.get_internals(): + new_params_names = "" + tensorrt_params = {} + if 'subgraph_params_names' in s.list_attr(): + keys = s.list_attr()['subgraph_params_names'].split(';') + for k in keys: + if k in arg_params: + new_params_names += k + ";" + tensorrt_params['subgraph_param_' + k] = arg_params[k] + arg_params.pop(k) + elif k in aux_params: + new_params_names += k + ";" + tensorrt_params['subgraph_param_' + k] = aux_params[k] + aux_params.pop(k) + new_attrs = {} + for k, v in tensorrt_params.items(): + new_attrs[k] = str(v.handle.value) + if len(new_attrs) > 0: + s._set_attr(**new_attrs) + s._set_attr(subgraph_params_names=new_params_names[:-1]) + return arg_params, aux_params diff --git a/src/c_api/c_api_executor.cc b/src/c_api/c_api_executor.cc index 5352fcfe0951..ed3d21082fe8 100644 --- a/src/c_api/c_api_executor.cc +++ b/src/c_api/c_api_executor.cc @@ -29,9 +29,6 @@ #include "./c_api_common.h" #include "../executor/graph_executor.h" #include "../common/utils.h" -#if MXNET_USE_TENSORRT -#include "../executor/trt_graph_executor.h" -#endif // MXNET_USE_TENSORRT int MXExecutorPrint(ExecutorHandle handle, const char **out_str) { Executor *exec = static_cast(handle); @@ -808,38 +805,12 @@ int MXExecutorSimpleBindEx(SymbolHandle symbol_handle, std::vector in_arg_vec; std::vector arg_grad_vec; std::vector aux_state_vec; -#if MXNET_USE_TENSORRT - // If we've built with TensorRT support we by default return an TRTExecutor. - // Users can override this behaviour via env var, which is useful for example for A/B - // performance testing. - if (dmlc::GetEnv("MXNET_USE_TENSORRT", false)) { - *out = exec::TrtGraphExecutor::TensorRTBind(*sym, ctx, ctx_map, &in_arg_ctx_vec, - &arg_grad_ctx_vec, &aux_state_ctx_vec, - &arg_shape_map, &arg_dtype_map, &arg_stype_map, - &grad_req_type_vec, shared_arg_name_set, - &in_arg_vec, &arg_grad_vec, &aux_state_vec, - use_shared_buffer ? &shared_buffer_map : nullptr, - reinterpret_cast(shared_exec_handle)); - } else { - // Checks to see if this env var has been set to true or false by the user. - // If the user is using a TensorRT build, but has not enabled TRT at inference time, warn - // them and describe further steps. - const int unset_indicator = std::numeric_limits::quiet_NaN(); - if (dmlc::GetEnv("MXNET_USE_TENSORRT", unset_indicator) == unset_indicator) { - LOG(INFO) << "TensorRT not enabled by default. Please set the MXNET_USE_TENSORRT " - "environment variable to 1 or call mx.contrib.tensorrt.set_use_tensorrt(True) " - "to enable."; - } -#endif // MXNET_USE_TENSORRT - *out = Executor::SimpleBind(*sym, ctx, ctx_map, in_arg_ctx_vec, arg_grad_ctx_vec, - aux_state_ctx_vec, arg_shape_map, arg_dtype_map, arg_stype_map, - grad_req_type_vec, shared_arg_name_set, &in_arg_vec, - &arg_grad_vec, &aux_state_vec, - use_shared_buffer ? &shared_buffer_map : nullptr, - reinterpret_cast(shared_exec_handle)); -#if MXNET_USE_TENSORRT - } -#endif // MXNET_USE_TENSORRT + *out = Executor::SimpleBind(*sym, ctx, ctx_map, in_arg_ctx_vec, arg_grad_ctx_vec, + aux_state_ctx_vec, arg_shape_map, arg_dtype_map, arg_stype_map, + grad_req_type_vec, shared_arg_name_set, &in_arg_vec, + &arg_grad_vec, &aux_state_vec, + use_shared_buffer ? &shared_buffer_map : nullptr, + reinterpret_cast(shared_exec_handle)); // copy ndarray ptrs to ret->handles so that front end // can access them @@ -1091,14 +1062,9 @@ int MXExecutorGetOptimizedSymbol(ExecutorHandle handle, auto s = new nnvm::Symbol(); API_BEGIN(); -#if MXNET_USE_TENSORRT - auto exec = static_cast(handle); + auto exec = static_cast(handle); *s = exec->GetOptimizedSymbol(); *out = s; -#else - LOG(FATAL) << "GetOptimizedSymbol may only be used when MXNet is compiled with " - "MXNET_USE_TENSORRT enabled. Please re-compile MXNet with TensorRT support."; -#endif // MXNET_USE_TENSORRT API_END_HANDLE_ERROR(delete s); } diff --git a/src/common/serialization.h b/src/common/serialization.h deleted file mode 100644 index c22d8bc82270..000000000000 --- a/src/common/serialization.h +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2015 by Contributors - * \file serialization.h - * \brief Serialization of some STL and nnvm data-structures - * \author Clement Fuji Tsang - */ - -#ifndef MXNET_COMMON_SERIALIZATION_H_ -#define MXNET_COMMON_SERIALIZATION_H_ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace mxnet { -namespace common { - -template -inline size_t SerializedSize(const T &obj); - -template -inline size_t SerializedSize(const mxnet::Tuple &obj); - -template -inline size_t SerializedSize(const std::map &obj); - -template<> -inline size_t SerializedSize(const std::string &obj); - -template -inline size_t SerializedSize(const std::tuple &obj); - -template -inline void Serialize(const T &obj, char **buffer); - -template -inline void Serialize(const mxnet::Tuple &obj, char **buffer); - -template -inline void Serialize(const std::map &obj, char **buffer); - -template<> -inline void Serialize(const std::string &obj, char **buffer); - -template -inline void Serialize(const std::tuple &obj, char **buffer); - -template -inline void Deserialize(T *obj, const std::string &buffer, size_t *curr_pos); - -template -inline void Deserialize(mxnet::Tuple *obj, const std::string &buffer, size_t *curr_pos); - -template -inline void Deserialize(std::map *obj, const std::string &buffer, size_t *curr_pos); - -template<> -inline void Deserialize(std::string *obj, const std::string &buffer, size_t *curr_pos); - -template -inline void Deserialize(std::tuple *obj, const std::string &buffer, size_t *curr_pos); - - -template -struct is_container { - static const bool value = !std::is_pod::value; -}; - -template -inline size_t SerializedSize(const T &obj) { - return sizeof(T); -} - -template -inline size_t SerializedSize(const mxnet::Tuple &obj) { - if (is_container::value) { - size_t sum_val = 4; - for (const auto& el : obj) { - sum_val += SerializedSize(el); - } - return sum_val; - } else { - return 4 + (obj.ndim() * sizeof(T)); - } -} - -template -inline size_t SerializedSize(const std::map &obj) { - size_t sum_val = 4; - if (is_container::value && is_container::value) { - for (const auto& p : obj) { - sum_val += SerializedSize(p.first) + SerializedSize(p.second); - } - } else if (is_container::value) { - for (const auto& p : obj) { - sum_val += SerializedSize(p.first); - } - sum_val += sizeof(V) * obj.size(); - } else if (is_container::value) { - for (const auto& p : obj) { - sum_val += SerializedSize(p.second); - } - sum_val += sizeof(K) * obj.size(); - } else { - sum_val += (sizeof(K) + sizeof(V)) * obj.size(); - } - return sum_val; -} - -template<> -inline size_t SerializedSize(const std::string &obj) { - return obj.size() + 4; -} - -template -struct serialized_size_tuple { - template - static inline size_t Compute(const std::tuple &obj) { - return SerializedSize(std::get(obj)) + serialized_size_tuple::Compute(obj); - } -}; - -template<> -struct serialized_size_tuple<0> { - template - static inline size_t Compute(const std::tuple &obj) { - return SerializedSize(std::get<0>(obj)); - } -}; - -template -inline size_t SerializedSize(const std::tuple &obj) { - return serialized_size_tuple::Compute(obj); -} - -// Serializer - -template -inline size_t SerializedContainerSize(const T &obj, char **buffer) { - uint32_t size = obj.size(); - std::memcpy(*buffer, &size, 4); - *buffer += 4; - return (size_t) size; -} - -template -inline void Serialize(const T &obj, char **buffer) { - std::memcpy(*buffer, &obj, sizeof(T)); - *buffer += sizeof(T); -} - -template -inline void Serialize(const mxnet::Tuple &obj, char **buffer) { - uint32_t size = obj.ndim(); - std::memcpy(*buffer, &size, 4); - *buffer += 4; - for (auto& el : obj) { - Serialize(el, buffer); - } -} - -template -inline void Serialize(const std::map &obj, char **buffer) { - SerializedContainerSize(obj, buffer); - for (auto& p : obj) { - Serialize(p.first, buffer); - Serialize(p.second, buffer); - } -} - -template<> -inline void Serialize(const std::string &obj, char **buffer) { - auto size = SerializedContainerSize(obj, buffer); - std::memcpy(*buffer, &obj[0], size); - *buffer += size; -} - -template -struct serialize_tuple { - template - static inline void Compute(const std::tuple &obj, char **buffer) { - serialize_tuple::Compute(obj, buffer); - Serialize(std::get(obj), buffer); - } -}; - -template<> -struct serialize_tuple<0> { - template - static inline void Compute(const std::tuple &obj, char **buffer) { - Serialize(std::get<0>(obj), buffer); - } -}; - -template -inline void Serialize(const std::tuple &obj, char **buffer) { - serialize_tuple::Compute(obj, buffer); -} - -// Deserializer - -template -inline size_t DeserializedContainerSize(T *obj, const std::string &buffer, size_t *curr_pos) { - uint32_t size = obj->size(); - std::memcpy(&size, &buffer[*curr_pos], 4); - *curr_pos += 4; - return (size_t) size; -} - -template -inline void Deserialize(T *obj, const std::string &buffer, size_t *curr_pos) { - std::memcpy(obj, &buffer[*curr_pos], sizeof(T)); - *curr_pos += sizeof(T); -} - -template -inline void Deserialize(mxnet::Tuple *obj, const std::string &buffer, size_t *curr_pos) { - uint32_t size = obj->ndim(); - std::memcpy(&size, &buffer[*curr_pos], 4); - *curr_pos += 4; - obj->SetDim(size); - for (size_t i = 0; i < size; ++i) { - Deserialize((*obj)[i], buffer, curr_pos); - } -} - -template -inline void Deserialize(std::map *obj, const std::string &buffer, size_t *curr_pos) { - auto size = DeserializedContainerSize(obj, buffer, curr_pos); - K first; - for (size_t i = 0; i < size; ++i) { - Deserialize(&first, buffer, curr_pos); - Deserialize(&(*obj)[first], buffer, curr_pos); - } -} - -template<> -inline void Deserialize(std::string *obj, const std::string &buffer, size_t *curr_pos) { - auto size = DeserializedContainerSize(obj, buffer, curr_pos); - obj->resize(size); - std::memcpy(&(obj->front()), &buffer[*curr_pos], size); - *curr_pos += size; -} - -template -struct deserialize_tuple { - template - static inline void Compute(std::tuple *obj, - const std::string &buffer, size_t *curr_pos) { - deserialize_tuple::Compute(obj, buffer, curr_pos); - Deserialize(&std::get(*obj), buffer, curr_pos); - } -}; - -template<> -struct deserialize_tuple<0> { - template - static inline void Compute(std::tuple *obj, - const std::string &buffer, size_t *curr_pos) { - Deserialize(&std::get<0>(*obj), buffer, curr_pos); - } -}; - -template -inline void Deserialize(std::tuple *obj, const std::string &buffer, size_t *curr_pos) { - deserialize_tuple::Compute(obj, buffer, curr_pos); -} - - -template -inline void Serialize(const T& obj, std::string* serialized_data) { - serialized_data->resize(SerializedSize(obj)); - char* curr_pos = &(serialized_data->front()); - Serialize(obj, &curr_pos); - CHECK_EQ((int64_t)curr_pos - (int64_t)&(serialized_data->front()), - serialized_data->size()); -} - -template -inline void Deserialize(T* obj, const std::string& serialized_data) { - size_t curr_pos = 0; - Deserialize(obj, serialized_data, &curr_pos); - CHECK_EQ(curr_pos, serialized_data.size()); -} - -} // namespace common -} // namespace mxnet -#endif // MXNET_COMMON_SERIALIZATION_H_ diff --git a/src/executor/exec_pass.h b/src/executor/exec_pass.h index 7e5130f4921c..f544d6ba3392 100644 --- a/src/executor/exec_pass.h +++ b/src/executor/exec_pass.h @@ -209,18 +209,6 @@ Graph InferStorageType(Graph&& graph, StorageTypeVector&& storage_type_inputs = StorageTypeVector(), const std::string& storage_type_attr_key = ""); -#if MXNET_USE_TENSORRT -/*! - * \brief Replace subgraphs by TRT (forward only) - */ -Graph ReplaceSubgraph(Graph&& g, - const std::unordered_set& set_subgraph, - std::unordered_map* const params_map); - -std::vector> GetTrtCompatibleSubsets(const Graph& g, - std::unordered_map* const params_map); -#endif - } // namespace exec } // namespace mxnet diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc index 4a4505581920..e726d29765ac 100644 --- a/src/executor/graph_executor.cc +++ b/src/executor/graph_executor.cc @@ -102,6 +102,16 @@ void GraphExecutor::Print(std::ostream &os) const { // NOLINT(*) os << "Total " << 11 << " TempSpace resource requested\n"; } +/*! + * \brief Return the "optimized" symbol contained in the executor graph. + */ +nnvm::Symbol GraphExecutor::GetOptimizedSymbol() { + Symbol ret; + ret.outputs = std::vector(graph_.outputs.begin(), + graph_.outputs.begin() + num_forward_outputs_); + return ret.Copy(); +} + void GraphExecutor::SetMonitorCallback(const MonitorCallback& callback, bool monitor_all) { CHECK(callback) << "invalid callback"; monitor_callback_ = callback; diff --git a/src/executor/graph_executor.h b/src/executor/graph_executor.h index b556a2bd0fe9..9a8660916357 100644 --- a/src/executor/graph_executor.h +++ b/src/executor/graph_executor.h @@ -68,6 +68,7 @@ class GraphExecutor : public Executor { const std::unordered_map& arg_grad_map() const override; const std::unordered_map& aux_state_map() const override; void Print(std::ostream &os) const override; // NOLINT(*) + nnvm::Symbol GetOptimizedSymbol(); void SetMonitorCallback(const MonitorCallback& callback, bool monitor_all = false) override; // Initialize the rest of attributes // after setting up arguments. diff --git a/src/executor/tensorrt_pass.cc b/src/executor/tensorrt_pass.cc deleted file mode 100644 index f847d59a1298..000000000000 --- a/src/executor/tensorrt_pass.cc +++ /dev/null @@ -1,596 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2018 by Contributors - * \file tensorrt_pass.cc - * \brief Replace TRT compatible subgraphs by TRT engines - * \author Clement Fuji Tsang - */ - -#if MXNET_USE_TENSORRT - -#include -#include -#include -#include -#include -#include - -#include "../operator/contrib/nnvm_to_onnx-inl.h" -#include "./exec_pass.h" -#include "./onnx_to_tensorrt.h" - -namespace mxnet { -namespace exec { - -using NodePtr = nnvm::NodePtr; - -/*! - * \brief Custom graph class, which will contain bi-directional nodes - * we need to compute DFS and reverse DFS for graph partitioning - */ -class BidirectionalGraph { - public: - struct Node { - nnvm::Node* nnvmptr; - std::vector inputs; - std::vector outputs; - }; - std::vector nodes; - std::unordered_map nnvm2nid; - std::vector outputs; - static const std::unordered_set unconditionalTRTop; - - explicit BidirectionalGraph(const Graph &g) { - auto& idx = g.indexed_graph(); - auto num_nodes = idx.num_nodes(); - nodes.reserve(num_nodes); - nnvm2nid.reserve(num_nodes); - outputs.reserve(idx.outputs().size()); - DFSVisit(g.outputs, [this](const nnvm::NodePtr& n) { - BidirectionalGraph::Node new_node; - new_node.nnvmptr = n.get(); - nnvm2nid[n.get()] = static_cast(nodes.size()); - nodes.emplace_back(std::move(new_node)); - }); - for (const auto& it : nnvm2nid) { - nnvm::Node* nnvmnode = it.first; - uint32_t nid = it.second; - for (auto& n : nnvmnode->inputs) { - uint32_t input_nid = nnvm2nid[n.node.get()]; - nodes[input_nid].outputs.emplace_back(&nodes[nid]); - nodes[nid].inputs.emplace_back(&nodes[input_nid]); - } - } - for (auto& e : g.outputs) { - uint32_t nid = nnvm2nid[e.node.get()]; - outputs.emplace_back(&nodes[nid]); - } - } - - template - void DFS(const std::vector& heads, bool reverse, FVisit fvisit) { - std::unordered_set visited; - std::vector vec(heads.begin(), heads.end()); - visited.reserve(heads.size()); - while (!vec.empty()) { - Node* vertex = vec.back(); - vec.pop_back(); - if (visited.count(vertex) == 0) { - visited.insert(vertex); - fvisit(vertex); - std::vector nexts = reverse ? vertex->inputs : vertex->outputs; - for (Node* node : nexts) { - if (visited.count(node) == 0) { - vec.emplace_back(node); - } - } - } - } - } - - using t_pairset = std::pair, std::unordered_set>; - using t_pairvec = std::pair, std::vector>; - using t_uncomp_map = std::unordered_map>; - - std::unordered_set naive_grow_subgraph(Node* head, - std::unordered_set* set_unused, - t_uncomp_map* uncomp_map) { - std::unordered_set subgraph; - std::unordered_set uncomp_set; - std::deque stack; - stack.emplace_back(head); - while (!stack.empty()) { - Node* vertex = stack.back(); - stack.pop_back(); - if (set_unused->count(vertex) && !uncomp_set.count(vertex)) { - set_unused->erase(vertex); - subgraph.insert(vertex); - uncomp_set.insert((*uncomp_map)[vertex].begin(), (*uncomp_map)[vertex].end()); - for (Node* input : vertex->inputs) { - if (set_unused->count(input) && !uncomp_set.count(input)) { - stack.emplace_back(input); - } - } - for (Node* output : vertex->outputs) { - if (set_unused->count(output) && !uncomp_set.count(output)) { - stack.emplace_back(output); - } - } - } - } - return subgraph; - } - - std::vector> get_subsets( - std::unordered_map* const params_map) { - std::vector> subgraphs; - std::unordered_set set_nonTRTnodes; - std::unordered_set set_allnodes(nodes.size()); - std::vector separation_sets; - for (Node& node : nodes) { - if (!IsTRTCompatible(node.nnvmptr)) { - set_nonTRTnodes.insert(&node); - std::unordered_set in_graph; - std::unordered_set out_graph; - std::vector dummy_head; - dummy_head.emplace_back(&node); - DFS(dummy_head, false, [&out_graph](Node* node) { - out_graph.insert(node); - }); - DFS(dummy_head, true, [&in_graph](Node* node) { - in_graph.insert(node); - }); - separation_sets.emplace_back(std::make_pair(in_graph, out_graph)); - } - set_allnodes.emplace(&node); - } - t_uncomp_map uncomp_map; - std::unordered_set set_TRTnodes; - set_TRTnodes.insert(set_allnodes.begin(), set_allnodes.end()); - for (Node* n : set_nonTRTnodes) { - set_TRTnodes.erase(n); - } - for (Node* n : set_TRTnodes) { - for (t_pairset p : separation_sets) { - if (p.first.count(n)) { - uncomp_map[n].insert(p.second.begin(), p.second.end()); - } else if (p.second.count(n)) { - uncomp_map[n].insert(p.first.begin(), p.first.end()); - } - } - for (Node* nonTRTn : set_nonTRTnodes) { - uncomp_map[n].erase(nonTRTn); - } - } - std::unordered_set set_unused; - set_unused.reserve(set_TRTnodes.size()); - - for (auto& n : set_TRTnodes) { - if (n->nnvmptr->attrs.op != nullptr || params_map->count(n->nnvmptr->attrs.name)) { - set_unused.insert(n); - } - } - std::unordered_set visited; - std::deque stack(outputs.begin(), outputs.end()); - while (!stack.empty()) { - Node* vertex = stack.front(); - stack.pop_front(); - if (!visited.count(vertex)) { - visited.insert(vertex); - if (set_unused.count(vertex)) { - subgraphs.emplace_back(naive_grow_subgraph(vertex, &set_unused, &uncomp_map)); - } - for (Node* input : vertex->inputs) { - stack.emplace_back(input); - } - } - } - - return subgraphs; - } - - - private: - friend class Graph; - - bool IsTRTCompatible(nnvm::Node* nodeptr) { - if (nodeptr->op() == nullptr) { - return true; - } - - const std::string op_name = nodeptr->op()->name; - if (op_name == "Pooling") { - return (nodeptr->attrs.dict.at("pool_type") == "avg" || - nodeptr->attrs.dict.at("pool_type") == "max"); - } - - if (unconditionalTRTop.count(op_name)) { - return true; - } - - if (op_name == "Activation") { - return nodeptr->attrs.dict.at("act_type") == "relu" || - nodeptr->attrs.dict.at("act_type") == "tanh" || - nodeptr->attrs.dict.at("act_type") == "sigmoid"; - } - - return false; - } -}; // class BidirectionalGraph - -/*! - * \brief function which transform std::vector back to Attrs (dmlc::any) - */ -const std::unordered_set BidirectionalGraph::unconditionalTRTop = { - "Convolution", - "BatchNorm", - "elemwise_add", - "elemwise_sub", - "elemwise_mul", - "rsqrt", - "pad", - "Pad", - "mean", - "FullyConnected", - "Flatten", - "SoftmaxOutput", -}; - - -using NodeEntrySet = std::unordered_set; - -/*! - * \brief get the output nodes of the subgraph in the main graph - * \return a vector of the output nodes -*/ -std::vector GetSubgraphNodeEntries(Graph g, - std::unordered_set set_subgraph) { - std::vector outputs; - NodeEntrySet _outputs; - for (auto& e : g.outputs) { - if (set_subgraph.count(e.node.get())) { - _outputs.insert(e); - } - } - DFSVisit(g.outputs, [&set_subgraph, &_outputs](const nnvm::NodePtr &node){ - if (!set_subgraph.count(node.get())) { - for (auto& e : node->inputs) { - if (set_subgraph.count(e.node.get())) { - _outputs.insert(e); - } - } - } - }); - outputs.insert(outputs.begin(), _outputs.begin(), _outputs.end()); - return outputs; -} - - -/*! - * \brief get the nodes outside of the subgraph for which outputs are used in the subgraph - * \return a vector the nodes -*/ -std::vector GetSubgraphInterfaceNodes(Graph g, - std::unordered_set set_subgraph) { - std::vector inputs; - NodeEntrySet _inputs; - DFSVisit(g.outputs, [&set_subgraph, &_inputs](const nnvm::NodePtr &node){ - if (set_subgraph.count(node.get())) { - for (auto& e : node->inputs) { - if (!set_subgraph.count(e.node.get())) { - _inputs.insert(e); - } - } - } - }); - inputs.insert(inputs.begin(), _inputs.begin(), _inputs.end()); - return inputs; -} - -std::unordered_map GetGraphInputsMap(const Graph& g) { - std::unordered_map outputs; - auto& idx = g.indexed_graph(); - outputs.reserve(idx.num_nodes()); - std::vector input_nodes = idx.input_nodes(); - for (size_t i = 0; i < input_nodes.size(); ++i) { - outputs[input_nodes[i]] = static_cast(i); - } - return outputs; -} - -/*! - * \brief Dummy function which creates a fake TensorRT Node - */ -nnvm::NodePtr ConvertNnvmGraphToOnnx(const nnvm::Graph &g, - std::unordered_map* const params_map) { - auto p = nnvm::Node::Create(); - p->attrs.op = nnvm::Op::Get("_trt_op"); - op::ONNXParam onnx_param = op::nnvm_to_onnx::ConvertNnvmGraphToOnnx(g, params_map); - p->attrs.dict["serialized_output_map"] = onnx_param.serialized_output_map; - p->attrs.dict["serialized_input_map"] = onnx_param.serialized_input_map; - p->attrs.dict["serialized_onnx_graph"] = onnx_param.serialized_onnx_graph; - if (p->op()->attr_parser != nullptr) { - p->op()->attr_parser(&(p->attrs)); - } - return p; -} - -/*! - * \brief Update attributes of the graph (such as some inputs properties) - */ -Graph UpdateSubgraphAttrs(Graph&& subgraph, const Graph& g, - const std::unordered_map& old2new, - const nnvm::NodeEntryMap& main_input_entry_to_sub) { - const auto& idx = g.indexed_graph(); - const auto& sub_idx = subgraph.indexed_graph(); - - const auto& shape = g.GetAttr("shape"); - const auto& dtype = g.GetAttr("dtype"); - const auto& storage_type = g.GetAttr("storage_type"); - const auto& shape_inputs = g.GetAttr("shape_inputs"); - const auto& dtype_inputs = g.GetAttr("dtype_inputs"); - const auto& storage_type_inputs = g.GetAttr("storage_type_inputs"); - - mxnet::ShapeVector sub_shape(sub_idx.num_node_entries()); - nnvm::DTypeVector sub_dtype(sub_idx.num_node_entries()); - StorageTypeVector sub_storage_type(sub_idx.num_node_entries()); - mxnet::ShapeVector sub_shape_inputs(sub_idx.input_nodes().size()); - nnvm::DTypeVector sub_dtype_inputs(sub_idx.input_nodes().size()); - StorageTypeVector sub_storage_type_inputs(sub_idx.input_nodes().size()); - - const std::unordered_map inputsindex2pos = GetGraphInputsMap(g); - const std::unordered_map sub_inputsindex2pos = GetGraphInputsMap(subgraph); - // map attributes from graph to subgraph - for (auto& p : old2new) { - const uint32_t nid = idx.node_id(p.first); - const uint32_t sub_nid = sub_idx.node_id(p.second.get()); - const nnvm::Op* op = sub_idx[sub_nid].source->op(); - if (op == nullptr) { // if it's an input node, there is only one output node entry - const uint32_t sub_i = sub_idx.entry_id(sub_nid, 0); - const uint32_t sub_input_i = sub_inputsindex2pos.at(sub_nid); - const uint32_t i = idx.entry_id(nid, 0); - - sub_shape[sub_i] = shape[i]; - sub_dtype[sub_i] = dtype[i]; - sub_storage_type[sub_i] = storage_type[i]; - sub_shape_inputs[sub_input_i] = shape_inputs[inputsindex2pos.at(nid)]; - sub_dtype_inputs[sub_input_i] = dtype_inputs[inputsindex2pos.at(nid)]; - sub_storage_type_inputs[sub_input_i] = storage_type_inputs[inputsindex2pos.at(nid)]; - - } else { - for (size_t oi = 0; oi < op->num_outputs; ++oi) { - const uint32_t sub_i = sub_idx.entry_id(sub_nid, oi); - const uint32_t i = idx.entry_id(nid, oi); - sub_shape[sub_i] = shape[i]; - sub_dtype[sub_i] = dtype[i]; - sub_storage_type[sub_i] = storage_type[i]; - } - } - } - // old2new doesn't contain placeholder / interfaces - for (auto& p : main_input_entry_to_sub) { - nnvm::NodeEntry main_entry = p.first; - nnvm::NodeEntry sub_entry = p.second; - const uint32_t sub_nid = sub_idx.node_id(sub_entry.node.get()); - const uint32_t sub_i = sub_idx.entry_id(sub_entry); - const uint32_t i = idx.entry_id(main_entry); - const uint32_t sub_input_i = sub_inputsindex2pos.at(sub_nid); - sub_shape[sub_i] = shape[i]; - sub_dtype[sub_i] = dtype[i]; - sub_storage_type[sub_i] = storage_type[i]; - sub_shape_inputs[sub_input_i] = sub_shape[sub_i]; - sub_dtype_inputs[sub_input_i] = sub_dtype[sub_i]; - sub_storage_type_inputs[sub_input_i] = sub_storage_type[sub_i]; - } - subgraph.attrs["shape"] = - std::make_shared(std::move(sub_shape)); - subgraph.attrs["dtype"] = - std::make_shared(std::move(sub_dtype)); - subgraph.attrs["storage_type"] = - std::make_shared(std::move(sub_storage_type)); - subgraph.attrs["shape_inputs"] = - std::make_shared(std::move(sub_shape_inputs)); - subgraph.attrs["dtype_inputs"] = - std::make_shared(std::move(sub_dtype_inputs)); - subgraph.attrs["storage_type_inputs"] = - std::make_shared(std::move(sub_storage_type_inputs)); - - return subgraph; -} - -/*! - * \brief Generate a name for a new TRT node, avoid collision if some TRT_nodes are already defined - */ -const std::string GetNewTrtName(const Graph& g, const Graph& subgraph) { - const std::string name_prefix("TRT_node"); - std::unordered_set name_set; - DFSVisit(g.outputs, [&name_set, &name_prefix](const nnvm::NodePtr& node) { - if (node->attrs.name.compare(0, name_prefix.size(), name_prefix) == 0) { - name_set.insert(node->attrs.name); - } - }); - // name inside the subgraph will be avaible as they will be removed - DFSVisit(subgraph.outputs, [&name_set, &name_prefix](const nnvm::NodePtr& node) { - if (node->attrs.name.compare(0, name_prefix.size(), name_prefix) == 0) { - name_set.erase(node->attrs.name); - } - }); - uint32_t name_suffix = 0; - std::string full_name = name_prefix + std::to_string(name_suffix); - while (name_set.count(full_name)) { - full_name = name_prefix + std::to_string(++name_suffix); - } - return full_name; -} - -/*! - * \brief helper function to display what nodes are in a specific subset - */ -void dispNodesSet(Graph g, std::unordered_set s) { - DFSVisit(g.outputs, [&s](const nnvm::NodePtr n){ - if (s.count(n.get())) { - std::cout << " Y " << n->attrs.name << std::endl; - } else { - std::cout << " N " << n->attrs.name << std::endl; - } - }); -} - -/*! - * \brief Replace a set of nodes by a TensorRT node - */ -Graph ReplaceSubgraph(Graph&& g, - const std::unordered_set& set_subgraph, - std::unordered_map* const params_map) { - // Create MXNet subgraph - Graph subgraph; - - const auto sub_outputs_in_main = GetSubgraphNodeEntries(g, set_subgraph); - subgraph.outputs = sub_outputs_in_main; - // old2new will link raw pointer of the nodes in the graph to - // the corresponding shared_ptr of the nodes in the generated subgraph - std::unordered_map old2new; - std::deque stack; - std::unordered_set visited; - int32_t reservation = set_subgraph.size(); - old2new.reserve(reservation); - visited.reserve(reservation); - - // Create the shared_ptr using the same raw pointer don't really matter - for (auto& n : set_subgraph) { - old2new[n] = std::make_shared(*n); - } - - // To generate a subgraph an input have to be replace by data node (no op) - // and it have to be agnostic to the node from which it's an output - // (For exemple even if two inputs are two different outputs from the same node) - nnvm::NodeEntryMap main_input_entry_to_sub; - for (auto& e : GetSubgraphInterfaceNodes(g, set_subgraph)) { - auto node = nnvm::Node::Create(); - node->attrs.name = e.node->attrs.name + "_" + std::to_string(e.index); - auto new_e = nnvm::NodeEntry{node, 0, 0}; - main_input_entry_to_sub[e] = new_e; - } - - for (nnvm::NodeEntry& e : subgraph.outputs) { - e.node = old2new[e.node.get()]; - stack.emplace_back(e.node.get()); - } - // link all nodes in the subgraph to nodes in the subgraph instead of main graph - while (!stack.empty()) { - auto vertex = stack.front(); - stack.pop_front(); - if (!visited.count(vertex)) { - visited.insert(vertex); - for (auto& e : vertex->inputs) { - auto it = main_input_entry_to_sub.find(e); - if (it != main_input_entry_to_sub.end()) { - e = it->second; - } else { - e.node = old2new[e.node.get()]; - } - stack.emplace_back(e.node.get()); - } - } - } - // Remove the control dependencies of the subgraph to nodes that are not in the subgraph - DFSVisit(subgraph.outputs, [&set_subgraph, &old2new](const nnvm::NodePtr& node) { - std::remove_if(node->control_deps.begin(), - node->control_deps.end(), - [&set_subgraph](nnvm::NodePtr n_ptr) { - return !set_subgraph.count(n_ptr.get()); - }); - for (nnvm::NodePtr& n_ptr : node->control_deps) { - n_ptr = old2new[n_ptr.get()]; - } - }); - - subgraph = UpdateSubgraphAttrs(std::move(subgraph), g, old2new, main_input_entry_to_sub); - auto& sub_idx = subgraph.indexed_graph(); - - auto trtnodeptr = ConvertNnvmGraphToOnnx(subgraph, params_map); - trtnodeptr->attrs.name = GetNewTrtName(g, subgraph); - - // Insert new trt node and unplug replaced nodes - std::unordered_map sub_input_entryid_to_main; - for (auto& p : main_input_entry_to_sub) { - sub_input_entryid_to_main[sub_idx.entry_id(p.second)] = p.first; - } - - // Plug the nodes from the main graph as inputs of the trt node - trtnodeptr->inputs.resize(main_input_entry_to_sub.size()); - { - uint32_t counter = 0; - for (uint32_t i : sub_idx.input_nodes()) { - auto it = sub_input_entryid_to_main.find(sub_idx.entry_id(i, 0)); - if (it != sub_input_entryid_to_main.end()) { - trtnodeptr->inputs[counter++] = it->second; - } - } - } - nnvm::NodeEntryMap sub_outputs_in_main_to_pos; - for (uint32_t i = 0; i < sub_outputs_in_main.size(); ++i) { - sub_outputs_in_main_to_pos[sub_outputs_in_main[i]] = i; - } - // Plug the trt node as inputs to the main graph nodes - DFSVisit(g.outputs, [&sub_outputs_in_main_to_pos, &trtnodeptr](const nnvm::NodePtr& n) { - for (auto& e : n->inputs) { - auto it = sub_outputs_in_main_to_pos.find(e); - if (it != sub_outputs_in_main_to_pos.end()) { - e.index = it->second; - e.node = trtnodeptr; - } - } - }); - - for (auto& output : g.outputs) { - auto it = sub_outputs_in_main_to_pos.find(output); - if (it != sub_outputs_in_main_to_pos.end()) { - output.index = it->second; - output.node = trtnodeptr; - } - } - - Graph new_graph; - new_graph.outputs = g.outputs; - return new_graph; -} - -std::vector> GetTrtCompatibleSubsets(const Graph& g, - std::unordered_map* const params_map) { - BidirectionalGraph biG = BidirectionalGraph(g); - std::vector> subsets = biG.get_subsets(params_map); - std::vector> nnvm_subsets(subsets.size(), - std::unordered_set()); - for (size_t i = 0; i < subsets.size(); ++i) { - nnvm_subsets[i].reserve(subsets[i].size()); - for (auto& n : subsets[i]) { - nnvm_subsets[i].insert(n->nnvmptr); - } - } - return nnvm_subsets; -} - -} // namespace exec -} // namespace mxnet - -#endif // MXNET_USE_TENSORRT diff --git a/src/executor/trt_graph_executor.cc b/src/executor/trt_graph_executor.cc deleted file mode 100644 index c923922d5184..000000000000 --- a/src/executor/trt_graph_executor.cc +++ /dev/null @@ -1,443 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#if MXNET_USE_TENSORRT - -#include "trt_graph_executor.h" - -#include -#include -#include "./onnx_to_tensorrt.h" -#include "../operator/contrib/tensorrt-inl.h" -#include "../common/utils.h" -#include "../common/exec_utils.h" - - -namespace mxnet { -namespace exec { - -using namespace mxnet::common; - - /*! - * \brief TrtGraphExecutor initializer for simple bind flow in - * which only certain input shapes and dtypes are provided by users. - * The initializer uses these shapes and dtypes to perform - * shape and dtype inferences, and then create NDArrays - * to populate data entries of the graph. The created NDArrays - * for in_args, arg_grads and aux_states are passed to the - * front end to attach the created executor. - * In front end, if the simple_bind flow is trigger by - * _bind_ith_exec, the shared data arrays of DataParallelExecutorGroup - * and shared executor will be taken into account in creating - * NDArrays for in_args, arg_grads, and aux_states for reusing - * already allocated memory. - * - * This version of an executor exports the computation graph to TensorRT make use of fused - * kernels and other runtime enhancements. TRT will compile the sub-graphs to executable fused - * operators without intervention from the user. Operators in the original graph that are not - * supported by TRT will continue to be executed normally by MXNet. - * - */ -void TrtGraphExecutor::Init(nnvm::Symbol symbol, - const Context& default_ctx, - const std::map& ctx_map, - std::vector *in_arg_ctxes, - std::vector *arg_grad_ctxes, - std::vector *aux_state_ctxes, - std::unordered_map *arg_shape_map, - std::unordered_map *arg_dtype_map, - std::unordered_map *arg_stype_map, - std::vector *grad_req_types, - const std::unordered_set& shared_arg_names, - std::vector* in_arg_vec, - std::vector* arg_grad_vec, - std::vector* aux_state_vec, - std::unordered_map* shared_buffer, - Executor* shared_exec, - const nnvm::NodeEntryMap& feed_dict) { - symbol = symbol.Copy(); - nnvm::Graph g = InitGraph(symbol, default_ctx, ctx_map, *in_arg_ctxes, *arg_grad_ctxes, - *aux_state_ctxes, *grad_req_types); - - if (need_grad_) { - LOG(FATAL) << "You may be attempting to use TensorRT for training. TensorRT is an inference " - "only library. To re-enable legacy MXNet graph execution, which will support " - "training, set the MXNET_USE_TENSORRT environment variable to 0, or call " - "mx.contrib.tensorrt.set_use_tensorrt(False)"; - } - - if (shared_buffer == nullptr || shared_buffer->empty()) { - LOG(FATAL) << "MXNET_USE_TENSORRT = 1 but shared_buffer is empty. " - << "Please provide weights and other parameters, such as " - << "BatchNorm moments, via the shared_buffer, during simple bind call."; - } - - // The following code of shape and dtype inferences and argument - // initialization is for simple_bind only. Regular bind operation - // should do this differently. - - // Initialize arg_shapes and arg_dtypes for shape and type inferences. - // It contains all in_args and aux_states' shapes and types in a certain order. - const nnvm::IndexedGraph& idx = g.indexed_graph(); - mxnet::ShapeVector arg_shapes(idx.input_nodes().size(), mxnet::TShape()); - nnvm::DTypeVector arg_dtypes(idx.input_nodes().size(), -1); - StorageTypeVector arg_stypes(idx.input_nodes().size(), kUndefinedStorage); - for (size_t i = 0; i < num_forward_inputs_; ++i) { - const uint32_t nid = idx.input_nodes().at(i); - const std::string& name = idx[nid].source->attrs.name; - auto it1 = arg_shape_map->find(name); - if (arg_shape_map->end() != it1) { - arg_shapes[i] = it1->second; - } - auto it2 = arg_dtype_map->find(name); - if (arg_dtype_map->end() != it2) { - arg_dtypes[i] = it2->second; - } - auto it3 = arg_stype_map->find(name); - if (arg_stype_map->end() != it3) { - arg_stypes[i] = it3->second; - } - } - g = InferShape(std::move(g), std::move(arg_shapes), "__shape__"); - if (g.GetAttr("shape_num_unknown_nodes") != 0U) { - HandleInferShapeError(num_forward_inputs_, g.indexed_graph(), - g.GetAttr("shape")); - } - - g = InferType(std::move(g), std::move(arg_dtypes), "__dtype__"); - if (g.GetAttr("dtype_num_unknown_nodes") != 0U) { - HandleInferTypeError(num_forward_inputs_, g.indexed_graph(), - g.GetAttr("dtype")); - } - - g = InferStorageType(std::move(g), std::move(arg_stypes), "__storage_type__"); - if (g.GetAttr("storage_type_num_unknown_nodes") != 0U) { - HandleInferStorageTypeError(num_forward_inputs_, g.indexed_graph(), - g.GetAttr("storage_type")); - } - - auto trt_groups = GetTrtCompatibleSubsets(g, shared_buffer); - for (const auto &trt_group : trt_groups) { - if (trt_group.size() > 1) { - g = ReplaceSubgraph(std::move(g), trt_group, shared_buffer); - g = ReinitGraph(std::move(g), default_ctx, ctx_map, in_arg_ctxes, arg_grad_ctxes, - aux_state_ctxes, grad_req_types, arg_shape_map, arg_dtype_map, - arg_stype_map, shared_buffer); - } - } - - InitArguments(g.indexed_graph(), g.GetAttr("shape"), - g.GetAttr("dtype"), - g.GetAttr("storage_type"), - *in_arg_ctxes, *arg_grad_ctxes, *aux_state_ctxes, - *grad_req_types, shared_arg_names, shared_exec, - shared_buffer, in_arg_vec, arg_grad_vec, aux_state_vec); - - // The above code of shape and dtype inferences and argument - // initialization is for simple_bind only. Regular bind operation - // should do this differently. - - // Initialize the rest attributes of the graph. - // This function can be called by regular bind - // operation flow as well. - FinishInitGraph(symbol, g, shared_exec, feed_dict); -} -/*! - * \brief Initialize in_args, arg_grads, and aux_states - * and their data_entry_ of the executor using - * shared_buffer from DataParallelExecutorGroup - * and shared_exec if available. - */ -void TrtGraphExecutor::InitArguments(const nnvm::IndexedGraph& idx, - const mxnet::ShapeVector& inferred_shapes, - const nnvm::DTypeVector& inferred_dtypes, - const StorageTypeVector& inferred_stypes, - const std::vector& in_arg_ctxes, - const std::vector& arg_grad_ctxes, - const std::vector& aux_state_ctxes, - const std::vector& grad_req_types, - const std::unordered_set& shared_arg_names, - const Executor* shared_exec, - std::unordered_map* shared_buffer, - std::vector* in_arg_vec, - std::vector* arg_grad_vec, - std::vector* aux_state_vec) { - // initialize in_args, arg_grads, and aux_states and populate grad_store_ - data_entry_.resize(idx.num_node_entries()); - size_t arg_top = 0, aux_top = 0; - const auto& mutable_nodes = idx.mutable_input_nodes(); - for (size_t i = 0; i < num_forward_inputs_; ++i) { - const uint32_t nid = idx.input_nodes().at(i); - const uint32_t eid = idx.entry_id(nid, 0); - const mxnet::TShape& inferred_shape = inferred_shapes[eid]; - const int inferred_dtype = inferred_dtypes[eid]; - const auto inferred_stype = (NDArrayStorageType) inferred_stypes[eid]; - const std::string& arg_name = idx[nid].source->attrs.name; - // aux_states - if (mutable_nodes.count(nid)) { - if (nullptr != shared_exec) { - const NDArray& aux_nd = shared_exec->aux_state_map().at(arg_name); - CHECK(inferred_stype == kDefaultStorage && aux_nd.storage_type() == kDefaultStorage) - << "Non-default storage type detected when creating auxilliary NDArray. The allocated " - << "memory of shared_exec.aux_array cannot be resued for argument: " - << arg_name << " for the current executor"; - CHECK_EQ(inferred_shape, aux_nd.shape()) - << "Inferred shape does not match shared_exec.aux_array's shape." - " Therefore, the allocated memory for shared_exec.aux_array cannot" - " be resued for creating auxilliary NDArray of the argument: " - << arg_name << " for the current executor"; - CHECK_EQ(inferred_dtype, aux_nd.dtype()) - << "Inferred dtype does not match shared_exec.aux_array's dtype." - " Therefore, the allocated memory for shared_exec.aux_array cannot" - " be resued for creating auxilliary NDArray of the argument: " - << arg_name << " for the current executor"; - aux_state_vec->emplace_back(aux_nd); - } else { - auto it = shared_buffer->find(arg_name); - if (it != shared_buffer->end()) { - aux_state_vec->push_back(std::move(it->second.Copy(aux_state_ctxes[aux_top]))); - } else { - aux_state_vec->push_back(std::move(InitZeros(inferred_stype, inferred_shape, - aux_state_ctxes[aux_top], inferred_dtype))); - } - } // if (has_shared_exec) - data_entry_[eid] = aux_state_vec->back(); - aux_state_map_.emplace(arg_name, aux_state_vec->back()); - ++aux_top; - } else { // in_args and grad for in_args - if (shared_arg_names.count(arg_name)) { // model parameter - // model parameter - if (nullptr != shared_exec) { - const NDArray& in_arg_nd = shared_exec->in_arg_map().at(arg_name); - auto arg_nd_stype = in_arg_nd.storage_type(); - // for model parameter, both default storage and row_sparse storage can be shared - bool shareable_arg_stype = inferred_stype == kDefaultStorage || - inferred_stype == kRowSparseStorage; - // try to reuse memory from shared_exec - CHECK(shareable_arg_stype) << "Inferred storage type " - << common::stype_string(inferred_stype) - << " does not support memory sharing with shared_exec.arg_array"; - CHECK_EQ(inferred_stype, arg_nd_stype) - << "Inferred stype does not match shared_exec.arg_array's stype" - " Therefore, the allocated memory for shared_exec.arg_array cannot" - " be resued for creating NDArray of the argument " - << arg_name << " for the current executor"; - CHECK_EQ(inferred_shape, in_arg_nd.shape()) - << "Inferred shape does not match shared_exec.arg_array's shape" - " Therefore, the allocated memory for shared_exec.arg_array cannot" - " be resued for creating NDArray of the argument " - << arg_name << " for the current executor"; - CHECK_EQ(inferred_dtype, in_arg_nd.dtype()) - << "Inferred dtype does not match shared_exec.arg_array's dtype" - " Therefore, the allocated memory for shared_exec.arg_array cannot" - " be resued for creating NDArray of the argument " - << arg_name << " for the current executor"; - in_arg_vec->emplace_back(in_arg_nd); - } else { - // doesn't have shared_exec, or non-default storage - EmplaceBackZeros(inferred_stype, inferred_shape, in_arg_ctxes[arg_top], - inferred_dtype, in_arg_vec); - } - // gradient for model parameter - if (kNullOp == grad_req_types[arg_top]) { - arg_grad_vec->emplace_back(); - } else { - auto grad_oid = grad_store_.size() + num_forward_outputs_; - auto grad_eid = idx.entry_id(idx.outputs()[grad_oid]); - auto grad_stype = (NDArrayStorageType) inferred_stypes[grad_eid]; - if (nullptr != shared_exec && grad_stype == kDefaultStorage && - shared_exec->arg_grad_map().at(arg_name).storage_type() == kDefaultStorage) { - // try to reuse memory from shared_exec - arg_grad_vec->emplace_back(shared_exec->arg_grad_map().at(arg_name)); - } else { - // no need to reuse memory from shared_exec for gradient of non-default storage - EmplaceBackZeros(grad_stype, inferred_shape, arg_grad_ctxes[arg_top], - inferred_dtype, arg_grad_vec); - } - grad_store_.emplace_back(grad_req_types[arg_top], arg_grad_vec->back()); - } - } else { // !shared_arg_names.count(arg_name) - // model parameter, row_sparse ndarray sharing enabled - auto it = shared_buffer->find(arg_name); - if (it != shared_buffer->end()) { - in_arg_vec->push_back(std::move(it->second.Copy(in_arg_ctxes[arg_top]))); - } else { - in_arg_vec->push_back(std::move(InitZeros(inferred_stype, inferred_shape, - in_arg_ctxes[arg_top], inferred_dtype))); - } - // gradient for model parameter, row_sparse ndarray sharing disabled - if (kNullOp == grad_req_types[arg_top]) { - arg_grad_vec->emplace_back(); - } else { - auto grad_oid = grad_store_.size() + num_forward_outputs_; - auto grad_eid = idx.entry_id(idx.outputs()[grad_oid]); - auto grad_stype = (NDArrayStorageType) inferred_stypes[grad_eid]; - bool enable_row_sparse_sharing = false; - arg_grad_vec->emplace_back(ReshapeOrCreate("grad of " + arg_name, inferred_shape, - inferred_dtype, grad_stype, - arg_grad_ctxes[arg_top], shared_buffer, - enable_row_sparse_sharing)); - grad_store_.emplace_back(grad_req_types[arg_top], arg_grad_vec->back()); - } // if (kNullOp == grad_req_types[arg_top]) - } // if (shared_arg_names.count(arg_name)) - in_arg_map_.emplace(arg_name, in_arg_vec->back()); - if (!arg_grad_vec->back().is_none()) { - arg_grad_map_.emplace(arg_name, arg_grad_vec->back()); - } - data_entry_[eid] = in_arg_vec->back(); - ++arg_top; - } - } -} - - - /*! - * \brief This function is triggered after each tensorrt subgraph replacement pass. - * Reset arguments of GraphExecutor::Init(...) as some variables (weights and biases) - * are absorbed into the TRT engine it also it reruns attributes inferences accordingly - * to the new topology. - */ -Graph TrtGraphExecutor::ReinitGraph(Graph&& g, const Context &default_ctx, - const std::map &ctx_map, - std::vector *in_arg_ctxes, - std::vector *arg_grad_ctxes, - std::vector *aux_state_ctxes, - std::vector *grad_req_types, - std::unordered_map *arg_shape_map, - std::unordered_map *arg_dtype_map, - std::unordered_map *arg_stype_map, - std::unordered_map *params_map) { - std::unordered_set to_remove_params; - for (auto& el : *params_map) { - to_remove_params.insert(el.first); - } - - DFSVisit(g.outputs, [&to_remove_params](const nnvm::NodePtr n) { - to_remove_params.erase(n->attrs.name); - }); - - for (auto& el : to_remove_params) { - params_map->erase(el); - arg_shape_map->erase(el); - arg_dtype_map->erase(el); - arg_stype_map->erase(el); - } - const auto &idx = g.indexed_graph(); - num_forward_inputs_ = idx.input_nodes().size(); - in_arg_ctxes->resize(num_forward_inputs_ - idx.mutable_input_nodes().size()); - arg_grad_ctxes->resize(num_forward_inputs_ - idx.mutable_input_nodes().size()); - grad_req_types->resize(num_forward_inputs_ - idx.mutable_input_nodes().size()); - aux_state_ctxes->resize(idx.mutable_input_nodes().size()); - - // create "device" and "context" attrs for the graph - g = AssignContext(g, default_ctx, ctx_map, *in_arg_ctxes, *arg_grad_ctxes, - *aux_state_ctxes, *grad_req_types, num_forward_inputs_, - num_forward_outputs_); - - // get number of nodes used in forward pass - num_forward_nodes_ = 0; - for (size_t i = 0; i < num_forward_outputs_; ++i) { - num_forward_nodes_ = std::max( - num_forward_nodes_, static_cast(idx.outputs()[i].node_id + 1)); - } - mxnet::ShapeVector arg_shapes(idx.input_nodes().size(), mxnet::TShape()); - nnvm::DTypeVector arg_dtypes(idx.input_nodes().size(), -1); - StorageTypeVector arg_stypes(idx.input_nodes().size(), kUndefinedStorage); - for (size_t i = 0; i < num_forward_inputs_; ++i) { - const uint32_t nid = idx.input_nodes().at(i); - const std::string &name = idx[nid].source->attrs.name; - auto it1 = arg_shape_map->find(name); - if (arg_shape_map->end() != it1) { - arg_shapes[i] = it1->second; - } - auto it2 = arg_dtype_map->find(name); - if (arg_dtype_map->end() != it2) { - arg_dtypes[i] = it2->second; - } - auto it3 = arg_stype_map->find(name); - if (arg_stype_map->end() != it3) { - arg_stypes[i] = it3->second; - } - } - g = InferShape(std::move(g), std::move(arg_shapes), "__shape__"); - if (g.GetAttr("shape_num_unknown_nodes") != 0U) { - HandleInferShapeError(num_forward_inputs_, g.indexed_graph(), - g.GetAttr("shape")); - } - - g = InferType(std::move(g), std::move(arg_dtypes), "__dtype__"); - if (g.GetAttr("dtype_num_unknown_nodes") != 0U) { - HandleInferTypeError(num_forward_inputs_, g.indexed_graph(), - g.GetAttr("dtype")); - } - - g = InferStorageType(std::move(g), std::move(arg_stypes), "__storage_type__"); - - if (g.GetAttr("storage_type_num_unknown_nodes") != 0U) { - HandleInferStorageTypeError(num_forward_inputs_, g.indexed_graph(), - g.GetAttr("storage_type")); - } - - return g; -} - - -/*! - * \brief Return the "optimized" symbol contained in the graph. - * For optimization pass such as TensorRT pass - */ -nnvm::Symbol TrtGraphExecutor::GetOptimizedSymbol() { - Symbol ret; - ret.outputs = std::vector(graph_.outputs.begin(), - graph_.outputs.begin() + num_forward_outputs_); - return ret.Copy(); -} - -Executor *TrtGraphExecutor::TensorRTBind(nnvm::Symbol symbol, - const Context &default_ctx, - const std::map &group2ctx, - std::vector *in_arg_ctxes, - std::vector *arg_grad_ctxes, - std::vector *aux_state_ctxes, - std::unordered_map - *arg_shape_map, - std::unordered_map *arg_dtype_map, - std::unordered_map *arg_stype_map, - std::vector *grad_req_types, - const std::unordered_set ¶m_names, - std::vector *in_args, - std::vector *arg_grads, - std::vector *aux_states, - std::unordered_map *shared_buffer, - Executor *shared_exec) { - auto exec = new exec::TrtGraphExecutor(); - exec->Init(std::move(symbol), default_ctx, group2ctx, - in_arg_ctxes, arg_grad_ctxes, aux_state_ctxes, - arg_shape_map, arg_dtype_map, arg_stype_map, - grad_req_types, param_names, - in_args, arg_grads, aux_states, - shared_buffer, shared_exec); - return exec; -} - -} // namespace exec - -} // namespace mxnet - -#endif // MXNET_USE_TENSORRT diff --git a/src/executor/trt_graph_executor.h b/src/executor/trt_graph_executor.h deleted file mode 100644 index a4ec5bf657ae..000000000000 --- a/src/executor/trt_graph_executor.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef MXNET_EXECUTOR_TRT_GRAPH_EXECUTOR_H_ -#define MXNET_EXECUTOR_TRT_GRAPH_EXECUTOR_H_ - -#if MXNET_USE_TENSORRT - -#include -#include -#include - -#include "./graph_executor.h" - -namespace mxnet { - -namespace exec { - -class TrtGraphExecutor : public GraphExecutor { - public: - static Executor* TensorRTBind(nnvm::Symbol symbol, - const Context& default_ctx, - const std::map& group2ctx, - std::vector *in_arg_ctxes, - std::vector* arg_grad_ctxes, - std::vector* aux_state_ctxes, - std::unordered_map* arg_shape_map, - std::unordered_map* arg_dtype_map, - std::unordered_map* arg_stype_map, - std::vector* grad_req_types, - const std::unordered_set& param_names, - std::vector* in_args, - std::vector* arg_grads, - std::vector* aux_states, - std::unordered_map* - shared_data_arrays = nullptr, - Executor* shared_exec = nullptr); - - virtual void Init(nnvm::Symbol symbol, - const Context& default_ctx, - const std::map& ctx_map, - std::vector *in_arg_ctxes, - std::vector *arg_grad_ctxes, - std::vector *aux_state_ctxes, - std::unordered_map *arg_shape_map, - std::unordered_map *arg_dtype_map, - std::unordered_map *arg_stype_map, - std::vector *grad_req_types, - const std::unordered_set& shared_arg_names, - std::vector* in_arg_vec, - std::vector* arg_grad_vec, - std::vector* aux_state_vec, - std::unordered_map* shared_buffer = nullptr, - Executor* shared_exec = nullptr, - const nnvm::NodeEntryMap& feed_dict - = nnvm::NodeEntryMap()); - - // Returns symbol representing the TRT optimized graph for comparison purposes. - nnvm::Symbol GetOptimizedSymbol(); - - protected: - Graph ReinitGraph(Graph&& g, const Context &default_ctx, - const std::map &ctx_map, - std::vector *in_arg_ctxes, - std::vector *arg_grad_ctxes, - std::vector *aux_state_ctxes, - std::vector *grad_req_types, - std::unordered_map *arg_shape_map, - std::unordered_map *arg_dtype_map, - std::unordered_map *arg_stype_map, - std::unordered_map *params_map); - - void InitArguments(const nnvm::IndexedGraph& idx, - const mxnet::ShapeVector& inferred_shapes, - const nnvm::DTypeVector& inferred_dtypes, - const StorageTypeVector& inferred_stypes, - const std::vector& in_arg_ctxes, - const std::vector& arg_grad_ctxes, - const std::vector& aux_state_ctxes, - const std::vector& grad_req_types, - const std::unordered_set& shared_arg_names, - const Executor* shared_exec, - std::unordered_map* shared_buffer, - std::vector* in_arg_vec, - std::vector* arg_grad_vec, - std::vector* aux_state_vec) override; -}; - -} // namespace exec - -} // namespace mxnet - -#endif // MXNET_USE_TENSORRT - -#endif // MXNET_EXECUTOR_TRT_GRAPH_EXECUTOR_H_ diff --git a/src/operator/contrib/tensorrt-inl.h b/src/operator/contrib/tensorrt-inl.h deleted file mode 100644 index 062d22e35795..000000000000 --- a/src/operator/contrib/tensorrt-inl.h +++ /dev/null @@ -1,79 +0,0 @@ -#ifndef MXNET_OPERATOR_CONTRIB_TENSORRT_INL_H_ -#define MXNET_OPERATOR_CONTRIB_TENSORRT_INL_H_ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2018 by Contributors - * \file tensorrt-inl.h - * \brief TensorRT Operator - * \author Marek Kolodziej, Clement Fuji Tsang -*/ - -#if MXNET_USE_TENSORRT - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "nnvm_to_onnx-inl.h" -#include "../operator_common.h" -#include "../../common/utils.h" -#include "../../common/serialization.h" -#include "../../executor/exec_pass.h" -#include "../../executor/graph_executor.h" -#include "../../executor/onnx_to_tensorrt.h" - -namespace mxnet { -namespace op { - -using namespace nnvm; -using int64 = ::google::protobuf::int64; - - -using trt_name_to_idx = std::map; - - -struct TRTEngineParam { - nvinfer1::IExecutionContext* trt_executor; - std::vector > binding_map; -}; - -} // namespace op -} // namespace mxnet - -#endif // MXNET_USE_TENSORRT - -#endif // MXNET_OPERATOR_CONTRIB_TENSORRT_INL_H_ diff --git a/src/operator/contrib/tensorrt.cc b/src/operator/contrib/tensorrt.cc deleted file mode 100644 index 5b3df70fd825..000000000000 --- a/src/operator/contrib/tensorrt.cc +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2018 by Contributors - * \file trt.cc - * \brief TensorRT operation registration - * \author Marek Kolodziej, Clement Fuji Tsang -*/ - -#if MXNET_USE_TENSORRT - -#include "./tensorrt-inl.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "../../common/serialization.h" -#include "../../common/utils.h" - -namespace mxnet { -namespace op { - -OpStatePtr GetPtrMapping(nvinfer1::ICudaEngine* trt_engine, - nnvm_to_onnx::NameToIdx_t input_map, - nnvm_to_onnx::NameToIdx_t output_map) { - TRTEngineParam param; - for (int b = 0; b < trt_engine->getNbBindings(); ++b) { - const std::string& binding_name = trt_engine->getBindingName(b); - if (trt_engine->bindingIsInput(b)) { - param.binding_map.emplace_back(input_map[binding_name], - nnvm_to_onnx::TypeIO::Inputs); - } else { - param.binding_map.emplace_back(output_map[binding_name], - nnvm_to_onnx::TypeIO::Outputs); - } - } - param.trt_executor = trt_engine->createExecutionContext(); - return OpStatePtr::Create(param); -} - -OpStatePtr TRTCreateState(const nnvm::NodeAttrs& attrs, Context /*ctx*/, - const mxnet::ShapeVector& /*ishape*/, - const std::vector& /*itype*/) { - const auto& node_param = nnvm::get(attrs.parsed); - - ::onnx::ModelProto model_proto; - bool success = model_proto.ParseFromString(node_param.serialized_onnx_graph); - if (!success) { - LOG(FATAL) << "Problems parsing serialized ONNX model."; - } - auto graph = model_proto.graph(); - auto first_input_type = graph.input(0).type().tensor_type(); - auto dim_value = first_input_type.shape().dim(0).dim_value(); - auto batch_size = static_cast(dim_value); - // Need to set up max workspace size based on device properties - nvinfer1::ICudaEngine* const trt_engine = ::onnx_to_tensorrt::onnxToTrtCtx( - node_param.serialized_onnx_graph, batch_size, 1 << 30); - - nnvm_to_onnx::NameToIdx_t output_map; - for (auto& el : node_param.output_map) { - output_map[el.first] = std::get<0>(el.second); - } - return GetPtrMapping(trt_engine, node_param.input_map, output_map); -} - -void TRTParamParser(nnvm::NodeAttrs* attrs) { - ONNXParam param_; - - try { - param_.Init(attrs->dict); - common::Deserialize(¶m_.input_map, param_.serialized_input_map); - common::Deserialize(¶m_.output_map, param_.serialized_output_map); - param_.onnx_pb_graph.ParseFromString(param_.serialized_onnx_graph); - } catch (const dmlc::ParamError& e) { - std::ostringstream os; - os << e.what(); - os << ", in operator " << attrs->op->name << "(" - << "name=\"" << attrs->name << "\""; - for (const auto& k : attrs->dict) { - os << ", " << k.first << "=\"" << k.second << "\""; - } - os << ")"; - throw dmlc::ParamError(os.str()); - } - - attrs->parsed = std::move(param_); -} - -inline bool TRTInferShape(const NodeAttrs& attrs, mxnet::ShapeVector* /*in_shape*/, - mxnet::ShapeVector* out_shape) { - const auto &node_param = nnvm::get(attrs.parsed); - for (auto& el : node_param.output_map) { - (*out_shape)[std::get<0>(el.second)] = std::get<1>(el.second); - } - return true; -} - -inline bool TRTInferStorageType(const NodeAttrs& /*attrs*/, const int /*dev_mask*/, - DispatchMode* dispatch_mode, - std::vector* /*in_storage_type*/, - std::vector* out_storage_type) { - return storage_type_assign(out_storage_type, mxnet::kDefaultStorage, - dispatch_mode, DispatchMode::kFCompute); -} - -inline bool TRTInferType(const NodeAttrs& attrs, std::vector* /*in_dtype*/, - std::vector* out_dtype) { - const auto& node_param = nnvm::get(attrs.parsed); - for (auto& el : node_param.output_map) { - (*out_dtype)[std::get<0>(el.second)] = std::get<3>(el.second); - } - return true; -} - -inline std::vector TRTListInputNames(const NodeAttrs& attrs) { - std::vector output; - const auto& node_param = nnvm::get(attrs.parsed); - output.resize(node_param.input_map.size()); - for (auto& el : node_param.input_map) { - output[el.second] = el.first; - } - return output; -} - -inline std::vector TRTListOutputNames(const NodeAttrs& attrs) { - std::vector output; - const auto& node_param = nnvm::get(attrs.parsed); - output.resize(node_param.output_map.size()); - for (auto& el : node_param.output_map) { - output[std::get<0>(el.second)] = el.first; - } - return output; -} - -NNVM_REGISTER_OP(_trt_op) - .describe(R"code(TRT operation (one engine) -)code" ADD_FILELINE) - .set_num_inputs([](const NodeAttrs& attrs) { - const auto& node_param = nnvm::get(attrs.parsed); - return node_param.input_map.size(); - }) - .set_num_outputs([](const NodeAttrs& attrs) { - const auto& node_param = nnvm::get(attrs.parsed); - return node_param.output_map.size(); - }) - .set_attr_parser(TRTParamParser) - .set_attr("FInferShape", TRTInferShape) - .set_attr("FInferType", TRTInferType) - .set_attr("FListInputNames", TRTListInputNames) - .set_attr("FListOutputNames", TRTListOutputNames) - .set_attr("FCreateOpState", TRTCreateState) - .set_attr("FInferStorageType", TRTInferStorageType); - -} // namespace op -} // namespace mxnet - -#endif // MXNET_USE_TENSORRT diff --git a/src/operator/subgraph/build_subgraph.cc b/src/operator/subgraph/build_subgraph.cc index 32ea341d0834..28b89613ee86 100644 --- a/src/operator/subgraph/build_subgraph.cc +++ b/src/operator/subgraph/build_subgraph.cc @@ -509,9 +509,9 @@ void FindOutputEntries(nnvm::Graph* g, void CutGraphInputs(const std::vector &input_entries, std::vector *orig_entries, const bool skip_var = false) { - orig_entries->resize(input_entries.size()); + orig_entries->reserve(input_entries.size()); // map for creating unique var nodes for deduplicating entries from the same node - std::unordered_map name_count_map; + std::unordered_map new_node_map; for (size_t i = 0; i < input_entries.size(); ++i) { nnvm::NodeEntry *e = input_entries[i]; // If the node is a variable itself, we may want to skip the node. @@ -519,19 +519,17 @@ void CutGraphInputs(const std::vector &input_entries, continue; } - orig_entries->at(i) = *e; nnvm::Symbol sym; sym.outputs.push_back(*e); const auto output_names = sym.ListOutputNames(); CHECK_EQ(output_names.size(), 1U); const std::string& var_name = output_names[0]; - auto it = name_count_map.find(var_name); - if (name_count_map.end() == it) { - name_count_map.emplace(var_name, 0); - } else { - ++(it->second); + auto it = new_node_map.find(var_name); + if (it == new_node_map.end()) { + orig_entries->push_back(*e); + new_node_map[var_name] = nnvm::CreateVariableNode(var_name); } - nnvm::NodePtr n = nnvm::CreateVariableNode(var_name + std::to_string(name_count_map[var_name])); + nnvm::NodePtr n = new_node_map[var_name]; *e = nnvm::NodeEntry{n, 0, 0}; } } diff --git a/src/operator/contrib/nnvm_to_onnx-inl.h b/src/operator/subgraph/tensorrt/nnvm_to_onnx-inl.h similarity index 61% rename from src/operator/contrib/nnvm_to_onnx-inl.h rename to src/operator/subgraph/tensorrt/nnvm_to_onnx-inl.h index 052948521ba8..4a88aee886db 100644 --- a/src/operator/contrib/nnvm_to_onnx-inl.h +++ b/src/operator/subgraph/tensorrt/nnvm_to_onnx-inl.h @@ -1,5 +1,5 @@ -#ifndef MXNET_OPERATOR_CONTRIB_NNVM_TO_ONNX_INL_H_ -#define MXNET_OPERATOR_CONTRIB_NNVM_TO_ONNX_INL_H_ +#ifndef MXNET_OPERATOR_SUBGRAPH_TENSORRT_NNVM_TO_ONNX_INL_H_ +#define MXNET_OPERATOR_SUBGRAPH_TENSORRT_NNVM_TO_ONNX_INL_H_ /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,76 +20,23 @@ */ /*! - * Copyright (c) 2018 by Contributors - * \file tensorrt-inl.h - * \brief TensorRT Operator + * Copyright (c) 2019 by Contributors + * \file nnvm_to_onnx-inl.h + * \brief Conversion from NNVM to ONNX for TensorRT * \author Marek Kolodziej, Clement Fuji Tsang */ #if MXNET_USE_TENSORRT -#include -#include -#include -#include -#include #include -#include #include #include -#include -#include -#include -#include -#include -#include -#include #include -#include "../operator_common.h" -#include "../../common/utils.h" -#include "../../common/serialization.h" - namespace mxnet { namespace op { - -namespace nnvm_to_onnx { - enum class TypeIO { Inputs = 0, Outputs = 1 }; - using NameToIdx_t = std::map; - using InferenceTuple_t = std::tuple; - using InferenceMap_t = std::map; -} // namespace nnvm_to_onnx - -struct ONNXParam : public dmlc::Parameter { - std::string serialized_onnx_graph; - std::string serialized_input_map; - std::string serialized_output_map; - nnvm_to_onnx::NameToIdx_t input_map; - nnvm_to_onnx::InferenceMap_t output_map; - ::onnx::ModelProto onnx_pb_graph; - - ONNXParam() = default; - - ONNXParam(const ::onnx::ModelProto& onnx_graph, - const nnvm_to_onnx::InferenceMap_t& input_map, - const nnvm_to_onnx::NameToIdx_t& output_map) { - common::Serialize(input_map, &serialized_input_map); - common::Serialize(output_map, &serialized_output_map); - onnx_graph.SerializeToString(&serialized_onnx_graph); - } - -DMLC_DECLARE_PARAMETER(ONNXParam) { - DMLC_DECLARE_FIELD(serialized_onnx_graph) - .describe("Serialized ONNX graph"); - DMLC_DECLARE_FIELD(serialized_input_map) - .describe("Map from inputs to topological order as input."); - DMLC_DECLARE_FIELD(serialized_output_map) - .describe("Map from outputs to order in g.outputs."); - } -}; - namespace nnvm_to_onnx { using namespace nnvm; @@ -99,24 +46,26 @@ using int64 = ::google::protobuf::int64; std::unordered_map GetPlaceholderShapes(const ShapeVector& shape_inputs, const nnvm::IndexedGraph& ig); +std::unordered_map GetPlaceholderDTypes(const DTypeVector& +dtype_inputs, + const nnvm::IndexedGraph& ig); + std::unordered_map GetOutputLookup(const nnvm::IndexedGraph& ig); void ConvertPlaceholder( const std::string& node_name, - const std::unordered_map& placeholder_shapes, + const std::unordered_map& placeholder_shapes, + const std::unordered_map& placeholder_dtypes, GraphProto* graph_proto); void ConvertConstant(GraphProto* graph_proto, const std::string& node_name, - std::unordered_map* shared_buffer); + const std::unordered_map* const params_map); -void ConvertOutput(op::nnvm_to_onnx::InferenceMap_t* trt_output_map, - GraphProto* graph_proto, +void ConvertOutput(GraphProto* graph_proto, const std::unordered_map::iterator& out_iter, - const std::string& node_name, - const nnvm::Graph& g, - const StorageTypeVector& storage_types, - const DTypeVector& dtypes); + const std::string& node_name, const ShapeVector& shapes, + const DTypeVector& dtypes, const nnvm::IndexedGraph &ig); typedef void (*ConverterFunction)(NodeProto *node_proto, const NodeAttrs &attrs, @@ -137,6 +86,11 @@ void ConvertPooling(NodeProto *node_proto, const nnvm::IndexedGraph &ig, const array_view &inputs); +void ConvertRelu(NodeProto *node_proto, + const NodeAttrs &attrs, + const nnvm::IndexedGraph &ig, + const array_view &inputs); + void ConvertActivation(NodeProto *node_proto, const NodeAttrs &attrs, const nnvm::IndexedGraph &ig, @@ -157,6 +111,11 @@ void ConvertFlatten(NodeProto *node_proto, const nnvm::IndexedGraph &ig, const array_view &inputs); +void ConvertDropout(NodeProto *node_proto, + const NodeAttrs &attrs, + const nnvm::IndexedGraph &ig, + const array_view &inputs); + void ConvertBatchNorm(NodeProto *node_proto, const NodeAttrs &attrs, const nnvm::IndexedGraph &ig, @@ -167,19 +126,39 @@ void ConvertElementwiseAdd(NodeProto *node_proto, const nnvm::IndexedGraph &ig, const array_view &inputs); -ONNXParam ConvertNnvmGraphToOnnx( - const nnvm::Graph &g, - std::unordered_map* shared_buffer); +void ConvertConcatenate(NodeProto *node_proto, + const NodeAttrs &attrs, + const nnvm::IndexedGraph &ig, + const array_view &inputs); + +void ConvertClip(NodeProto *node_proto, + const NodeAttrs &attrs, + const nnvm::IndexedGraph &ig, + const array_view &inputs); + +void ConvertPad(NodeProto* node_proto, + const NodeAttrs & attrs, + const nnvm::IndexedGraph &ig, + const array_view &inputs); + +std::string ConvertNnvmGraphToOnnx(const nnvm::Graph &g, + const std::unordered_map* const params_map); static const std::unordered_map converter_map = { - {"Convolution", ConvertConvolution}, - {"Pooling", ConvertPooling}, {"Activation", ConvertActivation}, - {"FullyConnected", ConvertFullyConnected}, - {"SoftmaxOutput", ConvertSoftmaxOutput}, - {"Flatten", ConvertFlatten}, {"BatchNorm", ConvertBatchNorm}, - {"elemwise_add", ConvertElementwiseAdd}}; + {"clip", ConvertClip}, + {"Convolution", ConvertConvolution}, + {"Concat", ConvertConcatenate}, + {"Dropout", ConvertDropout}, + {"elemwise_add", ConvertElementwiseAdd}, + {"Flatten", ConvertFlatten}, + {"FullyConnected", ConvertFullyConnected}, + {"Pad", ConvertPad}, + {"Pooling", ConvertPooling}, + {"relu", ConvertRelu}, + {"SoftmaxOutput", ConvertSoftmaxOutput} +}; } // namespace nnvm_to_onnx } // namespace op @@ -187,4 +166,4 @@ static const std::unordered_map converter_map = #endif // MXNET_USE_TENSORRT -#endif // MXNET_OPERATOR_CONTRIB_NNVM_TO_ONNX_INL_H_ +#endif // MXNET_OPERATOR_SUBGRAPH_TENSORRT_NNVM_TO_ONNX_INL_H_ diff --git a/src/operator/contrib/nnvm_to_onnx.cc b/src/operator/subgraph/tensorrt/nnvm_to_onnx.cc similarity index 68% rename from src/operator/contrib/nnvm_to_onnx.cc rename to src/operator/subgraph/tensorrt/nnvm_to_onnx.cc index 0c8bd79490e3..da89c2b476ee 100644 --- a/src/operator/contrib/nnvm_to_onnx.cc +++ b/src/operator/subgraph/tensorrt/nnvm_to_onnx.cc @@ -18,9 +18,9 @@ */ /*! - * Copyright (c) 2018 by Contributors - * \file trt.cc - * \brief TensorRT operation registration + * Copyright (c) 2019 by Contributors + * \file nnvm_to_onnx.cc + * \brief Conversion from NNVM to ONNX for TensorRT * \author Marek Kolodziej, Clement Fuji Tsang */ @@ -32,21 +32,17 @@ #include #include -#include -#include -#include -#include -#include - -#include "../../common/serialization.h" -#include "../../common/utils.h" -#include "../../ndarray/ndarray_function.h" -#include "../../operator/nn/activation-inl.h" -#include "../../operator/nn/batch_norm-inl.h" -#include "../../operator/nn/convolution-inl.h" -#include "../../operator/nn/fully_connected-inl.h" -#include "../../operator/nn/pooling-inl.h" -#include "../../operator/softmax_output-inl.h" +#include "../../../common/utils.h" +#include "../../../ndarray/ndarray_function.h" +#include "../../pad-inl.h" +#include "../../nn/activation-inl.h" +#include "../../nn/batch_norm-inl.h" +#include "../../nn/convolution-inl.h" +#include "../../nn/fully_connected-inl.h" +#include "../../nn/pooling-inl.h" +#include "../../nn/concat-inl.h" +#include "../../softmax_output-inl.h" +#include "../../tensor/matrix_op-inl.h" #if MXNET_USE_TENSORRT_ONNX_CHECKER #include @@ -54,36 +50,21 @@ namespace mxnet { namespace op { - -DMLC_REGISTER_PARAMETER(ONNXParam); - namespace nnvm_to_onnx { -op::ONNXParam ConvertNnvmGraphToOnnx( +std::string ConvertNnvmGraphToOnnx( const nnvm::Graph& g, - std::unordered_map* const shared_buffer) { + const std::unordered_map* const params_map) { static std::atomic_ulong subgraph_count = { 0 }; - op::ONNXParam onnx_param; - op::nnvm_to_onnx::NameToIdx_t onnx_input_map; - op::nnvm_to_onnx::InferenceMap_t onnx_output_map; + std::string serialized_onnx_graph; const nnvm::IndexedGraph& ig = g.indexed_graph(); - const auto& storage_types = g.GetAttr("storage_type"); const auto& dtypes = g.GetAttr("dtype"); - const auto& shape_inputs = g.GetAttr("shape_inputs"); - - // TODO(kellens): At the moment this check always passes no matter the weight dtypes used in your - // graph. We should first iterate over datatypes by name and ensure they're valid types - // (fp16 or fp32) and that they're uniform. Then ensure later conversions set tensor types - // correctly in ONNX. - for (auto& e : storage_types) { - if (e != mshadow::kFloat32) { - LOG(FATAL) << "ONNX converter does not support types other than float32 " - "right now."; - } - } + const auto& shapes = g.GetAttr("shape"); + const auto& dtype_inputs = g.GetAttr("dtype_inputs"); + const auto& shape_inputs = g.GetAttr("shape_inputs"); ModelProto model_proto; @@ -104,9 +85,9 @@ op::ONNXParam ConvertNnvmGraphToOnnx( auto subgraph_name_id = subgraph_count.fetch_add(1); graph_proto->set_name("MXNetTRTSubgraph" + std::to_string(subgraph_name_id)); - std::unordered_map placeholder_shapes = - GetPlaceholderShapes(shape_inputs, ig); - std::unordered_map output_lookup = GetOutputLookup(ig); + auto placeholder_shapes = GetPlaceholderShapes(shape_inputs, ig); + auto placeholder_dtypes = GetPlaceholderDTypes(dtype_inputs, ig); + auto output_lookup = GetOutputLookup(ig); uint32_t current_input = 0; // Can't do a foreach over IndexedGraph since it doesn't implement begin(), etc. @@ -121,18 +102,17 @@ op::ONNXParam ConvertNnvmGraphToOnnx( // placeholder if (source->is_variable()) { // Is this a placeholder? - if (shared_buffer->count(node_name) == 0) { + if (params_map->count(node_name) == 0) { // This fixes the problem with a SoftmaxOutput node during inference, but it's hacky. // Need to figure out how to properly fix it. if (node_name.find("label") != std::string::npos) { current_input++; continue; } - onnx_input_map.emplace(node_name, current_input++); - ConvertPlaceholder(node_name, placeholder_shapes, graph_proto); + ConvertPlaceholder(node_name, placeholder_shapes, placeholder_dtypes, graph_proto); } else { // If it's not a placeholder, then by exclusion it's a constant. - ConvertConstant(graph_proto, node_name, shared_buffer); + ConvertConstant(graph_proto, node_name, params_map); } // is_placeholder } else { // It's an op, rather than a "variable" (constant or placeholder) @@ -163,23 +143,18 @@ op::ONNXParam ConvertNnvmGraphToOnnx( auto out_iter = output_lookup.find(node_name); // We found an output if (out_iter != output_lookup.end()) { - ConvertOutput(&onnx_output_map, graph_proto, out_iter, node_name, g, - storage_types, dtypes); + ConvertOutput(graph_proto, out_iter, node_name, shapes, dtypes, ig); } // output found } // conversion function exists } // loop over i from 0 to num_nodes - model_proto.SerializeToString(&onnx_param.serialized_onnx_graph); - common::Serialize(onnx_input_map, - &onnx_param.serialized_input_map); - common::Serialize(onnx_output_map, - &onnx_param.serialized_output_map); + model_proto.SerializeToString(&serialized_onnx_graph); #if MXNET_USE_TENSORRT_ONNX_CHECKER onnx::checker::check_model(model_proto); #endif // MXNET_USE_TENSORRT_ONNX_CHECKER - return onnx_param; + return serialized_onnx_graph; } void ConvertConvolution(NodeProto* node_proto, const NodeAttrs& attrs, @@ -225,9 +200,10 @@ void ConvertConvolution(NodeProto* node_proto, const NodeAttrs& attrs, pads->set_name("pads"); pads->set_type(AttributeProto::INTS); - for (const dim_t kval : pad) { - pads->add_ints(static_cast(kval)); - pads->add_ints(static_cast(kval)); + for (int i =0; i < 2; i++) { + for (dim_t kval : pad) { + pads->add_ints(static_cast(kval)); + } } // strides @@ -295,6 +271,12 @@ void ConvertPooling(NodeProto* node_proto, const NodeAttrs& attrs, // not global pooling } // end ConvertPooling +void ConvertRelu(NodeProto* node_proto, const NodeAttrs& /*attrs*/, + const nnvm::IndexedGraph& /*ig*/, + const array_view& /*inputs*/) { + node_proto->set_op_type("Relu"); +} + void ConvertActivation(NodeProto* node_proto, const NodeAttrs& attrs, const nnvm::IndexedGraph& /*ig*/, const array_view& /*inputs*/) { @@ -411,7 +393,41 @@ void ConvertElementwiseAdd(NodeProto* node_proto, const NodeAttrs& /*attrs*/, node_proto->set_op_type("Add"); } -std::unordered_map GetPlaceholderShapes( +void ConvertConcatenate(NodeProto* node_proto, const NodeAttrs& attrs, + const nnvm::IndexedGraph& /*ig*/, + const array_view& /*inputs*/) { + const auto& _param = nnvm::get(attrs.parsed); + node_proto->set_op_type("Concat"); + node_proto->set_name(attrs.name); + // axis + AttributeProto* const axis = node_proto->add_attribute(); + axis->set_name("axis"); + axis->set_type(AttributeProto::INT); + axis->set_i(static_cast(_param.dim)); +} + +inline TensorProto_DataType ConvertDType(int dtype) { + switch (dtype) { + case mshadow::kFloat64: + return TensorProto_DataType_DOUBLE; + case mshadow::kFloat32: + return TensorProto_DataType_FLOAT; + case mshadow::kFloat16: + return TensorProto_DataType_FLOAT16; + case mshadow::kUint8: + return TensorProto_DataType_UINT8; + case mshadow::kInt32: + return TensorProto_DataType_INT32; + case mshadow::kInt8: + return TensorProto_DataType_INT8; + case mshadow::kInt64: + return TensorProto_DataType_INT64; + default: + return TensorProto_DataType_UNDEFINED; + } +} + +std::unordered_map GetPlaceholderShapes( const ShapeVector& shape_inputs, const nnvm::IndexedGraph& ig) { std::unordered_map placeholder_shapes; for (uint32_t i = 0; i < shape_inputs.size(); ++i) { @@ -425,6 +441,17 @@ std::unordered_map GetPlaceholderShapes( return placeholder_shapes; } +std::unordered_map GetPlaceholderDTypes( + const DTypeVector& dtype_inputs, const nnvm::IndexedGraph& ig) { + std::unordered_map placeholder_dtypes; + for (uint32_t i = 0; i < dtype_inputs.size(); ++i) { + std::string name = ig[ig.input_nodes()[i]].source->attrs.name; + int dtype = dtype_inputs[i]; + placeholder_dtypes.emplace(name, dtype); + } + return placeholder_dtypes; +} + std::unordered_map GetOutputLookup( const nnvm::IndexedGraph& ig) { std::unordered_map output_lookup; @@ -442,17 +469,17 @@ std::unordered_map GetOutputLookup( void ConvertPlaceholder( const std::string& node_name, - const std::unordered_map& placeholder_shapes, + const std::unordered_map& placeholder_shapes, + const std::unordered_map& placeholder_dtypes, GraphProto* const graph_proto) { auto val_info_proto = graph_proto->add_input(); auto type_proto = val_info_proto->mutable_type()->mutable_tensor_type(); auto shape_proto = type_proto->mutable_shape(); val_info_proto->set_name(node_name); - // Will support fp16, etc. in the near future - type_proto->set_elem_type(TensorProto_DataType_FLOAT); auto entry_shape = placeholder_shapes.find(node_name)->second; - + auto entry_dtype = placeholder_dtypes.find(node_name)->second; + type_proto->set_elem_type(ConvertDType(entry_dtype)); for (const auto& elem : entry_shape) { TensorShapeProto_Dimension* const tsp_dim = shape_proto->add_dim(); tsp_dim->set_dim_value(static_cast(elem)); @@ -461,38 +488,49 @@ void ConvertPlaceholder( void ConvertConstant( GraphProto* const graph_proto, const std::string& node_name, - std::unordered_map* const shared_buffer) { - TensorProto* const initializer_proto = graph_proto->add_initializer(); + const std::unordered_map* const params_map) { + TensorProto* const initializer_proto = graph_proto->add_initializer(); // Create initializer for constants initializer_proto->set_name(node_name); - // TODO(kellens): convert to fp16 if needed. - initializer_proto->set_data_type(TensorProto_DataType_FLOAT); - const NDArray nd = shared_buffer->find(node_name)->second; + const NDArray nd = params_map->find(node_name)->second; const TBlob& blob = nd.data(); - const mxnet::TShape shape = blob.shape_; + const TShape shape = blob.shape_; + const auto dtype = ConvertDType(nd.dtype()); + initializer_proto->set_data_type(dtype); for (auto& dim : shape) { initializer_proto->add_dims(static_cast(dim)); } auto size = shape.Size(); - // TODO(kellens): Note hard coded float32 size assumed. - std::shared_ptr shared_data_ptr(new float[size]); - float* const data_ptr = shared_data_ptr.get(); - nd.SyncCopyToCPU(static_cast(data_ptr), size); - for (size_t blob_idx = 0; blob_idx < size; ++blob_idx) { - initializer_proto->add_float_data(data_ptr[blob_idx]); + if (dtype == TensorProto_DataType_FLOAT) { + std::shared_ptr shared_data_ptr(new float[size]); + float* const data_ptr = shared_data_ptr.get(); + nd.SyncCopyToCPU(static_cast(data_ptr), size); + + for (size_t blob_idx = 0; blob_idx < size; ++blob_idx) { + initializer_proto->add_float_data(data_ptr[blob_idx]); + } + } else if (dtype == TensorProto_DataType_FLOAT16) { + std::shared_ptr shared_data_ptr(new uint16_t[size]); + uint16_t* const data_ptr = shared_data_ptr.get(); + nd.SyncCopyToCPU(static_cast(data_ptr), size); + for (size_t blob_idx = 0; blob_idx < size; ++blob_idx) { + initializer_proto->add_int32_data( + reinterpret_cast(data_ptr)[blob_idx]); + } + } else { + LOG(FATAL) << "dtype not supported for variables: " << node_name; } // Create inputs for constants. ValueInfoProto* const input_proto = graph_proto->add_input(); input_proto->set_name(node_name); - // TODO(kellens): (fp16 support) - input_proto->mutable_type()->mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT); + input_proto->mutable_type()->mutable_tensor_type()->set_elem_type(dtype); for (auto& dim : shape) { auto new_dim = input_proto->mutable_type()->mutable_tensor_type()->mutable_shape()->add_dim(); new_dim->set_dim_value(static_cast(dim)); @@ -500,37 +538,98 @@ void ConvertConstant( } void ConvertOutput( - op::nnvm_to_onnx::InferenceMap_t* const output_map, GraphProto* const graph_proto, const std::unordered_map::iterator& out_iter, - const std::string& node_name, const nnvm::Graph& g, - const StorageTypeVector& storage_types, const DTypeVector& dtypes) { - const nnvm::IndexedGraph& ig = g.indexed_graph(); + const std::string& node_name, const ShapeVector& shapes, + const DTypeVector& dtypes, const nnvm::IndexedGraph &ig) { uint32_t out_idx = ig.entry_id(ig.outputs()[out_iter->second]); - mxnet::TShape out_shape = g.GetAttr("shape")[out_idx]; - int storage_type = storage_types[out_idx]; int dtype = dtypes[out_idx]; - - // This should work with fp16 as well - op::nnvm_to_onnx::InferenceTuple_t out_tuple{out_iter->second, out_shape, storage_type, - dtype}; - - output_map->emplace(node_name, out_tuple); - auto graph_out = graph_proto->add_output(); auto tensor_type = graph_out->mutable_type()->mutable_tensor_type(); auto tensor_shape_proto = tensor_type->mutable_shape(); graph_out->set_name(node_name); // Also support fp16. - tensor_type->set_elem_type(TensorProto_DataType_FLOAT); + tensor_type->set_elem_type(ConvertDType(dtype)); - for (int64_t dim_shp : out_shape) { + for (int64_t dim_shp : shapes[out_idx]) { TensorShapeProto_Dimension* const tsp_dim = tensor_shape_proto->add_dim(); tsp_dim->set_dim_value(static_cast(dim_shp)); } } +void ConvertClip(NodeProto* node_proto, const NodeAttrs& attrs, + const nnvm::IndexedGraph& /*ig*/, + const array_view& /*inputs*/) { + const auto param = nnvm::get(attrs.parsed); + + node_proto->set_op_type("Clip"); + + // max + AttributeProto* const a_max = node_proto->add_attribute(); + a_max->set_name("max"); + a_max->set_type(AttributeProto::FLOAT); + a_max->set_f(static_cast(param.a_max)); + + // min + AttributeProto* const a_min = node_proto->add_attribute(); + a_min->set_name("min"); + a_min->set_type(AttributeProto::FLOAT); + a_min->set_f(static_cast(param.a_min)); +} + +void ConvertPad(NodeProto* node_proto, const NodeAttrs& attrs, + const nnvm::IndexedGraph& /*ig*/, + const array_view& /*inputs*/) { + const auto param = nnvm::get(attrs.parsed); + + node_proto->set_op_type("Pad"); + + // mode + AttributeProto* const mode = node_proto->add_attribute(); + mode->set_name("mode"); + mode->set_type(AttributeProto::STRING); + switch (param.mode) { + case op::pad_enum::kConstant: + mode->set_s("constant"); + break; + case op::pad_enum::kEdge: + mode->set_s("edge"); + break; + case op::pad_enum::kReflect: + mode->set_s("reflect"); + break; + default: + throw dmlc::Error("Such mode of padding doesn't exist doesn't exist"); + } + + // pads + AttributeProto* const pads = node_proto->add_attribute(); + pads->set_name("pads"); + pads->set_type(AttributeProto::INTS); + + std::vector pad_begin; + std::vector pad_end; + for (int st = 0; st < 2; ++st) { + for (auto it = param.pad_width.begin() + st; + it != param.pad_width.end(); it += 2) { + pads->add_ints(static_cast(*it)); + } + } + + // value + AttributeProto* const value = node_proto->add_attribute(); + value->set_name("value"); + value->set_type(AttributeProto::FLOAT); + value->set_f(param.constant_value); +} + +void ConvertDropout(NodeProto* node_proto, const NodeAttrs& attrs, + const nnvm::IndexedGraph& /*ig*/, + const array_view& /*inputs*/) { + node_proto->set_op_type("Dropout"); +} + } // namespace nnvm_to_onnx } // namespace op } // namespace mxnet diff --git a/src/executor/onnx_to_tensorrt.cc b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc similarity index 89% rename from src/executor/onnx_to_tensorrt.cc rename to src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc index f7fbc8f81359..7dbc54bc1a63 100644 --- a/src/executor/onnx_to_tensorrt.cc +++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc @@ -18,7 +18,7 @@ */ /*! - * Copyright (c) 2018 by Contributors + * Copyright (c) 2019 by Contributors * \file onnx_to_tensorrt.cc * \brief TensorRT integration with the MXNet executor * \author Marek Kolodziej, Clement Fuji Tsang @@ -36,6 +36,9 @@ #include #include #include +#include +#include + #include #include @@ -80,7 +83,7 @@ void PrintVersion() { << NV_TENSORRT_PATCH << endl; } -nvinfer1::ICudaEngine* onnxToTrtCtx( +std::tuple onnxToTrtCtx( const std::string& onnx_model, int32_t max_batch_size, size_t max_workspace_size, @@ -91,14 +94,13 @@ nvinfer1::ICudaEngine* onnxToTrtCtx( TRT_Logger trt_logger(verbosity); auto trt_builder = InferObject(nvinfer1::createInferBuilder(trt_logger)); auto trt_network = InferObject(trt_builder->createNetwork()); - auto trt_parser = InferObject(nvonnxparser::createParser(trt_network.get(), trt_logger)); + auto trt_parser = nvonnxparser::createParser(trt_network.get(), trt_logger); ::ONNX_NAMESPACE::ModelProto parsed_model; // We check for a valid parse, but the main effect is the side effect // of populating parsed_model if (!parsed_model.ParseFromString(onnx_model)) { throw dmlc::Error("Could not parse ONNX from string"); } - if ( !trt_parser->parse(onnx_model.c_str(), onnx_model.size()) ) { size_t nerror = trt_parser->getNbErrors(); for ( size_t i=0; i < nerror; ++i ) { @@ -127,19 +129,18 @@ nvinfer1::ICudaEngine* onnxToTrtCtx( } throw dmlc::Error("Cannot parse ONNX into TensorRT Engine"); } - - bool fp16 = trt_builder->platformHasFastFp16(); - + if (dmlc::GetEnv("MXNET_TENSORRT_USE_FP16", true)) { + if (trt_builder->platformHasFastFp16()) { + trt_builder->setFp16Mode(true); + } else { + LOG(WARNING) << "TensorRT can't use fp16 on this platform"; + } + } trt_builder->setMaxBatchSize(max_batch_size); trt_builder->setMaxWorkspaceSize(max_workspace_size); - if ( fp16 && dmlc::GetEnv("MXNET_TENSORRT_USE_FP16_FOR_FP32", false) ) { - LOG(INFO) << "WARNING: TensorRT using fp16 given original MXNet graph in fp32 !!!"; - trt_builder->setHalf2Mode(true); - } - trt_builder->setDebugSync(debug_builder); nvinfer1::ICudaEngine* trt_engine = trt_builder->buildCudaEngine(*trt_network.get()); - return trt_engine; + return std::make_tuple(trt_engine, trt_parser); } } // namespace onnx_to_tensorrt diff --git a/src/executor/onnx_to_tensorrt.h b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h similarity index 88% rename from src/executor/onnx_to_tensorrt.h rename to src/operator/subgraph/tensorrt/onnx_to_tensorrt.h index 259cfce7c332..3e8ea1bf9ee1 100644 --- a/src/executor/onnx_to_tensorrt.h +++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h @@ -1,5 +1,5 @@ -#ifndef MXNET_EXECUTOR_ONNX_TO_TENSORRT_H_ -#define MXNET_EXECUTOR_ONNX_TO_TENSORRT_H_ +#ifndef MXNET_OPERATOR_SUBGRAPH_TENSORRT_ONNX_TO_TENSORRT_H_ +#define MXNET_OPERATOR_SUBGRAPH_TENSORRT_ONNX_TO_TENSORRT_H_ /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,7 +20,7 @@ */ /*! - * Copyright (c) 2018 by Contributors + * Copyright (c) 2019 by Contributors * \file onnx_to_tensorrt.h * \brief TensorRT integration with the MXNet executor * \author Marek Kolodziej, Clement Fuji Tsang @@ -28,13 +28,15 @@ #if MXNET_USE_TENSORRT +#include +#include + #include #include -#include #include #include - -#include "../operator/contrib/tensorrt-inl.h" +#include +#include namespace onnx_to_tensorrt { @@ -64,7 +66,7 @@ class TRT_Logger : public nvinfer1::ILogger { } }; -nvinfer1::ICudaEngine* onnxToTrtCtx( +std::tuple onnxToTrtCtx( const std::string& onnx_model, int32_t max_batch_size = 32, size_t max_workspace_size = 1L << 30, @@ -74,4 +76,4 @@ nvinfer1::ICudaEngine* onnxToTrtCtx( #endif // MXNET_USE_TENSORRT -#endif // MXNET_EXECUTOR_ONNX_TO_TENSORRT_H_ +#endif // MXNET_OPERATOR_SUBGRAPH_TENSORRT_ONNX_TO_TENSORRT_H_ diff --git a/src/operator/subgraph/tensorrt/tensorrt-inl.h b/src/operator/subgraph/tensorrt/tensorrt-inl.h new file mode 100644 index 000000000000..e258d892aaba --- /dev/null +++ b/src/operator/subgraph/tensorrt/tensorrt-inl.h @@ -0,0 +1,240 @@ +#ifndef MXNET_OPERATOR_SUBGRAPH_TENSORRT_TENSORRT_INL_H_ +#define MXNET_OPERATOR_SUBGRAPH_TENSORRT_TENSORRT_INL_H_ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file tensorrt-inl.h + * \brief TensorRT operation registration + * \author Marek Kolodziej, Clement Fuji Tsang +*/ + +#if MXNET_USE_TENSORRT + +#include + +#include +#include +#include + +#include "../common.h" +#include "../subgraph_property.h" +#include "nnvm_to_onnx-inl.h" +#include "./onnx_to_tensorrt.h" + +namespace mxnet { +namespace op { + +using int64 = ::google::protobuf::int64; + +struct TRTParam { + std::unordered_map inputs_to_idx; + std::unordered_map outputs_to_idx; + std::unordered_map params_map; +}; + +struct TRTEngineParam { + TRTEngineParam(nvinfer1::ICudaEngine* trt_engine, + nvonnxparser::IParser* _parser, + const std::unordered_map input_map, + const std::unordered_map output_map) { + binding_order = std::make_shared > >(); + bindings = std::make_shared >(); + binding_order->reserve(trt_engine->getNbBindings()); + bindings->resize(trt_engine->getNbBindings()); + for (int b = 0; b < trt_engine->getNbBindings(); ++b) { + const std::string& binding_name = trt_engine->getBindingName(b); + if (trt_engine->bindingIsInput(b)) { + binding_order->emplace_back(input_map.at(binding_name), true); + } else { + binding_order->emplace_back(output_map.at(binding_name), false); + } + } + trt_executor = trt_engine->createExecutionContext(); + trt_parser = _parser; + } + + ~TRTEngineParam() { + trt_parser->destroy(); + trt_executor->destroy(); + } + nvinfer1::IExecutionContext* trt_executor; + nvonnxparser::IParser* trt_parser; + std::shared_ptr > > binding_order; + std::shared_ptr > bindings; +}; + +class TensorrtSelector : public SubgraphSelector { + public: + const std::unordered_set unconditionalTRTops = { + "BatchNorm", + "clip", + "Concat", + "Convolution", + "Dropout", + "elemwise_add", + "elemwise_sub", + "elemwise_mul", + "Flatten", + "FullyConnected", + "mean", + "Pad", + "relu", + "rsqrt", + "SoftmaxOutput" + }; + + const std::unordered_set withWeightsOps = { + "BatchNorm", + "Convolution", + "FullyConnected" + }; + + bool isTRTCompatible(const nnvm::Node &n) { + const std::string op_name = n.op()->name; + if (op_name == "Pooling") { + return (n.attrs.dict.at("pool_type") == "avg" || + n.attrs.dict.at("pool_type") == "max"); + } + + if (unconditionalTRTops.count(op_name)) { + return true; + } + + if (op_name == "Activation") { + return n.attrs.dict.at("act_type") == "relu" || + n.attrs.dict.at("act_type") == "tanh" || + n.attrs.dict.at("act_type") == "sigmoid"; + } + + return false; + } + + bool Select(const nnvm::Node &n) override { + return !n.is_variable() && isTRTCompatible(n); + } + + bool SelectInput(const nnvm::Node &n, const nnvm::Node &new_node) override { + if (new_node.is_variable()) { + if (withWeightsOps.count(n.op()->name)) { + return n.inputs[0].node->attrs.name != new_node.attrs.name; + } else { + return false; + } + } + return isTRTCompatible(new_node); + } + + bool SelectOutput(const nnvm::Node &n, const nnvm::Node &new_node) override { + return isTRTCompatible(new_node); + } + + std::vector Filter(const std::vector& candidates) override { + bool found_one = false; + // TensorRT is interesting with at least 2 operations + for (auto& n : candidates) { + if (!n->is_variable()) { + if (found_one) { + return candidates; + } else { + found_one = true; + } + } + } + return std::vector(); + } +}; + +class TensorrtProperty : public SubgraphProperty { + public: + static SubgraphPropertyPtr Create() { + return std::make_shared(); + } + + nnvm::NodePtr CreateSubgraphNode(const nnvm::Symbol &sym, + const int subgraph_id) const override { + nnvm::NodePtr n = nnvm::Node::Create(); + nnvm::Symbol new_sym; + std::unique_copy(sym.outputs.begin(), sym.outputs.end(), + std::back_inserter(new_sym.outputs), []( + nnvm::NodeEntry lhs, nnvm::NodeEntry rhs) { + return lhs.index == rhs.index && lhs.node.get() == rhs.node.get(); + }); + n->attrs.name = "TensorRT" + std::to_string(subgraph_id); + n->attrs.op = Op::Get("_TensorRT"); + CHECK(n->attrs.op); + n->attrs.subgraphs.emplace_back(std::make_shared(new_sym)); + std::ostringstream params_oss; + for (auto &e : new_sym.ListInputNames(nnvm::Symbol::kAll)) { + params_oss << e << ";"; + } + auto tensorrt_params_names = params_oss.str(); + tensorrt_params_names.pop_back(); + n->attrs.dict["subgraph_params_names"] = tensorrt_params_names; + TRTParam param; + n->attrs.parsed = param; + n->op()->attr_parser(&(n->attrs)); + return n; + } + + SubgraphSelectorPtr CreateSubgraphSelector() const override { + return std::make_shared(); + } + + void ConnectSubgraphOutputs(const nnvm::NodePtr subgraph_node, \ + std::vector* output_entries) const override { + std::vector& outputs = subgraph_node->attrs.subgraphs[0]->outputs; + TRTParam& _params = nnvm::get(subgraph_node->attrs.parsed); + for (size_t i = 0; i < outputs.size(); i++) { + auto& o = outputs[i]; + for (auto& e : *output_entries) { + if (o.index == e->index && o.node.get() == e->node.get()) { + e->index = i; + e->node = subgraph_node; + // TODO(cfujitsang): For future support this would fail + // if the node have multiple outputs + _params.outputs_to_idx[o.node->attrs.name] = i; + } + } + } + subgraph_node->attrs.parsed = std::move(_params); + } + + void ConnectSubgraphInputs(const nnvm::NodePtr subgraph_node, + std::vector* input_entries, + std::vector* orig_input_entries) const override { + TRTParam& _params = nnvm::get(subgraph_node->attrs.parsed); + subgraph_node->inputs.clear(); + subgraph_node->inputs.resize(orig_input_entries->size()); + for (size_t i = 0; i < orig_input_entries->size(); ++i) { + subgraph_node->inputs[i] = orig_input_entries->at(i); + _params.inputs_to_idx[input_entries->at(i)->node->attrs.name] = i; + } + subgraph_node->attrs.parsed = std::move(_params); + } +}; + + +} // namespace op +} // namespace mxnet + +#endif // MXNET_USE_TENSORRT + +#endif // MXNET_OPERATOR_SUBGRAPH_TENSORRT_TENSORRT_INL_H_ diff --git a/src/operator/subgraph/tensorrt/tensorrt.cc b/src/operator/subgraph/tensorrt/tensorrt.cc new file mode 100644 index 000000000000..30fcee007cfc --- /dev/null +++ b/src/operator/subgraph/tensorrt/tensorrt.cc @@ -0,0 +1,336 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file tensorrt.cc + * \brief TensorRT operation registration + * \author Marek Kolodziej, Clement Fuji Tsang +*/ + +#if MXNET_USE_TENSORRT + +#include "./tensorrt-inl.h" + +namespace mxnet { +namespace op { + +inline uint32_t TRTNumInputs(const nnvm::NodeAttrs& attrs) { + const TRTParam& param = nnvm::get(attrs.parsed); + const auto inputs_to_idx = param.inputs_to_idx; + return inputs_to_idx.size(); +} + +inline std::vector TRTListInputNames(const nnvm::NodeAttrs& attrs) { + std::vector outputs; + const TRTParam& param = nnvm::get(attrs.parsed); + const auto inputs_to_idx = param.inputs_to_idx; + for (auto& p : inputs_to_idx) { + outputs[p.second] = p.first; + } + return outputs; +} + +inline bool TRTInferShape(const nnvm::NodeAttrs& attrs, + std::vector *in_shapes, + std::vector *out_shapes) { + using namespace exec; + const nnvm::Symbol subgraph_sym = *(attrs.subgraphs[0]); + const TRTParam& param = nnvm::get(attrs.parsed); + auto params_map = param.params_map; + auto inputs_to_idx = param.inputs_to_idx; + nnvm::Graph g; + g.outputs = subgraph_sym.outputs; + const auto& idx_g = g.indexed_graph(); + CHECK_EQ(idx_g.input_nodes().size(), in_shapes->size() + params_map.size()); + CHECK_EQ(idx_g.outputs().size(), out_shapes->size()); + + // Put the input and output shapes to the shape vector. + mxnet::ShapeVector shapes(idx_g.num_node_entries()); + const auto &input_nids = idx_g.input_nodes(); + CHECK_EQ(input_nids.size(), in_shapes->size() + params_map.size()); + for (size_t i = 0; i < input_nids.size(); i++) { + auto node = idx_g[input_nids[i]].source; + auto eid = idx_g.entry_id(input_nids[i], 0); + auto it_params = params_map.find(node->attrs.name); + auto it_inputs = inputs_to_idx.find(node->attrs.name); + if (it_params != params_map.end()) { + shapes[eid] = it_params->second.shape(); + } else if (it_inputs != inputs_to_idx.end()) { + shapes[eid] = in_shapes->at(it_inputs->second); + } else { + LOG(FATAL) << node->attrs.name << " shape information is missing for attributes inference"; + } + } + CHECK_EQ(g.outputs.size(), out_shapes->size()); + for (size_t i = 0; i < out_shapes->size(); i++) { + auto eid = idx_g.entry_id(g.outputs[i]); + shapes[eid] = out_shapes->at(i); + } + + // Infer shape of the graph. + g.attrs["shape"] = std::make_shared(std::move(shapes)); + g = exec::InferShape(std::move(g)); + // Copy the inferred shape back to the input shapes and the output shapes. + shapes = g.GetAttr("shape"); + // assign to in_shapes + for (size_t i = 0; i < input_nids.size(); ++i) { + const auto node = idx_g[input_nids[i]].source; + const auto eid = idx_g.entry_id(input_nids[i], 0); + auto it = inputs_to_idx.find(node->attrs.name); + if (it != inputs_to_idx.end()) { + SHAPE_ASSIGN_CHECK(*in_shapes, it->second, shapes[eid]); + } + } + // assign to out_shapes + for (size_t i = 0; i < g.outputs.size(); ++i) { + const auto eid = idx_g.entry_id(g.outputs[i]); + SHAPE_ASSIGN_CHECK(*out_shapes, i, shapes[eid]); + } + // Check if we have inferred the shapes correctly. + return g.GetAttr("shape_num_unknown_nodes") == 0; +} + +inline bool TRTInferType(const nnvm::NodeAttrs& attrs, + std::vector *in_types, + std::vector *out_types) { + const nnvm::Symbol subgraph_sym = *(attrs.subgraphs[0]); + const TRTParam& param = nnvm::get(attrs.parsed); + auto params_map = param.params_map; + auto inputs_to_idx = param.inputs_to_idx; + + nnvm::Graph g; + g.outputs = subgraph_sym.outputs; + const auto& idx_g = g.indexed_graph(); + CHECK_EQ(idx_g.input_nodes().size(), in_types->size() + params_map.size()); + CHECK_EQ(idx_g.outputs().size(), out_types->size()); + + // Put the input and output data types to the dtype vector. + nnvm::DTypeVector types(idx_g.num_node_entries(), -1); + const auto &input_nids = idx_g.input_nodes(); + CHECK_EQ(input_nids.size(), in_types->size() + params_map.size()); + for (size_t i = 0; i < input_nids.size(); i++) { + auto node = idx_g[input_nids[i]].source; + auto eid = idx_g.entry_id(input_nids[i], 0); + auto it_params = params_map.find(node->attrs.name); + auto it_inputs = inputs_to_idx.find(node->attrs.name); + if (it_params != params_map.end()) { + types[eid] = it_params->second.dtype(); + } else if (it_inputs != inputs_to_idx.end()) { + types[eid] = in_types->at(it_inputs->second); + } else { + LOG(FATAL) << node->attrs.name + << " dtype information is missing for attributes inference"; + } + } + CHECK_EQ(g.outputs.size(), out_types->size()); + for (size_t i = 0; i < out_types->size(); i++) { + auto eid = idx_g.entry_id(g.outputs[i]); + types[eid] = out_types->at(i); + } + + // Infer data type of the graph. + g.attrs["dtype"] = std::make_shared(std::move(types)); + g = exec::InferType(std::move(g)); + + types = g.GetAttr("dtype"); + // assign to in_types + for (size_t i = 0; i < input_nids.size(); ++i) { + const auto node = idx_g[input_nids[i]].source; + const auto eid = idx_g.entry_id(input_nids[i], 0); + auto it = inputs_to_idx.find(node->attrs.name); + if (it != inputs_to_idx.end()) { + TYPE_ASSIGN_CHECK(*in_types, it->second, types[eid]); + } + } + // assign to out_types + for (size_t i = 0; i < g.outputs.size(); ++i) { + const auto eid = idx_g.entry_id(g.outputs[i]); + TYPE_ASSIGN_CHECK(*out_types, i, types[eid]); + } + + // Check if we have inferred the dtypes correctly. + return g.GetAttr("dtype_num_unknown_nodes") == 0; +} + +inline bool TRTInferStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector* in_stypes, + std::vector* out_stypes) { + const nnvm::Symbol subgraph_sym = *(attrs.subgraphs[0]); + const TRTParam& param = nnvm::get(attrs.parsed); + auto params_map = param.params_map; + auto inputs_to_idx = param.inputs_to_idx; + nnvm::Graph g; + g.outputs = subgraph_sym.outputs; + const auto& idx_g = g.indexed_graph(); + CHECK_EQ(idx_g.input_nodes().size(), in_stypes->size() + params_map.size()); + CHECK_EQ(idx_g.outputs().size(), out_stypes->size()); + exec::DevMaskVector dev_masks(idx_g.num_node_entries(), dev_mask); + + // Put the input and output storages to the storage vector. + StorageTypeVector stypes(idx_g.num_node_entries(), kUndefinedStorage); + const auto &input_nids = idx_g.input_nodes(); + CHECK_EQ(input_nids.size(), in_stypes->size() + params_map.size()); + for (size_t i = 0; i < input_nids.size(); i++) { + auto node = idx_g[input_nids[i]].source; + auto eid = idx_g.entry_id(input_nids[i], 0); + auto it_params = params_map.find(node->attrs.name); + auto it_inputs = inputs_to_idx.find(node->attrs.name); + if (it_params != params_map.end()) { + stypes[eid] = it_params->second.storage_type(); + } else if (it_inputs != inputs_to_idx.end()) { + stypes[eid] = in_stypes->at(it_inputs->second); + } else { + LOG(FATAL) << node->attrs.name + << " storage type information is missing for attributes inference"; + } + } + CHECK_EQ(g.outputs.size(), out_stypes->size()); + for (size_t i = 0; i < out_stypes->size(); i++) { + auto eid = idx_g.entry_id(g.outputs[i]); + stypes[eid] = out_stypes->at(i); + } + + // Infer storage type of the graph. + bool dev_match = g.attrs.count("dev_mask") && + g.GetAttr("dev_mask") == dev_masks; + if (!dev_match) { + g.attrs["dev_mask"] = std::make_shared(std::move(dev_masks)); + } + g.attrs["storage_type"] = std::make_shared(std::move(stypes)); + g = exec::InferStorageType(std::move(g)); + + stypes = g.GetAttr("storage_type"); + // assign to in_types + for (size_t i = 0; i < input_nids.size(); ++i) { + const auto node = idx_g[input_nids[i]].source; + const auto eid = idx_g.entry_id(input_nids[i], 0); + auto it = inputs_to_idx.find(node->attrs.name); + if (it != inputs_to_idx.end()) { + STORAGE_TYPE_ASSIGN_CHECK(*in_stypes, it->second, stypes[eid]); + } + } + + DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx); + // assign to out_types + for (size_t i = 0; i < g.outputs.size(); ++i) { + const auto eid = idx_g.entry_id(g.outputs[i]); + STORAGE_TYPE_ASSIGN_CHECK(*out_stypes, i, stypes[eid]); + } + // Check if we have inferred the storages correctly. + return g.GetAttr("storage_type_num_unknown_nodes") == 0; +} + +void TRTParamParser(nnvm::NodeAttrs* attrs) { + TRTParam& _param = nnvm::get(attrs->parsed); + std::string prefix = "subgraph_param_"; + std::string str_dtype, str_shape, str_pointer, _tmp; + for (auto it = attrs->dict.begin(); it != attrs->dict.end();) { + std::string attrs_name = it->first; + if (std::equal(prefix.begin(), prefix.end(), attrs_name.begin())) { + std::string param_name = attrs_name.substr(prefix.size(), + attrs_name.size() - prefix.size()); + // TODO(cfujitsang): find a less dirty way to give weights + NDArray *cache = reinterpret_cast(stol(it->second)); + _param.params_map.emplace(param_name, cache->Copy(Context())); + _param.params_map[param_name].WaitToRead(); + it = attrs->dict.erase(it); + } else { + ++it; + } + } + attrs->parsed = std::move(_param); +} + +OpStatePtr TRTCreateState(const nnvm::NodeAttrs& attrs, Context ctx, + const std::vector& in_shape, + const std::vector& in_type) { + const auto& node_param = nnvm::get(attrs.parsed); + nnvm::Graph graph; + graph.outputs = attrs.subgraphs[0]->outputs; + uint32_t max_batch_size = dmlc::GetEnv("MXNET_TENSORRT_MAX_BATCH_SIZE", in_shape[0][0]); + if (max_batch_size < in_shape[0][0]) { + LOG(INFO) << "Warning: max batch size changed to be is: " << in_shape[0][0] + << " instead of: " << max_batch_size; + max_batch_size = in_shape[0][0]; + } + const auto& params_map = node_param.params_map; + const auto& inputs_to_idx = node_param.inputs_to_idx; + const auto& outputs_to_idx = node_param.outputs_to_idx; + const auto& idx_g = graph.indexed_graph(); + const auto& input_nids = idx_g.input_nodes(); + mxnet::ShapeVector shape_inputs(input_nids.size()); + nnvm::DTypeVector dtype_inputs(input_nids.size()); + for (int i = 0; i < input_nids.size(); ++i) { + auto node = idx_g[input_nids[i]].source; + auto it_params = params_map.find(node->attrs.name); + auto it_inputs = inputs_to_idx.find(node->attrs.name); + if (it_params != params_map.end()) { + shape_inputs[i] = it_params->second.shape(); + dtype_inputs[i] = it_params->second.dtype(); + } else if (it_inputs != inputs_to_idx.end()) { + shape_inputs[i] = in_shape[it_inputs->second]; + dtype_inputs[i] = in_type[it_inputs->second]; + } else { + LOG(FATAL) << node->attrs.name << " attribute is missing for attributes inference"; + } + } + mxnet::ShapeVector out_shape(graph.outputs.size()); + nnvm::DTypeVector out_type(graph.outputs.size(), -1); + mxnet::ShapeVector _in_shape(in_shape.begin(), in_shape.end()); + nnvm::DTypeVector _in_type(in_type.begin(), in_type.end()); + TRTInferShape(attrs, &_in_shape, &out_shape); + TRTInferType(attrs, &_in_type, &out_type); + nnvm::DTypeVector dtypes(idx_g.num_node_entries()); + mxnet::ShapeVector shapes(idx_g.num_node_entries()); + for (int i = 0; i < graph.outputs.size(); ++i) { + auto eid = idx_g.entry_id(graph.outputs[i]); + dtypes[eid] = out_type[i]; + shapes[eid] = out_shape[i]; + } + graph.attrs["dtype_inputs"] = std::make_shared(std::move(dtype_inputs)); + graph.attrs["shape_inputs"] = std::make_shared(std::move(shape_inputs)); + graph.attrs["dtype"] = std::make_shared(std::move(dtypes)); + graph.attrs["shape"] = std::make_shared(std::move(shapes)); + auto onnx_graph = op::nnvm_to_onnx::ConvertNnvmGraphToOnnx(graph, ¶ms_map); + auto trt_tuple = ::onnx_to_tensorrt::onnxToTrtCtx(onnx_graph, max_batch_size, 1 << 30); + return OpStatePtr::Create(std::get<0>(trt_tuple), std::get<1>(trt_tuple), + inputs_to_idx, outputs_to_idx); +} + +NNVM_REGISTER_OP(_TensorRT) + .describe(R"code(TRT operation (one engine) +)code" ADD_FILELINE) + .set_num_inputs(TRTNumInputs) + .set_num_outputs(DefaultSubgraphOpNumOutputs) + .set_attr_parser(TRTParamParser) + .set_attr("FInferShape", TRTInferShape) + .set_attr("FInferType", TRTInferType) + .set_attr("FListInputNames", TRTListInputNames) + .set_attr("FListOutputNames", DefaultSubgraphOpListOutputs) + .set_attr("FCreateOpState", TRTCreateState) + .set_attr("FInferStorageType", TRTInferStorageType); + +MXNET_REGISTER_SUBGRAPH_PROPERTY(TensorRT, TensorrtProperty); +} // namespace op +} // namespace mxnet + +#endif // MXNET_USE_TENSORRT diff --git a/src/operator/contrib/tensorrt.cu b/src/operator/subgraph/tensorrt/tensorrt.cu similarity index 69% rename from src/operator/contrib/tensorrt.cu rename to src/operator/subgraph/tensorrt/tensorrt.cu index 9a9c3c024366..4a5b23b3a9f7 100644 --- a/src/operator/contrib/tensorrt.cu +++ b/src/operator/subgraph/tensorrt/tensorrt.cu @@ -19,8 +19,8 @@ /*! * Copyright (c) 2018 by Contributors - * \file trt.cu - * \brief TensorRT GPU operation + * \file tensorrt.cu + * \brief TensorRT GPU operation registration * \author Marek Kolodziej, Clement Fuji Tsang */ @@ -41,30 +41,26 @@ namespace op { } while (0) void TRTCompute(const OpStatePtr& state, const OpContext& ctx, - const std::vector& inputs, const std::vector& req, - const std::vector& outputs) { + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { using namespace mshadow; using namespace mshadow::expr; - - Stream* s = ctx.get_stream(); - cudaStream_t cuda_s = Stream::GetStream(s); + cudaStream_t cuda_s = Stream::GetStream(ctx.get_stream()); const auto& param = state.get_state(); - std::vector bindings; - bindings.reserve(param.binding_map.size()); - for (auto& p : param.binding_map) { - if (p.second == nnvm_to_onnx::TypeIO::Inputs) { - bindings.emplace_back(inputs[p.first].dptr_); + for (size_t i = 0; i < param.binding_order->size(); ++i) { + auto& p = param.binding_order->at(i); + if (p.second == true) { + param.bindings->at(i) = inputs[p.first].dptr_; } else { - bindings.emplace_back(outputs[p.first].dptr_); + param.bindings->at(i) = outputs[p.first].dptr_; } } - const int batch_size = static_cast(inputs[0].shape_[0]); - param.trt_executor->enqueue(batch_size, bindings.data(), cuda_s, nullptr); - CHECK_CUDART(cudaStreamSynchronize(cuda_s)); + param.trt_executor->enqueue(batch_size, param.bindings->data(), cuda_s, nullptr); } -NNVM_REGISTER_OP(_trt_op) +NNVM_REGISTER_OP(_TensorRT) .set_attr("FStatefulCompute", TRTCompute); } // namespace op diff --git a/tests/cpp/misc/serialization.cc b/tests/cpp/misc/serialization.cc deleted file mode 100644 index 2509a43c27ee..000000000000 --- a/tests/cpp/misc/serialization.cc +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include -#include <../../../src/common/serialization.h> - -using namespace mxnet; -using namespace std; - -/* - * Test that used datastruct are properly serialized and deserialized - */ - -TEST(SerializerTest, InputMapCorrect) { - std::map input_map; - input_map.emplace("input_0", 2); - input_map.emplace("another_input", 0); - input_map.emplace("last_input", 1); - std::string serialized_data; - common::Serialize(input_map, &serialized_data); - std::map deserialized_input_map; - common::Deserialize(&deserialized_input_map, serialized_data); - ASSERT_EQ(input_map.size(), deserialized_input_map.size()); - for (auto& p : input_map) { - auto it = deserialized_input_map.find(p.first); - ASSERT_NE(it, deserialized_input_map.end()); - ASSERT_EQ(it->second, p.second); - } -} - -TEST(SerializerTest, OutputMapCorrect) { - std::map > output_map; - output_map.emplace("output_0", std::make_tuple(1, mxnet::TShape({23, 12, 63, 432}), 0, 1)); - output_map.emplace("another_output", std::make_tuple(2, mxnet::TShape({23, 123}), 14, -23)); - output_map.emplace("last_output", std::make_tuple(0, mxnet::TShape(1, 0), -1, 0)); - std::string serialized_data; - common::Serialize(output_map, &serialized_data); - std::map > deserialized_output_map; - common::Deserialize(&deserialized_output_map, serialized_data); - ASSERT_EQ(output_map.size(), deserialized_output_map.size()); - for (auto& p : output_map) { - auto it = deserialized_output_map.find(p.first); - ASSERT_NE(it, deserialized_output_map.end()); - auto lhs = it->second; - auto rhs = p.second; - ASSERT_EQ(std::get<0>(lhs), std::get<0>(rhs)); - ASSERT_EQ(std::get<1>(lhs), std::get<1>(rhs)); - ASSERT_EQ(std::get<2>(lhs), std::get<2>(rhs)); - ASSERT_EQ(std::get<3>(lhs), std::get<3>(rhs)); - } -} - diff --git a/tests/python/tensorrt/lenet5_train.py b/tests/python/tensorrt/lenet5_train.py index 8edd9abf70e7..e679c05894a3 100644 --- a/tests/python/tensorrt/lenet5_train.py +++ b/tests/python/tensorrt/lenet5_train.py @@ -24,6 +24,7 @@ def lenet5(): """LeNet-5 Symbol""" #pylint: disable=no-member data = mx.sym.Variable('data') + data = mx.sym.Cast(data, 'float16') conv1 = mx.sym.Convolution(data=data, kernel=(5, 5), num_filter=20) tanh1 = mx.sym.Activation(data=conv1, act_type="tanh") pool1 = mx.sym.Pooling(data=tanh1, pool_type="max", @@ -39,6 +40,7 @@ def lenet5(): tanh3 = mx.sym.Activation(data=fc1, act_type="tanh") # second fullc fc2 = mx.sym.FullyConnected(data=tanh3, num_hidden=10) + fc2 = mx.sym.Cast(fc2, 'float32') # loss lenet = mx.sym.SoftmaxOutput(data=fc2, name='softmax') #pylint: enable=no-member diff --git a/tests/python/tensorrt/test_cvnets.py b/tests/python/tensorrt/test_cvnets.py index 4fdd522341bc..9282bc6f0de6 100644 --- a/tests/python/tensorrt/test_cvnets.py +++ b/tests/python/tensorrt/test_cvnets.py @@ -27,28 +27,22 @@ def get_classif_model(model_name, use_tensorrt, ctx=mx.gpu(0), batch_size=128): - mx.contrib.tensorrt.set_use_tensorrt(use_tensorrt) + mx.contrib.tensorrt.set_use_fp16(False) h, w = 32, 32 net = gluoncv.model_zoo.get_model(model_name, pretrained=True) - data = mx.sym.var('data') - + net.hybridize() + net.forward(mx.nd.zeros((batch_size, 3, h, w))) + net.export(model_name) + _sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, 0) if use_tensorrt: - out = net(data) - softmax = mx.sym.SoftmaxOutput(out, name='softmax') - all_params = dict([(k, v.data()) for k, v in net.collect_params().items()]) - executor = mx.contrib.tensorrt.tensorrt_bind(softmax, ctx=ctx, all_params=all_params, - data=(batch_size,3, h, w), - softmax_label=(batch_size,), grad_req='null', - force_rebind=True) + sym = _sym.get_backend_symbol('TensorRT') + mx.contrib.tensorrt.init_tensorrt_params(sym, arg_params, aux_params) else: - # Convert gluon model to Symbolic - net.hybridize() - net.forward(mx.ndarray.zeros((batch_size, 3, h, w))) - net.export(model_name) - symbol, arg_params, aux_params = mx.model.load_checkpoint(model_name, 0) - executor = symbol.simple_bind(ctx=ctx, data=(batch_size, 3, h, w), - softmax_label=(batch_size,)) - executor.copy_params_from(arg_params, aux_params) + sym = _sym + executor = sym.simple_bind(ctx=ctx, data=(batch_size, 3, h, w), + softmax_label=(batch_size,), + grad_req='null', force_rebind=True) + executor.copy_params_from(arg_params, aux_params) return executor @@ -126,7 +120,7 @@ def run_experiment_for(model_name, batch_size, num_workers): def test_tensorrt_on_cifar_resnets(batch_size=32, tolerance=0.1, num_workers=1): - original_try_value = mx.contrib.tensorrt.get_use_tensorrt() + original_use_fp16 = mx.contrib.tensorrt.get_use_fp16() try: models = [ 'cifar_resnet20_v1', @@ -170,7 +164,7 @@ def test_tensorrt_on_cifar_resnets(batch_size=32, tolerance=0.1, num_workers=1): print("Test duration: %.2f seconds" % test_duration) finally: - mx.contrib.tensorrt.set_use_tensorrt(original_try_value) + mx.contrib.tensorrt.set_use_fp16(original_use_fp16) if __name__ == '__main__': diff --git a/tests/python/tensorrt/test_cycle.py b/tests/python/tensorrt/test_cycle.py deleted file mode 100644 index 25f515a106a6..000000000000 --- a/tests/python/tensorrt/test_cycle.py +++ /dev/null @@ -1,69 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import mxnet as mx -from common import * - - -def detect_cycle_from(sym, visited, stack): - visited.add(sym.handle.value) - stack.add(sym.handle.value) - for s in sym.get_children(): - if s.handle.value not in visited: - if detect_cycle_from(sym, visited, stack): - return True - elif s.handle.value in stack: - return True - stack.remove(sym.handle.value) - return False - - -def has_no_cycle(sym): - visited = set() - stack = set() - all_nodes = sym.get_internals() - for s in all_nodes: - if s.handle.value in visited: - if detect_cycle_from(s, visited, stack): - return False - return True - - -def test_simple_cycle(): - inp = mx.sym.Variable('input', shape=[1,10]) - A = mx.sym.FullyConnected(data=inp, num_hidden=10, no_bias=False, name='A') - B = mx.sym.FullyConnected(data=A, num_hidden=10, no_bias=False, name='B') - D = mx.sym.sin(data=A, name='D') - C = mx.sym.elemwise_add(lhs=B, rhs=D, name='C') - arg_params = { - 'I_weight': mx.nd.zeros([10,10]), - 'I_bias': mx.nd.zeros([10]), - 'A_weight': mx.nd.zeros([10,10]), - 'A_bias': mx.nd.zeros([10]), - 'B_weight': mx.nd.zeros([10,10]), - 'B_bias': mx.nd.zeros([10]), - } - - executor = C.simple_bind(ctx=mx.gpu(0), data=(1,10), softmax_label=(1,), - shared_buffer=arg_params, grad_req='null', force_rebind=True) - optimized_graph = mx.contrib.tensorrt.get_optimized_symbol(executor) - assert has_no_cycle(optimized_graph), "The graph optimized by TRT contains a cycle" - - -if __name__ == '__main__': - import nose - nose.runmodule() diff --git a/tests/python/tensorrt/test_resnet18.py b/tests/python/tensorrt/test_resnet18.py index fff3ac5dd768..36a7f33fe6a0 100644 --- a/tests/python/tensorrt/test_resnet18.py +++ b/tests/python/tensorrt/test_resnet18.py @@ -25,7 +25,6 @@ url = '/~https://github.com/dmlc/web-data/blob/master/mxnet/doc/tutorials/python/predict_image/cat.jpg?raw=true' model_file_name = 'resnet18_v2_trt_test' - def get_image(image_url): fname = mx.test_utils.download(image_url, fname=image_url.split('/')[-1].split('?')[0]) img = mx.image.imread(fname) @@ -33,8 +32,7 @@ def get_image(image_url): img = img.transpose((2, 0, 1)) # Channel first img = img.expand_dims(axis=0) # Batchify img = mx.nd.cast(img, dtype=np.float32) - return img/255.0 - + return img / 255.0 def test_tensorrt_resnet18_feature_vect(): print("downloading sample input") @@ -45,24 +43,32 @@ def test_tensorrt_resnet18_feature_vect(): gluon_resnet18.export(model_file_name) sym, arg_params, aux_params = mx.model.load_checkpoint(model_file_name, 0) - os.environ['MXNET_USE_TENSORRT'] = '0' - executor = sym.simple_bind(ctx=mx.gpu(), data=batch_shape, grad_req='null', force_rebind=True) + executor = sym.simple_bind(ctx=mx.gpu(), data=batch_shape, + grad_req='null', force_rebind=True) executor.copy_params_from(arg_params, aux_params) y = executor.forward(is_train=False, data=input_data) - - os.environ['MXNET_USE_TENSORRT'] = '1' - all_params = arg_params - all_params.update(aux_params) - executor = mx.contrib.tensorrt.tensorrt_bind(sym, ctx=mx.gpu(), all_params=all_params, data=batch_shape, - grad_req='null', force_rebind=True) - y_trt = executor.forward(is_train=False, data=input_data) - - no_trt_output = y[0].asnumpy()[0] - trt_output = y_trt[0].asnumpy()[0] - assert_almost_equal(no_trt_output, trt_output, 1e-4, 1e-4) - + trt_sym = sym.get_backend_symbol('TensorRT') + mx.contrib.tensorrt.init_tensorrt_params(trt_sym, arg_params, aux_params) + original_precision_value = mx.contrib.tensorrt.get_use_fp16() + try: + mx.contrib.tensorrt.set_use_fp16(True) + executor = trt_sym.simple_bind(ctx=mx.gpu(), data=batch_shape, + grad_req='null', force_rebind=True) + executor.copy_params_from(arg_params, aux_params) + y_trt = executor.forward(is_train=False, data=input_data) + mx.contrib.tensorrt.set_use_fp16(False) + executor = trt_sym.simple_bind(ctx=mx.gpu(), data=batch_shape, + grad_req='null', force_rebind=True) + executor.copy_params_from(arg_params, aux_params) + y_trt_fp32 = executor.forward(is_train=False, data=input_data) + no_trt_output = y[0].asnumpy()[0] + trt_output = y_trt[0].asnumpy()[0] + trt_fp32_output = y_trt_fp32[0].asnumpy()[0] + assert_almost_equal(no_trt_output, trt_output, 1e-1, 1e-2) + assert_almost_equal(no_trt_output, trt_fp32_output, 1e-4, 1e-4) + finally: + mx.contrib.tensorrt.set_use_fp16(original_precision_value) if __name__ == '__main__': import nose - nose.runmodule() diff --git a/tests/python/tensorrt/test_tensorrt_lenet5.py b/tests/python/tensorrt/test_tensorrt_lenet5.py index 258686428a45..bdc306c0b297 100644 --- a/tests/python/tensorrt/test_tensorrt_lenet5.py +++ b/tests/python/tensorrt/test_tensorrt_lenet5.py @@ -24,24 +24,25 @@ def run_inference(sym, arg_params, aux_params, mnist, all_test_labels, batch_size, use_tensorrt): """Run inference with either MXNet or TensorRT""" - mx.contrib.tensorrt.set_use_tensorrt(use_tensorrt) data_size = (batch_size,) + mnist['test_data'].shape[1:] + type_dict = {'data': 'float32', 'softmax_label': 'float32'} if use_tensorrt: - all_params = merge_dicts(arg_params, aux_params) - executor = mx.contrib.tensorrt.tensorrt_bind(sym, ctx=mx.gpu(0), all_params=all_params, - data=data_size, - softmax_label=(batch_size,), - grad_req='null', - force_rebind=True) + _sym = sym.get_backend_symbol('TensorRT') + mx.contrib.tensorrt.init_tensorrt_params(_sym, arg_params, aux_params) else: - executor = sym.simple_bind(ctx=mx.gpu(0), - data=data_size, - softmax_label=(batch_size,), - grad_req='null', - force_rebind=True) - executor.copy_params_from(arg_params, aux_params) - + _sym = sym + for k, v in arg_params.items(): + type_dict[k] = v.dtype + for k, v in aux_params.items(): + type_dict[k] = v.dtype + executor = _sym.simple_bind(ctx=mx.gpu(0), + type_dict=type_dict, + data=data_size, + softmax_label=(batch_size,), + grad_req='null', + force_rebind=True) + executor.copy_params_from(arg_params, aux_params) # Get this value from all_test_labels # Also get classes from the dataset num_ex = 10000 @@ -68,39 +69,35 @@ def run_inference(sym, arg_params, aux_params, mnist, all_test_labels, batch_siz def test_tensorrt_inference(): """Run LeNet-5 inference comparison between MXNet and TensorRT.""" - original_try_value = mx.contrib.tensorrt.get_use_tensorrt() - try: - check_tensorrt_installation() - mnist = mx.test_utils.get_mnist() - num_epochs = 10 - batch_size = 128 - model_name = 'lenet5' - model_dir = os.getenv("LENET_MODEL_DIR", "/tmp") - model_file = '%s/%s-symbol.json' % (model_dir, model_name) - params_file = '%s/%s-%04d.params' % (model_dir, model_name, num_epochs) - - _, _, _, all_test_labels = get_iters(mnist, batch_size) - - # Load serialized MXNet model (model-symbol.json + model-epoch.params) - sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, num_epochs) - - print("LeNet-5 test") - print("Running inference in MXNet") - mx_pct = run_inference(sym, arg_params, aux_params, mnist, all_test_labels, - batch_size=batch_size, use_tensorrt=False) - - print("Running inference in MXNet-TensorRT") - trt_pct = run_inference(sym, arg_params, aux_params, mnist, all_test_labels, - batch_size=batch_size, use_tensorrt=True) - - print("MXNet accuracy: %f" % mx_pct) - print("MXNet-TensorRT accuracy: %f" % trt_pct) - - assert abs(mx_pct - trt_pct) < 1e-2, \ - """Diff. between MXNet & TensorRT accuracy too high: - MXNet = %f, TensorRT = %f""" % (mx_pct, trt_pct) - finally: - mx.contrib.tensorrt.set_use_tensorrt(original_try_value) + check_tensorrt_installation() + mnist = mx.test_utils.get_mnist() + num_epochs = 10 + batch_size = 128 + model_name = 'lenet5' + model_dir = os.getenv("LENET_MODEL_DIR", "/tmp") + model_file = '%s/%s-symbol.json' % (model_dir, model_name) + params_file = '%s/%s-%04d.params' % (model_dir, model_name, num_epochs) + + _, _, _, all_test_labels = get_iters(mnist, batch_size) + + # Load serialized MXNet model (model-symbol.json + model-epoch.params) + sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, num_epochs) + + print("LeNet-5 test") + print("Running inference in MXNet") + mx_pct = run_inference(sym, arg_params, aux_params, mnist, all_test_labels, + batch_size=batch_size, use_tensorrt=False) + + print("Running inference in MXNet-TensorRT") + trt_pct = run_inference(sym, arg_params, aux_params, mnist, all_test_labels, + batch_size=batch_size, use_tensorrt=True) + + print("MXNet accuracy: %f" % mx_pct) + print("MXNet-TensorRT accuracy: %f" % trt_pct) + + assert abs(mx_pct - trt_pct) < 1e-2, \ + """Diff. between MXNet & TensorRT accuracy too high: + MXNet = %f, TensorRT = %f""" % (mx_pct, trt_pct) if __name__ == '__main__': diff --git a/tests/python/tensorrt/test_training_warning.py b/tests/python/tensorrt/test_training_warning.py deleted file mode 100644 index fdac859aef6f..000000000000 --- a/tests/python/tensorrt/test_training_warning.py +++ /dev/null @@ -1,70 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -import gluoncv -import mxnet as mx - -from tests.python.unittest.common import assertRaises - - -def test_training_without_trt(): - run_resnet(is_train=True, use_tensorrt=False) - - -def test_inference_without_trt(): - run_resnet(is_train=False, use_tensorrt=False) - - -def test_training_with_trt(): - assertRaises(RuntimeError, run_resnet, is_train=True, use_tensorrt=True) - - -def test_inference_with_trt(): - run_resnet(is_train=False, use_tensorrt=True) - - -def run_resnet(is_train, use_tensorrt): - original_trt_value = mx.contrib.tensorrt.get_use_tensorrt() - try: - mx.contrib.tensorrt.set_use_tensorrt(use_tensorrt) - ctx = mx.gpu(0) - batch_size = 1 - h = 32 - w = 32 - model_name = 'cifar_resnet20_v1' - resnet = gluoncv.model_zoo.get_model(model_name, pretrained=True) - data = mx.sym.var('data') - out = resnet(data) - softmax = mx.sym.SoftmaxOutput(out, name='softmax') - if is_train: - grad_req = 'write' - else: - grad_req = 'null' - if use_tensorrt: - all_params = dict([(k, v.data()) for k, v in resnet.collect_params().items()]) - mx.contrib.tensorrt.tensorrt_bind(softmax, ctx=ctx, all_params=all_params, - data=(batch_size, 3, h, w), softmax_label=(batch_size,), - force_rebind=True, grad_req=grad_req) - else: - softmax.simple_bind(ctx=ctx, data=(batch_size, 3, h, w), softmax_label=(batch_size,), - force_rebind=True, grad_req=grad_req) - finally: - mx.contrib.tensorrt.set_use_tensorrt(original_trt_value) - - -if __name__ == '__main__': - import nose - nose.runmodule()