From 25cdb1c2324a054f991a6aa3a13439090adcddf5 Mon Sep 17 00:00:00 2001 From: Zhennan Qin Date: Sun, 31 Mar 2019 08:04:44 +0800 Subject: [PATCH] Enhance subgraph API (#14113) * Enhance subgraph API * Fix lint * Trigger CI * Fix test * split into another PR * Rename partition_graph to build_graph * Fix lint * Fix merge * run CI * run CI * fix quantize script * fix ssd script * Address reminisce comment --- include/mxnet/c_api_test.h | 2 +- src/c_api/c_api_symbolic.cc | 2 +- src/c_api/c_api_test.cc | 4 +- src/executor/graph_executor.cc | 53 ++- .../{partition_graph.cc => build_subgraph.cc} | 383 ++++++++---------- .../subgraph/default_subgraph_property_v2.cc | 84 ++++ ...kldnn_post_quantize_align_scale_property.h | 164 ++++++++ .../mkldnn/mkldnn_subgraph_property.cc | 2 + src/operator/subgraph/subgraph_property.h | 176 +++++++- tests/python/mkl/test_subgraph.py | 44 +- tests/python/unittest/test_subgraph_op.py | 89 ++-- 11 files changed, 709 insertions(+), 294 deletions(-) rename src/operator/subgraph/{partition_graph.cc => build_subgraph.cc} (70%) create mode 100644 src/operator/subgraph/default_subgraph_property_v2.cc create mode 100644 src/operator/subgraph/mkldnn/mkldnn_post_quantize_align_scale_property.h diff --git a/include/mxnet/c_api_test.h b/include/mxnet/c_api_test.h index fe6fc7fe9cc4..ce2670e9a6f6 100644 --- a/include/mxnet/c_api_test.h +++ b/include/mxnet/c_api_test.h @@ -38,7 +38,7 @@ extern "C" { * to the input graph for partitioning. This function should be * used only for the testing purpose. */ -MXNET_DLL int MXPartitionGraphByOpNames(SymbolHandle sym_handle, +MXNET_DLL int MXBuildSubgraphByOpNames(SymbolHandle sym_handle, const char* prop_name, const mx_uint num_ops, const char** op_names, diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc index a3a0b0ca16f9..545e95f04b79 100644 --- a/src/c_api/c_api_symbolic.cc +++ b/src/c_api/c_api_symbolic.cc @@ -728,7 +728,7 @@ int MXGenBackendSubgraph(SymbolHandle sym_handle, const char *backend, nnvm::Graph g = Symbol2Graph(*s); property->SetAttr("graph", g); g.attrs["subgraph_property"] = std::make_shared(std::move(property)); - g = nnvm::ApplyPass(std::move(g), "PartitionGraph"); + g = ApplyPass(std::move(g), "BuildSubgraph"); s->outputs = g.outputs; } *ret_sym_handle = s; diff --git a/src/c_api/c_api_test.cc b/src/c_api/c_api_test.cc index 70829db3d4a5..ae36b7af2829 100644 --- a/src/c_api/c_api_test.cc +++ b/src/c_api/c_api_test.cc @@ -27,7 +27,7 @@ #include "./c_api_common.h" #include "../operator/subgraph/subgraph_property.h" -int MXPartitionGraphByOpNames(SymbolHandle sym_handle, +int MXBuildSubgraphByOpNames(SymbolHandle sym_handle, const char* prop_name, const mx_uint num_ops, const char** op_names, @@ -49,7 +49,7 @@ int MXPartitionGraphByOpNames(SymbolHandle sym_handle, property->SetAttr("graph", g); property->SetAttr("op_names", op_name_set); g.attrs["subgraph_property"] = std::make_shared(std::move(property)); - g = nnvm::ApplyPass(std::move(g), "PartitionGraph"); + g = nnvm::ApplyPass(std::move(g), "BuildSubgraph"); s->outputs = g.outputs; } } diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc index 28862a49fae1..460cec371bd4 100644 --- a/src/executor/graph_executor.cc +++ b/src/executor/graph_executor.cc @@ -1442,15 +1442,14 @@ static nnvm::Graph InferForwardAttrs(nnvm::Graph g, // Given input attr arrays, partition the graph using the backend name equal to prop_name. // This is a common function for bind and simple_bind flows. -static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src, - mxnet::op::SubgraphPropertyPtr subgraph_prop, - const mxnet::ShapeVector& arg_shapes, - const nnvm::DTypeVector& arg_dtypes, - const StorageTypeVector& arg_stypes, - const Context& default_ctx, - const std::map& ctx_map, - const std::vector& in_arg_ctxes, - const std::vector& aux_state_ctxes) { +static nnvm::Symbol BuildSubgraph(const nnvm::Symbol& src, + mxnet::op::SubgraphPropertyPtr subgraph_prop, + const mxnet::ShapeVector& arg_shapes, + const nnvm::DTypeVector& arg_dtypes, + const StorageTypeVector& arg_stypes, const Context& default_ctx, + const std::map& ctx_map, + const std::vector& in_arg_ctxes, + const std::vector& aux_state_ctxes) { nnvm::Symbol ret = src.Copy(); nnvm::Graph g; g.outputs = ret.outputs; @@ -1458,14 +1457,14 @@ static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src, aux_state_ctxes); subgraph_prop->SetAttr("graph", g); g.attrs["subgraph_property"] = std::make_shared(std::move(subgraph_prop)); - g = ApplyPass(std::move(g), "PartitionGraph"); + g = ApplyPass(std::move(g), "BuildSubgraph"); ret.outputs = g.outputs; return ret; } // Given input attr dicts, partition the graph using the backend name equal to prop_name. // This is for simple_bind flow. -static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src, +static nnvm::Symbol BuildSubgraph(const nnvm::Symbol& src, const std::string& prop_name, const std::unordered_map & arg_shape_map, @@ -1547,7 +1546,7 @@ static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src, arg_stypes[i] = it3->second; } } - ret = PartitionGraph(ret, subgraph_prop, arg_shapes, arg_dtypes, arg_stypes, default_ctx, + ret = BuildSubgraph(ret, subgraph_prop, arg_shapes, arg_dtypes, arg_stypes, default_ctx, ctx_map, *in_arg_ctxes, *aux_state_ctxes); // Reorder in_arg_ctxes, arg_grad_ctxes, aux_state_ctxes and grad_req_types according to // partitioned symbol input sequence @@ -1573,13 +1572,13 @@ static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src, // Given input ndarrays, partition the graph using the backend name equal to prop_name. // This is for bind flow. -static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src, const std::string& prop_name, - const Context& default_ctx, - const std::map& ctx_map, - std::vector* in_args, - std::vector* arg_grad_store, - std::vector* grad_req_type, - std::vector* aux_states) { +static nnvm::Symbol BuildSubgraph(const nnvm::Symbol& src, const std::string& prop_name, + const Context& default_ctx, + const std::map& ctx_map, + std::vector* in_args, + std::vector* arg_grad_store, + std::vector* grad_req_type, + std::vector* aux_states) { // setup map for in_args, arg_grad_store, grad_req_type and aux_states std::unordered_map in_args_map; std::unordered_map arg_grad_store_map; @@ -1664,8 +1663,8 @@ static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src, const std::string& p } } - ret = PartitionGraph(ret, subgraph_prop, arg_shapes, arg_dtypes, arg_stypes, default_ctx, - ctx_map, in_arg_ctxes, aux_state_ctxes); + ret = BuildSubgraph(ret, subgraph_prop, arg_shapes, arg_dtypes, arg_stypes, default_ctx, + ctx_map, in_arg_ctxes, aux_state_ctxes); } // Reorder in_args, arg_grad_store, grad_req_type and aux_states according to partitioned symbol // input sequence @@ -1713,9 +1712,9 @@ Executor *Executor::SimpleBind(nnvm::Symbol symbol, std::vector tmp_aux_state_ctxes = aux_state_ctxes; std::vector tmp_grad_req_types = grad_req_types; if (!exec->subgraph_property().empty()) { - symbol = exec::PartitionGraph(symbol, exec->subgraph_property(), arg_shape_map, arg_dtype_map, - arg_stype_map, default_ctx, group2ctx, &tmp_in_arg_ctxes, - &tmp_arg_grad_ctxes, &tmp_grad_req_types, &tmp_aux_state_ctxes); + symbol = exec::BuildSubgraph(symbol, exec->subgraph_property(), arg_shape_map, arg_dtype_map, + arg_stype_map, default_ctx, group2ctx, &tmp_in_arg_ctxes, + &tmp_arg_grad_ctxes, &tmp_grad_req_types, &tmp_aux_state_ctxes); } exec->Init(symbol, default_ctx, group2ctx, tmp_in_arg_ctxes, tmp_arg_grad_ctxes, tmp_aux_state_ctxes, arg_shape_map, arg_dtype_map, arg_stype_map, tmp_grad_req_types, @@ -1738,9 +1737,9 @@ Executor *Executor::Bind(nnvm::Symbol symbol, std::vector tmp_aux_states = aux_states; if (!exec->subgraph_property().empty()) { - symbol = exec::PartitionGraph(symbol, exec->subgraph_property(), default_ctx, group2ctx, - &tmp_in_args, &tmp_arg_grad_store, &tmp_grad_req_type, - &tmp_aux_states); + symbol = + exec::BuildSubgraph(symbol, exec->subgraph_property(), default_ctx, group2ctx, &tmp_in_args, + &tmp_arg_grad_store, &tmp_grad_req_type, &tmp_aux_states); } exec->Init(symbol, default_ctx, group2ctx, tmp_in_args, tmp_arg_grad_store, tmp_grad_req_type, tmp_aux_states, reinterpret_cast(shared_exec)); diff --git a/src/operator/subgraph/partition_graph.cc b/src/operator/subgraph/build_subgraph.cc similarity index 70% rename from src/operator/subgraph/partition_graph.cc rename to src/operator/subgraph/build_subgraph.cc index 4c7552ec9bfe..32ea341d0834 100644 --- a/src/operator/subgraph/partition_graph.cc +++ b/src/operator/subgraph/build_subgraph.cc @@ -19,7 +19,7 @@ /*! * Copyright (c) 2018 by Contributors - * \file partition_graph.cc + * \file build_subgraph.cc * \brief */ #include @@ -31,51 +31,18 @@ #include "./subgraph_property.h" +#define DEBUG_SUBGRAPH 0 + namespace nnvm { NodePtr CreateVariableNode(const std::string& name); } namespace mxnet { - namespace op { - -using nnvm::Symbol; -using nnvm::Node; -using nnvm::NodePtr; -using nnvm::NodeEntry; -using nnvm::Graph; - -#define DEBUG_SUBGRAPH 0 - namespace sg { // sg stands for subgraph -struct SimpleNode; -using SimpleNodePtr = std::shared_ptr; - -/*! - * \brief Node of the undirected graph which replicates the network structures - * of the computational graph. It is used to ease the graph traversal for finding - * subgraphs. - */ -struct SimpleNode { - static SimpleNodePtr Create() { - return std::make_shared(); - } - SimpleNode() : label(-1), node(nullptr) {} - /*! subgraph label */ - int label; - /*! the original node in the computational graph it references*/ - nnvm::Node* node; - /*! - * \brief output nodes of the current node - * key is node ptr and value is an array of indices standing for the entry indices - * in key->inputs whose source is the current node. - */ - std::unordered_map> outputs; -}; // struct SimpleNode - #if DEBUG_SUBGRAPH -void PrintSubgraph(const std::vector& simple_nodes) { +void PrintSubgraph(const std::vector& simple_nodes) { std::string op_names = ""; for (size_t i = 0; i < simple_nodes.size(); ++i) { op_names += simple_nodes[i]->node->attrs.name + ' '; @@ -101,12 +68,12 @@ void PrintNodeEntries(const std::vector& entries) { * \param g the MXNet computational graph * \param simple_nodes the nodes of undirected graph in top sorted order */ -void CreateSimpleGraph(const Graph& g, - std::vector* simple_nodes) { +void CreateSimpleGraph(const nnvm::Graph& g, + std::vector* simple_nodes) { const auto& indexed_graph = g.indexed_graph(); simple_nodes->reserve(indexed_graph.num_nodes()); - DFSVisit(g.outputs, [&](const NodePtr& node) { - SimpleNodePtr sn = SimpleNode::Create(); + DFSVisit(g.outputs, [&](const nnvm::NodePtr& node) { + BiDirectedNodePtr sn = BiDirectedNode::Create(); sn->node = node.get(); for (size_t i = 0; i < sn->node->inputs.size(); ++i) { const auto& e = sn->node->inputs[i]; @@ -129,10 +96,10 @@ void CreateSimpleGraph(const Graph& g, * and clear the vector of subgraph nodes. */ void ResetNodeLabels(const nnvm::Graph& g, - const std::vector& simple_nodes, - std::vector* subgraph_nodes) { + const std::vector& simple_nodes, + std::vector* subgraph_nodes) { for (auto n : *subgraph_nodes) { - const auto nid = g.indexed_graph().node_id(n); + const auto nid = g.indexed_graph().node_id(n->node); simple_nodes[nid]->label = -1; } subgraph_nodes->clear(); @@ -153,20 +120,15 @@ void ResetNodeLabels(const nnvm::Graph& g, * \subgraph_nodes all the nodes belonging to the same subgraph of seed node * \excluded_nodes set of nodes that should be excluded from the current subgraph */ -bool LabelSubgraph(const Graph& g, - SubgraphSelectorPtr subgraph_selector, - const int label, - const size_t snid, // simple node id, this is a seed - const std::vector& simple_nodes, - std::vector* subgraph_nodes, - std::unordered_set* excluded_nodes = nullptr) { +bool LabelSubgraph(const nnvm::Graph& g, SubgraphSelectorV2Ptr subgraph_selector, const int label, + const size_t snid, const std::vector& simple_nodes, + std::vector* subgraph_nodes, + std::unordered_set* excluded_nodes) { const auto& indexed_graph = g.indexed_graph(); - std::queue node_queue; - if (!excluded_nodes || !excluded_nodes->count(simple_nodes[snid]->node)) { - CHECK_EQ(simple_nodes[snid]->label, -1); - simple_nodes[snid]->label = label; - node_queue.push(simple_nodes[snid].get()); - } + std::queue node_queue; + CHECK_EQ(simple_nodes[snid]->label, -1); + simple_nodes[snid]->label = label; + node_queue.push(simple_nodes[snid].get()); // key: nodes that serve as input/output nodes to the subgraph // value: pair of vectors of nodes in the subgraph. The first vector contains the // output nodes of the key in the subgraph, and the second vector contains the @@ -180,41 +142,40 @@ bool LabelSubgraph(const Graph& g, std::pair, std::vector>> non_subgraph_node_map; while (!node_queue.empty()) { - SimpleNode* cur_node = node_queue.front(); + BiDirectedNode* cur_node = node_queue.front(); node_queue.pop(); - subgraph_nodes->push_back(cur_node->node); + subgraph_nodes->push_back(cur_node); // get qualified adjacent input nodes for (auto& e : cur_node->node->inputs) { - const bool select_input = (!excluded_nodes || !excluded_nodes->count(e.node.get())) - && subgraph_selector->SelectInput(*cur_node->node, *e.node); + const auto node = e.node.get(); + const auto nid = indexed_graph.node_id(node); + auto snode = simple_nodes[nid].get(); + CHECK_LT(nid, simple_nodes.size()); + const bool select_input = + (snode->label == -1) && (!excluded_nodes || !excluded_nodes->count(snode)) && + subgraph_selector->SelectInput(*cur_node, *snode); if (select_input) { // e.node is a subgraph node - const auto nid = indexed_graph.node_id(e.node.get()); - CHECK_LT(nid, simple_nodes.size()); - // this node has not been visited yet - if (simple_nodes[nid]->label == -1) { - simple_nodes[nid]->label = label; - node_queue.push(simple_nodes[nid].get()); - } - } else { + snode->label = label; + node_queue.push(snode); + } else if (snode->label == -1) { // e.node is an input node of the subgraph non_subgraph_node_map[e.node.get()].first.push_back(cur_node->node); } } // get qualified output nodes for (auto it = cur_node->outputs.begin(); it != cur_node->outputs.end(); ++it) { - const bool select_output = (!excluded_nodes || !excluded_nodes->count(it->first)) - && subgraph_selector->SelectOutput(*cur_node->node, *it->first); + const auto nid = indexed_graph.node_id(it->first); + auto snode = simple_nodes[nid].get(); + CHECK_LT(nid, simple_nodes.size()); + const bool select_output = + (snode->label == -1) && (!excluded_nodes || !excluded_nodes->count(snode)) && + subgraph_selector->SelectOutput(*cur_node, *snode); if (select_output) { // it->first is a subgraph node - const auto nid = indexed_graph.node_id(it->first); - CHECK_LT(nid, simple_nodes.size()); - // this node has not been visited yet - if (simple_nodes[nid]->label == -1) { - simple_nodes[nid]->label = label; - node_queue.push(simple_nodes[nid].get()); - } - } else { + snode->label = label; + node_queue.push(snode); + } else if (snode->label == -1) { // it->first is an output node of the subgraph non_subgraph_node_map[it->first].second.push_back(cur_node->node); } @@ -235,8 +196,12 @@ bool LabelSubgraph(const Graph& g, } // check whether there is a cycle between the subgraph and its input/output nodes auto is_ancestor = [&](const nnvm::Node* ancestor, const nnvm::Node* descendant, - const std::vector& snodes) { + const std::vector& snodes) { if (ancestor == descendant) return true; + std::unordered_set snode_set; + for (const auto& sn : snodes) { + snode_set.insert(sn->node); + } std::stack s; s.push(descendant); size_t count = 0; @@ -251,8 +216,7 @@ bool LabelSubgraph(const Graph& g, } for (const auto& entry : top->inputs) { // when searching for the ancestor, the path cannot cross any subgraph node - auto it = std::find(snodes.begin(), snodes.end(), entry.node.get()); - if (it == snodes.end()) { + if (!snode_set.count(entry.node.get())) { s.push(entry.node.get()); } } @@ -295,11 +259,14 @@ bool LabelSubgraph(const Graph& g, << "A cycle is found in the computational graph between nodes " << simple_nodes[excluded_node_id]->node->attrs.name << " and " << simple_nodes[snid]->node->attrs.name; - excluded_nodes->insert(simple_nodes[excluded_node_id]->node); + excluded_nodes->insert(simple_nodes[excluded_node_id].get()); ResetNodeLabels(g, simple_nodes, subgraph_nodes); return false; } - std::sort(subgraph_nodes->begin(), subgraph_nodes->end(), node_cmp); + auto sim_node_cmp = [&] (const BiDirectedNode* node1, const BiDirectedNode* node2) { + return indexed_graph.node_id(node1->node) < indexed_graph.node_id(node2->node); + }; + std::sort(subgraph_nodes->begin(), subgraph_nodes->end(), sim_node_cmp); return true; } @@ -313,24 +280,22 @@ bool LabelSubgraph(const Graph& g, * \subgraph_nodes all the nodes belonging to the same subgraph of seed node * \return Subgraph node candidates sorted in the topological order */ -void PreSelectSubgraphNodes(const Graph& g, - SubgraphSelectorPtr subgraph_selector, - const int label, - const size_t snid, - const std::vector& simple_nodes, - std::vector* subgraph_nodes) { - std::unordered_set excluded_nodes; +void PreSelectSubgraphNodes(const nnvm::Graph& g, SubgraphSelectorV2Ptr subgraph_selector, + const int label, const size_t snid, + const std::vector& simple_nodes, + std::vector* subgraph_nodes) { + std::unordered_set excluded_nodes; const size_t max_num_retry = simple_nodes.size() * simple_nodes.size(); size_t count = 0; bool success = false; while (!success && count < max_num_retry) { - success = LabelSubgraph(g, subgraph_selector, label, snid, simple_nodes, - subgraph_nodes, &excluded_nodes); + success = LabelSubgraph(g, subgraph_selector, label, snid, simple_nodes, subgraph_nodes, + &excluded_nodes); if (!success) { CHECK(!excluded_nodes.empty()); std::string excluded_node_names; for (auto node : excluded_nodes) { - excluded_node_names += node->attrs.name + ", "; + excluded_node_names += node->node->attrs.name + ", "; } LOG(INFO) << "Found a cycle when BFS from node " << simple_nodes[snid]->node->attrs.name << ". Excluding nodes " << excluded_node_names << "and retrying"; @@ -339,126 +304,81 @@ void PreSelectSubgraphNodes(const Graph& g, } if (!success) { LOG(INFO) << "Tried " << count << " times of finding subgraphs starting from node " - << simple_nodes[snid]->node->attrs.name << " without success because a loop " - "is always found between the subgraph and some other nodes. Will treat " - "seed node " << simple_nodes[snid]->node->attrs.name - << "as a subgraph with one node"; + << simple_nodes[snid]->node->attrs.name + << " without success because a loop " + "is always found between the subgraph and some other nodes. Will treat " + "seed node " + << simple_nodes[snid]->node->attrs.name << "as a subgraph with one node"; CHECK(subgraph_nodes->empty()); simple_nodes[snid]->label = label; - subgraph_nodes->push_back(simple_nodes[snid]->node); + subgraph_nodes->push_back(simple_nodes[snid].get()); } } -/*! - * \brief Given a vector of nodes, group them into individual subgraphs - * based upon their connectivity. - */ -void PostProcessNodeCandidates(const nnvm::Graph& g, - const std::vector& nodes, - const std::vector& simple_nodes, - std::vector>* subgraphs, - size_t* subgraph_id) { - const auto& indexed_graph = g.indexed_graph(); - std::unordered_set node_set(nodes.begin(), nodes.end()); - auto simple_node_cmp = [&] (const SimpleNode* node1, const SimpleNode* node2) { +void SelectSubgraphNodes(nnvm::Graph* g, SubgraphSelectorV2Ptr subgraph_selector, + const std::vector& simple_nodes, + std::vector>* subgraph_nodes, + std::vector* subgraph_selectors, + const BiDirectedNode* node, const size_t snid, size_t* subgraph_id) { + const auto& indexed_graph = g->indexed_graph(); + auto node_cmp = [&] (const BiDirectedNode* node1, const BiDirectedNode* node2) { return indexed_graph.node_id(node1->node) < indexed_graph.node_id(node2->node); }; - for (auto node : nodes) { - if (!node_set.count(node)) { - // The node has been included in a subgraph - continue; - } - std::queue q; - q.push(node); - CHECK_EQ(node_set.erase(node), 1U); - subgraphs->emplace_back(); - const auto nid = indexed_graph.node_id(node); - simple_nodes[nid]->label = *subgraph_id; - subgraphs->back().push_back(simple_nodes[nid].get()); - while (!q.empty()) { - nnvm::Node* cur_node = q.front(); - q.pop(); - for (auto& e : cur_node->inputs) { - auto in_it = node_set.find(e.node.get()); - if (in_it != node_set.end()) { - q.push(*in_it); - const auto in_nid = indexed_graph.node_id(*in_it); - simple_nodes[in_nid]->label = *subgraph_id; - subgraphs->back().push_back(simple_nodes[in_nid].get()); - node_set.erase(in_it); - } + if (simple_nodes[snid]->label == -1 && subgraph_selector->Select(*node)) { + // pre-select nodes that can be grouped in a subgraph + std::vector preselected_nodes; + PreSelectSubgraphNodes(*g, subgraph_selector, *subgraph_id, snid, simple_nodes, + &preselected_nodes); + + // filter out unqualified pre-selected nodes + std::vector filtered_nodes = subgraph_selector->Filter(preselected_nodes); + + // reset node labels that are not in filtered nodes + for (const auto n : preselected_nodes) { + const auto nit = std::find(filtered_nodes.begin(), filtered_nodes.end(), n); + if (nit == filtered_nodes.end()) { + n->label = -1; } - const auto cur_nid = indexed_graph.node_id(cur_node); - const SimpleNode* cur_snode = simple_nodes[cur_nid].get(); - for (const auto& kv : cur_snode->outputs) { - const auto out_it = node_set.find(kv.first); - if (out_it != node_set.end()) { - q.push(*out_it); - const auto out_nid = indexed_graph.node_id(*out_it); - simple_nodes[out_nid]->label = *subgraph_id; - subgraphs->back().push_back(simple_nodes[out_nid].get()); - node_set.erase(out_it); - } + } + + if (filtered_nodes.size()) { + // make sure filtered_nodes is a subset of preselected_nodes + for (const auto n : filtered_nodes) { + const auto nit = std::find(preselected_nodes.begin(), preselected_nodes.end(), n); + CHECK(nit != preselected_nodes.end()) + << "Node " << n->node->attrs.name + << " is not found in the pre-selected subgraph nodes." + " Please make sure that no new nodes were added in your subgraph" + " selector's Filter function"; } + + // make sure nodes are sorted + std::sort(filtered_nodes.begin(), filtered_nodes.end(), node_cmp); + subgraph_nodes->push_back(filtered_nodes); + subgraph_selectors->push_back(subgraph_selector); + (*subgraph_id)++; } - ++(*subgraph_id); - std::sort(subgraphs->back().begin(), subgraphs->back().end(), simple_node_cmp); } - CHECK(node_set.empty()); } /*! * \brief Finds subgraphs with all nodes that meet certain criteria. * All nodes in a subgraph are marked with the same label. */ -void FindSubgraphs(Graph* g, +void FindSubgraphs(nnvm::Graph* g, const SubgraphProperty &subg_prop, - const std::vector& simple_nodes, - std::vector>* subgraph_nodes) { + const std::vector& simple_nodes, + std::vector>* subgraph_nodes, + std::vector* subgraph_selectors) { const auto& indexed_graph = g->indexed_graph(); CHECK_EQ(indexed_graph.num_nodes(), simple_nodes.size()); - auto node_cmp = [&] (const nnvm::Node* node1, const nnvm::Node* node2) { - return indexed_graph.node_id(node1) < indexed_graph.node_id(node2); - }; + size_t subgraph_id = 0; for (size_t i = 0; i < simple_nodes.size(); ++i) { - nnvm::Node* node = simple_nodes[i]->node; - auto subgraph_selector = subg_prop.CreateSubgraphSelector(); - if (subgraph_selector->Select(*node) && simple_nodes[i]->label == -1) { - // pre-select nodes that can be grouped in a subgraph - std::vector preselected_nodes; - PreSelectSubgraphNodes(*g, subgraph_selector, subgraph_id, i, simple_nodes, - &preselected_nodes); - - // filter out unqualified pre-selected nodes - std::vector filtered_nodes = subgraph_selector->Filter(preselected_nodes); - - // make sure filtered_nodes is a subset of preselected_nodes - for (const auto n : filtered_nodes) { - const auto nit = std::find(preselected_nodes.begin(), preselected_nodes.end(), n); - CHECK(nit != preselected_nodes.end()) - << "Node " << n->attrs.name << " is not found in the pre-selected subgraph nodes." - " Please make sure that no new nodes were added in your subgraph" - " selector's Filter function"; - } - - // make sure nodes are sorted - std::sort(filtered_nodes.begin(), filtered_nodes.end(), node_cmp); - - // reset node labels that are not in filtered nodes - for (const auto n : preselected_nodes) { - const auto nit = std::find(filtered_nodes.begin(), filtered_nodes.end(), n); - if (nit == filtered_nodes.end()) { - simple_nodes[indexed_graph.node_id(n)]->label = -1; - } - } - // find out subgraphs from the filtered nodes - std::vector> subgraphs; - PostProcessNodeCandidates(*g, filtered_nodes, simple_nodes, &subgraphs, &subgraph_id); - if (!subgraphs.empty()) { - subgraph_nodes->insert(subgraph_nodes->end(), subgraphs.begin(), subgraphs.end()); - } - } + const auto snode = simple_nodes[i]; + SubgraphSelectorV2Ptr subgraph_selector = subg_prop.CreateSubgraphSelectorV2(); + SelectSubgraphNodes(g, subgraph_selector, simple_nodes, subgraph_nodes, subgraph_selectors, + snode.get(), i, &subgraph_id); } } @@ -488,9 +408,9 @@ void SortEntries(const std::unordered_map& entry * \param entry_top_order_map mapping entry pointer to its top sorted position * \param input_entries input entries of the subgraph */ -void FindInputEntries(const Graph& g, - const std::vector& simple_nodes, - const std::vector& subgraph_nodes, +void FindInputEntries(const nnvm::Graph& g, + const std::vector& simple_nodes, + const std::vector& subgraph_nodes, const std::unordered_map& entry_top_order_map, std::vector* input_entries) { const auto& indexed_graph = g.indexed_graph(); @@ -528,9 +448,9 @@ void FindInputEntries(const Graph& g, * \param entry_top_order_map mapping entry pointer to its top sorted position * \param output_entries output entries of the subgraph */ -void FindOutputEntries(Graph* g, - const std::vector& simple_nodes, - const std::vector& subgraph_nodes, +void FindOutputEntries(nnvm::Graph* g, + const std::vector& simple_nodes, + const std::vector& subgraph_nodes, const std::unordered_map& entry_top_order_map, std::vector* output_entries) { @@ -618,12 +538,12 @@ void CutGraphInputs(const std::vector &input_entries, /*! * \brief Replace a set of nodes belonging to the same subgraph with a subgrpah node - * and keep the subgraph in the subgraph node. The input entries and output entries - * of the subgraph node are kept in the same order as the subgraph's. + * and keep the subgraph in the subgraph node. */ -void CreateSubgraphNode(Graph* g, - const std::vector& simple_nodes, - const std::vector& subgraph_nodes, +void CreateSubgraphNode(nnvm::Graph* g, + const std::vector& simple_nodes, + const std::vector& subgraph_nodes, + const SubgraphSelectorV2Ptr& subgraph_selector, const size_t subgraph_id, std::unordered_map* entry_top_order_map) { #if DEBUG_SUBGRAPH @@ -647,7 +567,7 @@ void CreateSubgraphNode(Graph* g, sym.outputs[i] = *output_entries[i]; } const SubgraphPropertyPtr& subg_prop = g->GetAttr("subgraph_property"); - nnvm::NodePtr n = subg_prop->CreateSubgraphNode(sym, subgraph_id); + nnvm::NodePtr n = subg_prop->CreateSubgraphNode(sym, subgraph_selector, subgraph_id); // Connect the external nodes to the subgraph node. subg_prop->ConnectSubgraphOutputs(n, &output_entries); @@ -664,8 +584,8 @@ void CreateSubgraphNode(Graph* g, nnvm::Node* node = e.node.get(); if (indexed_graph.exist(node)) { const auto nid = indexed_graph.node_id(node); - SimpleNode* sn = simple_nodes[nid].get(); - for (SimpleNode* dest_node : subgraph_nodes) { + BiDirectedNode* sn = simple_nodes[nid].get(); + for (BiDirectedNode* dest_node : subgraph_nodes) { sn->outputs.erase(dest_node->node); } sn->outputs[n.get()].push_back(i); @@ -676,6 +596,25 @@ void CreateSubgraphNode(Graph* g, #endif } +/*! + * \brief Adjust a set of nodes belonging to the same subgraph. No new node is created, but + * adjust selected nodes' attributes. + * This can be used to implement peephole optimization. For example, adjust calibration information + * of quantized nodes. + */ +void AdjustSubgraphNode(nnvm::Graph* g, + const std::vector& subgraph_nodes, + const SubgraphSelectorV2Ptr& subgraph_selector, + const size_t subgraph_id) { + std::vector node_list; + for (auto node : subgraph_nodes) { + node_list.push_back(node->node); + } + + const SubgraphPropertyPtr& subg_prop = g->GetAttr("subgraph_property"); + subg_prop->AdjustSubgraphNode(node_list, subgraph_selector, subgraph_id); +} + } // namespace sg /*! @@ -683,7 +622,7 @@ void CreateSubgraphNode(Graph* g, * This is going to be used to sort input/output entries of subgraphs to keep * the topological order unchanged. */ -void TopSortEntries(const Graph& g, +void TopSortEntries(const nnvm::Graph& g, std::unordered_map* entry_top_order_map) { CHECK(entry_top_order_map != nullptr); std::unordered_set visited; @@ -732,7 +671,7 @@ void TopSortEntries(const Graph& g, } } -Graph PartitionGraph(Graph&& g) { +nnvm::Graph BuildSubgraph(nnvm::Graph&& g) { if (!g.HasAttr("subgraph_property")) { // treat the whole graph as a subgraph LOG(INFO) << "The graph has no attribute of subgraph_property attached. " "The original graph is returned."; @@ -748,27 +687,37 @@ Graph PartitionGraph(Graph&& g) { std::unordered_map entry_top_order_map; TopSortEntries(g, &entry_top_order_map); - // Create undirected graph for ease of finding subgraphs - std::vector simple_nodes; + // Create double directional graph for ease of finding subgraphs + std::vector simple_nodes; CreateSimpleGraph(g, &simple_nodes); - std::vector> subgraph_nodes; - FindSubgraphs(&g, *subg_prop, simple_nodes, &subgraph_nodes); + std::vector> subgraph_nodes; + std::vector subgraph_selectors; + FindSubgraphs(&g, *subg_prop, simple_nodes, &subgraph_nodes, &subgraph_selectors); + CHECK_EQ(subgraph_nodes.size(), subgraph_selectors.size()); for (size_t i = 0; i < subgraph_nodes.size(); ++i) { #if DEBUG_SUBGRAPH - std::set simple_node_set(subgraph_nodes[i].begin(), subgraph_nodes[i].end()); + std::set simple_node_set(subgraph_nodes[i].begin(), subgraph_nodes[i].end()); CHECK_EQ(simple_node_set.size(), subgraph_nodes[i].size()); PrintSubgraph(subgraph_nodes[i]); #endif - CreateSubgraphNode(&g, simple_nodes, subgraph_nodes[i], i, &entry_top_order_map); + auto ptype = subg_prop->GetPropertyType(); + if (ptype == SubgraphProperty::SgPropertyType::kCreate) { + CreateSubgraphNode(&g, simple_nodes, subgraph_nodes[i], subgraph_selectors[i], i, + &entry_top_order_map); + } else { + CHECK_EQ(ptype, SubgraphProperty::SgPropertyType::kAdjust); + AdjustSubgraphNode(&g, subgraph_nodes[i], subgraph_selectors[i], i); + } } return g; } -NNVM_REGISTER_PASS(PartitionGraph) -.describe("Partition a graph according to the user defined rules " +NNVM_REGISTER_PASS(BuildSubgraph) +.describe("Apply a subgraph pass according to the user defined rules " "in a derived class of SubgraphProperty") -.set_body(PartitionGraph) +.set_body(BuildSubgraph) .set_change_graph(true); + } // namespace op } // namespace mxnet diff --git a/src/operator/subgraph/default_subgraph_property_v2.cc b/src/operator/subgraph/default_subgraph_property_v2.cc new file mode 100644 index 000000000000..bf8ccfe5ba6f --- /dev/null +++ b/src/operator/subgraph/default_subgraph_property_v2.cc @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +#include "./common.h" +#include "./subgraph_property.h" +#include "../../imperative/cached_op.h" + +namespace mxnet { +namespace op { + +/* + * This selects nodes for a subgraph that only contains operators + * in a given set and it visits nodes via both input and output links. + */ +class ContainOpSelectorV2: public SubgraphSelectorV2 { + public: + explicit ContainOpSelectorV2(const std::unordered_set& op_names) + : op_names_(op_names) {} + + bool Select(const BiDirectedNode &sn) override { + const auto &seed_node = *sn.node; + return !seed_node.is_variable() && op_names_.count(seed_node.op()->name); + } + + bool SelectInput(const BiDirectedNode &sn, const BiDirectedNode &snew_node) override { + const auto &input_node = *snew_node.node; + return !input_node.is_variable() && op_names_.count(input_node.op()->name); + } + + bool SelectOutput(const BiDirectedNode &sn, const BiDirectedNode &snew_node) override { + const auto &output_node = *snew_node.node; + return !output_node.is_variable() && op_names_.count(output_node.op()->name); + } + private: + const std::unordered_set& op_names_; +}; + +/* + * This subgraph property finds a subgraph whose nodes have only operators + * within a set. The operators in the subgraph will be executed by _CachedOp. + */ +class DefaultSubgraphProperty: public SubgraphProperty { + public: + static SubgraphPropertyPtr Create() { return std::make_shared(); } + nnvm::NodePtr CreateSubgraphNode(const nnvm::Symbol &sym, + const SubgraphSelectorPtr& subgraph_selector, + const int subgraph_id = 0) const override { + nnvm::NodePtr n = nnvm::Node::Create(); + n->attrs.op = Op::Get("_CachedOp"); + n->attrs.name = "_CachedOp" + std::to_string(subgraph_id); + n->attrs.subgraphs.push_back(std::make_shared(sym)); + + std::vector > flags{{"static_alloc", "true"}}; + n->attrs.parsed = CachedOpPtr(new CachedOp(sym, flags)); + + return n; + } + SubgraphSelectorV2Ptr CreateSubgraphSelectorV2() const override { + return std::make_shared( + this->GetAttr>("op_names")); + } +}; + +MXNET_REGISTER_SUBGRAPH_PROPERTY(default_v2, DefaultSubgraphProperty); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/subgraph/mkldnn/mkldnn_post_quantize_align_scale_property.h b/src/operator/subgraph/mkldnn/mkldnn_post_quantize_align_scale_property.h new file mode 100644 index 000000000000..f8c47f0ce036 --- /dev/null +++ b/src/operator/subgraph/mkldnn/mkldnn_post_quantize_align_scale_property.h @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_POST_QUANTIZE_ALIGN_SCALE_PROPERTY_H_ +#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_POST_QUANTIZE_ALIGN_SCALE_PROPERTY_H_ +#if MXNET_USE_MKLDNN == 1 + +#include +#include +#include "../common.h" +#include "../subgraph_property.h" + +namespace mxnet { +namespace op { + +class SgMKLDNNConcatPostQuantizeSelector : public SubgraphSelectorV2 { + public: + bool Select(const BiDirectedNode &sn) override { + const auto &n = *sn.node; + if (n.op() == Op::Get("_contrib_quantized_concat")) { + matched_list_.clear(); + visit_list_.clear(); + visit_list_.insert(&n); + select_output_ = (sn.outputs.size() > 1) ? false : true; + return true; + } + return false; + } + + bool SelectInput(const BiDirectedNode &sn, const BiDirectedNode &snew_node) override { + const auto &n = *sn.node; + const auto &new_node = *snew_node.node; + if (new_node.is_variable()) return false; + if (visit_list_.count(&n) == 0) return false; + bool multiple_outputs = false; + for (auto i : snew_node.outputs) { + if (visit_list_.count(i.first) == 0) { + multiple_outputs = true; + break; + } + } + if (multiple_outputs) return false; + if (new_node.attrs.dict.count("min_calib_range") != 0 && + new_node.attrs.dict.count("max_calib_range") != 0) { + matched_list_.push_back(&snew_node); + return true; + } else if (new_node.op() == Op::Get("_contrib_quantized_concat") || + new_node.op() == Op::Get("_contrib_quantized_pooling")) { + visit_list_.insert(&new_node); + return true; + } + return false; + } + + bool SelectOutput(const BiDirectedNode &sn, const BiDirectedNode &snew_node) override { + if (!select_output_) return false; + const auto &n = *sn.node; + const auto &new_node = *snew_node.node; + if (new_node.is_variable()) return false; + if (visit_list_.count(&n) == 0) { + return false; + } + if (new_node.op() == Op::Get("_contrib_quantized_concat") || + new_node.op() == Op::Get("_contrib_quantized_pooling")) { + visit_list_.insert(&new_node); + return true; + } + return false; + } + + virtual std::vector Filter( + const std::vector &candidates) { + if (matched_list_.size() < 2) { + return std::vector(0); + } else { + std::vector ret; + for (auto i : matched_list_) { + ret.push_back(const_cast(i)); + } + return ret; + } + } + + private: + bool select_output_; + std::vector matched_list_; + std::unordered_set visit_list_; +}; + +class SgMKLDNNPostQuantizeAlignScaleProperty : public SubgraphProperty { + public: + SgMKLDNNPostQuantizeAlignScaleProperty() : SubgraphProperty(kAdjust) {} + + static SubgraphPropertyPtr Create() { + static const std::string &name = "MKLDNN post-quantization scale alignment optimization pass"; + auto property = std::make_shared(); + property->SetAttr("property_name", name); + property->SetAttr("inference_only", true); + return property; + } + +/*! + * \brief Adjust selected nodes calibration range with maximum calib range. + * For example, + * conv1 = mx.symbol.Convolution(data=data, weight=weight, name='conv1', num_filter=64, + * kernel=(3, 3), stride=(1, 1), no_bias=True) + * conv2 = mx.symbol.Convolution(data=data, weight=weight * 2, name='conv2', num_filter=64, + * kernel=(3, 3), stride=(1, 1), no_bias=True) + * conv3 = mx.symbol.Convolution(data=data, weight=weight * 3, name='conv3', num_filter=64, + * kernel=(3, 3), stride=(1, 1), no_bias=True) + * conv4 = mx.symbol.Convolution(data=data, weight=weight * 4, name='conv4', num_filter=64, + * kernel=(3, 3), stride=(1, 1), no_bias=True) + * concat = mx.symbol.Concat(*[conv1, conv2, conv3, conv4], name="concat", dim=1) + * + * This pass will collect the maximum calib range from conv1 to conv4, and apply it to all + * conv1 to conv4. Then concat don't need extra scale alignment operation. Performance and + * accuracy are both improved. + */ + void AdjustSubgraphNode(const std::vector &subgraph_nodes, + const SubgraphSelectorV2Ptr &subgraph_selector, + const int subgraph_id = 0) const override { + float min_calib = 0.0f; + float max_calib = 0.0f; + for (size_t i = 0; i < subgraph_nodes.size(); ++i) { + auto this_min_calib = std::stof(subgraph_nodes[i]->attrs.dict["min_calib_range"]); + auto this_max_calib = std::stof(subgraph_nodes[i]->attrs.dict["max_calib_range"]); + if (min_calib > this_min_calib) min_calib = this_min_calib; + if (max_calib < this_max_calib) max_calib = this_max_calib; + } + for (size_t i = 0; i < subgraph_nodes.size(); ++i) { + auto &n = *subgraph_nodes[i]; + n.attrs.dict["min_calib_range"] = std::to_string(min_calib); + n.attrs.dict["max_calib_range"] = std::to_string(max_calib); + if (n.op()->attr_parser) n.op()->attr_parser(&(n.attrs)); + } + } + + SubgraphSelectorV2Ptr CreateSubgraphSelectorV2() const override { + auto selector = std::make_shared(); + return selector; + } +}; + +} // namespace op +} // namespace mxnet + +#endif // if MXNET_USE_MKLDNN == 1 +#endif // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_POST_QUANTIZE_ALIGN_SCALE_PROPERTY_H_ diff --git a/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc b/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc index 4d54f02a5844..26aa3b5b8e9a 100644 --- a/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc +++ b/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc @@ -23,6 +23,7 @@ #include "mkldnn_fc_property.h" #include "mkldnn_conv_post_quantize_property.h" #include "mkldnn_fc_post_quantize_property.h" +#include "mkldnn_post_quantize_align_scale_property.h" namespace mxnet { namespace op { @@ -31,6 +32,7 @@ MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN, SgMKLDNNConvProperty); MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN, SgMKLDNNFCProperty); MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN_POST_QUANTIZE, SgMKLDNNConvPostQuantizeProperty); MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN_POST_QUANTIZE, SgMKLDNNFCPostQuantizeProperty); +MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN_POST_QUANTIZE, SgMKLDNNPostQuantizeAlignScaleProperty); } // namespace op } // namespace mxnet diff --git a/src/operator/subgraph/subgraph_property.h b/src/operator/subgraph/subgraph_property.h index 4c05c089426f..aac3b3f2d0fc 100644 --- a/src/operator/subgraph/subgraph_property.h +++ b/src/operator/subgraph/subgraph_property.h @@ -30,6 +30,29 @@ namespace mxnet { namespace op { +struct BiDirectedNode; +using BiDirectedNodePtr = std::shared_ptr; + +/*! + * \brief Node of the undirected graph which replicates the network structures + * of the computational graph. It is used to ease the graph traversal for finding + * subgraphs. + */ +struct BiDirectedNode { + static BiDirectedNodePtr Create() { return std::make_shared(); } + BiDirectedNode() : label(-1), node(nullptr) {} + /*! subgraph label */ + int label; + /*! the original node in the computational graph it references*/ + nnvm::Node* node; + /*! + * \brief output nodes of the current node + * key is node ptr and value is an array of indices standing for the entry indices + * in key->inputs whose source is the current node. + */ + std::unordered_map> outputs; +}; // struct BiDirectedNode + /* * This provides criteria for the graph partitioning algorithm to select * nodes to subgraphs. @@ -85,6 +108,83 @@ class SubgraphSelector { using SubgraphSelectorPtr = std::shared_ptr; +class SubgraphSelectorV2 { + public: + virtual ~SubgraphSelectorV2() {} + /*! + * \brief Determines if to search for other nodes to form a subgraph from the seed_node. + */ + virtual bool Select(const BiDirectedNode& seed_node) = 0; + /*! + * \brief Determines if to select input_node when traverse to the cur_node. + * \param cur_node the node for determining whether its input_node should be selected + * \param input_node the input node of the cur_node + * \return true if input_node is selected + */ + virtual bool SelectInput(const BiDirectedNode& cur_node, + const BiDirectedNode& input_node) = 0; + /*! + * \brief Determines if to select output_node when traverse to the cur_node. + * \param cur_node the node for determining whether its output_node should be selected + * \param output_node the output node of the cur_node + * \return true if output_node is selected + */ + virtual bool SelectOutput(const BiDirectedNode& cur_node, + const BiDirectedNode& output_node) = 0; + /*! + * \brief Post processes pre-selected subgraph nodes. Return a list of nodes that + * users want to keep in subgraph(s). + * \param candidates re-selected subgraph nodes to filt + * \return a list of nodes to keep + */ + virtual std::vector Filter( + const std::vector& candidates) { + return candidates; + } +}; + +using SubgraphSelectorV2Ptr = std::shared_ptr; + +class SubgraphSelectorV2Bridge : public SubgraphSelectorV2 { + public: + explicit SubgraphSelectorV2Bridge(SubgraphSelectorPtr ptr) : ss_ptr_(ptr) {} + + virtual ~SubgraphSelectorV2Bridge() {} + + bool Select(const BiDirectedNode& seed_node) override { + return ss_ptr_->Select(*seed_node.node); + } + + bool SelectInput(const BiDirectedNode& cur_node, + const BiDirectedNode& input_node) override { + return ss_ptr_->SelectInput(*cur_node.node, *input_node.node); + } + + bool SelectOutput(const BiDirectedNode& cur_node, + const BiDirectedNode& output_node) override { + return ss_ptr_->SelectOutput(*cur_node.node, *output_node.node); + } + + std::vector Filter( + const std::vector& candidates) override { + std::unordered_map node_2_snode_map; + std::vector n_candidates; + for (auto i : candidates) { + node_2_snode_map[i->node] = i; + n_candidates.push_back(i->node); + } + auto n_ret = ss_ptr_->Filter(n_candidates); + std::vector ret; + for (auto i : n_ret) ret.push_back(node_2_snode_map[i]); + return ret; + } + + const SubgraphSelectorPtr& GetV1ptr() const { return ss_ptr_; } + + private: + SubgraphSelectorPtr ss_ptr_; +}; + /*! * \brief This provides a set of properties for partitioning a graph into subgraphs, * reconstructing a new graph from the subgraphs and creating a subgraph @@ -92,18 +192,83 @@ using SubgraphSelectorPtr = std::shared_ptr; */ class SubgraphProperty { public: + /*! \brief Property type */ + enum SgPropertyType { + kCreate, + kAdjust, + }; + + explicit SubgraphProperty(SgPropertyType type = kCreate) : type_(type) {} + /*! * \brief The criteria of selecting the subgraph nodes. */ - virtual SubgraphSelectorPtr CreateSubgraphSelector() const = 0; + virtual SubgraphSelectorPtr CreateSubgraphSelector() const { + LOG(FATAL) << "No CreateSubgraphSelector is implemented for this SubgraphProperty."; + return nullptr; + } + + virtual SubgraphSelectorV2Ptr CreateSubgraphSelectorV2() const { + auto v1_ptr = CreateSubgraphSelector(); + return std::make_shared(v1_ptr); + } + + /*! + * \brief Create an nnvm node for a given subgraph. Here users can customize how to + * execute the operators in the subgraph. + * \param sym the symbol to create subgraph node + * \param subgraph_id subgraph id + */ + virtual nnvm::NodePtr CreateSubgraphNode(const nnvm::Symbol& sym, + const int subgraph_id = 0) const { + CHECK_EQ(GetPropertyType(), kCreate); + LOG(FATAL) << "Not implement CreateSubgraphNode() for this subgraph property."; + return nullptr; + } + + /*! + * \brief Create an nnvm node for a given subgraph. Here users can customize how to + * execute the operators in the subgraph. + * \param sym the symbol to create subgraph node + * \param subgraph_selector the selector used for creating this subgraph + * \param subgraph_id subgraph id + */ + virtual nnvm::NodePtr CreateSubgraphNode(const nnvm::Symbol& sym, + const SubgraphSelectorPtr& subgraph_selector, + const int subgraph_id = 0) const { + return CreateSubgraphNode(sym, subgraph_id); + } + /*! * \brief Create an nnvm node for a given subgraph. Here users can customize how to * execute the operators in the subgraph. * \param sym the symbol to create subgraph node + * \param subgraph_selector The selector used for selecting this node set + * \param subgraph_id subgraph id + */ + virtual nnvm::NodePtr CreateSubgraphNode(const nnvm::Symbol& sym, + const SubgraphSelectorV2Ptr& subgraph_selector, + const int subgraph_id = 0) const { + CHECK_EQ(GetPropertyType(), kCreate); + const auto bridge = static_cast(subgraph_selector.get()); + return CreateSubgraphNode(sym, bridge->GetV1ptr(), subgraph_id); + } + + /*! + * \brief Adjust nnvm nodes from a given subgraph. No new node is created, but adjust + * selected nodes' attributes. This can be used to implement peephole optimization. + * Here users can customize how to adjust the operators in the subgraph. + * \param subgraph_nodes the subgraph nodes to adjust + * \param subgraph_selector The selector used for selecting this node set. * \param subgraph_id subgraph id */ - virtual nnvm::NodePtr CreateSubgraphNode(const nnvm::Symbol &sym, - const int subgraph_id = 0) const = 0; + virtual void AdjustSubgraphNode(const std::vector& subgraph_nodes, + const SubgraphSelectorV2Ptr &subgraph_selector, + const int subgraph_id = 0) const { + CHECK_EQ(GetPropertyType(), kAdjust); + LOG(FATAL) << "Not implement AdjustSubgraphNode() for this subgraph property."; + } + /*! * \brief Connect subgraph internal output with external output entries. * By default, each output entry will connect to an unique internal output. @@ -152,8 +317,13 @@ class SubgraphProperty { auto it = attrs_.find(name); return it != attrs_.end(); } + /*! + * \brief Get the property type. + */ + SgPropertyType GetPropertyType() const { return type_; } protected: + SgPropertyType type_; std::unordered_map> attrs_; }; diff --git a/tests/python/mkl/test_subgraph.py b/tests/python/mkl/test_subgraph.py index 3213fb13a218..c8cf79e399fd 100644 --- a/tests/python/mkl/test_subgraph.py +++ b/tests/python/mkl/test_subgraph.py @@ -67,6 +67,23 @@ def check_qsym_calibrated(qsym, out_type, name='conv'): assert 'min_calib_range' in v assert 'max_calib_range' in v +def check_qsym_scale_align(qsym): + assert ''.join(qsym.attr_dict().keys()).find('quantized_sg_mkldnn_conv') != -1 + init = False + for k, v in qsym.attr_dict().items(): + if k.find('quantized_sg_mkldnn_conv') != -1: + assert 'min_calib_range' in v + assert 'max_calib_range' in v + if not init: + min_calib_range = v['min_calib_range'] + max_calib_range = v['max_calib_range'] + init = True + else: + assert min_calib_range == v['min_calib_range'] + assert max_calib_range == v['max_calib_range'] + + + def check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape, label_shape): mod = mx.mod.Module(symbol=qsym, context=mx.current_context()) mod.bind(for_training=False, @@ -105,7 +122,7 @@ def check_qsym_gluon_forward(qsym, qarg_params, qaux_params, data_shape): net(data) def check_quantize(sym, data_shape, out_type, name='conv', - check_calibration=True, gluon_forward=False): + check_calibration=True, gluon_forward=False, check_scale_align=False): sg_pass_name = config[name][SG_PASS_NAME] post_sg_pass_name = config[name][POST_SG_PASS_NAME] @@ -157,6 +174,8 @@ def check_quantize(sym, data_shape, out_type, name='conv', qsym = qsym.get_backend_symbol(post_sg_pass_name) if check_calibration: check_qsym_calibrated(qsym, out_type, name=name) + if check_scale_align: + check_qsym_scale_align(qsym) if gluon_forward == True: check_qsym_gluon_forward(qsym, qarg_params, qaux_params, data_shape) else: @@ -306,6 +325,20 @@ def single_concat(data_shape, input_num, dim): concat = mx.symbol.Concat(*inputs, name="concat", dim=dim) return concat +# concat scale alignment case +def concat_scale_align(data_shape): + data, weight = head_symbol(data_shape) + conv1 = mx.symbol.Convolution(data=data, weight=weight, name='conv1', num_filter=64, + kernel=(3, 3), stride=(1, 1), no_bias=True) + conv2 = mx.symbol.Convolution(data=data, weight=weight * 2, name='conv2', num_filter=64, + kernel=(3, 3), stride=(1, 1), no_bias=True) + conv3 = mx.symbol.Convolution(data=data, weight=weight * 3, name='conv3', num_filter=64, + kernel=(3, 3), stride=(1, 1), no_bias=True) + conv4 = mx.symbol.Convolution(data=data, weight=weight * 4, name='conv4', num_filter=64, + kernel=(3, 3), stride=(1, 1), no_bias=True) + concat = mx.symbol.Concat(*[conv1, conv2, conv3, conv4], name="concat", dim=1) + return concat + def tail_neg_symbol(sym1, sym2): fc1 = mx.sym.FullyConnected(data=sym1, num_hidden=10, flatten=True, name='fc1') fc2 = mx.sym.FullyConnected(data=sym2, num_hidden=10, flatten=True, name='fc2') @@ -579,6 +612,7 @@ def test_pos_conv_bn_sum_relu(): net, attrs = conv_bn_sum_relu(True, data_shape) check_fusion(net, data_shape, attrs) +@with_seed() def test_pos_single_concat(): for data_shape in DATA_SHAPE: for out_type in ('uint8', 'int8', 'auto'): @@ -592,6 +626,14 @@ def test_pos_single_concat(): check_quantize(net, data_shape, out_type, name='conv', check_calibration=False) check_quantize(net, data_shape, out_type, name='conv', check_calibration=False, gluon_forward=True) +@with_seed() +def test_pos_concat_scale_align(): + for data_shape in DATA_SHAPE: + for out_type in ('uint8', 'int8', 'auto'): + net = concat_scale_align(data_shape) + check_quantize(net, data_shape, out_type, check_calibration=True, check_scale_align=True) + check_quantize(net, data_shape, out_type, check_calibration=True, check_scale_align=True, gluon_forward=True) + @with_seed() def test_neg_conv_bn(): for data_shape in DATA_SHAPE: diff --git a/tests/python/unittest/test_subgraph_op.py b/tests/python/unittest/test_subgraph_op.py index 40d609ad3541..f3c5dfd4e091 100644 --- a/tests/python/unittest/test_subgraph_op.py +++ b/tests/python/unittest/test_subgraph_op.py @@ -24,12 +24,12 @@ from mxnet.test_utils import assert_almost_equal -def test_subgraph_exe(): - def _check_subgraph_exe1(sym, op_names): +def _test_subgraph_exe(subgraph_backend): + def _check_subgraph_exe1(sym, subgraph_backend, op_names): """Use the partitioned sym to simple_bind an executor and compare the outputs with those of the original executor""" out = SymbolHandle() - check_call(_LIB.MXPartitionGraphByOpNames(sym.handle, c_str('default'), mx_uint(len(op_names)), + check_call(_LIB.MXBuildSubgraphByOpNames(sym.handle, c_str(subgraph_backend), mx_uint(len(op_names)), c_str_array(op_names), ctypes.byref(out))) partitioned_sym = Symbol(out) @@ -54,7 +54,7 @@ def _check_subgraph_exe1(sym, op_names): assert_almost_equal((exe.outputs[i] - partitioned_exe.outputs[i]).abs().sum().asnumpy(), np.zeros(shape=(1,))) - def _check_subgraph_exe2(sym, op_names): + def _check_subgraph_exe2(sym, subgraph_backend, op_names): """Use env var MXNET_SUBGRAPH_BACKEND=default to trigger graph partitioning in simple_bind and compare results of the partitioned sym and the original sym.""" def get_executor(sym, subgraph_backend=None, op_names=None, original_exec=None): @@ -79,18 +79,18 @@ def get_executor(sym, subgraph_backend=None, op_names=None, original_exec=None): return exe original_exec = get_executor(sym) - partitioned_exec = get_executor(sym, 'default', op_names, original_exec) + partitioned_exec = get_executor(sym, subgraph_backend, op_names, original_exec) outputs1 = original_exec.outputs outputs2 = partitioned_exec.outputs assert len(outputs1) == len(outputs2) for i in range(len(outputs1)): assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1,))) - def _check_subgraph_exe3(sym, op_names): + def _check_subgraph_exe3(sym, subgraph_backend, op_names): """Use the partitioned sym to bind an executor and compare the outputs with those of the original executor""" out = SymbolHandle() - check_call(_LIB.MXPartitionGraphByOpNames(sym.handle, c_str('default'), mx_uint(len(op_names)), + check_call(_LIB.MXBuildSubgraphByOpNames(sym.handle, c_str(subgraph_backend), mx_uint(len(op_names)), c_str_array(op_names), ctypes.byref(out))) partitioned_sym = Symbol(out) @@ -113,7 +113,7 @@ def _check_subgraph_exe3(sym, op_names): assert_almost_equal((exe.outputs[i] - partitioned_exe.outputs[i]).abs().sum().asnumpy(), np.zeros(shape=(1,))) - def _check_subgraph_exe4(sym, op_names): + def _check_subgraph_exe4(sym, subgraph_backend, op_names): """Use env var MXNET_SUBGRAPH_BACKEND=default to trigger graph partitioning in bind and compare results of the partitioned sym and the original sym.""" def get_executor(sym, subgraph_backend=None, op_names=None, original_exec=None): @@ -139,38 +139,38 @@ def get_executor(sym, subgraph_backend=None, op_names=None, original_exec=None): return exe original_exec = get_executor(sym) - partitioned_exec = get_executor(sym, 'default', op_names, original_exec) + partitioned_exec = get_executor(sym, subgraph_backend, op_names, original_exec) outputs1 = original_exec.outputs outputs2 = partitioned_exec.outputs assert len(outputs1) == len(outputs2) for i in range(len(outputs1)): assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1,))) - def check_subgraph_exe(sym, op_names): - _check_subgraph_exe1(sym, op_names) - _check_subgraph_exe2(sym, op_names) - _check_subgraph_exe3(sym, op_names) - _check_subgraph_exe4(sym, op_names) + def check_subgraph_exe(sym, subgraph_backend, op_names): + _check_subgraph_exe1(sym, subgraph_backend, op_names) + _check_subgraph_exe2(sym, subgraph_backend, op_names) + _check_subgraph_exe3(sym, subgraph_backend, op_names) + _check_subgraph_exe4(sym, subgraph_backend, op_names) - def test_network_structure_1(): + def test_network_structure_1(subgraph_backend): data1 = mx.sym.var('data1', shape=(2, 3, 10, 10)) data2 = mx.sym.var('data2') conv1 = mx.sym.Convolution(data=data1, weight=data2, no_bias=True, kernel=(2, 2), num_filter=1) conv2 = mx.sym.Convolution(data=data2, no_bias=True, kernel=(1, 1), num_filter=1) out = mx.sym.Group([conv1, conv2]) - check_subgraph_exe(out, ['Convolution']) + check_subgraph_exe(out, subgraph_backend, ['Convolution']) - def test_network_structure_2(): + def test_network_structure_2(subgraph_backend): # this tests whether the partitioning algorithm can deal with cycles data = mx.sym.var('data', shape=(2, 3, 10, 10)) ret = mx.sym.exp(data) ret1 = mx.sym.cos(ret) ret2 = mx.sym.sin(ret) ret = ret1 + ret2 - check_subgraph_exe(ret, ['exp', 'sin', '_Plus', 'elemwise_add', '_plus']) - check_subgraph_exe(ret, ['exp', 'cos', '_Plus', 'elemwise_add', '_plus']) + check_subgraph_exe(ret, subgraph_backend, ['exp', 'sin', '_Plus', 'elemwise_add', '_plus']) + check_subgraph_exe(ret, subgraph_backend, ['exp', 'cos', '_Plus', 'elemwise_add', '_plus']) - def test_network_structure_3(): + def test_network_structure_3(subgraph_backend): # this tests whether the partitioned sym can distinguish in_args and aux_states data = mx.sym.var('data', shape=(2, 3, 10, 10)) ret = mx.sym.exp(data) @@ -179,27 +179,27 @@ def test_network_structure_3(): ret = ret1 + ret2 ret = mx.sym.BatchNorm(ret) ret = mx.sym.BatchNorm(ret) - check_subgraph_exe(ret, ['exp', 'sin', '_Plus', 'elemwise_add', '_plus']) - check_subgraph_exe(ret, ['exp', 'cos', '_Plus', 'elemwise_add', '_plus']) - check_subgraph_exe(ret, ['exp', 'sin', '_Plus', 'elemwise_add', '_plus', 'BatchNorm']) - check_subgraph_exe(ret, ['exp', 'cos', '_Plus', 'elemwise_add', '_plus', 'BatchNorm']) - check_subgraph_exe(ret, ['exp', 'BatchNorm']) - check_subgraph_exe(ret, ['BatchNorm']) - - def test_network_structure_4(): + check_subgraph_exe(ret, subgraph_backend, ['exp', 'sin', '_Plus', 'elemwise_add', '_plus']) + check_subgraph_exe(ret, subgraph_backend, ['exp', 'cos', '_Plus', 'elemwise_add', '_plus']) + check_subgraph_exe(ret, subgraph_backend, ['exp', 'sin', '_Plus', 'elemwise_add', '_plus', 'BatchNorm']) + check_subgraph_exe(ret, subgraph_backend, ['exp', 'cos', '_Plus', 'elemwise_add', '_plus', 'BatchNorm']) + check_subgraph_exe(ret, subgraph_backend, ['exp', 'BatchNorm']) + check_subgraph_exe(ret, subgraph_backend, ['BatchNorm']) + + def test_network_structure_4(subgraph_backend): # the last op has multiple duplicate outputs data = mx.sym.var('data', shape=(2, 3, 10, 10)) ret = mx.sym.exp(data) ret = mx.sym.Group([ret, ret, ret]) - check_subgraph_exe(ret, ['exp']) + check_subgraph_exe(ret, subgraph_backend, ['exp']) - def test_network_structure_5(): + def test_network_structure_5(subgraph_backend): # the subgraph has two duplicate input entries data = mx.sym.var('data', shape=(2, 3, 10, 10)) ret = data + data - check_subgraph_exe(ret, ['_plus', '_Plus', 'elemwise_add']) + check_subgraph_exe(ret, subgraph_backend, ['_plus', '_Plus', 'elemwise_add']) - def test_network_structure_6(): + def test_network_structure_6(subgraph_backend): def get_graph(): data1 = mx.sym.Variable('data1', shape=(3, 3, 10, 10), dtype=np.float32) data2 = mx.sym.Variable('data2', shape=(1, 0, 2, 2)) @@ -212,9 +212,9 @@ def get_graph(): return rets for sym, op_names in get_graph(): - check_subgraph_exe(sym, op_names) + check_subgraph_exe(sym, subgraph_backend, op_names) - def test_network_structure_7(): + def test_network_structure_7(subgraph_backend): # in this graph, the subgraph node and the other two external nodes form a cycle data = mx.sym.Variable('data', shape=(1,)) ret1 = mx.sym.sin(data) @@ -222,16 +222,21 @@ def test_network_structure_7(): for _ in range(5): ret2 = mx.sym.cos(ret2) ret = ret1 + ret2 - check_subgraph_exe(ret, ['sin', 'elemwise_add', '_plus', '_Plus']) + check_subgraph_exe(ret, subgraph_backend, ['sin', 'elemwise_add', '_plus', '_Plus']) + + test_network_structure_1(subgraph_backend) + test_network_structure_2(subgraph_backend) + test_network_structure_3(subgraph_backend) + test_network_structure_4(subgraph_backend) + test_network_structure_5(subgraph_backend) + test_network_structure_6(subgraph_backend) + test_network_structure_7(subgraph_backend) - test_network_structure_1() - test_network_structure_2() - test_network_structure_3() - test_network_structure_4() - test_network_structure_5() - test_network_structure_6() - test_network_structure_7() +def test_subgraph_exe(): + _test_subgraph_exe('default') +def test_subgraph_v2_exe(): + _test_subgraph_exe('default_v2') if __name__ == '__main__': import nose