-
Notifications
You must be signed in to change notification settings - Fork 6.8k
[performance] Avoid uneccesary vector copies in imperative_utils.cc #14665
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,62 +20,61 @@ | |
#include "./imperative_utils.h" | ||
#include "./cached_op.h" | ||
|
||
namespace mxnet { | ||
namespace imperative { | ||
namespace { | ||
|
||
inline std::vector<NDArray*> NodeInputs(const nnvm::IndexedGraph& idx, | ||
const int node_idx, | ||
const std::vector<NDArray*> arrays) { | ||
std::vector<NDArray*> NodeInputs(const nnvm::IndexedGraph& idx, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we pass in the returned vector There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would be unnecessary and non idiomatic. Return value optimization (RVO) will kick in and eliminate any extra copies on return values. |
||
const int node_idx, | ||
const std::vector<NDArray*>& arrays) { | ||
const nnvm::IndexedGraph::Node& node = idx[node_idx]; | ||
const size_t num_inputs = node.inputs.size(); | ||
std::vector<NDArray*> ndinputs; | ||
ndinputs.reserve(num_inputs); | ||
for (const auto& j : node.inputs) { | ||
size_t eid = idx.entry_id(j); | ||
const size_t eid = idx.entry_id(j); | ||
ndinputs.emplace_back(arrays[eid]); | ||
} | ||
return ndinputs; | ||
} | ||
|
||
inline std::vector<NDArray*> NodeOutputs(const nnvm::IndexedGraph& idx, | ||
const int node_idx, | ||
const std::vector<NDArray*> arrays) { | ||
std::vector<NDArray*> NodeOutputs(const nnvm::IndexedGraph& idx, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we pass in the returned vector There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think there will be not any another copy because of the right-value reference of std::vector. |
||
const int node_idx, | ||
const std::vector<NDArray*>& arrays) { | ||
const nnvm::IndexedGraph::Node& node = idx[node_idx]; | ||
const size_t num_outputs = node.source->num_outputs(); | ||
std::vector<NDArray*> ndoutputs; | ||
ndoutputs.reserve(num_outputs); | ||
for (size_t j = 0; j < num_outputs; ++j) { | ||
size_t eid = idx.entry_id(node_idx, j); | ||
const size_t eid = idx.entry_id(node_idx, j); | ||
ndoutputs.emplace_back(arrays[eid]); | ||
} | ||
return ndoutputs; | ||
} | ||
|
||
inline std::vector<OpReqType> NodeReq(const nnvm::IndexedGraph& idx, | ||
const int node_idx, | ||
const std::vector<OpReqType> array_reqs) { | ||
std::vector<OpReqType> NodeReq(const nnvm::IndexedGraph& idx, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we pass in the returned vector |
||
const int node_idx, | ||
const std::vector<OpReqType>& array_reqs) { | ||
const nnvm::IndexedGraph::Node& node = idx[node_idx]; | ||
const size_t num_outputs = node.source->num_outputs(); | ||
std::vector<OpReqType> req; | ||
req.reserve(num_outputs); | ||
for (size_t j = 0; j < num_outputs; ++j) { | ||
size_t eid = idx.entry_id(node_idx, j); | ||
const size_t eid = idx.entry_id(node_idx, j); | ||
req.push_back(array_reqs[eid]); | ||
} | ||
return req; | ||
} | ||
|
||
inline void InvokeOperator(const nnvm::IndexedGraph& idx, | ||
const int node_idx, | ||
const bool retain_graph, | ||
const std::vector<NDArray*> arrays, | ||
Context ctx, | ||
std::vector<OpStatePtr>* p_states, | ||
std::vector<NDArray*> ndinputs, | ||
std::vector<NDArray*> ndoutputs, | ||
std::vector<OpReqType> *p_req, | ||
std::vector<uint32_t> *p_ref_count, | ||
std::function<void(const OpStatePtr &state)> invoke) { | ||
void InvokeOperator(const nnvm::IndexedGraph& idx, | ||
const int node_idx, | ||
const bool retain_graph, | ||
const std::vector<NDArray*>& arrays, | ||
Context ctx, | ||
std::vector<OpStatePtr>* p_states, | ||
const std::vector<NDArray*>& ndinputs, | ||
const std::vector<NDArray*>& ndoutputs, | ||
std::vector<OpReqType> *p_req, | ||
std::vector<uint32_t> *p_ref_count, | ||
std::function<void(const OpStatePtr &state)> invoke) { | ||
static const auto bwd_cached_op = Op::Get("_backward_CachedOp"); | ||
static auto& createop = nnvm::Op::GetAttr<FCreateOpState>("FCreateOpState"); | ||
static auto& is_layer_backward = Op::GetAttr<bool>("TIsLayerOpBackward"); | ||
|
@@ -122,10 +121,15 @@ inline void InvokeOperator(const nnvm::IndexedGraph& idx, | |
} | ||
} | ||
|
||
} // namespace | ||
|
||
namespace mxnet { | ||
namespace imperative { | ||
|
||
void RunGraph( | ||
const bool retain_graph, | ||
const nnvm::IndexedGraph& idx, | ||
const std::vector<NDArray*> arrays, | ||
const std::vector<NDArray*>& arrays, | ||
size_t node_start, size_t node_end, | ||
std::vector<OpReqType>&& array_reqs, | ||
std::vector<uint32_t>&& ref_count, | ||
|
@@ -161,7 +165,7 @@ void NaiveRunGraph( | |
const bool retain_graph, | ||
const Context& default_ctx, | ||
const nnvm::IndexedGraph& idx, | ||
const std::vector<NDArray*> arrays, | ||
const std::vector<NDArray*>& arrays, | ||
size_t node_start, size_t node_end, | ||
std::vector<OpReqType>&& array_reqs, | ||
std::vector<uint32_t>&& ref_count, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why remove inline?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the point of inline here? These are private functions so they are moved to anonymous namespace. Let the compiler do the inline as the compiler is better at this than us. We are abusing inline in the codebase.
https://stackoverflow.com/questions/1932311/when-to-use-inline-function-and-when-not-to-use-it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we measure the impact here? Because this logic is part of core engine and gets exposed to every job, I was wondering if it would be good to measure the performance impact.