-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor cast storage #109
Changes from 4 commits
ae11e6f
430864d
cab5a8c
c6c3952
0bd6493
8a722e0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,8 +6,11 @@ | |
#ifndef MXNET_OPERATOR_NN_CAST_STORAGE_INL_H_ | ||
#define MXNET_OPERATOR_NN_CAST_STORAGE_INL_H_ | ||
|
||
#include <numeric> | ||
#include <dmlc/timer.h> | ||
#include <mxnet/ndarray.h> | ||
#include <vector> | ||
#include "../mxnet_op.h" | ||
#include "../operator_common.h" | ||
#ifdef __CUDACC__ | ||
#include "./cast_storage-inl.cuh" | ||
#endif // __CUDACC__ | ||
|
@@ -54,23 +57,41 @@ inline void CastStorageDnsRspImpl(mshadow::Stream<cpu>* s, const TBlob& dns, NDA | |
rsp->CheckAndAllocAuxData(rowsparse::kIdx, mshadow::Shape1(num_rows)); | ||
TBlob row_idx_blob = rsp->aux_data(rowsparse::kIdx); | ||
RType* row_idx = row_idx_blob.dptr<RType>(); | ||
|
||
double start = dmlc::GetTime(); | ||
mxnet_op::Kernel<MarkRspRowIdx, cpu>::Launch(s, num_rows, row_idx, | ||
dns.dptr<DType>(), num_cols); | ||
double elapsed1 = dmlc::GetTime() - start; | ||
|
||
index_t nnr = 0; | ||
nnr = std::accumulate(row_idx, row_idx+num_rows, nnr); | ||
start = dmlc::GetTime(); | ||
nnr = mxnet::common::ParallelAccumulate(row_idx, num_rows, nnr); | ||
//nnr = std::accumulate(row_idx, row_idx+num_rows, nnr); | ||
double elapsed2 = dmlc::GetTime() - start; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove it ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I will push another commit to clean up everything. This is kept here for convenience for now. |
||
|
||
rsp->set_aux_shape(rowsparse::kIdx, mshadow::Shape1(nnr)); | ||
if (0 == nnr) return; | ||
rsp->CheckAndAllocData(mshadow::Shape2(nnr, num_cols)); | ||
mshadow::Tensor<cpu, 2, DType> dns_data = dns.FlatTo2D<cpu, DType>(s); | ||
mshadow::Tensor<cpu, 2, DType> rsp_data = rsp->data().FlatTo2D<cpu, DType>(s); | ||
size_t idx = 0; | ||
|
||
start = dmlc::GetTime(); | ||
for (index_t i = 0; i < num_rows; ++i) { | ||
if (row_idx[i] > 0) { | ||
row_idx[idx] = i; | ||
mshadow::Copy(rsp_data[idx], dns_data[i], s); | ||
++idx; | ||
} | ||
} | ||
double elapsed3 = dmlc::GetTime() - start; | ||
|
||
double total = elapsed1 + elapsed2 + elapsed3; | ||
LOG(INFO) << "shape = " << rsp->shape(); | ||
LOG(INFO) << "nnr = " << nnr; | ||
LOG(INFO) << "MarkRspRowIdx cost " << elapsed1 * 1000 << " ms, " << static_cast<int>(elapsed1/total*100) << '%'; | ||
LOG(INFO) << "ParallelAccumulate cost " << elapsed2 * 1000 << " ms, " << static_cast<int>(elapsed2/total*100) << '%'; | ||
LOG(INFO) << "Copy rows cost " << elapsed3 * 1000 << " ms, " << static_cast<int>(elapsed3/total*100) << '%'; | ||
}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We're not committing these profiling log, are we? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, we are not. I will push another commit to clean up everything later. |
||
}); | ||
} | ||
|
@@ -287,6 +308,45 @@ void CastStorageComputeImpl(mshadow::Stream<xpu>* s, | |
} | ||
} | ||
|
||
struct CastStorageParam : public dmlc::Parameter<CastStorageParam> { | ||
int storage_type; | ||
DMLC_DECLARE_PARAMETER(CastStorageParam) { | ||
DMLC_DECLARE_FIELD(storage_type) | ||
.add_enum("default", kDefaultStorage) | ||
.add_enum("row_sparse", kRowSparseStorage) | ||
.add_enum("csr", kCSRStorage) | ||
.describe("Output storage type."); | ||
} | ||
}; | ||
|
||
inline bool CastStorageInferStorageType(const nnvm::NodeAttrs& attrs, | ||
std::vector<int> *in_attrs, | ||
std::vector<int> *out_attrs) { | ||
CHECK_EQ(in_attrs->size(), 1U); | ||
CHECK_EQ(out_attrs->size(), 1U); | ||
CHECK_NE(in_attrs->at(0), kUndefinedStorage) | ||
<< "src ndarray's storage type must be specified"; | ||
const CastStorageParam& param = nnvm::get<CastStorageParam>(attrs.parsed); | ||
CHECK_NE(param.storage_type, kUndefinedStorage) | ||
<< "dst ndarray's storage type must be specified"; | ||
TYPE_ASSIGN_CHECK(*out_attrs, 0, param.storage_type); | ||
return true; | ||
} | ||
|
||
template<typename xpu> | ||
void CastStorageComputeEx(const nnvm::NodeAttrs& attrs, | ||
const OpContext& ctx, | ||
const std::vector<NDArray>& inputs, | ||
const std::vector<OpReqType>& req, | ||
const std::vector<NDArray>& outputs) { | ||
using namespace mshadow; | ||
using namespace mshadow::expr; | ||
Stream<xpu> *s = ctx.get_stream<xpu>(); | ||
CHECK_EQ(inputs.size(), 1); | ||
CHECK_EQ(outputs.size(), 1); | ||
CastStorageComputeImpl<xpu>(s, inputs[0], outputs[0]); | ||
} | ||
|
||
} // namespace op | ||
} // namespace mxnet | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/*! | ||
* Copyright (c) 2017 by Contributors | ||
* \file cast_storage.cc | ||
* \brief CPU Implementation of cast_storage operator. | ||
*/ | ||
|
||
#include "./cast_storage-inl.h" | ||
#include "../elemwise_op_common.h" | ||
#include "../tensor/elemwise_unary_op.h" | ||
|
||
namespace mxnet { | ||
namespace op { | ||
|
||
// TODO(haibin) declare backward op for cast storage | ||
DMLC_REGISTER_PARAMETER(CastStorageParam); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove
|
||
NNVM_REGISTER_OP(cast_storage) | ||
.describe(R"code(Casts tensor storage type to the new type. | ||
)code" ADD_FILELINE) | ||
.set_num_inputs(1) | ||
.set_num_outputs(1) | ||
.set_attr_parser(ParamParser<CastStorageParam>) | ||
.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>) | ||
.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>) | ||
.set_attr<nnvm::FInferStorageType>("FInferStorageType", CastStorageInferStorageType) | ||
.set_attr<FCompute>("FCompute<cpu>", IdentityCompute<cpu>) | ||
.set_attr<FComputeEx>("FComputeEx<cpu>", CastStorageComputeEx<cpu>) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove
|
||
.add_argument("data", "NDArray-or-Symbol", "The input.") | ||
.add_arguments(CastStorageParam::__FIELDS__()); | ||
|
||
} // namespace op | ||
} // namespace mxnet |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
/*! | ||
* Copyright (c) 2017 by Contributors | ||
* \file cast_storage.cu | ||
* \brief GPU Implementation of cast_storage operator. | ||
*/ | ||
#include "./cast_storage-inl.h" | ||
#include "../tensor/elemwise_unary_op.h" | ||
|
||
namespace mxnet { | ||
namespace op { | ||
|
||
NNVM_REGISTER_OP(cast_storage) | ||
.set_attr<FCompute>("FCompute<gpu>", IdentityCompute<cpu>) | ||
.set_attr<FComputeEx>("FComputeEx<gpu>", CastStorageComputeEx<gpu>); | ||
|
||
} // namespace op | ||
} // namespace mxnet |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why are there two types T and V?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
T is for the element type of the array, and V is for the type of the accumulated sum.