Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Fix a race condition in converting data layouts in MKLDNN. (#9862)
Browse files Browse the repository at this point in the history
* Fix a race condition in converting data layouts.

* Avoid calling data() in elemwise sum.

* Fix a compilation error.

* Address comments.

* avoid data layout conversion inside ndarray.

* Fix a compilation error.

* address comments.

* Reorder weight arrays in convolution async.

* Fix async data reordering in NDArray.

* Fix race condition in deconv.

* Update ndarray.cc

* Check more in NDArray.

* Fix a bug in MKLDNNDataReorder.

* Fix a bug in NDArray.

* Simplify weight reorder in (de-)conv.
  • Loading branch information
zheng-da authored and marcoabreu committed Mar 1, 2018
1 parent e29b2f5 commit f9c2689
Show file tree
Hide file tree
Showing 9 changed files with 188 additions and 85 deletions.
23 changes: 21 additions & 2 deletions include/mxnet/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -622,12 +622,29 @@ class NDArray {
/*
* Reorder the memory to the specified layout.
*/
void MKLDNNDataReorder(const mkldnn::memory::primitive_desc &desc);
void MKLDNNDataReorder(const mkldnn::memory::primitive_desc &desc) {
CHECK_EQ(storage_type(), kDefaultStorage);
ptr_->MKLDNNDataReorder(desc);
}
void Reorder2Default() {
CHECK_EQ(storage_type(), kDefaultStorage);
ptr_->Reorder2Default();
}

/*
* These are the async version of the methods above.
* It changes the layout of this NDArray, but it happens after all accesses to
* the array are complete.
*/
void Reorder2DefaultAsync();
void MKLDNNDataReorderAsync(const mkldnn::memory::primitive_desc &desc);

/*
* This creates a new NDArray with the reordered data.
* It doesn't affect the data of the original NDArray.
*/
NDArray Reorder2Default() const;

void InvalidateMKLDNNData() {
// Removing mkl_mem_ means the NDArray will store data in the default format.
ptr_->mkl_mem_ = nullptr;
Expand Down Expand Up @@ -880,9 +897,11 @@ class NDArray {
// Have MKL memory reference to the data in the default storage
// or create memory for MKLDNN.
void SetMKLMem(const TShape &shape, int dtype);
// In the data is stored in MKLDNN layout, we reorder data in mkl_mem_ and
// If the data is stored in MKLDNN layout, we reorder data in mkl_mem_ and
// save the result in shandle.
void Reorder2Default();
// Reroder data to a specified layout.
void MKLDNNDataReorder(const mkldnn::memory::primitive_desc &desc);
bool IsMKLDNN() const;
bool IsDefault() const;
#endif
Expand Down
149 changes: 99 additions & 50 deletions src/ndarray/ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,45 @@ void NDArray::Chunk::Reorder2Default() {
CheckAndAlloc(def_pd.get_size());
// TODO(zhengda) We need to avoid memory copy here.
memcpy(shandle.dptr, def_mem->get_data_handle(), def_pd.get_size());
mkl_mem_.reset(new mkldnn::memory(def_pd, shandle.dptr));
mkl_mem_ = nullptr;
}

void NDArray::Chunk::MKLDNNDataReorder(const mkldnn::memory::primitive_desc &pd) {
// If the memory already uses the specified layout, don't do anything.
if (mkl_mem_ != nullptr && mkl_mem_->get_primitive_desc() == pd)
return;
auto _pd = pd;
auto _desc = _pd.desc();
auto def_format = GetDefaultFormat(_desc);
// If the memory is default, don't do anything.
if (def_format == _desc.data.format && IsDefault())
return;
// If the specified layout is default, we should use Reorder2Default.
if (def_format == _desc.data.format) {
Reorder2Default();
return;
}

std::shared_ptr<mkldnn::memory> new_mem(new mkldnn::memory(pd));
std::shared_ptr<mkldnn::memory> old_mem;
if (IsDefault()) {
auto def_pd = GetPrimitiveDesc(pd, def_format);
old_mem.reset(new mkldnn::memory(def_pd, shandle.dptr));
} else {
old_mem = this->mkl_mem_;
}
CHECK(old_mem->get_primitive_desc().desc().data.ndims == _desc.data.ndims);

// This may be called in MKLDNN operators. We can't use MKLDNNStream here.
std::vector<mkldnn::primitive> net;
net.push_back(mkldnn::reorder(*old_mem, *new_mem));
mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();

CHECK(shandle.size >= pd.get_size());
CheckAndAlloc(pd.get_size());
// TODO(zhengda) We need to avoid memory copy here.
memcpy(shandle.dptr, new_mem->get_data_handle(), pd.get_size());
mkl_mem_.reset(new mkldnn::memory(pd, shandle.dptr));
}

void NDArray::Chunk::SetMKLMem(const TShape &shape, int dtype) {
Expand Down Expand Up @@ -495,12 +533,56 @@ const mkldnn::memory *NDArray::GetMKLDNNDataReorder(
}
}

NDArray NDArray::Reorder2Default() const {
CHECK(storage_type() == kDefaultStorage);

if (ptr_->mkl_mem_ == nullptr)
return *this;
auto format = GetDefaultFormat(ptr_->mkl_mem_->get_primitive_desc().desc());
if (format == ptr_->mkl_mem_->get_primitive_desc().desc().data.format)
return *this;

NDArray ret(shape(), ctx(), false, dtype());
auto def_pd = GetPrimitiveDesc(ptr_->mkl_mem_->get_primitive_desc(), format);
CHECK(ret.ptr_->shandle.size >= def_pd.get_size());
mkldnn::memory def_mem(def_pd, ret.ptr_->shandle.dptr);
// This may be called in MKLDNN operators. We can't use MKLDNNStream here.
std::vector<mkldnn::primitive> net;
net.push_back(mkldnn::reorder(*ptr_->mkl_mem_, def_mem));
mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
return ret;
}

void NDArray::Reorder2DefaultAsync() {
std::vector<Engine::VarHandle> const_vars;
std::vector<Engine::VarHandle> mutable_vars(1, this->var());
NDArray tmp = *this;
Engine::Get()->PushAsync(
[tmp](RunContext ctx, Engine::CallbackOnComplete on_complete) {
tmp.ptr_->Reorder2Default();
on_complete();
}, ctx(), const_vars, mutable_vars,
FnProperty::kNormal, 0, PROFILER_MESSAGE("Reorder2Default"));
}

void NDArray::MKLDNNDataReorderAsync(const mkldnn::memory::primitive_desc &desc) {
std::vector<Engine::VarHandle> const_vars;
std::vector<Engine::VarHandle> mutable_vars(1, this->var());
NDArray tmp = *this;
Engine::Get()->PushAsync(
[tmp, desc](RunContext ctx, Engine::CallbackOnComplete on_complete) {
tmp.ptr_->MKLDNNDataReorder(desc);
on_complete();
}, ctx(), const_vars, mutable_vars,
FnProperty::kNormal, 0, PROFILER_MESSAGE("Reorder"));
}

const mkldnn::memory *NDArray::GetMKLDNNData() const {
CHECK(storage_type() == kDefaultStorage);
// If this array uses MKLDNN layout and it's a view, we have to change its
// layout to the default layout.
if (IsMKLDNNData() && IsView())
ptr_->Reorder2Default();
// If this array uses MKLDNN layout, we have to make sure it's not a view.
// Otherwise, we'll have to change the layout inside the array.
if (IsMKLDNNData())
CHECK(!IsView());
ptr_->SetMKLMem(IsView() ? ptr_->storage_shape : shape_, dtype_);
// If shandle has data, the data in shandle and mkl_mem_ should match.
if (ptr_->shandle.dptr)
Expand Down Expand Up @@ -534,45 +616,6 @@ const mkldnn::memory *NDArray::GetMKLDNNData() const {
}
}

void NDArray::MKLDNNDataReorder(const mkldnn::memory::primitive_desc &pd) {
CHECK_EQ(storage_type(), kDefaultStorage);
// If the memory already uses the specified layout, don't do anything.
if (ptr_->mkl_mem_ != nullptr && ptr_->mkl_mem_->get_primitive_desc() == pd)
return;
auto _pd = pd;
auto _desc = _pd.desc();
auto def_format = GetDefaultFormat(_desc);
// If the memory is default, don't do anything.
if (def_format == _desc.data.format && ptr_->IsDefault())
return;
// If the specified layout is default, we should use Reorder2Default.
if (def_format == _desc.data.format) {
ptr_->Reorder2Default();
return;
}

std::shared_ptr<mkldnn::memory> new_mem(new mkldnn::memory(pd));
ptr_->SetMKLMem(shape_, dtype_);
auto old_mem = ptr_->mkl_mem_;
// It's possible that the specified layout has a different number of dimensions.
if (old_mem->get_primitive_desc().desc().data.ndims != _desc.data.ndims) {
// For now, we only support reorder from the default layout.
CHECK(ptr_->IsDefault());
auto def_pd = GetPrimitiveDesc(pd, def_format);
old_mem.reset(new mkldnn::memory(def_pd, old_mem->get_data_handle()));
}
// This may be called in MKLDNN operators. We can't use MKLDNNStream here.
std::vector<mkldnn::primitive> net;
net.push_back(mkldnn::reorder(*old_mem, *new_mem));
mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();

CHECK(ptr_->shandle.size >= pd.get_size());
ptr_->CheckAndAlloc(pd.get_size());
// TODO(zhengda) We need to avoid memory copy here.
memcpy(ptr_->shandle.dptr, new_mem->get_data_handle(), pd.get_size());
ptr_->mkl_mem_.reset(new mkldnn::memory(pd, ptr_->shandle.dptr));
}

void NDArray::CopyFrom(const mkldnn::memory &mem) {
CHECK(ptr_ != nullptr) << "The NDArray hasn't been initialized";
if (ptr_->mkl_mem_.get() == &mem)
Expand All @@ -581,10 +624,10 @@ void NDArray::CopyFrom(const mkldnn::memory &mem) {
CHECK(mem.get_primitive_desc().get_size() == shape().Size() * GetTypeSize(dtype_))
<< "The size of NDArray doesn't match the requested MKLDNN memory desc";
MKLDNNStream *stream = MKLDNNStream::Get();
// If this array uses MKLDNN layout and it's a view, we have to change its
// layout to the default layout.
if (IsMKLDNNData() && IsView())
ptr_->Reorder2Default();
// If this array uses MKLDNN layout, we have to make sure it's not a view.
// Otherwise, we'll have to change the layout inside the array.
if (IsMKLDNNData())
CHECK(!IsView());
ptr_->SetMKLMem(IsView() ? ptr_->storage_shape : shape_,
dtype_);
stream->RegisterMem(ptr_->mkl_mem_);
Expand Down Expand Up @@ -1017,6 +1060,7 @@ inline void CopyFromToDnsImpl(const NDArray& from, const NDArray& to, RunContext
// with Copy().
NDArray tmp_from = from;
if (tmp_from.IsMKLDNNData()) {
// TODO(zhengda) tmp_from should be cached.
tmp_from = NDArray(from.shape(), from.ctx(), false, from.dtype());
auto tmp_mem = from.GetMKLDNNData();
tmp_from.CopyFrom(*tmp_mem);
Expand All @@ -1025,7 +1069,7 @@ inline void CopyFromToDnsImpl(const NDArray& from, const NDArray& to, RunContext
CHECK(tmp_from.IsDefaultData());
CHECK(to.IsDefaultData());
TBlob tmp = to.data();
ndarray::Copy<from_xpu, to_xpu>(from.data(), &tmp,
ndarray::Copy<from_xpu, to_xpu>(tmp_from.data(), &tmp,
from.ctx(), to.ctx(), ctx);
}
#endif
Expand Down Expand Up @@ -1849,7 +1893,12 @@ void NDArray::SyncCopyToCPU(void *data, size_t size) const {
if (this->ctx().dev_mask() == cpu::kDevMask) {
this->WaitToRead();
RunContext rctx{this->ctx(), nullptr};
ndarray::Copy<cpu, cpu>(this->data(), &dst,
NDArray src = *this;
#if MXNET_USE_MKLDNN == 1
if (src.IsMKLDNNData())
src = this->Reorder2Default();
#endif
ndarray::Copy<cpu, cpu>(src.data(), &dst,
Context::CPU(), Context::CPU(), rctx);
} else {
#if MXNET_USE_CUDA
Expand Down
17 changes: 17 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,26 @@ void FallBackCompute(FCompute fn, const nnvm::NodeAttrs &attrs,
const std::vector<OpReqType> &req,
const std::vector<NDArray> &outputs) {
std::vector<TBlob> in_blobs(inputs.size());
std::vector<NDArray> in_bufs;
for (size_t i = 0; i < in_blobs.size(); i++) {
// If the input data isn't stored in the default format, we shouldn't
// call data() directly, which will change the layout of the NDArray.
// Instead, we should save the converted data in another NDArray.
// TODO(zhengda) we should use temp space to save the converted data.
if (inputs[i].IsDefaultData()) {
in_blobs[i] = inputs[i].data();
} else {
if (in_bufs.empty())
in_bufs.reserve(inputs.size());
in_bufs.emplace_back(inputs[i].shape(), inputs[i].ctx(),
false, inputs[i].dtype());
const mkldnn::memory *mem = inputs[i].GetMKLDNNData();
in_bufs.back().CopyFrom(*mem);
in_blobs[i] = in_bufs.back().data();
}
}
MKLDNNStream::Get()->Submit();

std::vector<TBlob> out_blobs(outputs.size());
for (size_t i = 0; i < out_blobs.size(); i++) {
if (req[i] == kWriteTo)
Expand Down
25 changes: 16 additions & 9 deletions src/operator/nn/mkldnn/mkldnn_convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -262,25 +262,32 @@ void MKLDNNConvolutionForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx
const std::vector<NDArray> &out_data) {
TmpMemMgr::Get()->Init(ctx.requested[conv::kTempSpace]);
const ConvolutionParam& param = nnvm::get<ConvolutionParam>(attrs.parsed);
MKLDNNConvForward &fwd = GetConvFwd(attrs,
ctx.is_train, in_data[conv::kData], in_data[conv::kWeight],
NDArray weight = in_data[conv::kWeight];
MKLDNNConvForward &fwd = GetConvFwd(attrs, ctx.is_train, in_data[conv::kData], weight,
param.no_bias ? nullptr : &in_data[conv::kBias], out_data[conv::kOut]);

auto data_mem = in_data[conv::kData].GetMKLDNNDataReorder(fwd.fwd_pd.src_primitive_desc());
const mkldnn::memory *weight_mem;
if (ctx.is_train) {
// TODO(zhengda) kvstore doesn't handle MKLDNN correctly. Let's reorder it
// to the default format for now.
if (in_data[conv::kWeight].IsMKLDNNData())
const_cast<NDArray &>(in_data[conv::kWeight]).Reorder2Default();
weight_mem = GetWeights(in_data[conv::kWeight], fwd.fwd_pd.weights_primitive_desc(),
param.num_group);
if (weight.IsMKLDNNData())
// This asks the engine to change the layout of the weight array after
// it's used.
weight.Reorder2DefaultAsync();
weight_mem = GetWeights(weight, fwd.fwd_pd.weights_primitive_desc(), param.num_group);
} else {
// For inference, we want to reorder the weight array so we don't need to
// reorder data every time.
const_cast<NDArray &>(in_data[conv::kWeight]).MKLDNNDataReorder(
fwd.fwd_pd.weights_primitive_desc());
weight_mem = in_data[conv::kWeight].GetMKLDNNData();
if (weight.IsDefaultData()) {
weight_mem = GetWeights(weight, fwd.fwd_pd.weights_primitive_desc(), param.num_group);
// We also need to modify the layout on the original weight array. The
// data conversion happens after the weight array is used.
weight.MKLDNNDataReorderAsync(fwd.fwd_pd.weights_primitive_desc());
} else {
weight_mem = weight.GetMKLDNNData();
CHECK(weight_mem->get_primitive_desc() == fwd.fwd_pd.weights_primitive_desc());
}
}
auto out_mem = CreateMKLDNNMem(out_data[conv::kOut], fwd.fwd_pd.dst_primitive_desc(),
req[conv::kOut]);
Expand Down
22 changes: 14 additions & 8 deletions src/operator/nn/mkldnn/mkldnn_deconvolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -234,21 +234,27 @@ void MKLDNNDeconvForward::SetDataHandle(const DeconvolutionParam& param,
const std::vector<NDArray> &out_data) {
auto data_mem = in_data[deconv::kData].GetMKLDNNDataReorder(
fwd_pd.diff_dst_primitive_desc());
NDArray weight = in_data[deconv::kWeight];
const mkldnn::memory *weight_mem;
if (ctx.is_train) {
// TODO(zhengda) kvstore doesn't handle MKLDNN correctly. Let's reorder it
// to the default format for now.
if (in_data[deconv::kWeight].IsMKLDNNData())
const_cast<NDArray &>(in_data[deconv::kWeight]).Reorder2Default();
weight_mem = GetWeights(in_data[deconv::kWeight],
fwd_pd.weights_primitive_desc(),
param.num_group);
if (weight.IsMKLDNNData())
// This asks the engine to reorder data after the weight array is used.
weight.Reorder2DefaultAsync();
weight_mem = GetWeights(weight, fwd_pd.weights_primitive_desc(), param.num_group);
} else {
// For inference, we want to reorder the weight array so we don't need to
// reorder data every time.
const_cast<NDArray &>(in_data[deconv::kWeight]).MKLDNNDataReorder(
fwd_pd.weights_primitive_desc());
weight_mem = in_data[deconv::kWeight].GetMKLDNNData();
if (weight.IsDefaultData()) {
weight_mem = GetWeights(weight, fwd_pd.weights_primitive_desc(), param.num_group);
// We also need to modify the layout on the original weight array. The
// data conversion happens after the weight array is used.
weight.MKLDNNDataReorderAsync(fwd_pd.weights_primitive_desc());
} else {
weight_mem = weight.GetMKLDNNData();
CHECK(weight_mem->get_primitive_desc() == fwd_pd.weights_primitive_desc());
}
}
auto out_mem = CreateMKLDNNMem(out_data[deconv::kOut],
fwd_pd.diff_src_primitive_desc(), req[deconv::kOut]);
Expand Down
5 changes: 5 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_fully_connected.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ void MKLDNNFCForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
const TShape& oshape = out_data[fullc::kOut].shape();
NDArray weight = in_data[fullc::kWeight];
NDArray data = in_data[fullc::kData];
// If the input data is a view of an MKLDNN array, we should create a new
// NDArray with reordered data.
if (data.IsMKLDNNData() && data.IsView())
data = in_data[fullc::kData].Reorder2Default();

auto out_md = GetMemDesc(out_data[fullc::kOut]);
if (data.shape().ndim() != 2 && !param.flatten) {
data = data.MKLDNNDataReshape(Shape2(ishape.ProdShape(0, ishape.ndim()-1),
Expand Down
7 changes: 6 additions & 1 deletion src/operator/tensor/cast_storage-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,12 @@ void CastStorageComputeImpl(const OpContext& ctx,
CHECK_EQ(output.ctx().dev_type, input.ctx().dev_type);
// If one of them uses the MKLDNN layout.
if (input.IsMKLDNNData() || output.IsMKLDNNData()) {
auto in_mem = input.GetMKLDNNData();
NDArray tmp_input = input;
// If the input data is MKLDNN and is a view, we need to reorder the input
// data first.
if (input.IsMKLDNNData() && input.IsView())
tmp_input = input.Reorder2Default();
const mkldnn::memory *in_mem = tmp_input.GetMKLDNNData();
const_cast<NDArray &>(output).CopyFrom(*in_mem);
MKLDNNStream::Get()->Submit();
} else {
Expand Down
Loading

0 comments on commit f9c2689

Please sign in to comment.