-
Notifications
You must be signed in to change notification settings - Fork 6.8k
Fix a bug in getting MKLDNN memory #10731
Changes from 29 commits
081de9e
42d64b4
3ff7475
b746362
60e9c6d
c1c68a1
244149b
8de717e
735f1d3
03a742f
a21af4e
cd1a074
1987430
ff8bd94
8835d77
150d38a
afdae0a
cfc38e6
beb7c27
859005c
65917bc
45683e2
69b06d3
b9b360a
a737d4d
e8b604a
faafd08
4d441e6
01b37ce
1c5e15e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -313,6 +313,8 @@ build_ubuntu_amalgamation_min() { | |
build_ubuntu_gpu_cmake_mkldnn() { | ||
set -ex | ||
cd /work/build | ||
# We need to use generic archtecture. Otherwise, MKLDNN compiled in one | ||
# CPU architecture (e.g., C5) can't run on another architecture (e.g., g3). | ||
cmake \ | ||
-DUSE_CUDA=1 \ | ||
-DUSE_CUDNN=1 \ | ||
|
@@ -323,6 +325,9 @@ build_ubuntu_gpu_cmake_mkldnn() { | |
/work/mxnet | ||
|
||
ninja -v | ||
# libmkldnn.so.0 is a link file. We need an actual binary file named libmkldnn.so.0. | ||
cp 3rdparty/mkldnn/src/libmkldnn.so.0 3rdparty/mkldnn/src/libmkldnn.so.0.tmp | ||
mv 3rdparty/mkldnn/src/libmkldnn.so.0.tmp 3rdparty/mkldnn/src/libmkldnn.so.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a no-op. Please elaborate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't no-op. I think my comment explain this. libmkldnn.so.0 is a link file. We need an actual binary file named libmkldnn.so.0. Jenkins can't pack a link file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, so you're making use of the fact that cp automatically resolves the symlink? I think a cleaner way would be to resolve the symlink explicitely and then override the symlink file with the original file rather than relying on implicit actions. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you know a cleaner way, I'm happy to do so. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What we need here is to turn a link file to a regular file, instead of finding the path to the regular file. It's a different thing. Jenkins can't wrapper a link file and mxnet wants a library file named libmkldnn.so.0, instead of libmkldnn.so.0.13 (which is what libmkldnn.so.0 points to). If what you find can turn libmkldnn.so.0 (a link file) to a regular file with the same name with a single command, can you please provide the command? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so the code will be something like this.
do you think this is a preferred way or less confusing way? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be honest, I actually prefer the existing solution over the one-liner. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alright, no strong feelings from my side |
||
} | ||
|
||
build_ubuntu_gpu_cmake() { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -485,8 +485,8 @@ const mkldnn::memory *NDArray::GetMKLDNNData( | |
} | ||
|
||
const mkldnn::memory *NDArray::GetMKLDNNDataReorder( | ||
const mkldnn::memory::primitive_desc &desc) const { | ||
if (desc.get_size() != shape().Size() * GetTypeSize(dtype_)) { | ||
const mkldnn::memory::primitive_desc &new_pd) const { | ||
if (new_pd.get_size() != shape().Size() * GetTypeSize(dtype_)) { | ||
LOG(FATAL) << "The size of NDArray doesn't match the requested MKLDNN memory desc"; | ||
return nullptr; | ||
} | ||
|
@@ -495,24 +495,41 @@ const mkldnn::memory *NDArray::GetMKLDNNDataReorder( | |
const mkldnn::memory *mem = GetMKLDNNData(); | ||
// If the memory descriptor matches, it's easy. | ||
MKLDNNStream *stream = MKLDNNStream::Get(); | ||
if (mem->get_primitive_desc() == desc) { | ||
return GetMKLDNNExact(mem, desc); | ||
if (mem->get_primitive_desc() == new_pd) { | ||
return GetMKLDNNExact(mem, new_pd); | ||
} | ||
|
||
mkldnn::memory::primitive_desc _desc = desc; | ||
mkldnn::memory::primitive_desc _pd = new_pd; | ||
mkldnn::memory::desc desc1 = mem->get_primitive_desc().desc(); | ||
mkldnn::memory::desc desc2 = _pd.desc(); | ||
// Now we need to determine if we should reorder the memory. | ||
// If both use the default formats, we think we don't need to reorder. | ||
mkldnn::memory::desc desc1 = mem->get_primitive_desc().desc(); | ||
mkldnn::memory::desc desc2 = _desc.desc(); | ||
if (desc1.data.format == GetDefaultFormat(desc1) && | ||
desc2.data.format == GetDefaultFormat(desc2)) { | ||
mkldnn_mem_ptr ret(new mkldnn::memory(desc, mem->get_data_handle())); | ||
mkldnn_mem_ptr ret(new mkldnn::memory(new_pd, mem->get_data_handle())); | ||
stream->RegisterMem(ret); | ||
return ret.get(); | ||
} else { | ||
mkldnn::memory *ret = TmpMemMgr::Get()->Alloc(desc); | ||
} else if (same_shape(desc1, desc2)) { | ||
// If they have the same shape, we can reorder data directly. | ||
mkldnn::memory *ret = TmpMemMgr::Get()->Alloc(new_pd); | ||
stream->RegisterPrim(mkldnn::reorder(*mem, *ret)); | ||
return ret; | ||
} else { | ||
// If they have different shapes, we need to reshape the array first. | ||
// Since this method will only be used inside an operator, we can call | ||
// MKLDNNDataReshape to reshape an array. | ||
TShape required_shape(desc2.data.ndims); | ||
for (int i = 0; i < desc2.data.ndims; i++) | ||
required_shape[i] = desc2.data.dims[i]; | ||
NDArray reshaped = MKLDNNDataReshape(required_shape); | ||
const mkldnn::memory *ret = reshaped.GetMKLDNNData(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is ret being destroyed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The memory is managed by MKLDNNStream. You can take a look at the implementation of GetMKLDNNData |
||
if (ret->get_primitive_desc() == new_pd) { | ||
return GetMKLDNNExact(ret, new_pd); | ||
} else { | ||
mkldnn::memory *ret2 = TmpMemMgr::Get()->Alloc(new_pd); | ||
stream->RegisterPrim(mkldnn::reorder(*ret, *ret2)); | ||
return ret2; | ||
} | ||
} | ||
} | ||
|
||
|
@@ -566,10 +583,15 @@ void NDArray::MKLDNNDataReorderAsync(const mkldnn::memory::primitive_desc &desc) | |
|
||
const mkldnn::memory *NDArray::GetMKLDNNData() const { | ||
CHECK(storage_type() == kDefaultStorage); | ||
// If this array uses MKLDNN layout, we have to make sure it's not a view. | ||
// Otherwise, we'll have to change the layout inside the array. | ||
if (IsMKLDNNData()) | ||
if (IsMKLDNNData()) { | ||
// If this array uses MKLDNN layout, we have to make sure it's not a view. | ||
// Otherwise, we'll have to change the layout inside the array. | ||
CHECK(!IsView()); | ||
MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem()); | ||
// If this array uses MKLDNN format, we should return now. Otherwise, | ||
// SetMKLMem may mess up mkl_mem_. | ||
return ptr_->mkl_mem_->GetRaw(); | ||
} | ||
ptr_->SetMKLMem(IsView() ? ptr_->storage_shape : shape_, dtype_); | ||
MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem()); | ||
if (IsView()) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -272,12 +272,11 @@ class MKLDNNStream { | |
std::vector<std::shared_ptr<const mkldnn::memory> > mem_holder; | ||
|
||
public: | ||
static MKLDNNStream *Get() { | ||
static thread_local MKLDNNStream stream; | ||
return &stream; | ||
} | ||
static MKLDNNStream *Get(); | ||
|
||
void RegisterPrim(const mkldnn::primitive &prim) { net.push_back(prim); } | ||
void RegisterPrim(const mkldnn::primitive &prim) { | ||
net.push_back(prim); | ||
} | ||
|
||
void RegisterMem(std::shared_ptr<const mkldnn::memory> mem) { | ||
mem_holder.push_back(mem); | ||
|
@@ -287,10 +286,21 @@ class MKLDNNStream { | |
return !net.empty(); | ||
} | ||
|
||
void Submit() { | ||
if (!net.empty()) | ||
/* | ||
* After submitting mkldnn operations for execution, we need to | ||
* clean up memory held by the stream. However, sometimes users | ||
* might want to separate mkldnn execution and memory cleanup. | ||
*/ | ||
void Submit(bool cleanup = true) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please describe this argument |
||
if (!net.empty()) { | ||
mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait(); | ||
net.clear(); | ||
net.clear(); | ||
} | ||
if (cleanup) | ||
Cleanup(); | ||
} | ||
|
||
void Cleanup() { | ||
mem_holder.clear(); | ||
TmpMemMgr::Get()->Reset(); | ||
} | ||
|
@@ -348,6 +358,16 @@ inline bool same_shape(const TShape &shape, const mkldnn_dims_t dims, int ndims) | |
return true; | ||
} | ||
|
||
inline bool same_shape(const mkldnn::memory::desc &desc1, | ||
const mkldnn::memory::desc &desc2) { | ||
if (desc1.data.ndims != desc2.data.ndims) | ||
return false; | ||
for (int i = 0; i < desc1.data.ndims; i++) | ||
if (desc1.data.dims[i] != desc2.data.dims[i]) | ||
return false; | ||
return true; | ||
} | ||
|
||
inline bool same_shape(const TShape &shape, int dtype, | ||
const mkldnn::memory::desc &desc) { | ||
return same_shape(shape, desc.data.dims, desc.data.ndims) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do these comments apply to here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I need to remove this. Originally, I set ARCH_OPT_FLAGS here. but I moved it to CMakeLists.txt.
/~https://github.com/apache/incubator-mxnet/pull/10731/files/01b37cecf0a57ea9d943aae0c079305d4de8452d#diff-af3b638bc2a3e6c650974192a53c7291R162
I need to move the comments as well.