Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Fix a bug in getting MKLDNN memory #10731

Merged
merged 30 commits into from
May 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
081de9e
test inference multiple times.
zheng-da Apr 18, 2018
42d64b4
Fix a bug in GetMKLDNNData().
zheng-da Apr 20, 2018
3ff7475
Update comments.
zheng-da Apr 20, 2018
b746362
Handle all cases for GetMKLDNNDataReorder
zheng-da Apr 20, 2018
60e9c6d
avoid unnecessary message.
zheng-da Apr 20, 2018
c1c68a1
Add C++ unit test for NDArray.
zheng-da Apr 21, 2018
244149b
Fix a minor bug.
zheng-da Apr 21, 2018
8de717e
Unit tests on GetMKLDNNDataReorder.
zheng-da Apr 21, 2018
735f1d3
Fix lint error.
zheng-da Apr 21, 2018
03a742f
Add more test cases.
zheng-da Apr 22, 2018
a21af4e
add comments for the test code.
zheng-da Apr 22, 2018
cd1a074
Reorganize test code.
zheng-da Apr 22, 2018
1987430
Fix cpp tests.
zheng-da Apr 27, 2018
ff8bd94
test.
zheng-da Apr 27, 2018
8835d77
Add a new Jenkins compile task.
zheng-da Apr 27, 2018
150d38a
Update jenkins.
zheng-da Apr 28, 2018
afdae0a
update jenkins.
zheng-da Apr 28, 2018
cfc38e6
Fix a Jenkins.
zheng-da Apr 28, 2018
beb7c27
Fix jenkins.
zheng-da Apr 28, 2018
859005c
Fix jenkins.
zheng-da Apr 28, 2018
65917bc
Fix CMake for MKLDNN.
zheng-da Apr 28, 2018
45683e2
Fix jenkins.
zheng-da Apr 28, 2018
69b06d3
update jenkins.
zheng-da Apr 28, 2018
b9b360a
update CMake.
zheng-da Apr 28, 2018
a737d4d
Fix cmake.
zheng-da Apr 28, 2018
e8b604a
update CI.
zheng-da Apr 28, 2018
faafd08
add comment.
zheng-da Apr 28, 2018
4d441e6
add comments.
zheng-da Apr 30, 2018
01b37ce
cmake builds mkldnn with -mtune=generic by default.
zheng-da Apr 30, 2018
1c5e15e
adjust comments.
zheng-da May 2, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,12 @@ endif()

if(USE_MKL_IF_AVAILABLE)
if(USE_MKLDNN)
# We need to use generic archtecture. Otherwise, MKLDNN compiled in one
# CPU architecture (e.g., C5) can't run on another architecture (e.g., g3).
set(ARCH_OPT_FLAGS "-mtune=generic")
add_subdirectory(3rdparty/mkldnn)
include_directories(3rdparty/mkldnn/include)
add_definitions(-DMXNET_USE_MKLDNN=1)
list(APPEND mxnet_LINKER_LIBS mkldnn)
endif()
find_package(MKL)
Expand All @@ -169,10 +173,6 @@ if(USE_MKL_IF_AVAILABLE)
include_directories(${MKL_INCLUDE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/operator/mkl)

if(USE_MKLDNN)
add_definitions(-DMXNET_USE_MKLDNN=1)
endif()

add_definitions(-DUSE_MKL=1)
add_definitions(-DCUB_MKL=1)
list(APPEND mxnet_LINKER_LIBS ${MKL_LIBRARIES})
Expand Down
13 changes: 12 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ mx_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdpart
mx_dist_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a'
// mxnet cmake libraries, in cmake builds we do not produce a libnvvm static library by default.
mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so'
mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so, build/3rdparty/mkldnn/src/libmkldnn.so.0'
mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so.0'
mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/nnvm/lib/libnnvm.a'
// command to start a docker container
docker_run = 'tests/ci_build/ci_build.sh'
Expand Down Expand Up @@ -574,6 +574,17 @@ try {
}
}
},
'Cpp: MKLDNN+GPU': {
node('mxnetlinux-gpu') {
ws('workspace/ut-cpp-mkldnn-gpu') {
timeout(time: max_time, unit: 'MINUTES') {
init_git()
unpack_lib('cmake_mkldnn_gpu', mx_cmake_mkldnn_lib)
sh "ci/build.py --nvidiadocker --platform ubuntu_gpu /work/runtime_functions.sh unittest_ubuntu_gpu_cpp"
}
}
}
},
'R: CPU': {
node('mxnetlinux-cpu') {
ws('workspace/ut-r-cpu') {
Expand Down
3 changes: 3 additions & 0 deletions ci/docker/runtime_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ build_ubuntu_gpu_cmake_mkldnn() {
/work/mxnet

ninja -v
# libmkldnn.so.0 is a link file. We need an actual binary file named libmkldnn.so.0.
cp 3rdparty/mkldnn/src/libmkldnn.so.0 3rdparty/mkldnn/src/libmkldnn.so.0.tmp
mv 3rdparty/mkldnn/src/libmkldnn.so.0.tmp 3rdparty/mkldnn/src/libmkldnn.so.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a no-op. Please elaborate

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't no-op. I think my comment explain this. libmkldnn.so.0 is a link file. We need an actual binary file named libmkldnn.so.0. Jenkins can't pack a link file.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, so you're making use of the fact that cp automatically resolves the symlink? I think a cleaner way would be to resolve the symlink explicitely and then override the symlink file with the original file rather than relying on implicit actions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you know a cleaner way, I'm happy to do so.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

@zheng-da zheng-da May 1, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What we need here is to turn a link file to a regular file, instead of finding the path to the regular file. It's a different thing. Jenkins can't wrapper a link file and mxnet wants a library file named libmkldnn.so.0, instead of libmkldnn.so.0.13 (which is what libmkldnn.so.0 points to). If what you find can turn libmkldnn.so.0 (a link file) to a regular file with the same name with a single command, can you please provide the command?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

@zheng-da zheng-da May 2, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so the code will be something like this.

cp --remove-destination 3rdparty/mkldnn/src/`readlink 3rdparty/mkldnn/src/libmkldnn.so.0` 3rdparty/mkldnn/src/libmkldnn.so.0

do you think this is a preferred way or less confusing way?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be honest, I actually prefer the existing solution over the one-liner.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, no strong feelings from my side

}

build_ubuntu_gpu_cmake() {
Expand Down
48 changes: 35 additions & 13 deletions src/ndarray/ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -485,8 +485,8 @@ const mkldnn::memory *NDArray::GetMKLDNNData(
}

const mkldnn::memory *NDArray::GetMKLDNNDataReorder(
const mkldnn::memory::primitive_desc &desc) const {
if (desc.get_size() != shape().Size() * GetTypeSize(dtype_)) {
const mkldnn::memory::primitive_desc &new_pd) const {
if (new_pd.get_size() != shape().Size() * GetTypeSize(dtype_)) {
LOG(FATAL) << "The size of NDArray doesn't match the requested MKLDNN memory desc";
return nullptr;
}
Expand All @@ -495,24 +495,41 @@ const mkldnn::memory *NDArray::GetMKLDNNDataReorder(
const mkldnn::memory *mem = GetMKLDNNData();
// If the memory descriptor matches, it's easy.
MKLDNNStream *stream = MKLDNNStream::Get();
if (mem->get_primitive_desc() == desc) {
return GetMKLDNNExact(mem, desc);
if (mem->get_primitive_desc() == new_pd) {
return GetMKLDNNExact(mem, new_pd);
}

mkldnn::memory::primitive_desc _desc = desc;
mkldnn::memory::primitive_desc _pd = new_pd;
mkldnn::memory::desc desc1 = mem->get_primitive_desc().desc();
mkldnn::memory::desc desc2 = _pd.desc();
// Now we need to determine if we should reorder the memory.
// If both use the default formats, we think we don't need to reorder.
mkldnn::memory::desc desc1 = mem->get_primitive_desc().desc();
mkldnn::memory::desc desc2 = _desc.desc();
if (desc1.data.format == GetDefaultFormat(desc1) &&
desc2.data.format == GetDefaultFormat(desc2)) {
mkldnn_mem_ptr ret(new mkldnn::memory(desc, mem->get_data_handle()));
mkldnn_mem_ptr ret(new mkldnn::memory(new_pd, mem->get_data_handle()));
stream->RegisterMem(ret);
return ret.get();
} else {
mkldnn::memory *ret = TmpMemMgr::Get()->Alloc(desc);
} else if (same_shape(desc1, desc2)) {
// If they have the same shape, we can reorder data directly.
mkldnn::memory *ret = TmpMemMgr::Get()->Alloc(new_pd);
stream->RegisterPrim(mkldnn::reorder(*mem, *ret));
return ret;
} else {
// If they have different shapes, we need to reshape the array first.
// Since this method will only be used inside an operator, we can call
// MKLDNNDataReshape to reshape an array.
TShape required_shape(desc2.data.ndims);
for (int i = 0; i < desc2.data.ndims; i++)
required_shape[i] = desc2.data.dims[i];
NDArray reshaped = MKLDNNDataReshape(required_shape);
const mkldnn::memory *ret = reshaped.GetMKLDNNData();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is ret being destroyed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The memory is managed by MKLDNNStream. You can take a look at the implementation of GetMKLDNNData

if (ret->get_primitive_desc() == new_pd) {
return GetMKLDNNExact(ret, new_pd);
} else {
mkldnn::memory *ret2 = TmpMemMgr::Get()->Alloc(new_pd);
stream->RegisterPrim(mkldnn::reorder(*ret, *ret2));
return ret2;
}
}
}

Expand Down Expand Up @@ -566,10 +583,15 @@ void NDArray::MKLDNNDataReorderAsync(const mkldnn::memory::primitive_desc &desc)

const mkldnn::memory *NDArray::GetMKLDNNData() const {
CHECK(storage_type() == kDefaultStorage);
// If this array uses MKLDNN layout, we have to make sure it's not a view.
// Otherwise, we'll have to change the layout inside the array.
if (IsMKLDNNData())
if (IsMKLDNNData()) {
// If this array uses MKLDNN layout, we have to make sure it's not a view.
// Otherwise, we'll have to change the layout inside the array.
CHECK(!IsView());
MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem());
// If this array uses MKLDNN format, we should return now. Otherwise,
// SetMKLMem may mess up mkl_mem_.
return ptr_->mkl_mem_->GetRaw();
}
ptr_->SetMKLMem(IsView() ? ptr_->storage_shape : shape_, dtype_);
MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem());
if (IsView()) {
Expand Down
36 changes: 28 additions & 8 deletions src/operator/nn/mkldnn/mkldnn_base-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,12 +272,11 @@ class MKLDNNStream {
std::vector<std::shared_ptr<const mkldnn::memory> > mem_holder;

public:
static MKLDNNStream *Get() {
static thread_local MKLDNNStream stream;
return &stream;
}
static MKLDNNStream *Get();

void RegisterPrim(const mkldnn::primitive &prim) { net.push_back(prim); }
void RegisterPrim(const mkldnn::primitive &prim) {
net.push_back(prim);
}

void RegisterMem(std::shared_ptr<const mkldnn::memory> mem) {
mem_holder.push_back(mem);
Expand All @@ -287,10 +286,21 @@ class MKLDNNStream {
return !net.empty();
}

void Submit() {
if (!net.empty())
/*
* After submitting mkldnn operations for execution, we need to
* clean up memory held by the stream. However, sometimes users
* might want to separate mkldnn execution and memory cleanup.
*/
void Submit(bool cleanup = true) {
if (!net.empty()) {
mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
net.clear();
net.clear();
}
if (cleanup)
Cleanup();
}

void Cleanup() {
mem_holder.clear();
TmpMemMgr::Get()->Reset();
}
Expand Down Expand Up @@ -348,6 +358,16 @@ inline bool same_shape(const TShape &shape, const mkldnn_dims_t dims, int ndims)
return true;
}

inline bool same_shape(const mkldnn::memory::desc &desc1,
const mkldnn::memory::desc &desc2) {
if (desc1.data.ndims != desc2.data.ndims)
return false;
for (int i = 0; i < desc1.data.ndims; i++)
if (desc1.data.dims[i] != desc2.data.dims[i])
return false;
return true;
}

inline bool same_shape(const TShape &shape, int dtype,
const mkldnn::memory::desc &desc) {
return same_shape(shape, desc.data.dims, desc.data.ndims)
Expand Down
12 changes: 10 additions & 2 deletions src/operator/nn/mkldnn/mkldnn_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@

namespace mxnet {

MKLDNNStream *MKLDNNStream::Get() {
static thread_local MKLDNNStream stream;
return &stream;
}

void *AlignMem(void *mem, size_t size, size_t alignment, size_t *space) {
if (size > *space)
return nullptr;
Expand Down Expand Up @@ -57,8 +62,11 @@ mkldnn::memory *TmpMemMgr::Alloc(const mkldnn::memory::primitive_desc &pd) {
this->curr_mem = static_cast<char *>(mem) + pd.get_size();
return ret.get();
} else {
LOG(WARNING) << "Allocate " << pd.get_size()
<< " bytes with malloc directly";
// If curr_mem has been initialized and we still reach here. It means
// the current allocated memory isn't enough.
if (this->curr_mem)
LOG(WARNING) << "Allocate " << pd.get_size()
<< " bytes with malloc directly";
mkldnn_mem_ptr ret(new mkldnn::memory(pd));
MKLDNNStream::Get()->RegisterMem(ret);
return ret.get();
Expand Down
10 changes: 5 additions & 5 deletions tests/cpp/include/test_core_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
if (bwd_node_ptr) {
CHECK_EQ(bwd_node_ptr->inputs.size(), num_inputs);
input_types.resize(bwd_node_ptr->inputs.size(), -1);
for (size_t i = 0; i < num_inputs; ++i) {
for (int i = 0; i < num_inputs; ++i) {
const int map_key = bwd_node_ptr->inputs[i].index;
CHECK(index2array.find(map_key) != index2array.end());
const int dtype = index2array[map_key]->dtype();
Expand All @@ -421,7 +421,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
output_types.emplace_back(dtype);
}
} else {
for (size_t x = 0; x < num_inputs; ++x) {
for (int x = 0; x < num_inputs; ++x) {
input_types.emplace_back(default_dtype());
}
for (const auto &fwd_inp : backward_for_op->inputs()) {
Expand All @@ -431,10 +431,10 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
}
} else {
CHECK(false); // above always true?
for (size_t x = 0; x < num_inputs; ++x) {
for (int x = 0; x < num_inputs; ++x) {
input_types.emplace_back(default_dtype());
}
for (size_t x = 0; x < inferred_num_outputs; ++x) {
for (int x = 0; x < inferred_num_outputs; ++x) {
output_types.emplace_back(default_dtype());
}
}
Expand All @@ -455,7 +455,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
if (bwd_node_ptr) {
input_shapes.clear();
CHECK_EQ(bwd_node_ptr->inputs.size(), num_inputs);
for (size_t i = 0; i < num_inputs; ++i) {
for (int i = 0; i < num_inputs; ++i) {
const int map_key = bwd_node_ptr->inputs[i].index;
CHECK(index2array.find(map_key) != index2array.end());
const nnvm::TShape &shp = index2array[map_key]->shape();
Expand Down
Loading