Skip to content

Commit

Permalink
Fix a bug in getting MKLDNN memory (apache#10731)
Browse files Browse the repository at this point in the history
* test inference multiple times.

* Fix a bug in GetMKLDNNData().

* Update comments.

* Handle all cases for GetMKLDNNDataReorder

* avoid unnecessary message.

* Add C++ unit test for NDArray.

* Fix a minor bug.

* Unit tests on GetMKLDNNDataReorder.

* Fix lint error.

* Add more test cases.

* add comments for the test code.

* Reorganize test code.

* Fix cpp tests.

* test.

* Add a new Jenkins compile task.

* Update jenkins.

* update jenkins.

* Fix a Jenkins.

* Fix jenkins.

* Fix jenkins.

* Fix CMake for MKLDNN.

* Fix jenkins.

* update jenkins.

* update CMake.

* Fix cmake.

* update CI.

* add comment.

* add comments.

* cmake builds mkldnn with -mtune=generic by default.

* adjust comments.
  • Loading branch information
zheng-da committed Jun 8, 2018
1 parent efb15fd commit d0a80c9
Show file tree
Hide file tree
Showing 9 changed files with 355 additions and 42 deletions.
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,12 @@ endif()

if(USE_MKL_IF_AVAILABLE)
if(USE_MKLDNN)
# We need to use generic archtecture. Otherwise, MKLDNN compiled in one
# CPU architecture (e.g., C5) can't run on another architecture (e.g., g3).
set(ARCH_OPT_FLAGS "-mtune=generic")
add_subdirectory(3rdparty/mkldnn)
include_directories(3rdparty/mkldnn/include)
add_definitions(-DMXNET_USE_MKLDNN=1)
list(APPEND mxnet_LINKER_LIBS mkldnn)
endif()
find_package(MKL)
Expand All @@ -197,10 +201,6 @@ if(USE_MKL_IF_AVAILABLE)
include_directories(${MKL_INCLUDE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/operator/mkl)

if(USE_MKLDNN)
add_definitions(-DMXNET_USE_MKLDNN=1)
endif()

add_definitions(-DUSE_MKL=1)
add_definitions(-DCUB_MKL=1)
list(APPEND mxnet_LINKER_LIBS ${MKL_LIBRARIES})
Expand Down
13 changes: 12 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ mx_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdpart
mx_dist_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a'
// mxnet cmake libraries, in cmake builds we do not produce a libnvvm static library by default.
mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so'
mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so, build/3rdparty/mkldnn/src/libmkldnn.so.0'
mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so.0'
mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/nnvm/lib/libnnvm.a'
// command to start a docker container
docker_run = 'tests/ci_build/ci_build.sh'
Expand Down Expand Up @@ -534,6 +534,17 @@ try {
}
}
},
'Cpp: MKLDNN+GPU': {
node('mxnetlinux-gpu') {
ws('workspace/ut-cpp-mkldnn-gpu') {
timeout(time: max_time, unit: 'MINUTES') {
init_git()
unpack_lib('cmake_mkldnn_gpu', mx_cmake_mkldnn_lib)
sh "ci/build.py --nvidiadocker --platform ubuntu_gpu /work/runtime_functions.sh unittest_ubuntu_gpu_cpp"
}
}
}
},
'R: CPU': {
node('mxnetlinux-cpu') {
ws('workspace/ut-r-cpu') {
Expand Down
3 changes: 3 additions & 0 deletions ci/docker/runtime_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ build_ubuntu_gpu_cmake_mkldnn() {
/work/mxnet

ninja -v
# libmkldnn.so.0 is a link file. We need an actual binary file named libmkldnn.so.0.
cp 3rdparty/mkldnn/src/libmkldnn.so.0 3rdparty/mkldnn/src/libmkldnn.so.0.tmp
mv 3rdparty/mkldnn/src/libmkldnn.so.0.tmp 3rdparty/mkldnn/src/libmkldnn.so.0
}

build_ubuntu_gpu_cmake() {
Expand Down
48 changes: 35 additions & 13 deletions src/ndarray/ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -485,8 +485,8 @@ const mkldnn::memory *NDArray::GetMKLDNNData(
}

const mkldnn::memory *NDArray::GetMKLDNNDataReorder(
const mkldnn::memory::primitive_desc &desc) const {
if (desc.get_size() != shape().Size() * GetTypeSize(dtype_)) {
const mkldnn::memory::primitive_desc &new_pd) const {
if (new_pd.get_size() != shape().Size() * GetTypeSize(dtype_)) {
LOG(FATAL) << "The size of NDArray doesn't match the requested MKLDNN memory desc";
return nullptr;
}
Expand All @@ -495,24 +495,41 @@ const mkldnn::memory *NDArray::GetMKLDNNDataReorder(
const mkldnn::memory *mem = GetMKLDNNData();
// If the memory descriptor matches, it's easy.
MKLDNNStream *stream = MKLDNNStream::Get();
if (mem->get_primitive_desc() == desc) {
return GetMKLDNNExact(mem, desc);
if (mem->get_primitive_desc() == new_pd) {
return GetMKLDNNExact(mem, new_pd);
}

mkldnn::memory::primitive_desc _desc = desc;
mkldnn::memory::primitive_desc _pd = new_pd;
mkldnn::memory::desc desc1 = mem->get_primitive_desc().desc();
mkldnn::memory::desc desc2 = _pd.desc();
// Now we need to determine if we should reorder the memory.
// If both use the default formats, we think we don't need to reorder.
mkldnn::memory::desc desc1 = mem->get_primitive_desc().desc();
mkldnn::memory::desc desc2 = _desc.desc();
if (desc1.data.format == GetDefaultFormat(desc1) &&
desc2.data.format == GetDefaultFormat(desc2)) {
mkldnn_mem_ptr ret(new mkldnn::memory(desc, mem->get_data_handle()));
mkldnn_mem_ptr ret(new mkldnn::memory(new_pd, mem->get_data_handle()));
stream->RegisterMem(ret);
return ret.get();
} else {
mkldnn::memory *ret = TmpMemMgr::Get()->Alloc(desc);
} else if (same_shape(desc1, desc2)) {
// If they have the same shape, we can reorder data directly.
mkldnn::memory *ret = TmpMemMgr::Get()->Alloc(new_pd);
stream->RegisterPrim(mkldnn::reorder(*mem, *ret));
return ret;
} else {
// If they have different shapes, we need to reshape the array first.
// Since this method will only be used inside an operator, we can call
// MKLDNNDataReshape to reshape an array.
TShape required_shape(desc2.data.ndims);
for (int i = 0; i < desc2.data.ndims; i++)
required_shape[i] = desc2.data.dims[i];
NDArray reshaped = MKLDNNDataReshape(required_shape);
const mkldnn::memory *ret = reshaped.GetMKLDNNData();
if (ret->get_primitive_desc() == new_pd) {
return GetMKLDNNExact(ret, new_pd);
} else {
mkldnn::memory *ret2 = TmpMemMgr::Get()->Alloc(new_pd);
stream->RegisterPrim(mkldnn::reorder(*ret, *ret2));
return ret2;
}
}
}

Expand Down Expand Up @@ -559,10 +576,15 @@ void NDArray::MKLDNNDataReorderAsync(const mkldnn::memory::primitive_desc &desc)

const mkldnn::memory *NDArray::GetMKLDNNData() const {
CHECK(storage_type() == kDefaultStorage);
// If this array uses MKLDNN layout, we have to make sure it's not a view.
// Otherwise, we'll have to change the layout inside the array.
if (IsMKLDNNData())
if (IsMKLDNNData()) {
// If this array uses MKLDNN layout, we have to make sure it's not a view.
// Otherwise, we'll have to change the layout inside the array.
CHECK(!IsView());
MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem());
// If this array uses MKLDNN format, we should return now. Otherwise,
// SetMKLMem may mess up mkl_mem_.
return ptr_->mkl_mem_->GetRaw();
}
ptr_->SetMKLMem(IsView() ? ptr_->storage_shape : shape_, dtype_);
MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem());
if (IsView()) {
Expand Down
36 changes: 28 additions & 8 deletions src/operator/nn/mkldnn/mkldnn_base-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,11 @@ class MKLDNNStream {
std::vector<std::shared_ptr<const mkldnn::memory> > mem_holder;

public:
static MKLDNNStream *Get() {
static thread_local MKLDNNStream stream;
return &stream;
}
static MKLDNNStream *Get();

void RegisterPrim(const mkldnn::primitive &prim) { net.push_back(prim); }
void RegisterPrim(const mkldnn::primitive &prim) {
net.push_back(prim);
}

void RegisterMem(std::shared_ptr<const mkldnn::memory> mem) {
mem_holder.push_back(mem);
Expand All @@ -288,10 +287,21 @@ class MKLDNNStream {
return !net.empty();
}

void Submit() {
if (!net.empty())
/*
* After submitting mkldnn operations for execution, we need to
* clean up memory held by the stream. However, sometimes users
* might want to separate mkldnn execution and memory cleanup.
*/
void Submit(bool cleanup = true) {
if (!net.empty()) {
mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
net.clear();
net.clear();
}
if (cleanup)
Cleanup();
}

void Cleanup() {
mem_holder.clear();
TmpMemMgr::Get()->Reset();
}
Expand Down Expand Up @@ -349,6 +359,16 @@ inline bool same_shape(const TShape &shape, const mkldnn_dims_t dims, int ndims)
return true;
}

inline bool same_shape(const mkldnn::memory::desc &desc1,
const mkldnn::memory::desc &desc2) {
if (desc1.data.ndims != desc2.data.ndims)
return false;
for (int i = 0; i < desc1.data.ndims; i++)
if (desc1.data.dims[i] != desc2.data.dims[i])
return false;
return true;
}

inline bool same_shape(const TShape &shape, int dtype,
const mkldnn::memory::desc &desc) {
return same_shape(shape, desc.data.dims, desc.data.ndims)
Expand Down
12 changes: 10 additions & 2 deletions src/operator/nn/mkldnn/mkldnn_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@

namespace mxnet {

MKLDNNStream *MKLDNNStream::Get() {
static thread_local MKLDNNStream stream;
return &stream;
}

void *AlignMem(void *mem, size_t size, size_t alignment, size_t *space) {
if (size > *space)
return nullptr;
Expand Down Expand Up @@ -57,8 +62,11 @@ mkldnn::memory *TmpMemMgr::Alloc(const mkldnn::memory::primitive_desc &pd) {
this->curr_mem = static_cast<char *>(mem) + pd.get_size();
return ret.get();
} else {
LOG(WARNING) << "Allocate " << pd.get_size()
<< " bytes with malloc directly";
// If curr_mem has been initialized and we still reach here. It means
// the current allocated memory isn't enough.
if (this->curr_mem)
LOG(WARNING) << "Allocate " << pd.get_size()
<< " bytes with malloc directly";
mkldnn_mem_ptr ret(new mkldnn::memory(pd));
MKLDNNStream::Get()->RegisterMem(ret);
return ret.get();
Expand Down
10 changes: 5 additions & 5 deletions tests/cpp/include/test_core_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
if (bwd_node_ptr) {
CHECK_EQ(bwd_node_ptr->inputs.size(), num_inputs);
input_types.resize(bwd_node_ptr->inputs.size(), -1);
for (size_t i = 0; i < num_inputs; ++i) {
for (int i = 0; i < num_inputs; ++i) {
const int map_key = bwd_node_ptr->inputs[i].index;
CHECK(index2array.find(map_key) != index2array.end());
const int dtype = index2array[map_key]->dtype();
Expand All @@ -421,7 +421,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
output_types.emplace_back(dtype);
}
} else {
for (size_t x = 0; x < num_inputs; ++x) {
for (int x = 0; x < num_inputs; ++x) {
input_types.emplace_back(default_dtype());
}
for (const auto &fwd_inp : backward_for_op->inputs()) {
Expand All @@ -431,10 +431,10 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
}
} else {
CHECK(false); // above always true?
for (size_t x = 0; x < num_inputs; ++x) {
for (int x = 0; x < num_inputs; ++x) {
input_types.emplace_back(default_dtype());
}
for (size_t x = 0; x < inferred_num_outputs; ++x) {
for (int x = 0; x < inferred_num_outputs; ++x) {
output_types.emplace_back(default_dtype());
}
}
Expand All @@ -455,7 +455,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
if (bwd_node_ptr) {
input_shapes.clear();
CHECK_EQ(bwd_node_ptr->inputs.size(), num_inputs);
for (size_t i = 0; i < num_inputs; ++i) {
for (int i = 0; i < num_inputs; ++i) {
const int map_key = bwd_node_ptr->inputs[i].index;
CHECK(index2array.find(map_key) != index2array.end());
const nnvm::TShape &shp = index2array[map_key]->shape();
Expand Down
Loading

0 comments on commit d0a80c9

Please sign in to comment.