From a8c2ff316f21d3defd211386c2034a241debed96 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Sat, 16 Jun 2018 12:58:36 +0800 Subject: [PATCH 1/3] refine the initial cpu memory flag for mkldnn --- paddle/fluid/platform/cpu_info.cc | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index 40dc7c9a0b6a40..c708337f8f4119 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -28,9 +28,13 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1, "Default use 100% of CPU memory for PaddlePaddle," "reserve the rest for page tables, etc"); -DEFINE_uint64( - initial_cpu_memory_in_mb, 500, - "Default initial 500MB of CPU memory for PaddlePaddle, in MD unit."); +DEFINE_uint64(initial_cpu_memory_in_mb, +#ifdef PADDLE_WITH_MKLDNN + 1000, +#else + 500, +#endif + "Initial CPU memory for PaddlePaddle, in MD unit."); DEFINE_double( fraction_of_cuda_pinned_memory_to_use, 0.5, @@ -59,10 +63,7 @@ inline size_t CpuTotalPhysicalMemory() { size_t CpuMaxAllocSize() { // For distributed systems, it requires configuring and limiting // the fraction of memory to use. - return std::min( - static_cast(FLAGS_fraction_of_cpu_memory_to_use * - CpuTotalPhysicalMemory()), - static_cast(FLAGS_initial_cpu_memory_in_mb * 1 << 20)); + return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory(); } size_t CpuMinChunkSize() { @@ -71,8 +72,11 @@ size_t CpuMinChunkSize() { } size_t CpuMaxChunkSize() { - // Allow to allocate the maximum chunk size is roughly 3% of CPU memory. - return CpuMaxAllocSize() / 32; + // Allow to allocate the maximum chunk size is roughly 3% of CPU memory, + // or the initial_cpu_memory_in_mb. + return std::min( + static_cast(CpuMaxAllocSize() / 32), + static_cast(FLAGS_initial_cpu_memory_in_mb * 1 << 20)); } size_t CUDAPinnedMaxAllocSize() { From a0c5fd83b26a2603e46011d9e6a1e6b1e850e323 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Sat, 16 Jun 2018 13:11:55 +0800 Subject: [PATCH 2/3] enable setting initial memory from env --- paddle/testing/paddle_gtest_main.cc | 5 +++-- python/paddle/fluid/__init__.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc index 7772dc97f5c1a9..555be3d00e2dc4 100644 --- a/paddle/testing/paddle_gtest_main.cc +++ b/paddle/testing/paddle_gtest_main.cc @@ -30,8 +30,9 @@ int main(int argc, char** argv) { new_argv.push_back( strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory")); #else - new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn")); - new_argv.push_back(strdup("--undefok=use_mkldnn")); + new_argv.push_back(strdup( + "--tryfromenv=use_pinned_memory,use_mkldnn,initial_cpu_memory_in_mb")); + new_argv.push_back(strdup("--undefok=use_mkldnn,initial_cpu_memory_in_mb")); #endif int new_argc = static_cast(new_argv.size()); char** new_argv_address = new_argv.data(); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index bd985ad733aa8e..5af5bc9c473131 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -117,7 +117,7 @@ def __bootstrap__(): read_env_flags = [ 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', - 'eager_delete_scope', 'use_mkldnn' + 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb' ] if core.is_compiled_with_cuda(): read_env_flags += [ From 9a25f2895cf1b9e65542442a5f3fed666b52b37a Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 19 Jun 2018 11:40:32 +0800 Subject: [PATCH 3/3] update the default cpu memory with MKLDNN --- paddle/fluid/platform/cpu_info.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index c708337f8f4119..f832d72b53e8d0 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -30,7 +30,9 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1, DEFINE_uint64(initial_cpu_memory_in_mb, #ifdef PADDLE_WITH_MKLDNN - 1000, + /* Aligned with mozga-intel, MKLDNN need at least 5000 MB + * to obtain the best performance*/ + 5000, #else 500, #endif