From b72472b9911866955767d492b477df2082787bfe Mon Sep 17 00:00:00 2001 From: vlado Date: Mon, 25 Feb 2019 10:25:54 -0700 Subject: [PATCH 1/9] Allow releasing all gpu memory --- include/mxnet/c_api.h | 6 ++++++ include/mxnet/storage.h | 8 ++++++++ python/mxnet/context.py | 5 +++++ src/c_api/c_api.cc | 7 +++++++ src/storage/pooled_storage_manager.h | 6 ++++-- src/storage/storage.cc | 12 ++++++++++++ src/storage/storage_manager.h | 8 ++++++++ 7 files changed, 50 insertions(+), 2 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 0acfde0686d4..8aa51febeaae 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -2736,6 +2736,12 @@ MXNET_DLL int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, MXNET_DLL int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const mx_uint *shape, mx_uint ndim, int dtype, NDArrayHandle *out); +/*! + * \brief Release all pooled memory from the devices storage manager + * \param dev_type device type, specify device we want to take + * \param dev_id the device id of the specific device + */ +MXNET_DLL int MXStorageReleaseAll(int dev_type, int dev_id); /*! * \brief Reconstruct NDArray from shared memory handle diff --git a/include/mxnet/storage.h b/include/mxnet/storage.h index a8481c1d36ef..4d1fc3d2c6da 100644 --- a/include/mxnet/storage.h +++ b/include/mxnet/storage.h @@ -95,6 +95,14 @@ class Storage { * \param handle Handle struct. */ virtual void DirectFree(Handle handle) = 0; + /*! + * \brief Release all memory from device if using a pooled storage manager + * + * This release all memory from pool storage managers such as + * GPUPooledStorageManager and GPUPooledRoundedStorageManager. + * For non-pool memory managers this has no effect. + */ + virtual void ReleaseAll(Context ctx) = 0; /*! * \brief Destructor. */ diff --git a/python/mxnet/context.py b/python/mxnet/context.py index 15ea9905de03..ce48b9e6305f 100644 --- a/python/mxnet/context.py +++ b/python/mxnet/context.py @@ -144,6 +144,11 @@ def default_ctx(cls, val): DeprecationWarning) cls._default_ctx.value = val #pylint: enable=no-self-argument + + def release_all(self): + dev_type = ctypes.c_int(self.device_typeid) + dev_id = ctypes.c_int(self.device_id) + check_call(_LIB.MXStorageReleaseAll(dev_type, dev_id)) # initialize the default context in Context Context._default_ctx.value = Context('cpu', 0) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index f549ddd13994..caa5f1c445b5 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1526,3 +1526,10 @@ int MXEnginePushSync(EngineSyncFunc sync_func, void* func_param, API_END(); } + +int MXStorageReleaseAll(int dev_type, int dev_id) { + API_BEGIN(); + Context ctx = Context::Create(static_cast(dev_type), dev_id); + Storage::Get()->ReleaseAll(ctx); + API_END(); +} \ No newline at end of file diff --git a/src/storage/pooled_storage_manager.h b/src/storage/pooled_storage_manager.h index 7726bc6f9273..91eb536ec7bd 100644 --- a/src/storage/pooled_storage_manager.h +++ b/src/storage/pooled_storage_manager.h @@ -85,6 +85,8 @@ class GPUPooledStorageManager final : public StorageManager { DirectFreeNoLock(handle); } + void ReleaseAll() override; + private: void DirectFreeNoLock(Storage::Handle handle) { mxnet::common::cuda::DeviceStore device_store(handle.ctx.real_dev_id(), true); @@ -115,7 +117,6 @@ class GPUPooledStorageManager final : public StorageManager { } private: - void ReleaseAll(); // used memory size_t used_memory_ = 0; // page size @@ -250,6 +251,8 @@ class GPUPooledRoundedStorageManager final : public StorageManager { DirectFreeNoLock(handle); } + void ReleaseAll() override; + private: inline int div_pow2_round_up(size_t s, int divisor_log2) { // (1025, 10) -> 2 @@ -284,7 +287,6 @@ class GPUPooledRoundedStorageManager final : public StorageManager { } private: - void ReleaseAll(); // number of devices const int NDEV = 32; // log2 of maximum page size. 16GB diff --git a/src/storage/storage.cc b/src/storage/storage.cc index 4f15351a594a..35c54d6ade88 100644 --- a/src/storage/storage.cc +++ b/src/storage/storage.cc @@ -26,6 +26,7 @@ #include "./pooled_storage_manager.h" #include "./cpu_shared_storage_manager.h" #include "./cpu_device_storage.h" +#include "./gpu_device_storage.h" #include "./pinned_memory_storage.h" #include "../common/lazy_alloc_array.h" #include "../profiler/storage_profiler.h" @@ -38,6 +39,7 @@ class StorageImpl : public Storage { void Alloc(Handle* handle) override; void Free(Handle handle) override; void DirectFree(Handle handle) override; + void ReleaseAll(Context ctx) override; void SharedIncrementRefCount(Handle handle) override; StorageImpl() {} virtual ~StorageImpl() = default; @@ -160,6 +162,16 @@ void StorageImpl::DirectFree(Storage::Handle handle) { profiler_.OnFree(handle); } +void StorageImpl::ReleaseAll(Context ctx) { + auto&& device = storage_managers_.at(ctx.dev_type); + std::shared_ptr manager = device.Get( + ctx.real_dev_id(), []() { + LOG(FATAL) << "Cannot Free space to a device you have not allocated"; + return nullptr; + }); + manager->ReleaseAll(); +} + void StorageImpl::SharedIncrementRefCount(Storage::Handle handle) { CHECK_EQ(handle.ctx.dev_type, Context::kCPUShared); auto&& device = storage_managers_.at(Context::kCPUShared); diff --git a/src/storage/storage_manager.h b/src/storage/storage_manager.h index d17dc91dc2fc..13be16ebe70f 100644 --- a/src/storage/storage_manager.h +++ b/src/storage/storage_manager.h @@ -52,6 +52,14 @@ class StorageManager { * \param handle Handle struct. */ virtual void DirectFree(Storage::Handle handle) = 0; + /*! + * \brief Release all memory if using a pool storage manager + * + * This release all memory from pool storage managers such as + * GPUPooledStorageManager and GPUPooledRoundedStorageManager. + * For non-pool memory managers this has no effect. + */ + virtual void ReleaseAll() {} /*! * \brief Destructor. */ From 629a5aa6fffca7d3b57c2a88db5a9a1b767a341d Mon Sep 17 00:00:00 2001 From: vlado Date: Tue, 16 Apr 2019 22:59:46 -0600 Subject: [PATCH 2/9] fix white space --- src/c_api/c_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index caa5f1c445b5..73d6472d2702 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1532,4 +1532,4 @@ int MXStorageReleaseAll(int dev_type, int dev_id) { Context ctx = Context::Create(static_cast(dev_type), dev_id); Storage::Get()->ReleaseAll(ctx); API_END(); -} \ No newline at end of file +} From 0bc0653f377202a098543ccc922e8ec870c5fb4c Mon Sep 17 00:00:00 2001 From: vlado Date: Mon, 29 Apr 2019 10:31:47 -0600 Subject: [PATCH 3/9] stuck ci checks From f63d1d1a044cc1c9670724d1dc37b5b84583496f Mon Sep 17 00:00:00 2001 From: vlado Date: Mon, 29 Apr 2019 15:32:37 -0600 Subject: [PATCH 4/9] Fix whitespace --- python/mxnet/context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/mxnet/context.py b/python/mxnet/context.py index ce48b9e6305f..1ca105146d01 100644 --- a/python/mxnet/context.py +++ b/python/mxnet/context.py @@ -144,7 +144,7 @@ def default_ctx(cls, val): DeprecationWarning) cls._default_ctx.value = val #pylint: enable=no-self-argument - + def release_all(self): dev_type = ctypes.c_int(self.device_typeid) dev_id = ctypes.c_int(self.device_id) From 98cbc92ec54a3bc92785e135a2e8ca36ac9bad45 Mon Sep 17 00:00:00 2001 From: vlado Date: Tue, 7 May 2019 12:33:45 -0600 Subject: [PATCH 5/9] Rename release_all -> empty_cache and provide documentation --- python/mxnet/context.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/python/mxnet/context.py b/python/mxnet/context.py index 1ca105146d01..4bdc1f648a23 100644 --- a/python/mxnet/context.py +++ b/python/mxnet/context.py @@ -145,7 +145,20 @@ def default_ctx(cls, val): cls._default_ctx.value = val #pylint: enable=no-self-argument - def release_all(self): + def empty_cache(self): + """Empties the memory cache for the current contexts device. + + MXNet utilizes a memory pool to avoid excessive allocations. + Calling empty_cache will empty the memory pool of the contexts + device. This will only free the memory of unreferenced data. + + Examples + ------- + >>> ctx = mx.gpu(0) + >>> arr = mx.nd.ones((200,200), ctx=ctx) + >>> del arr + >>> ctx.empty_cache() # forces release of memory allocated for arr + """ dev_type = ctypes.c_int(self.device_typeid) dev_id = ctypes.c_int(self.device_id) check_call(_LIB.MXStorageReleaseAll(dev_type, dev_id)) From 6f24ece87d2f85415a1b2e58d08c9c452d2ebc03 Mon Sep 17 00:00:00 2001 From: vlado Date: Tue, 7 May 2019 16:47:33 -0600 Subject: [PATCH 6/9] fix indentation --- python/mxnet/context.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/mxnet/context.py b/python/mxnet/context.py index 4bdc1f648a23..3cf25b28f852 100644 --- a/python/mxnet/context.py +++ b/python/mxnet/context.py @@ -148,13 +148,13 @@ def default_ctx(cls, val): def empty_cache(self): """Empties the memory cache for the current contexts device. - MXNet utilizes a memory pool to avoid excessive allocations. - Calling empty_cache will empty the memory pool of the contexts - device. This will only free the memory of unreferenced data. + MXNet utilizes a memory pool to avoid excessive allocations. + Calling empty_cache will empty the memory pool of the contexts + device. This will only free the memory of unreferenced data. Examples ------- - >>> ctx = mx.gpu(0) + >>> ctx = mx.gpu(0) >>> arr = mx.nd.ones((200,200), ctx=ctx) >>> del arr >>> ctx.empty_cache() # forces release of memory allocated for arr From 504f309cb0b488a0d07a7d5591aac50ef92fb0ea Mon Sep 17 00:00:00 2001 From: vlado Date: Wed, 8 May 2019 10:00:06 -0600 Subject: [PATCH 7/9] Rename c_api's MXStorageReleaseAll -> MXStorageEmptyCache and clarify documention --- include/mxnet/c_api.h | 4 ++-- python/mxnet/context.py | 2 +- src/c_api/c_api.cc | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 8aa51febeaae..63a38e6368dc 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -2737,11 +2737,11 @@ MXNET_DLL int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const mx_uint ndim, int dtype, NDArrayHandle *out); /*! - * \brief Release all pooled memory from the devices storage manager + * \brief Release all unreferenced memory from the devices storage managers memory pool * \param dev_type device type, specify device we want to take * \param dev_id the device id of the specific device */ -MXNET_DLL int MXStorageReleaseAll(int dev_type, int dev_id); +MXNET_DLL int MXStorageEmptyCache(int dev_type, int dev_id); /*! * \brief Reconstruct NDArray from shared memory handle diff --git a/python/mxnet/context.py b/python/mxnet/context.py index 3cf25b28f852..1e4e712b2d4a 100644 --- a/python/mxnet/context.py +++ b/python/mxnet/context.py @@ -161,7 +161,7 @@ def empty_cache(self): """ dev_type = ctypes.c_int(self.device_typeid) dev_id = ctypes.c_int(self.device_id) - check_call(_LIB.MXStorageReleaseAll(dev_type, dev_id)) + check_call(_LIB.MXStorageEmptyCache(dev_type, dev_id)) # initialize the default context in Context Context._default_ctx.value = Context('cpu', 0) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 73d6472d2702..c306c832a447 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1527,7 +1527,7 @@ int MXEnginePushSync(EngineSyncFunc sync_func, void* func_param, API_END(); } -int MXStorageReleaseAll(int dev_type, int dev_id) { +int MXStorageEmptyCache(int dev_type, int dev_id) { API_BEGIN(); Context ctx = Context::Create(static_cast(dev_type), dev_id); Storage::Get()->ReleaseAll(ctx); From 96a7b2be6e36025273f52234c9674ef2c3a04614 Mon Sep 17 00:00:00 2001 From: vlado Date: Fri, 10 May 2019 11:07:41 -0600 Subject: [PATCH 8/9] nudge ci From 9eb83708712c13e81c0a809d36443b1a226b5125 Mon Sep 17 00:00:00 2001 From: Sheng Zha Date: Wed, 22 May 2019 00:43:07 -0700 Subject: [PATCH 9/9] Update context.py --- python/mxnet/context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/mxnet/context.py b/python/mxnet/context.py index 1e4e712b2d4a..f284e00127b4 100644 --- a/python/mxnet/context.py +++ b/python/mxnet/context.py @@ -150,7 +150,7 @@ def empty_cache(self): MXNet utilizes a memory pool to avoid excessive allocations. Calling empty_cache will empty the memory pool of the contexts - device. This will only free the memory of unreferenced data. + device. This will only free the memory of the unreferenced data. Examples -------