Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Do not touch GPU 0 during ReleaseAll #14550

Merged
merged 4 commits into from
Mar 31, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions src/storage/pooled_storage_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ class GPUPooledStorageManager final : public StorageManager {
/*!
* \brief Default constructor.
*/
GPUPooledStorageManager() {
GPUPooledStorageManager(int dev_id) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe pass the context instead? And call it creation_ or initial_context? And maybe add a short explanation to the parameters documentation?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

reserve_ = dmlc::GetEnv("MXNET_GPU_MEM_POOL_RESERVE", 5);
page_size_ = dmlc::GetEnv("MXNET_GPU_MEM_POOL_PAGE_SIZE", 4096);
large_alloc_round_size_ = dmlc::GetEnv("MXNET_GPU_MEM_LARGE_ALLOC_ROUND_SIZE", 2 * 1024 * 1024);
dev_id_ = dev_id;
if (large_alloc_round_size_ <= 0) {
LOG(FATAL) << "MXNET_GPU_MEM_LARGE_ALLOC_ROUND_SIZE cannot be set to a value <= 0, found: "
<< large_alloc_round_size_;
Expand Down Expand Up @@ -123,6 +124,8 @@ class GPUPooledStorageManager final : public StorageManager {
int reserve_;
// number of devices
const size_t NDEV = 32;
// device id
int dev_id_;
// memory pool
std::unordered_map<size_t, std::vector<void*>> memory_pool_;
DISALLOW_COPY_AND_ASSIGN(GPUPooledStorageManager);
Expand Down Expand Up @@ -177,6 +180,7 @@ void GPUPooledStorageManager::ReleaseAll() {
Storage::Handle handle;
handle.dptr = j;
handle.size = i.first;
handle.ctx = Context::GPU(dev_id_);
DirectFreeNoLock(handle);
}
}
Expand All @@ -202,10 +206,11 @@ class GPUPooledRoundedStorageManager final : public StorageManager {
/*!
* \brief Default constructor.
*/
GPUPooledRoundedStorageManager() {
GPUPooledRoundedStorageManager(int dev_id) {
reserve_ = dmlc::GetEnv("MXNET_GPU_MEM_POOL_RESERVE", 5);
page_size_ = dmlc::GetEnv("MXNET_GPU_MEM_POOL_PAGE_SIZE", 4096);
cut_off_ = dmlc::GetEnv("MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF", 24);
dev_id_ = dev_id;
if (page_size_ < 32) {
LOG(FATAL) << "MXNET_GPU_MEM_POOL_PAGE_SIZE cannot be set to a value smaller than 32. " \
<< "Got: " << page_size_ << ".";
Expand Down Expand Up @@ -290,6 +295,8 @@ class GPUPooledRoundedStorageManager final : public StorageManager {
size_t cut_off_;
// percentage of reserved memory
int reserve_;
// device id
int dev_id_;
// memory pool
std::vector<std::vector<void*>> memory_pool_;
DISALLOW_COPY_AND_ASSIGN(GPUPooledRoundedStorageManager);
Expand Down Expand Up @@ -345,6 +352,7 @@ void GPUPooledRoundedStorageManager::ReleaseAll() {
Storage::Handle handle;
handle.size = size;
handle.dptr = j;
handle.ctx = Context::GPU(dev_id_);
DirectFreeNoLock(handle);
}
memory_pool_[i].clear();
Expand Down
4 changes: 2 additions & 2 deletions src/storage/storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,13 @@ void StorageImpl::Alloc(Storage::Handle* handle) {
std::string strategy = type;

if (strategy == "Round") {
ptr = new storage::GPUPooledRoundedStorageManager();
ptr = new storage::GPUPooledRoundedStorageManager(handle->ctx.real_dev_id());
LOG(INFO) << "Using GPUPooledRoundedStorageManager.";
} else {
if (strategy != "Naive") {
LOG(FATAL) << "Unknown memory pool strategy specified: " << strategy << ".";
}
ptr = new storage::GPUPooledStorageManager();
ptr = new storage::GPUPooledStorageManager(handle->ctx.real_dev_id());
}
#else
LOG(FATAL) << "Compile with USE_CUDA=1 to enable GPU usage";
Expand Down