Skip to content

Commit

Permalink
Fixes apache#14199: use proper API get number of gpus.
Browse files Browse the repository at this point in the history
1. Added get_gpus() and get_gpu_memory() API to python binding.
2. Update example script to use proper API for getting gpu numbers.
  • Loading branch information
frankfliu committed Feb 25, 2019
1 parent 5f32f32 commit 10466ff
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 26 deletions.
7 changes: 1 addition & 6 deletions benchmark/python/control_flow/rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,7 @@ def _array(shape, ctx):


def _get_gpus():
try:
re = subprocess.check_output(["nvidia-smi", "-L"], universal_newlines=True)
except OSError:
return []
return range(len([i for i in re.split('\n') if 'GPU' in i]))

return range(mx.util.get_gpu_count())

def run_benchmark(cell_type, ctx, seq_len, batch_size, hidden_dim):
obj = {"foreach": ForeachRNN, "while_loop": WhileRNN}[args.benchmark]
Expand Down
8 changes: 3 additions & 5 deletions example/image-classification/common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import os
import errno

import mxnet as mx

def download_file(url, local_fname=None, force_write=False):
# requests is not default installed
import requests
Expand Down Expand Up @@ -49,8 +51,4 @@ def get_gpus():
"""
return a list of GPUs
"""
try:
re = subprocess.check_output(["nvidia-smi", "-L"], universal_newlines=True)
except OSError:
return []
return range(len([i for i in re.split('\n') if 'GPU' in i]))
return range(mx.util.get_gpu_count())
10 changes: 1 addition & 9 deletions python/mxnet/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import struct
import traceback
import numbers
import subprocess
import sys
import os
import errno
Expand Down Expand Up @@ -1391,14 +1390,7 @@ def list_gpus():
If there are n GPUs, then return a list [0,1,...,n-1]. Otherwise returns
[].
"""
re = ''
nvidia_smi = ['nvidia-smi', '/usr/bin/nvidia-smi', '/usr/local/nvidia/bin/nvidia-smi']
for cmd in nvidia_smi:
try:
re = subprocess.check_output([cmd, "-L"], universal_newlines=True)
except (subprocess.CalledProcessError, OSError):
pass
return range(len([i for i in re.split('\n') if 'GPU' in i]))
return range(mx.util.get_gpu_count())

def download(url, fname=None, dirname=None, overwrite=False, retries=5):
"""Download an given URL
Expand Down
16 changes: 16 additions & 0 deletions python/mxnet/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@
# under the License.
"""general utility functions"""

import ctypes
import os
import sys

from .base import _LIB, check_call


def makedirs(d):
"""Create directories recursively if they don't exist. os.makedirs(exist_ok=True) is not
Expand All @@ -28,3 +31,16 @@ def makedirs(d):
mkpath(d)
else:
os.makedirs(d, exist_ok=True) # pylint: disable=unexpected-keyword-arg


def get_gpu_count():
size = ctypes.c_int()
check_call(_LIB.MXGetGPUCount(ctypes.byref(size)))
return size.value


def get_gpu_memory(gpu_dev_id):
free_mem = ctypes.c_uint64(0)
total_mem = ctypes.c_uint64(0)
check_call(_LIB.MXGetGPUMemoryInformation64(gpu_dev_id, ctypes.byref(free_mem), ctypes.byref(total_mem)))
return free_mem.value, total_mem.value
10 changes: 4 additions & 6 deletions tools/bandwidth/test_measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,11 @@
from measure import run
import subprocess
import logging

import mxnet as mx

def get_gpus():
try:
re = subprocess.check_output(["nvidia-smi", "-L"], universal_newlines=True)
except OSError:
return ''
gpus = [i for i in re.split('\n') if 'GPU' in i]
return ','.join([str(i) for i in range(len(gpus))])
return ','.join([str(i) for i in range(mx.util.get_gpu_count())])

def test_measure(**kwargs):
logging.info(kwargs)
Expand Down

0 comments on commit 10466ff

Please sign in to comment.