You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.
I want to reduce gpu memory costing when using gluon, I tryed MXNet memonger but it did not work for me, After that I setting os.environ['MXNET_BACKWARD_DO_MIRROR'] = '1', But it not work for me too.
Environment info (Required)
----------Python Info----------
('Version :', '2.7.5')
('Compiler :', 'GCC 4.8.5 20150623 (Red Hat 4.8.5-11)')
('Build :', ('default', 'Nov 6 2016 00:28:07'))
('Arch :', ('64bit', 'ELF'))
------------Pip Info-----------
('Version :', '9.0.1')
('Directory :', '/usr/lib/python2.7/site-packages/pip')
----------MXNet Info-----------
('Version :', '1.2.0')
('Directory :', '/home/yinghuang/incubator-mxnet-newest/python/mxnet')
Hashtag not found. Not installed from pre-built package.
----------System Info----------
('Platform :', 'Linux-3.10.0-327.22.2.el7.x86_64-x86_64-with-centos-7.2.1511-Core')
('system :', 'Linux')
('node :', 'gz-open-gpu-c117')
('release :', '3.10.0-327.22.2.el7.x86_64')
('version :', '#1 SMP Thu Jun 23 17:05:11 UTC 2016')
----------Hardware Info----------
('machine :', 'x86_64')
('processor :', 'x86_64')
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 32
On-line CPU(s) list: 0-31
Thread(s) per core: 2
Core(s) per socket: 8
座: 2
NUMA 节点: 2
厂商 ID: GenuineIntel
CPU 系列: 6
型号: 62
型号名称: Genuine Intel(R) CPU @ 2.80GHz
步进: 2
CPU MHz: 1706.250
BogoMIPS: 5617.25
虚拟化: VT-x
L1d 缓存: 32K
L1i 缓存: 32K
L2 缓存: 256K
L3 缓存: 25600K
NUMA 节点0 CPU: 0-7,16-23
NUMA 节点1 CPU: 8-15,24-31
----------Network Test----------
Setting timeout: 10
Timing for MXNet: /~https://github.com/apache/incubator-mxnet, DNS: 0.0234 sec, LOAD: 1.4669 sec.
Timing for PYPI: https://pypi.python.org/pypi/pip, DNS: 0.0253 sec, LOAD: 0.4839 sec.
Timing for FashionMNIST: https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/train-labels-idx1-ubyte.gz, DNS: 0.3580 sec, LOAD: 2.5292 sec.
Timing for Conda: https://repo.continuum.io/pkgs/free/, DNS: 0.0229 sec, LOAD: 0.8054 sec.
Timing for Gluon Tutorial(en): http://gluon.mxnet.io, DNS: 0.7395 sec, LOAD: 1.3768 sec.
Timing for Gluon Tutorial(cn): https://zh.gluon.ai, DNS: 0.0230 sec, LOAD: 3.4856 sec.
question definition
I am using python 2 and newest mxnet gluon 3D convolution to do video action recognition. I want gpu memory to be effiently used, 1) so I try [memonger] to optimize the sym the hybridblock generated, but with no effect after hard working. 2) Then I try to save memory by only setting the environment variable MXNET_BACKWARD_DO_MIRROR to be '1', the memory not reduced too. Can someone show me what I have missed for memory saving.
Steps to reproduce
here is my code of my network
import sys
import os
mxnet_path = os.path.expanduser('~') + '/incubator-mxnet-newest'
sys.path.insert(0, os.path.abspath(os.path.join(mxnet_path, "python")))
import mxnet as mx
from mxnet.gluon import nn
from mxnet import nd
from mxnet.gluon.block import _flatten,_regroup
import memonger
def bn_relu_conv(ks, nout, stride, pad, name=None):
layer = nn.HybridSequential()
layer.add(nn.BatchNorm())
layer.add(nn.Activation('relu'))
layer.add(nn.Conv3D(channels=nout, kernel_size=ks, padding=pad, strides=stride))
return layer
def bn_relu_block(growth_rate):
layer = nn.HybridSequential()
layer.add(bn_relu_conv(1, nout=growth_rate, stride=1, pad=0))
layer.add(bn_relu_conv(3, nout=growth_rate, stride=1, pad=1))
return layer
def conv_act_layer(channels, kernel=(1,1,1) , pad=(0,0,0), stride=(1,1,1), act_type="relu", use_batchnorm=False):
layer = nn.HybridSequential()
layer.add(nn.Conv3D(channels=channels, kernel_size=kernel, padding=pad, strides=stride))
layer.add(nn.BatchNorm())
layer.add(nn.Activation(act_type))
return layer
def transition(channels):
transition_layer = nn.HybridSequential()
transition_layer.add(bn_relu_conv(ks=1, nout=channels, stride=1, pad=0))
transition_layer.add(nn.MaxPool3D(pool_size=2, strides=2))
return transition_layer
def transition_w_o_pooling(channels):
layer = bn_relu_conv(ks=1, nout=channels, stride=1, pad=0)
return layer
class DsodBlock(nn.HybridBlock):
def __init__(self, layers, growth_rate, use_memonger=False, **kwargs):
super(DsodBlock, self).__init__(**kwargs)
self.use_memonger = use_memonger
self.net = nn.HybridSequential()
for i in range(layers):
lay = bn_relu_block(growth_rate)
self.net.add(lay)
def hybrid_forward(self, F, x):
for idx, layer in enumerate(self.net):
out = layer(x)
x = F.concat(x, out, dim=1)
if self.use_memonger and (idx % 2 == 0):
#print("use memonger true")
x._set_attr(mirror_stage='True')
return x
class DenseNet(nn.HybridBlock):
def __init__(self, net_def, num_classes, growth_rate, use_memonger=False, batch_size=32, input_depth=16, input_size=112, **kwargs):
super(DenseNet, self).__init__(**kwargs)
channels = 128
self.use_memonger = use_memonger
self.batch_size = batch_size
self.input_depth = input_depth
self.input_size = input_size
#assert self.use_memonger
with self.name_scope():
self.features = nn.HybridSequential(prefix='')
self.features.add(conv_act_layer(64, kernel=3, pad=1, stride=2, act_type="relu", use_batchnorm=True))
self.features.add(conv_act_layer(64, kernel=3, pad=1, stride=1, act_type="relu", use_batchnorm=True))
self.features.add(conv_act_layer(128, kernel=3, pad=1, stride=1, act_type="relu", use_batchnorm=True))
self.features.add(nn.MaxPool3D(pool_size=2, strides=2, padding=1))
for i,(dense_layers,transition_fun) in enumerate(net_def):
self.features.add(DsodBlock(layers=dense_layers, growth_rate=growth_rate, use_memonger=use_memonger))
channels += growth_rate*dense_layers
self.features.add(transition_fun(channels))
self.features.add(nn.BatchNorm())
self.features.add(nn.Activation('relu'))
self.features.add(nn.GlobalAvgPool3D())
self.features.add(nn.Flatten())
self.output = nn.Dense(num_classes, in_units=channels)
def _get_graph(self, *args):
#assert False
if not self._cached_graph:
args, self._in_format = _flatten(args)
if len(args) > 1:
inputs = [mx.symbol.var('data%d'%i) for i in range(len(args))]
else:
inputs = [mx.symbol.var('data')]
grouped_inputs = _regroup(inputs, self._in_format)[0]
params = {i: j.var() for i, j in self._reg_params.items()}
with self.name_scope():
out = self.hybrid_forward(mx.symbol, *grouped_inputs, **params) # pylint: disable=no-value-for-parameter
out, self._out_format = _flatten(out)
assert len(out) == 1
if self.use_memonger:
assert len(inputs) == 1
out = memonger.search_plan(out[0], data=(self.batch_size, 3, self.input_depth, self.input_size, self.input_size))
out = [out]
self._cached_graph = inputs, out[0] #mx.symbol.Group(out)
return self._cached_graph
def hybrid_forward(self, F, x):
x = self.features(x)
x = self.output(x)
return x
def dsod_net(net_def, num_classes, growth_rate=64):
growth_rate = growth_rate
channels = 128
net = nn.HybridSequential()
with net.name_scope():
## dsod backbone
net.add(conv_act_layer(64, kernel=3, pad=1, stride=2, act_type="relu", use_batchnorm=True))
net.add(conv_act_layer(64, kernel=3, pad=1, stride=1, act_type="relu", use_batchnorm=True))
net.add(conv_act_layer(128, kernel=3, pad=1, stride=1, act_type="relu", use_batchnorm=True))
net.add(nn.MaxPool3D(pool_size=2, strides=2, padding=1))
for i,(dense_layers,transition_fun) in enumerate(net_def):
net.add(DsodBlock(layers=dense_layers, growth_rate=growth_rate))
channels += growth_rate*dense_layers
net.add(transition_fun(channels))
classifier = nn.HybridSequential()
classifier.add(nn.BatchNorm())
classifier.add(nn.Activation('relu'))
classifier.add(nn.GlobalAvgPool3D())
classifier.add(nn.Flatten())
classifier.add(nn.Dense(num_classes))
net.add(classifier)
return net
def dsod_net_v2(net_def, num_classes, growth_rate=64, use_memonger=False, **kwargs):
net = DenseNet(net_def, num_classes, growth_rate, use_memonger, **kwargs)
return net
def get_net(net_depth, num_classes, hybridize=True, growth_rate=64, **kwargs):
densenet_spec = {30:[(6,transition), (8,transition), (8,transition_w_o_pooling), (8,transition_w_o_pooling)],
22:[(6,transition), (8,transition), (8,transition_w_o_pooling)]}
net_def = densenet_spec[net_depth]
#net = dsod_net(net_def, num_classes, growth_rate)
net = dsod_net_v2(net_def, num_classes, growth_rate, False, **kwargs)
if hybridize:
net.hybridize()
return net
if __name__ == '__main__':
dsod = get_net(22, 101, True, 32)
#print dsod
dsod.initialize(ctx=mx.gpu(4))
x = mx.nd.ones((32,3,16,112,112), ctx=mx.gpu(4))
res = dsod(x)
#print res.shape
#print res
The text was updated successfully, but these errors were encountered:
@xinedison - Memonger in Gluon, not yet as far as I know.
There is a new environment variable - MXNET_GPU_MEM_POOL_TYPE introduced for Memory pool strategy which significantly reduces memory usage in - #11041 You might want to check it out.
Created a feature tracking issue for evaluating using Memonger in Gluon - #12226
Resolving it here. Please reopen if closed in error.
Description
I want to reduce gpu memory costing when using gluon, I tryed MXNet memonger but it did not work for me, After that I setting os.environ['MXNET_BACKWARD_DO_MIRROR'] = '1', But it not work for me too.
Environment info (Required)
question definition
I am using python 2 and newest mxnet gluon 3D convolution to do video action recognition. I want gpu memory to be effiently used, 1) so I try [memonger] to optimize the sym the hybridblock generated, but with no effect after hard working. 2) Then I try to save memory by only setting the environment variable MXNET_BACKWARD_DO_MIRROR to be '1', the memory not reduced too. Can someone show me what I have missed for memory saving.
Steps to reproduce
here is my code of my network
The text was updated successfully, but these errors were encountered: