Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Use env var to enforce safe accumulation in ReduceAxesCompute #14830

Merged
merged 8 commits into from
May 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ci/windows/test_py2_cpu.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,7 @@ C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 -
if (! $?) { Throw ("Error running unittest") }
C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_train.xml tests\python\train
if (! $?) { Throw ("Error running train tests") }
# Adding this extra test since it's not possible to set env var on the fly in Windows.
$env:MXNET_SAFE_ACCUMULATION=1
haojin2 marked this conversation as resolved.
Show resolved Hide resolved
C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_unittest.xml tests\python\unittest\test_operator.py:test_norm
if (! $?) { Throw ("Error running unittest") }
4 changes: 4 additions & 0 deletions ci/windows/test_py2_gpu.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,7 @@ C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 -
if (! $?) { Throw ("Error running tests") }
C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error tests\python\train
if (! $?) { Throw ("Error running tests") }
# Adding this extra test since it's not possible to set env var on the fly in Windows.
$env:MXNET_SAFE_ACCUMULATION=1
C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_operator.xml tests\python\gpu\test_operator_gpu.py:test_norm
if (! $?) { Throw ("Error running tests") }
4 changes: 4 additions & 0 deletions ci/windows/test_py3_cpu.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,7 @@ C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 -
if (! $?) { Throw ("Error running unittest") }
C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_train.xml tests\python\train
if (! $?) { Throw ("Error running train tests") }
# Adding this extra test since it's not possible to set env var on the fly in Windows.
$env:MXNET_SAFE_ACCUMULATION=1
C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_unittest.xml tests\python\unittest\test_operator.py:test_norm
if (! $?) { Throw ("Error running unittest") }
4 changes: 4 additions & 0 deletions ci/windows/test_py3_gpu.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,7 @@ C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 -
if (! $?) { Throw ("Error running tests") }
C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_train.xml tests\python\train
if (! $?) { Throw ("Error running tests") }
# Adding this extra test since it's not possible to set env var on the fly in Windows.
$env:MXNET_SAFE_ACCUMULATION=1
C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_operator.xml tests\python\gpu\test_operator_gpu.py:test_norm
if (! $?) { Throw ("Error running tests") }
8 changes: 8 additions & 0 deletions docs/faq/env_var.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,14 @@ When USE_PROFILER is enabled in Makefile or CMake, the following environments ca
- Values: Int ```(default=4)```
- This variable controls how many CuDNN dropout state resources to create for each GPU context for use in operator.

* MXNET_SAFE_ACCUMULATION
- Values: Values: 0(false) or 1(true) ```(default=0)```
- If this variable is set, the accumulation will enter the safe mode, meaning accumulation is done in a data type of higher precision than
the input data type, leading to more accurate accumulation results with a possible performance loss and backward compatibility loss.
For example, when the variable is set to 1(true), if the input data type is float16, then the accumulation will be done
with float32.
- Model accuracies do not necessarily improve with this environment variable turned on.

Settings for Minimum Memory Usage
---------------------------------
- Make sure ```min(MXNET_EXEC_NUM_TEMP, MXNET_GPU_WORKER_NTHREADS) = 1```
Expand Down
15 changes: 13 additions & 2 deletions src/operator/tensor/broadcast_reduce_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -1183,12 +1183,23 @@ void LpNormCompute(const nnvm::NodeAttrs& attrs,
} else {
small = ReduceAxesShapeImpl(inputs[0].shape_, param.axis, true, false);
}

if (param.ord == 1) {
ReduceAxesComputeImpl<xpu, mshadow_op::sum, true, false, mshadow_op::abs>(
if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
ReduceAxesComputeImpl<xpu, mshadow_op::sum, true, false, mshadow_op::abs>(
ctx, inputs, req, outputs, small);
} else {
ReduceAxesComputeImpl<xpu, mshadow_op::sum, false, false, mshadow_op::abs>(
ctx, inputs, req, outputs, small);
}
} else if (param.ord == 2) {
ReduceAxesComputeImpl<xpu, mshadow_op::nrm2, true, false, mshadow_op::identity>(
if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
ReduceAxesComputeImpl<xpu, mshadow_op::nrm2, true, false, mshadow_op::identity>(
ctx, inputs, req, outputs, small);
} else {
ReduceAxesComputeImpl<xpu, mshadow_op::nrm2, false, false, mshadow_op::identity>(
ctx, inputs, req, outputs, small);
}
}
}

Expand Down
86 changes: 48 additions & 38 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3482,51 +3482,61 @@ def l2norm(input_data, axis=0, keepdims=True):
epsilon = 1e-3
acc_type = {np.float16: np.float32, np.float32: np.float32, np.float64: np.float64,
np.int32: np.int32, np.int64: np.int64}
dtype_to_str = {np.float16: 'float16', np.float32: 'float32', np.float64: 'float64',
np.int32: 'int32', np.int64: 'int64'}
is_windows = sys.platform.startswith('win')
for order in [1, 2]:
for dtype in [np.float16, np.float32, np.float64, np.int32, np.int64]:
for i in range(in_data_dim):
for out_dtype in ['float32', 'float64', 'int32', 'int64']:
if (dtype == np.int32 or dtype == np.int64) and ('int' not in out_dtype or is_windows):
continue
if dtype != np.int32 and dtype != np.int64 and 'int' in out_dtype:
continue
backward_dtype = np.float32 if out_dtype == 'float32' else np.float64
skip_backward = 'int' in out_dtype
print(order, dtype, i, out_dtype, in_shape)
in_data = np.random.uniform(-1, 1, in_shape).astype(acc_type[dtype])
in_data[abs(in_data) < epsilon] = 2 * epsilon
norm_sym = mx.symbol.norm(data=data, ord=order, axis=i, out_dtype=out_dtype, keepdims=True)
npy_out = l1norm(in_data, i) if order is 1 else l2norm(in_data, i)
npy_out_backward = np.sign(in_data) if order is 1 else in_data/npy_out
check_symbolic_forward(norm_sym, [in_data.astype(dtype)], [npy_out.astype(out_dtype)],
rtol=1e-3, atol=1e-5, ctx=ctx)
if not skip_backward:
check_symbolic_backward(norm_sym, [in_data.astype(dtype)],
[np.ones(npy_out.shape).astype(out_dtype)],
[npy_out_backward], rtol=1e-3, atol=1e-5, ctx=ctx,
dtype=backward_dtype)
# Disable numeric gradient /~https://github.com/apache/incubator-mxnet/issues/11509
# check gradient
if dtype is not np.float16 and not skip_backward:
check_numeric_gradient(norm_sym, [in_data], numeric_eps=epsilon,
rtol=1e-1, atol=1e-3, dtype=backward_dtype)
if i < in_data_dim-1:
norm_sym = mx.symbol.norm(data=data, ord=order, axis=(i, i+1), keepdims=True)
npy_out = l1norm(in_data, (i, i+1)) if order is 1 else l2norm(in_data, (i, i+1))
for enforce_safe_acc in ["1", "0"]:
if is_windows:
if enforce_safe_acc == "0":
break
enforce_safe_acc = "0" if "MXNET_SAFE_ACCUMULATION" not in os.environ else os.environ["MXNET_SAFE_ACCUMULATION"]
else:
os.environ["MXNET_SAFE_ACCUMULATION"] = enforce_safe_acc
for order in [1, 2]:
for dtype in [np.float16, np.float32, np.float64]:
for i in range(in_data_dim):
for out_dtype in ['float32', 'float64']:
backward_dtype = np.float32 if out_dtype == 'float32' else np.float64
accumulation_type = acc_type[dtype]
if enforce_safe_acc == "0":
backward_dtype = dtype
out_dtype = dtype_to_str[dtype]
accumulation_type = dtype
skip_backward = 'int' in out_dtype
in_data = np.random.uniform(-1, 1, in_shape).astype(accumulation_type)
in_data[abs(in_data) < epsilon] = 2 * epsilon
norm_sym = mx.symbol.norm(data=data, ord=order, axis=i, out_dtype=out_dtype, keepdims=True)
npy_out = l1norm(in_data, i) if order is 1 else l2norm(in_data, i)
npy_out_backward = np.sign(in_data) if order is 1 else in_data/npy_out
check_symbolic_forward(norm_sym, [in_data], [npy_out.astype(dtype)],
rtol=1e-3 if dtype is np.float16 else 1e-3,
atol=1e-5 if dtype is np.float16 else 1e-5, ctx=ctx)
if not skip_backward:
check_symbolic_backward(norm_sym, [in_data],
check_symbolic_forward(norm_sym, [in_data.astype(dtype)], [npy_out.astype(out_dtype)],
rtol=1e-2 if dtype == np.float16 else 1e-3,
atol=1e-4 if dtype == np.float16 else 1e-5, ctx=ctx, dtype=dtype)
if dtype is not np.float16 and not skip_backward:
check_symbolic_backward(norm_sym, [in_data.astype(dtype)],
[np.ones(npy_out.shape).astype(out_dtype)],
[npy_out_backward.astype(out_dtype)],
rtol=1e-3, atol=1e-5, ctx=ctx, dtype=backward_dtype)
[npy_out_backward], rtol=1e-3, atol=1e-5, ctx=ctx,
dtype=backward_dtype)
# Disable numeric gradient /~https://github.com/apache/incubator-mxnet/issues/11509
# check gradient
if dtype is not np.float16 and not skip_backward:
check_numeric_gradient(norm_sym, [in_data], numeric_eps=epsilon,
rtol=1e-1, atol=1e-3, dtype=backward_dtype)
if i < in_data_dim-1:
norm_sym = mx.symbol.norm(data=data, ord=order, axis=(i, i+1), keepdims=True)
npy_out = l1norm(in_data, (i, i+1)) if order is 1 else l2norm(in_data, (i, i+1))
npy_out_backward = np.sign(in_data) if order is 1 else in_data/npy_out
check_symbolic_forward(norm_sym, [in_data], [npy_out.astype(dtype)],
rtol=1e-2 if dtype is np.float16 else 1e-3,
atol=1e-4 if dtype is np.float16 else 1e-5, ctx=ctx)
if dtype is not np.float16 and not skip_backward:
check_symbolic_backward(norm_sym, [in_data],
[np.ones(npy_out.shape).astype(out_dtype)],
[npy_out_backward.astype(out_dtype)],
rtol=1e-3, atol=1e-5, ctx=ctx, dtype=backward_dtype)
# check gradient
if dtype is not np.float16 and not skip_backward:
check_numeric_gradient(norm_sym, [in_data], numeric_eps=epsilon,
rtol=1e-1, atol=1e-3, dtype=backward_dtype)


def test_layer_norm():
Expand Down