From b0d97e3ca2e3b95ec4e801f57f8ac6ba1256eb22 Mon Sep 17 00:00:00 2001 From: JackieWu Date: Tue, 5 Mar 2019 16:14:58 +0800 Subject: [PATCH] fix SoftmaxOutput resource bug (#14302) * fix SoftmaxOutput resource * remove BackwardResource since SoftmaxOutput is not a legacy op * add test_SoftmaxOutput_normalization * igore_label=-1 when use_ignore=False * retrigger CI * add multi_output test for SoftmaxOutput * rename test_SoftmaxOutput_normalization to test_softmax_output_normalization * retrigger CI * retrigger CI * fix test bug --- src/operator/softmax_output-inl.h | 5 -- src/operator/softmax_output.cc | 3 ++ tests/python/unittest/test_operator.py | 71 ++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 5 deletions(-) diff --git a/src/operator/softmax_output-inl.h b/src/operator/softmax_output-inl.h index c5ad90ab95bc..5dca8bac14a3 100644 --- a/src/operator/softmax_output-inl.h +++ b/src/operator/softmax_output-inl.h @@ -426,11 +426,6 @@ class SoftmaxOutputProp : public OperatorProperty { return {{in_data[softmaxout_enum::kData], out_data[softmaxout_enum::kOut]}}; } - std::vector BackwardResource( - const mxnet::ShapeVector &in_shape) const override { - return {ResourceRequest::kTempSpace}; - } - Operator* CreateOperator(Context ctx) const override { LOG(FATAL) << "Not Implemented."; return NULL; diff --git a/src/operator/softmax_output.cc b/src/operator/softmax_output.cc index c34e9095f4c1..b17ef3527297 100644 --- a/src/operator/softmax_output.cc +++ b/src/operator/softmax_output.cc @@ -262,6 +262,9 @@ NNVM_REGISTER_OP(_backward_SoftmaxOutput) .set_attr("FInplaceOption", [](const NodeAttrs& attrs){ return std::vector >{{0, 0}}; }) +.set_attr("FResourceRequest", [](const NodeAttrs& n){ + return std::vector{ResourceRequest::kTempSpace}; +}) .set_attr_parser(ParamParser) .set_attr("FCompute", SoftmaxOutputGradCompute); } // namespace op diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index ae7dc86d566c..500d2f99f4d9 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -6496,6 +6496,75 @@ def test_softmax(): check_smoothed_softmax_grad(default_context()) +@with_seed() +def test_softmax_output_normalization(): + def _softmaxoutput_normalization(multi_output, use_ignore, normalization): + grad_scale = np.random.random() + batch_size = 8 + num_labels = 6 + H, W = 3, 3 + ignore_label = np.random.randint(0, num_labels) if use_ignore else -1 + + if multi_output: + data_shape = (batch_size, num_labels, H, W) + label_shape = (batch_size, H, W) + else: + data_shape = (batch_size, num_labels) + label_shape = (batch_size, ) + + data = mx.nd.random.uniform(-1, 1, shape=data_shape) + label = mx.nd.random.randint( + 0, num_labels, shape=label_shape).astype('float32') + data.attach_grad() + + kwargs = dict(grad_scale=grad_scale, + normalization=normalization, multi_output=multi_output) + if use_ignore: + kwargs.update(use_ignore=True, ignore_label=ignore_label) + + with mx.autograd.record(): + out = mx.nd.SoftmaxOutput(data=data, label=label, **kwargs) + out.backward(mx.nd.ones_like(data)) + + exp_data = mx.nd.exp(data) + softmax_data = exp_data / exp_data.sum(1, keepdims=True) + argmax_data = mx.nd.argmax(data, axis=1) + + assert_almost_equal(out.asnumpy(), softmax_data.asnumpy()) + one_hot_label = mx.nd.one_hot(label, num_labels) + if multi_output: + one_hot_label = one_hot_label.transpose((0, 3, 1, 2)) + data_grad = softmax_data - one_hot_label + + if use_ignore: + if multi_output: + data_grad *= (label != + ignore_label).reshape((batch_size, 1, H, W)) + else: + data_grad *= (label != ignore_label).reshape((batch_size, 1)) + + valid_cnt = 1 + if normalization == 'batch': + valid_cnt = batch_size + elif normalization == 'valid': + valid_cnt = mx.nd.maximum(1, (label != ignore_label).sum()) + scale = grad_scale / valid_cnt + + if multi_output: + if normalization != 'valid': + scale /= H * W + + data_grad *= scale + + assert_almost_equal(data.grad.asnumpy(), data_grad.asnumpy()) + + for multi_output in [False, True]: + for use_ignore in [False, True]: + for normalization in ['null', 'batch', 'valid']: + _softmaxoutput_normalization( + multi_output, use_ignore, normalization) + + @with_seed() def test_slice(): def test_slice_forward_backward(a, index): @@ -6533,6 +6602,7 @@ def test_slice_forward_backward(a, index): slice_sym = mx.sym.slice(data, begin=[0, None], end=[1, None], step=[2, -1]) check_numeric_gradient(slice_sym, [in_data]) + def test_slice_partial_infer(): def check_slice_partial_infer(data, begin, end, step, expected_out_shape): out = mx.sym.slice(data, begin=begin, end=end, step=step) @@ -6555,6 +6625,7 @@ def check_slice_axis_partial_infer(data, axis, begin, end, expected_out_shape): check_slice_axis_partial_infer(var1, 0, 0, 5, (5, 0)) check_slice_axis_partial_infer(var1, 1, 0, 5, (10, 0)) + @with_seed() def test_float16_min_max(): """Test for issue: /~https://github.com/apache/incubator-mxnet/issues/9007"""