diff --git a/src/operator/nn/fully_connected-inl.h b/src/operator/nn/fully_connected-inl.h index 44af375486fb..e4bb11f6bc56 100644 --- a/src/operator/nn/fully_connected-inl.h +++ b/src/operator/nn/fully_connected-inl.h @@ -36,7 +36,6 @@ #include "../elemwise_op_common.h" #include "../linalg.h" #include "../../common/utils.h" -#include "../tensor/broadcast_reduce_op.h" namespace mxnet { namespace op { @@ -170,18 +169,7 @@ void FCBackward(const OpContext &ctx, const FullyConnectedParam ¶m, // gradient of bias if (!param.no_bias) { Tensor gbias = in_grad[fullc::kBias].get(s); - TBlob grad_blob = TBlob(grad); - TBlob gbias_blob = TBlob(gbias); - mxnet::TShape x(1, 0); - mxnet::TShape small; - if (shape_assign(&gbias_blob.shape_, Shape2(param.num_hidden, 1))) { - small = gbias_blob.shape_; - } else { - small = ReduceAxesShapeImpl(grad_blob.shape_, dmlc::optional(x), true, false); - } - ReduceAxesComputeImpl(ctx, {grad_blob}, {req[fullc::kBias]}, - {in_grad[fullc::kBias]}, small); + Assign(gbias, req[fullc::kBias], sum_rows(grad)); } // gradient of data // Legacy approach shown here for comparison: diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index 27f6595aee9e..a097357ef5a3 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -316,9 +316,11 @@ NNVM_REGISTER_OP(_backward_FullyConnected) const FullyConnectedParam& params = nnvm::get(attrs.parsed); return params.no_bias ? 2 : 3; }) +#if MXNET_USE_MKLDNN == 1 .set_attr("FResourceRequest", [](const NodeAttrs& n) { return std::vector{ResourceRequest::kTempSpace}; }) +#endif .set_attr("TIsBackward", true) .set_attr("FInplaceOption", [](const NodeAttrs& attrs){ return std::vector >{{1, 0}}; diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 2dd5fe3bcc9a..52fe69bbd434 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -696,27 +696,6 @@ def test_symbol_pow(): check_symbolic_backward(test, [data_tmp, exp_tmp], [np.ones(shape)], [data_dir, exp_dir]) -@with_seed() -def test_fully_connected(): - data = mx.sym.var("data") - fc_weight = mx.sym.var("weight") - fc_bias = mx.sym.var("bias") - fc = mx.sym.FullyConnected(data=data, weight=fc_weight, bias=fc_bias, num_hidden=10, no_bias=False, name='fc') - data = mx.nd.random.uniform(shape=(5, 5, 5, 13), dtype=np.float32) - fc_weight = mx.nd.random.uniform(shape=(10, 325), dtype=np.float32) - fc_bias = mx.nd.random.uniform(shape=(10), dtype=np.float32) - fc_bias2 = mx.nd.random.uniform(shape=(10, 1), dtype=np.float32) - data_np = data.asnumpy().reshape(5, 325) - fc_weight_np = np.transpose(fc_weight.asnumpy()) - fc_bias_np = fc_bias.asnumpy() - res = np.dot(data_np, fc_weight_np) + fc_bias.asnumpy() - check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}, {'fc_output': res}) - check_numeric_gradient(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}, - numeric_eps=1e-2, rtol=1e-4, atol=1e-2) - # TODO: Fix Bug #15032 when bias has ndim > 1 - #check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias2.asnumpy()}, {'fc_output': res}) - - @with_seed() def test_pow_fn(): shape = (3, 4)