From 2df5756cc22846a8ff56b5ed52877c5519bf0773 Mon Sep 17 00:00:00 2001 From: Wang Jiajun Date: Mon, 11 Mar 2019 15:30:03 +0800 Subject: [PATCH] add backgroud class in box_nms (#14058) * add backgroud class in box_nms * add unittest * trigger CI --- src/operator/contrib/bounding_box-common.h | 24 ++++++++- src/operator/contrib/bounding_box-inl.cuh | 26 ++++------ src/operator/contrib/bounding_box-inl.h | 49 ++++++++++++------- src/operator/contrib/bounding_box.cc | 8 ++- .../python/unittest/test_contrib_operator.py | 41 ++++++++++++---- 5 files changed, 102 insertions(+), 46 deletions(-) diff --git a/src/operator/contrib/bounding_box-common.h b/src/operator/contrib/bounding_box-common.h index 4c9b1b86d10c..69a96c60569a 100644 --- a/src/operator/contrib/bounding_box-common.h +++ b/src/operator/contrib/bounding_box-common.h @@ -114,12 +114,32 @@ struct nms_impl { namespace mshadow_op { struct less_than : public mxnet_op::tunable { - // a is x, b is sigma template MSHADOW_XINLINE static DType Map(DType a, DType b) { return static_cast(a < b); } -}; // struct equal_to +}; + +struct greater_than : public mxnet_op::tunable { + template + MSHADOW_XINLINE static DType Map(DType a, DType b) { + return static_cast(a > b); + } +}; + +struct not_equal : public mxnet_op::tunable { + template + MSHADOW_XINLINE static DType Map(DType a, DType b) { + return static_cast(a != b); + } +}; + +struct bool_and : public mxnet_op::tunable { + template + MSHADOW_XINLINE static DType Map(DType a, DType b) { + return static_cast(a && b); + } +}; } // namespace mshadow_op } // namespace op diff --git a/src/operator/contrib/bounding_box-inl.cuh b/src/operator/contrib/bounding_box-inl.cuh index e7f5567f469a..de215ce35a98 100644 --- a/src/operator/contrib/bounding_box-inl.cuh +++ b/src/operator/contrib/bounding_box-inl.cuh @@ -38,26 +38,20 @@ namespace mxnet { namespace op { template -struct valid_score { - DType thresh; - explicit valid_score(DType _thresh) : thresh(_thresh) {} +struct valid_value { __host__ __device__ bool operator()(const DType x) { - return x > thresh; + return static_cast(x); } }; -template -int FilterScores(mshadow::Tensor out_scores, - mshadow::Tensor out_sorted_index, - mshadow::Tensor scores, - mshadow::Tensor sorted_index, - float valid_thresh) { - valid_score pred(static_cast(valid_thresh)); - DType * end_scores = thrust::copy_if(thrust::device, scores.dptr_, scores.dptr_ + scores.MSize(), - out_scores.dptr_, pred); - thrust::copy_if(thrust::device, sorted_index.dptr_, sorted_index.dptr_ + sorted_index.MSize(), - scores.dptr_, out_sorted_index.dptr_, pred); - return end_scores - out_scores.dptr_; +template +int CopyIf(mshadow::Tensor out, + mshadow::Tensor value, + mshadow::Tensor flag) { + valid_value pred; + DType *end_out = thrust::copy_if(thrust::device, value.dptr_, value.dptr_ + value.MSize(), + flag.dptr_, out.dptr_, pred); + return end_out - out.dptr_; } // compute line intersect along either height or width diff --git a/src/operator/contrib/bounding_box-inl.h b/src/operator/contrib/bounding_box-inl.h index 8610dcca8e10..37c4297ff49d 100644 --- a/src/operator/contrib/bounding_box-inl.h +++ b/src/operator/contrib/bounding_box-inl.h @@ -51,6 +51,7 @@ struct BoxNMSParam : public dmlc::Parameter { int coord_start; int score_index; int id_index; + int background_id; bool force_suppress; int in_format; int out_format; @@ -67,6 +68,8 @@ struct BoxNMSParam : public dmlc::Parameter { .describe("Index of the scores/confidence of boxes."); DMLC_DECLARE_FIELD(id_index).set_default(-1) .describe("Optional, index of the class categories, -1 to disable."); + DMLC_DECLARE_FIELD(background_id).set_default(-1) + .describe("Optional, id of the background class which will be ignored in nms."); DMLC_DECLARE_FIELD(force_suppress).set_default(false) .describe("Optional, if set false and id_index is provided, nms will only apply" " to boxes belongs to the same category"); @@ -103,7 +106,7 @@ inline bool BoxNMSShape(const nnvm::NodeAttrs& attrs, << ishape << " provided"; int width_elem = ishape[indim - 1]; int expected = 5; - if (param.id_index > 0) { + if (param.id_index >= 0) { expected += 1; } CHECK_GE(width_elem, expected) @@ -145,24 +148,20 @@ inline uint32_t BoxNMSNumVisibleOutputs(const NodeAttrs& attrs) { return static_cast(1); } -template -int FilterScores(mshadow::Tensor out_scores, - mshadow::Tensor out_sorted_index, - mshadow::Tensor scores, - mshadow::Tensor sorted_index, - float valid_thresh) { +template +int CopyIf(mshadow::Tensor out, + mshadow::Tensor value, + mshadow::Tensor flag) { index_t j = 0; - for (index_t i = 0; i < scores.size(0); i++) { - if (scores[i] > valid_thresh) { - out_scores[j] = scores[i]; - out_sorted_index[j] = sorted_index[i]; + for (index_t i = 0; i < flag.size(0); i++) { + if (static_cast(flag[i])) { + out[j] = value[i]; j++; } } return j; } - struct corner_to_center { template MSHADOW_XINLINE static void Map(int i, DType *data, int stride) { @@ -351,6 +350,7 @@ void BoxNMSForward(const nnvm::NodeAttrs& attrs, int num_batch = indim <= 2? 1 : in_shape.ProdShape(0, indim - 2); int num_elem = in_shape[indim - 2]; int width_elem = in_shape[indim - 1]; + bool class_exist = param.id_index >= 0; MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, { Tensor data = inputs[box_nms_enum::kData] .get_with_shape(Shape3(num_batch, num_elem, width_elem), s); @@ -366,7 +366,7 @@ void BoxNMSForward(const nnvm::NodeAttrs& attrs, // index index_t int32_size = sort_index_shape.Size() * 3 + batch_start_shape.Size(); - index_t dtype_size = sort_index_shape.Size() * 2; + index_t dtype_size = sort_index_shape.Size() * 3; if (req[0] == kWriteInplace) { dtype_size += buffer_shape.Size(); } @@ -385,6 +385,7 @@ void BoxNMSForward(const nnvm::NodeAttrs& attrs, Tensor scores(workspace.dptr_ + int32_offset, sort_index_shape, s); Tensor areas(scores.dptr_ + scores.MSize(), sort_index_shape, s); + Tensor classes(areas.dptr_ + areas.MSize(), sort_index_shape, s); Tensor buffer = data; if (req[0] == kWriteInplace) { // make copy @@ -405,16 +406,30 @@ void BoxNMSForward(const nnvm::NodeAttrs& attrs, return; } - // use batch_id and areas as temporary storage + // use classes, areas and scores as temporary storage Tensor all_scores = areas; - // Tensor all_sorted_index = areas; all_scores = reshape(slice<2>(buffer, score_index, score_index + 1), all_scores.shape_); all_sorted_index = range(0, num_batch * num_elem); + Tensor all_classes = classes; + if (class_exist) { + all_classes = reshape(slice<2>(buffer, id_index, id_index + 1), classes.shape_); + } // filter scores but keep original sorted_index value // move valid score and index to the front, return valid size - int num_valid = mxnet::op::FilterScores(scores, sorted_index, all_scores, all_sorted_index, - param.valid_thresh); + Tensor valid_box = scores; + if (class_exist) { + valid_box = F( + F(all_scores, ScalarExp(param.valid_thresh)), + F(all_classes, ScalarExp(param.background_id))); + } else { + valid_box = F(all_scores, ScalarExp(param.valid_thresh)); + } + classes = F(valid_box); + valid_box = classes; + int num_valid = mxnet::op::CopyIf(scores, all_scores, valid_box); + mxnet::op::CopyIf(sorted_index, all_sorted_index, valid_box); + // if everything is filtered, output -1 if (num_valid == 0) { record = -1; diff --git a/src/operator/contrib/bounding_box.cc b/src/operator/contrib/bounding_box.cc index d73f99245118..d682fafec092 100644 --- a/src/operator/contrib/bounding_box.cc +++ b/src/operator/contrib/bounding_box.cc @@ -38,8 +38,9 @@ NNVM_REGISTER_OP(_contrib_box_nms) .describe(R"code(Apply non-maximum suppression to input. The output will be sorted in descending order according to `score`. Boxes with -overlaps larger than `overlap_thresh` and smaller scores will be removed and -filled with -1, the corresponding position will be recorded for backward propogation. +overlaps larger than `overlap_thresh`, smaller scores and background boxes +will be removed and filled with -1, the corresponding position will be recorded +for backward propogation. During back-propagation, the gradient will be copied to the original position according to the input index. For positions that have been suppressed, @@ -60,6 +61,9 @@ additional elements are allowed. - `id_index`: optional, use -1 to ignore, useful if `force_suppress=False`, which means we will skip highly overlapped boxes if one is `apple` while the other is `car`. +- `background_id`: optional, default=-1, class id for background boxes, useful + when `id_index >= 0` which means boxes with background id will be filtered before nms. + - `coord_start`: required, default=2, the starting index of the 4 coordinates. Two formats are supported: diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py index aac807660af1..38aeb99c2d89 100644 --- a/tests/python/unittest/test_contrib_operator.py +++ b/tests/python/unittest/test_contrib_operator.py @@ -26,23 +26,23 @@ import unittest def test_box_nms_op(): - def test_box_nms_forward(data, expected, thresh=0.5, valid=0, topk=-1, coord=2, score=1, cid=0, - force=False, in_format='corner', out_format='corner'): + def test_box_nms_forward(data, expected, thresh=0.5, valid=0, topk=-1, coord=2, score=1, cid=0, bid=-1, + force=False, in_format='corner', out_format='corner'): for dtype in ['float16', 'float32', 'float64']: data = mx.nd.array(data, dtype=dtype) out = mx.contrib.nd.box_nms(data, overlap_thresh=thresh, valid_thresh=valid, topk=topk, - coord_start=coord, score_index=score, id_index=cid, - force_suppress=force, in_format=in_format, out_format=out_format) + coord_start=coord, score_index=score, id_index=cid, background_id=bid, + force_suppress=force, in_format=in_format, out_format=out_format) assert_almost_equal(out.asnumpy(), expected.astype(dtype), rtol=1e-3, atol=1e-3) def test_box_nms_backward(data, grad, expected, thresh=0.5, valid=0, topk=-1, coord=2, score=1, - cid=0, force=False, in_format='corner', out_format='corner'): + cid=0, bid=-1, force=False, in_format='corner', out_format='corner'): in_var = mx.sym.Variable('data') arr_data = mx.nd.array(data) arr_grad = mx.nd.empty(arr_data.shape) op = mx.contrib.sym.box_nms(in_var, overlap_thresh=thresh, valid_thresh=valid, topk=topk, - coord_start=coord, score_index=score, id_index=cid, - force_suppress=force, in_format=in_format, out_format=out_format) + coord_start=coord, score_index=score, id_index=cid, background_id=bid, + force_suppress=force, in_format=in_format, out_format=out_format) exe = op.bind(ctx=default_context(), args=[arr_data], args_grad=[arr_grad]) exe.forward(is_train=True) exe.backward(mx.nd.array(grad)) @@ -91,8 +91,8 @@ def swap_position(data, expected, coord=2, score=1, cid=0, new_col=0): [0, 0.3, 0.1, 0.1, 0.14, 0.14], [2, 0.6, 0.5, 0.5, 0.7, 0.8]] # case1 - force=True - thresh=0.5 + force = True + thresh = 0.5 expected = [[2, 0.6, 0.5, 0.5, 0.7, 0.8], [0, 0.5, 0.1, 0.1, 0.2, 0.2], [0, 0.3, 0.1, 0.1, 0.14, 0.14], [-1, -1, -1, -1, -1, -1]] grad = np.random.rand(4, 6) @@ -176,6 +176,29 @@ def swap_position(data, expected, coord=2, score=1, cid=0, new_col=0): test_box_nms_forward(np.array(boxes8), np.array(expected8), force=force, thresh=thresh, valid=valid, topk=topk) test_box_nms_backward(np.array(boxes8), grad8, expected_in_grad8, force=force, thresh=thresh, valid=valid, topk=topk) + # case9: background id filter out + # default background id -1 + boxes9 = [[0, 0.5, 0.1, 0.1, 0.2, 0.2], [0, 0.4, 0.1, 0.1, 0.2, 0.2], + [1, 0.3, 0.1, 0.1, 0.14, 0.14], [-1, 0.6, 0.5, 0.5, 0.7, 0.8]] + expected9 = [[0, 0.5, 0.1, 0.1, 0.2, 0.2], [1, 0.3, 0.1, 0.1, 0.14, 0.14], + [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]] + force = True + thresh = 0.5 + grad9 = np.random.rand(4, 6) + expected_in_grad9 = grad9[(0, 2, 1, 3), :] + expected_in_grad9[(1, 3), :] = 0 + test_box_nms_forward(np.array(boxes9), np.array(expected9), force=force, thresh=thresh) + test_box_nms_backward(np.array(boxes9), grad9, expected_in_grad9, force=force, thresh=thresh) + # set background id + background_id = 0 + expected9 = [[-1, 0.6, 0.5, 0.5, 0.7, 0.8], [1, 0.3, 0.1, 0.1, 0.14, 0.14], + [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]] + grad9 = np.random.rand(4, 6) + expected_in_grad9 = grad9[(2, 3, 1, 0), :] + expected_in_grad9[(0, 1), :] = 0 + test_box_nms_forward(np.array(boxes9), np.array(expected9), force=force, thresh=thresh, bid=background_id) + test_box_nms_backward(np.array(boxes9), grad9, expected_in_grad9, force=force, thresh=thresh, bid=background_id) + def test_box_iou_op(): def numpy_box_iou(a, b, fmt='corner'): def area(left, top, right, bottom):