From c2a9f1d0d031db9ece3ed00b1086b369488d08f3 Mon Sep 17 00:00:00 2001 From: Hao Jin Date: Tue, 29 May 2018 23:49:21 +0000 Subject: [PATCH] address code reviews and code re-design --- python/mxnet/ndarray/ndarray.py | 16 +++-- src/operator/tensor/histogram-inl.h | 87 ++++++++++++------------- src/operator/tensor/histogram.cc | 90 +++++++++++++++----------- src/operator/tensor/histogram.cu | 63 +++++++++--------- tests/python/unittest/test_operator.py | 33 +++++----- 5 files changed, 152 insertions(+), 137 deletions(-) diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py index ebb0369d4ebe..efbdb84f15a9 100644 --- a/python/mxnet/ndarray/ndarray.py +++ b/python/mxnet/ndarray/ndarray.py @@ -3742,7 +3742,8 @@ def empty(shape, ctx=None, dtype=None): return NDArray(handle=_new_alloc_handle(shape, ctx, False, dtype)) -def histogram(a, bins=10, range_=None): +# pylint: disable= redefined-builtin +def histogram(a, bins=10, range=None): """Compute the histogram of the input data. Parameters @@ -3763,9 +3764,12 @@ def histogram(a, bins=10, range_=None): # pylint: disable= no-member, protected-access if isinstance(bins, NDArray): return _internal._histogram(data=a, bins=bins) - elif isinstance(bins, int): - if range_ is None: - range_ = (float(a.min().asnumpy()[0]), float(a.max().asnumpy()[0])) - return _internal._histogram(data=a, bins=array([]), bin_cnt=bins, range=range_) + elif isinstance(bins, integer_types): + if range is None: + warnings.warn("range_ is not specified, using numpy's result " + "to ensure consistency with numpy") + res, bin_bounds = np.histogram(a.asnumpy(), bins=bins) + return array(res), array(bin_bounds) + return _internal._histogram(data=a, bin_cnt=bins, range=range) return None - # pylint: enable= no-member, protected-access + # pylint: enable= no-member, protected-access, redefined-builtin diff --git a/src/operator/tensor/histogram-inl.h b/src/operator/tensor/histogram-inl.h index d91c98546335..18ae86249d9e 100644 --- a/src/operator/tensor/histogram-inl.h +++ b/src/operator/tensor/histogram-inl.h @@ -42,13 +42,13 @@ namespace op { struct HistogramParam : public dmlc::Parameter { dmlc::optional bin_cnt; - dmlc::optional> range; + dmlc::optional> range; DMLC_DECLARE_PARAMETER(HistogramParam) { DMLC_DECLARE_FIELD(bin_cnt) .set_default(dmlc::optional()) .describe("Number of bins for uniform case"); DMLC_DECLARE_FIELD(range) - .set_default(dmlc::optional>()) + .set_default(dmlc::optional>()) .describe("The lower and upper range of the bins. if not provided, " "range is simply (a.min(), a.max()). values outside the " "range are ignored. the first element of the range must be " @@ -61,9 +61,9 @@ struct HistogramParam : public dmlc::Parameter { struct FillBinBoundsKernel { template - static MSHADOW_XINLINE void Map(int i, DType* bin_bounds, int bin_cnt, float min, float max) { + static MSHADOW_XINLINE void Map(int i, DType* bin_bounds, int bin_cnt, double min, double max) { if (i <= bin_cnt) { - bin_bounds[i] = DType((i * max + (bin_cnt - i) * min) / bin_cnt); + bin_bounds[i] = DType((max * i + (bin_cnt - i) * min) / bin_cnt); } } }; @@ -71,13 +71,14 @@ struct FillBinBoundsKernel { inline bool HistogramOpShape(const nnvm::NodeAttrs& attrs, std::vector* in_attrs, std::vector* out_attrs) { - CHECK_EQ(in_attrs->size(), 2U); - CHECK_EQ(out_attrs->size(), 2U); HistogramParam param = nnvm::get(attrs.parsed); const bool has_cnt = param.bin_cnt.has_value(); const bool has_range = param.range.has_value(); const bool legal_param = (has_cnt && has_range) || (!has_cnt && !has_range); + CHECK_EQ(in_attrs->size(), has_cnt ? 1U : 2U); + CHECK_EQ(out_attrs->size(), 2U); CHECK(legal_param) << "cnt and range should both or neither specified"; + if (has_cnt) { // if cnt is specified, the output histogram has shape (cnt,) // while output bins has shape (cnt+1,) @@ -96,21 +97,18 @@ inline bool HistogramOpShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*out_attrs, 1, in_attrs->at(1)); } - return out_attrs->at(0).ndim() == 1U && out_attrs->at(0).Size() != 0U && - out_attrs->at(1).ndim() == 1U && out_attrs->at(1).Size() != 0U && + return !shape_is_none(out_attrs->at(0)) && !shape_is_none(out_attrs->at(1)) && out_attrs->at(0).Size() == out_attrs->at(1).Size() - 1; } inline bool HistogramOpType(const nnvm::NodeAttrs& attrs, std::vector* in_attrs, std::vector* out_attrs) { - CHECK_EQ(in_attrs->size(), 2U); - CHECK_EQ(in_attrs->at(0), in_attrs->at(1)); CHECK_EQ(out_attrs->size(), 2U); TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kInt64); TYPE_ASSIGN_CHECK(*out_attrs, 1, in_attrs->at(0)); - return out_attrs->at(0) != -1 && out_attrs->at(1) != -1; + return !type_is_none(out_attrs->at(0)) && !type_is_none(out_attrs->at(1)); } template @@ -122,54 +120,55 @@ void HistogramForwardImpl(mshadow::Stream* s, const TBlob& out_data, const TBlob& out_bins); +template +void HistogramForwardImpl(mshadow::Stream* s, + const OpContext& ctx, + const nnvm::NodeAttrs& attrs, + const TBlob& in_data, + const TBlob& out_data, + const TBlob& out_bins, + const int bin_cnt, + const double min, + const double max); + template void HistogramOpForward(const nnvm::NodeAttrs& attrs, const OpContext& ctx, const std::vector& inputs, const std::vector& req, const std::vector& outputs) { - CHECK_EQ(inputs.size(), 2U); - CHECK_EQ(outputs.size(), 2U); CHECK_EQ(req.size(), 2U); CHECK_EQ(req[0], kWriteTo); CHECK_EQ(req[1], kWriteTo); + const HistogramParam& param = nnvm::get(attrs.parsed); + const bool has_cnt = param.bin_cnt.has_value(); + const bool has_range = param.range.has_value(); + const bool legal_params = (has_cnt && has_range) || (!has_cnt && !has_range); + CHECK(legal_params) << "width and range should both or neither be specified"; mshadow::Stream *s = ctx.get_stream(); const TBlob& in_data = inputs[0]; - const TBlob& bin_bounds = inputs[1]; const TBlob& out_data = outputs[0]; const TBlob& out_bins = outputs[1]; - HistogramForwardImpl(s, ctx, attrs, in_data, bin_bounds, out_data, out_bins); -} - -template -void HistogramBackwardImpl(const OpContext& ctx, - const nnvm::NodeAttrs& attrs, - const TBlob& out_grad, - const TBlob& in_data, - const TBlob& bin_bounds, - const TBlob& out_data, - const TBlob& in_grad); - -template -void HistogramOpBackward(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - CHECK_EQ(inputs.size(), 4U); - CHECK_EQ(outputs.size(), 1U); - CHECK_EQ(req.size(), 1U); - CHECK_EQ(req[0], kWriteTo); - - const TBlob& out_grad = inputs[0]; - const TBlob& in_data = inputs[1]; - const TBlob& bin_bounds = inputs[2]; - const TBlob& out_data = inputs[3]; - const TBlob& in_grad = outputs[0]; - - HistogramBackwardImpl(ctx, attrs, out_grad, in_data, bin_bounds, out_data, in_grad); + if (has_cnt) { + CHECK((param.range.value().ndim() == 2U)) << "range should be a tuple with only 2 elements"; + CHECK(param.range.value()[0] <= param.range.value()[1]) + << "left hand side of range(" << param.range.value()[0] + << ")should be less than or equal to right hand side(" << param.range.value()[1] << ")"; + double max = param.range.value()[1]; + double min = param.range.value()[0]; + const int bin_cnt = param.bin_cnt.value(); + if (min == max) { + min -= 0.5f; + max += 0.5f; + LOG(INFO) << min << " " << max; + } + HistogramForwardImpl(s, ctx, attrs, in_data, out_data, out_bins, bin_cnt, min, max); + } else { + const TBlob& bin_bounds = inputs[1]; + HistogramForwardImpl(s, ctx, attrs, in_data, bin_bounds, out_data, out_bins); + } } } // namespace op diff --git a/src/operator/tensor/histogram.cc b/src/operator/tensor/histogram.cc index 12c3efe3a00c..e3bda5926c31 100644 --- a/src/operator/tensor/histogram.cc +++ b/src/operator/tensor/histogram.cc @@ -24,30 +24,32 @@ namespace op { struct ComputeBinKernel { template - MSHADOW_XINLINE static void Map(int i, const DType* in_data, int* bin_indices, - int bin_cnt, float width, float min, float max) { + MSHADOW_XINLINE static void Map(int i, const DType* in_data, const DType* bin_bounds, + int* bin_indices, int bin_cnt, double min, double max) { DType data = in_data[i]; + int target = -1; if (data >= min && data <= max) { - bin_indices[i] = mshadow_op::floor::Map((in_data[i] - min) / width); - bin_indices[i] = mshadow_op::minimum::Map(bin_cnt - 1, bin_indices[i]); - } else { - bin_indices[i] = -1; + target = (data - min) * bin_cnt / (max - min); + target = mshadow_op::minimum::Map(bin_cnt - 1, target); + target -= (data < bin_bounds[target]) ? 1 : 0; + target += ((data >= bin_bounds[target + 1]) && (target != bin_cnt - 1)) ? 1 : 0; } + bin_indices[i] = target; } template MSHADOW_XINLINE static void Map(int i, const DType* in_data, int* bin_indices, const DType* bin_bounds, int num_bins) { DType data = in_data[i]; - int target_idx = -1; + int target = -1; if (data >= bin_bounds[0] && data <= bin_bounds[num_bins]) { - target_idx = 0; - while ((data - bin_bounds[target_idx]) >= 0) { - target_idx += 1; + target = 0; + while ((data - bin_bounds[target]) >= 0) { + target += 1; } - target_idx = mshadow_op::minimum::Map(target_idx - 1, num_bins - 1); + target = mshadow_op::minimum::Map(target - 1, num_bins - 1); } - bin_indices[i] = target_idx; + bin_indices[i] = target; } }; @@ -71,37 +73,44 @@ void HistogramForwardImpl(mshadow::Stream* s, const TBlob& out_bins) { using namespace mshadow; using namespace mxnet_op; - HistogramParam param = nnvm::get(attrs.parsed); - const bool has_cnt = param.bin_cnt.has_value(); - const bool has_range = param.range.has_value(); - const bool legal_param = (has_cnt && has_range) || (!has_cnt && !has_range); - CHECK(legal_param) << "width and range should both or neither be specified"; - CHECK(!has_range || (has_range && (param.range.value().ndim() == 2U))) - << "range should be a tuple with only 2 elements"; - CHECK(!has_range || (has_range && (param.range.value()[0] < param.range.value()[1]))) - << "left hand side of range should be less than or equal to right hand side"; - Tensor bin_indices = ctx.requested[0].get_space_typed(Shape1(in_data.Size()), s); const int bin_cnt = out_data.Size(); MSHADOW_TYPE_SWITCH(in_data.type_flag_, DType, { - if (has_cnt) { - float max = param.range.value()[1]; - float min = param.range.value()[0]; - float width = (max - min) / bin_cnt; - Kernel::Launch( - s, in_data.Size(), in_data.dptr(), bin_indices.dptr_, - bin_cnt, width, min, max); - Kernel::Launch( - s, bin_cnt+1, out_bins.dptr(), bin_cnt, min, max); - } else { - Kernel::Launch( - s, in_data.Size(), in_data.dptr(), bin_indices.dptr_, bin_bounds.dptr(), - bin_cnt); - Kernel, cpu>::Launch( - s, bin_bounds.Size(), out_bins.dptr(), bin_bounds.dptr()); - } + Kernel::Launch( + s, in_data.Size(), in_data.dptr(), bin_indices.dptr_, bin_bounds.dptr(), + bin_cnt); + Kernel, cpu>::Launch( + s, bin_bounds.Size(), out_bins.dptr(), bin_bounds.dptr()); + }); + MSHADOW_TYPE_SWITCH(out_data.type_flag_, CType, { + Kernel::Launch(s, bin_cnt, out_data.dptr()); + ComputeHistogram(bin_indices.dptr_, out_data.dptr(), in_data.Size()); + }); +} + +template +void HistogramForwardImpl(mshadow::Stream* s, + const OpContext& ctx, + const nnvm::NodeAttrs& attrs, + const TBlob& in_data, + const TBlob& out_data, + const TBlob& out_bins, + const int bin_cnt, + const double min, + const double max) { + using namespace mshadow; + using namespace mxnet_op; + Tensor bin_indices = + ctx.requested[0].get_space_typed(Shape1(in_data.Size()), s); + + MSHADOW_TYPE_SWITCH(in_data.type_flag_, DType, { + Kernel::Launch( + s, bin_cnt+1, out_bins.dptr(), bin_cnt, min, max); + Kernel::Launch( + s, in_data.Size(), in_data.dptr(), out_bins.dptr(), bin_indices.dptr_, + bin_cnt, min, max); }); MSHADOW_TYPE_SWITCH(out_data.type_flag_, CType, { Kernel::Launch(s, bin_cnt, out_data.dptr()); @@ -124,7 +133,10 @@ Example:: )code" ADD_FILELINE) .set_attr_parser(ParamParser) -.set_num_inputs(2) +.set_num_inputs([](const NodeAttrs& attrs) { + const HistogramParam& params = nnvm::get(attrs.parsed); + return params.bin_cnt.has_value() ? 1 : 2; +}) .set_num_outputs(2) .set_attr("FListInputNames", [](const NodeAttrs& attrs) { diff --git a/src/operator/tensor/histogram.cu b/src/operator/tensor/histogram.cu index c7c583bb733b..58174227875a 100644 --- a/src/operator/tensor/histogram.cu +++ b/src/operator/tensor/histogram.cu @@ -23,17 +23,17 @@ namespace mxnet { namespace op { - struct HistogramFusedKernel { template - static MSHADOW_XINLINE void Map(int i, const DType* in_data, CType* bins, - const int bin_cnt, const float width, - const float min, const float max) { + static MSHADOW_XINLINE void Map(int i, const DType* in_data, const DType* bin_bounds, CType* bins, + const int bin_cnt, const double min, const double max) { DType data = in_data[i]; int target = -1; if (data >= min && data <= max) { target = mshadow_op::floor::Map((data - min) * bin_cnt / (max - min)); target = mshadow_op::minimum::Map(bin_cnt - 1, target); + target -= (data < bin_bounds[target]) ? 1 : 0; + target += ((data >= bin_bounds[target + 1]) && (target != bin_cnt - 1)) ? 1 : 0; } if (target >= 0) { atomicAdd(&bins[target], CType(1)); @@ -68,36 +68,39 @@ void HistogramForwardImpl(mshadow::Stream* s, const TBlob& out_bins) { using namespace mshadow; using namespace mxnet_op; - HistogramParam param = nnvm::get(attrs.parsed); - const bool has_cnt = param.bin_cnt.has_value(); - const bool has_range = param.range.has_value(); - const bool legal_param = (has_cnt && has_range) || (!has_cnt && !has_range); - CHECK(legal_param) << "width and range should both or neither be specified"; - - CHECK(!has_range || (has_range && (param.range.value().ndim() == 2U))); - CHECK(!has_range || (has_range && (param.range.value()[0] < param.range.value()[1]))); - MSHADOW_TYPE_SWITCH(in_data.type_flag_, DType, { MSHADOW_IDX_TYPE_SWITCH(out_data.type_flag_, CType, { int bin_cnt = out_bins.Size() - 1; Kernel::Launch(s, bin_cnt, out_data.dptr()); - if (has_cnt) { - bin_cnt = param.bin_cnt.value(); - const float max = param.range.value()[1]; - const float min = param.range.value()[0]; - const float width = (max - min) / bin_cnt; - Kernel::Launch( - s, in_data.Size(), in_data.dptr(), out_data.dptr(), - bin_cnt, width, min, max); - Kernel::Launch( - s, bin_cnt+1, out_bins.dptr(), bin_cnt, min, max); - } else { - Kernel::Launch( - s, in_data.Size(), in_data.dptr(), bin_bounds.dptr(), - out_data.dptr(), bin_cnt); - Kernel, gpu>::Launch( - s, bin_bounds.Size(), out_bins.dptr(), bin_bounds.dptr()); - } + Kernel::Launch( + s, in_data.Size(), in_data.dptr(), bin_bounds.dptr(), + out_data.dptr(), bin_cnt); + Kernel, gpu>::Launch( + s, bin_bounds.Size(), out_bins.dptr(), bin_bounds.dptr()); + }); + }); +} + +template +void HistogramForwardImpl(mshadow::Stream* s, + const OpContext& ctx, + const nnvm::NodeAttrs& attrs, + const TBlob& in_data, + const TBlob& out_data, + const TBlob& out_bins, + const int bin_cnt, + const double min, + const double max) { + using namespace mshadow; + using namespace mxnet_op; + MSHADOW_TYPE_SWITCH(in_data.type_flag_, DType, { + MSHADOW_IDX_TYPE_SWITCH(out_data.type_flag_, CType, { + Kernel::Launch(s, bin_cnt, out_data.dptr()); + Kernel::Launch( + s, bin_cnt+1, out_bins.dptr(), bin_cnt, min, max); + Kernel::Launch( + s, in_data.Size(), in_data.dptr(), out_bins.dptr(), out_data.dptr(), + bin_cnt, min, max); }); }); } diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 29604fd8d1b8..960ce981ccac 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -5955,24 +5955,21 @@ def test_histogram(): def f(x, bins=10, range=None): return np.histogram(x, bins, range=range) - shape = (3, 3, 3) - x = rand_ndarray(shape, stype='default') - mx_bins = mx.nd.array([-1.0, 0.5, 2.0, 4.5, 50.0]) - np_bins = mx_bins.asnumpy() - bin_cnt = 5 - bin_range = (-2.5, 2.5) - mx_histo1, mx_bins1 = mx.nd.histogram(x, bins=bin_cnt, range_=bin_range) - np_histo1, np_bins1 = f(x.asnumpy(), bins=bin_cnt, range=bin_range) - assert_almost_equal(mx_bins1.asnumpy(), np_bins1) - assert_almost_equal(mx_histo1.asnumpy(), np_histo1, rtol=1e-3, atol=1e-5) - mx_histo2, mx_bins2 = mx.nd.histogram(x, bins=mx_bins) - np_histo2, np_bins2 = f(x.asnumpy(), bins=np_bins) - assert_almost_equal(mx_histo2.asnumpy(), np_histo2, rtol=1e-3, atol=1e-5) - assert_almost_equal(mx_bins2.asnumpy(), np_bins2, rtol=1e-3, atol=1e-5) - mx_histo3, mx_bins3 = mx.nd.histogram(x, bins=bin_cnt) - np_histo3, np_bins3 = f(x.asnumpy(), bins=bin_cnt) - assert_almost_equal(mx_bins3.asnumpy(), np_bins3, atol=1e-5) - assert_almost_equal(mx_histo3.asnumpy(), np_histo3, rtol=1e-3, atol=1e-5) + for ndim in range(1, 6): + shape = rand_shape_nd(ndim) + x = rand_ndarray(shape, stype='default', dtype=np.float64) + mx_bins = mx.nd.array([-1.0, 0.5, 2.0, 4.5, 50.0], dtype=np.float64) + np_bins = mx_bins.asnumpy() + bin_cnt = random.randint(2, 10) + bin_range = (-2.5, 2.5) + mx_histo1, mx_bins1 = mx.nd.histogram(x, bins=bin_cnt, range=bin_range) + np_histo1, np_bins1 = f(x.asnumpy(), bins=bin_cnt, range=bin_range) + assert_almost_equal(mx_bins1.asnumpy(), np_bins1) + assert_almost_equal(mx_histo1.asnumpy(), np_histo1, rtol=1e-3, atol=1e-5) + mx_histo2, mx_bins2 = mx.nd.histogram(x, bins=mx_bins) + np_histo2, np_bins2 = f(x.asnumpy(), bins=np_bins) + assert_almost_equal(mx_histo2.asnumpy(), np_histo2, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_bins2.asnumpy(), np_bins2, rtol=1e-3, atol=1e-5) def test_op_output_names_monitor():