Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[MXNET-349] Histogram Operator (#10931)
Browse files Browse the repository at this point in the history
* implementation of histogram operator

* address code reviews and code re-design

* add exception for invalid inputs

* address code reviews

* add symbol and symbolic forward check for histogram
  • Loading branch information
haojin2 authored and piiswrong committed Jun 25, 2018
1 parent 21ff36b commit ed7e360
Show file tree
Hide file tree
Showing 8 changed files with 571 additions and 4 deletions.
35 changes: 34 additions & 1 deletion python/mxnet/ndarray/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
"ones", "add", "arange", "eye", "divide", "equal", "full", "greater", "greater_equal",
"imdecode", "lesser", "lesser_equal", "logical_and", "logical_or", "logical_xor",
"maximum", "minimum", "moveaxis", "modulo", "multiply", "not_equal", "onehot_encode",
"power", "subtract", "true_divide", "waitall", "_new_empty_handle"]
"power", "subtract", "true_divide", "waitall", "_new_empty_handle", "histogram"]

_STORAGE_TYPE_UNDEFINED = -1
_STORAGE_TYPE_DEFAULT = 0
Expand Down Expand Up @@ -3740,3 +3740,36 @@ def empty(shape, ctx=None, dtype=None):
if dtype is None:
dtype = mx_real_t
return NDArray(handle=_new_alloc_handle(shape, ctx, False, dtype))


# pylint: disable= redefined-builtin
def histogram(a, bins=10, range=None):
"""Compute the histogram of the input data.
Parameters
----------
a : NDArray
Input data. The histogram is computed over the flattened array.
bins : int or sequence of scalars
If bins is an int, it defines the number of equal-width bins in the
given range (10, by default). If bins is a sequence, it defines the bin edges,
including the rightmost edge, allowing for non-uniform bin widths.
range : (float, float), optional
The lower and upper range of the bins. If not provided, range is simply (a.min(), a.max()).
Values outside the range are ignored. The first element of the range must be less than or
equal to the second. range affects the automatic bin computation as well, the range will
be equally divided by the number of bins.
"""

# pylint: disable= no-member, protected-access
if isinstance(bins, NDArray):
return _internal._histogram(data=a, bins=bins)
elif isinstance(bins, integer_types):
if range is None:
warnings.warn("range is not specified, using numpy's result "
"to ensure consistency with numpy")
res, bin_bounds = np.histogram(a.asnumpy(), bins=bins)
return array(res), array(bin_bounds)
return _internal._histogram(data=a, bin_cnt=bins, range=range)
raise ValueError("bins argument should be either an integer or an NDArray")
# pylint: enable= no-member, protected-access, redefined-builtin
30 changes: 28 additions & 2 deletions python/mxnet/symbol/symbol.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

from ..attribute import AttrScope
from ..base import _LIB, numeric_types, c_array, c_array_buf, c_str, c_str_array, c_handle_array
from ..base import mx_uint, py_str, string_types
from ..base import mx_uint, py_str, string_types, integer_types
from ..base import NDArrayHandle, ExecutorHandle, SymbolHandle
from ..base import check_call, MXNetError, NotImplementedForSymbol
from ..context import Context, current_context
Expand All @@ -47,7 +47,8 @@
from ._internal import SymbolBase, _set_symbol_class

__all__ = ["Symbol", "var", "Variable", "Group", "load", "load_json",
"pow", "maximum", "minimum", "hypot", "eye", "zeros", "ones", "full", "arange"]
"pow", "maximum", "minimum", "hypot", "eye", "zeros", "ones", "full", "arange",
"histogram"]


class Symbol(SymbolBase):
Expand Down Expand Up @@ -2864,4 +2865,29 @@ def arange(start, stop=None, step=1.0, repeat=1, name=None, dtype=None):
return _internal._arange(start=start, stop=stop, step=step, repeat=repeat,
name=name, dtype=dtype)

def histogram(a, bins=10, range=None, **kwargs):
"""Compute the histogram of the input data.
Parameters
----------
a : NDArray
Input data. The histogram is computed over the flattened array.
bins : int or sequence of scalars
If bins is an int, it defines the number of equal-width bins in the
given range (10, by default). If bins is a sequence, it defines the bin edges,
including the rightmost edge, allowing for non-uniform bin widths.
range : (float, float), required if bins is an integer
The lower and upper range of the bins. If not provided, range is simply (a.min(), a.max()).
Values outside the range are ignored. The first element of the range must be less than or
equal to the second. range affects the automatic bin computation as well, the range will
be equally divided by the number of bins.
"""
if isinstance(bins, Symbol):
return _internal._histogram(data=a, bins=bins, **kwargs)
elif isinstance(bins, integer_types):
if range is None:
raise ValueError("null range is not supported in symbol mode")
return _internal._histogram(data=a, bin_cnt=bins, range=range, **kwargs)
raise ValueError("bins argument should be either an integer or an NDArray")

_set_symbol_class(Symbol)
30 changes: 30 additions & 0 deletions src/common/cuda_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,36 @@ static inline __device__ void atomicAdd(mshadow::half::half_t *address,
} while (assumed != old);
}

static inline __device__ void atomicAdd(uint8_t *address, uint8_t val) {
unsigned int * address_as_ui = (unsigned int *) (address - ((size_t)address & 0x3));
unsigned int old = *address_as_ui;
unsigned int shift = (((size_t)address & 0x3) << 3);
unsigned int sum;
unsigned int assumed;

do {
assumed = old;
sum = val + static_cast<uint8_t>((old >> shift) & 0xff);
old = (old & ~(0x000000ff << shift)) | (sum << shift);
old = atomicCAS(address_as_ui, assumed, old);
} while (assumed != old);
}

static inline __device__ void atomicAdd(int8_t *address, int8_t val) {
unsigned int * address_as_ui = (unsigned int *) (address - ((size_t)address & 0x3));
unsigned int old = *address_as_ui;
unsigned int shift = (((size_t)address & 0x3) << 3);
unsigned int sum;
unsigned int assumed;

do {
assumed = old;
sum = val + static_cast<int8_t>((old >> shift) & 0xff);
old = (old & ~(0x000000ff << shift)) | (sum << shift);
old = atomicCAS(address_as_ui, assumed, old);
} while (assumed != old);
}

// Overload atomicAdd to work for signed int64 on all architectures
static inline __device__ void atomicAdd(int64_t *address, int64_t val) {
atomicAdd(reinterpret_cast<unsigned long long*>(address), static_cast<unsigned long long>(val)); // NOLINT
Expand Down
172 changes: 172 additions & 0 deletions src/operator/tensor/histogram-inl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#ifndef MXNET_OPERATOR_TENSOR_HISTOGRAM_INL_H_
#define MXNET_OPERATOR_TENSOR_HISTOGRAM_INL_H_

#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <mxnet/operator.h>
#include <mxnet/operator_util.h>
#include <dmlc/optional.h>
#include <mshadow/tensor.h>
#include <nnvm/op.h>
#include <nnvm/node.h>
#include <nnvm/op_attr_types.h>
#include <vector>
#include <type_traits>
#include "./util/tensor_util-inl.h"
#include "../elemwise_op_common.h"
#include "../mshadow_op.h"
#include "../mxnet_op.h"
#include "../operator_common.h"

namespace mxnet {
namespace op {

struct HistogramParam : public dmlc::Parameter<HistogramParam> {
dmlc::optional<int> bin_cnt;
dmlc::optional<nnvm::Tuple<double>> range;
DMLC_DECLARE_PARAMETER(HistogramParam) {
DMLC_DECLARE_FIELD(bin_cnt)
.set_default(dmlc::optional<int>())
.describe("Number of bins for uniform case");
DMLC_DECLARE_FIELD(range)
.set_default(dmlc::optional<nnvm::Tuple<double>>())
.describe("The lower and upper range of the bins. if not provided, "
"range is simply (a.min(), a.max()). values outside the "
"range are ignored. the first element of the range must be "
"less than or equal to the second. range affects the automatic "
"bin computation as well. while bin width is computed to be "
"optimal based on the actual data within range, the bin count "
"will fill the entire range including portions containing no data.");
}
};

struct FillBinBoundsKernel {
template<typename DType>
static MSHADOW_XINLINE void Map(int i, DType* bin_bounds, int bin_cnt, double min, double max) {
if (i <= bin_cnt) {
bin_bounds[i] = DType((max * i + (bin_cnt - i) * min) / bin_cnt);
}
}
};

inline bool HistogramOpShape(const nnvm::NodeAttrs& attrs,
std::vector<TShape>* in_attrs,
std::vector<TShape>* out_attrs) {
HistogramParam param = nnvm::get<HistogramParam>(attrs.parsed);
const bool has_cnt = param.bin_cnt.has_value();
const bool has_range = param.range.has_value();
const bool legal_param = (has_cnt && has_range) || (!has_cnt && !has_range);
CHECK_EQ(in_attrs->size(), has_cnt ? 1U : 2U);
CHECK_EQ(out_attrs->size(), 2U);
CHECK(legal_param) << "cnt and range should both or neither specified";

if (has_cnt) {
// if cnt is specified, the output histogram has shape (cnt,)
// while output bins has shape (cnt+1,)
const int bin_cnt = param.bin_cnt.value();
SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape({bin_cnt}));
SHAPE_ASSIGN_CHECK(*out_attrs, 1, TShape({bin_cnt + 1}));
} else {
// if cnt is not specified, the output histogram has shape (bins.Size() - 1)
// while output bins has same shape as input bins
TShape oshape = (*in_attrs)[1];

CHECK_EQ(oshape.ndim(), 1U) << "bins argument should be an 1D vector";
CHECK_GE(oshape.Size(), 2U) << "number of bounds should be >= 2";

SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape({(oshape[0] - 1)}));
SHAPE_ASSIGN_CHECK(*out_attrs, 1, in_attrs->at(1));
}

return !shape_is_none(out_attrs->at(0)) && !shape_is_none(out_attrs->at(1)) &&
out_attrs->at(0).Size() == out_attrs->at(1).Size() - 1;
}

inline bool HistogramOpType(const nnvm::NodeAttrs& attrs,
std::vector<int>* in_attrs,
std::vector<int>* out_attrs) {
CHECK_EQ(out_attrs->size(), 2U);

TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kInt64);
TYPE_ASSIGN_CHECK(*out_attrs, 1, in_attrs->at(0));
return !type_is_none(out_attrs->at(0)) && !type_is_none(out_attrs->at(1));
}

template<typename xpu>
void HistogramForwardImpl(const OpContext& ctx,
const TBlob& in_data,
const TBlob& bin_bounds,
const TBlob& out_data,
const TBlob& out_bins);

template<typename xpu>
void HistogramForwardImpl(const OpContext& ctx,
const TBlob& in_data,
const TBlob& out_data,
const TBlob& out_bins,
const int bin_cnt,
const double min,
const double max);

template<typename xpu>
void HistogramOpForward(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
CHECK_EQ(req.size(), 2U);
CHECK_EQ(req[0], kWriteTo);
CHECK_EQ(req[1], kWriteTo);
const HistogramParam& param = nnvm::get<HistogramParam>(attrs.parsed);
const bool has_cnt = param.bin_cnt.has_value();
const bool has_range = param.range.has_value();
const bool legal_params = (has_cnt && has_range) || (!has_cnt && !has_range);
CHECK(legal_params) << "width and range should both or neither be specified";

const TBlob& in_data = inputs[0];
const TBlob& out_data = outputs[0];
const TBlob& out_bins = outputs[1];

if (has_cnt) {
CHECK((param.range.value().ndim() == 2U)) << "range should be a tuple with only 2 elements";
CHECK(param.range.value()[0] <= param.range.value()[1])
<< "left hand side of range(" << param.range.value()[0]
<< ")should be less than or equal to right hand side(" << param.range.value()[1] << ")";
double max = param.range.value()[1];
double min = param.range.value()[0];
const int bin_cnt = param.bin_cnt.value();
if (min == max) {
min -= 0.5f;
max += 0.5f;
LOG(INFO) << min << " " << max;
}
HistogramForwardImpl<xpu>(ctx, in_data, out_data, out_bins, bin_cnt, min, max);
} else {
const TBlob& bin_bounds = inputs[1];
HistogramForwardImpl<xpu>(ctx, in_data, bin_bounds, out_data, out_bins);
}
}

} // namespace op
} // namespace mxnet

#endif // MXNET_OPERATOR_TENSOR_HISTOGRAM_INL_H_
Loading

0 comments on commit ed7e360

Please sign in to comment.