forked from apache/mxnet
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[MKLDNN] add quantized sum (apache#14614)
* add quantized sum * fix gpu compiler error and cpu testcase fail * add default forward function for quantized_sum * skip quantized_sum for gpu ctx * fix comments * fix indetation and comments * retrigger CI * alloc memeory through TmpMemMgr * fix comments Apr.12 * change sum to elemwise_add * change Sum to ElemwiseAdd * fix indents * retrigger CI * trigger CI * fix indentation and typo * trigger CI * fix typo * fix typo * remove USE_MKLDNN macro for requantize params * rename param same as its op * trigger CI * trigger CI * trigger CI
- Loading branch information
Showing
8 changed files
with
518 additions
and
28 deletions.
There are no files selected for viewing
206 changes: 206 additions & 0 deletions
206
src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
/*! | ||
* Copyright (c) 2019 by Contributors | ||
* \file mkldnn_quantized_elemwise_add.cc | ||
* \brief | ||
*/ | ||
|
||
#if MXNET_USE_MKLDNN == 1 | ||
#include "../quantized_elemwise_add-inl.h" | ||
#include "../../nn/mkldnn/mkldnn_ops-inl.h" | ||
#include "../../nn/mkldnn/mkldnn_base-inl.h" | ||
#include "../quantization_utils.h" | ||
|
||
namespace mxnet { | ||
namespace op { | ||
|
||
DMLC_REGISTER_PARAMETER(QuantizeElemwiseAddParam); | ||
|
||
static inline float GetScale(const NDArray& data, float min, float max) { | ||
auto data_range = (data.dtype() == mshadow::kInt8) ? kInt8Range : kUint8Range; | ||
return data_range / MaxAbs(min, max); | ||
} | ||
|
||
static void MKLDNNQuantizedElemwiseAddForward(const nnvm::NodeAttrs& attrs, const OpContext& ctx, | ||
const std::vector<NDArray>& in_data, | ||
const std::vector<OpReqType>& req, | ||
const std::vector<NDArray>& out_data) { | ||
const QuantizeElemwiseAddParam& params = nnvm::get<QuantizeElemwiseAddParam>(attrs.parsed); | ||
// A, B, A_min, A_max, B_min, B_max | ||
CHECK_EQ(in_data.size(), 6U) << "should be A, B, A_min, A_max, B_min, B_max"; | ||
// C, C_min, C_max | ||
CHECK_EQ(out_data.size(), 3U) << "should be C, C_min, C_max"; | ||
// Collect data min,max,absmax | ||
const float dataA_min = in_data[quantized_elemwise_add_enum::kAMin].data().dptr<float>()[0]; | ||
const float dataB_min = in_data[quantized_elemwise_add_enum::kBMin].data().dptr<float>()[0]; | ||
const float dataA_max = in_data[quantized_elemwise_add_enum::kAMax].data().dptr<float>()[0]; | ||
const float dataB_max = in_data[quantized_elemwise_add_enum::kBMax].data().dptr<float>()[0]; | ||
const float dataA_absmax = MaxAbs(dataA_min, dataA_max); | ||
const float dataB_absmax = MaxAbs(dataB_min, dataB_max); | ||
|
||
auto dataA_mem = in_data[quantized_elemwise_add_enum::kDataA].GetMKLDNNData(); | ||
auto dataB_mem = in_data[quantized_elemwise_add_enum::kDataB].GetMKLDNNData(); | ||
const bool is_dataA_int8 = (in_data[quantized_elemwise_add_enum::kDataA].dtype() | ||
== mshadow::kInt8); | ||
const size_t dataA_range = is_dataA_int8 ? kInt8Range : kUint8Range; | ||
|
||
const float A_scale = GetScale(in_data[quantized_elemwise_add_enum::kDataA], | ||
dataA_min, | ||
dataA_max); | ||
const float B_scale = GetScale(in_data[quantized_elemwise_add_enum::kDataB], | ||
dataB_min, | ||
dataB_max); | ||
// rescaled_mem is for reorder mkldnn memory | ||
mkldnn::memory *rescaled_mem; | ||
|
||
// output default set as int32 | ||
size_t output_data_range = kInt32Range; | ||
auto output_data_type = mkldnn::memory::s32; | ||
// dataA && dataB are uint8 | ||
if (out_data[quantized_elemwise_add_enum::kOut].dtype() == mshadow::kInt8) { | ||
output_data_range = kInt8Range; | ||
output_data_type = mkldnn::memory::s8; | ||
} else if (out_data[quantized_elemwise_add_enum::kOut].dtype() == mshadow::kUint8) { | ||
output_data_range = kUint8Range; | ||
output_data_type = mkldnn::memory::u8; | ||
} else { | ||
output_data_range = kInt32Range; | ||
output_data_type = mkldnn::memory::s32; | ||
} | ||
|
||
float output_min = 0; | ||
float output_max = 0; | ||
float out_data_scale = 0; | ||
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) { | ||
output_min = params.min_calib_range.value(); | ||
output_max = params.max_calib_range.value(); | ||
out_data_scale = output_data_range / MaxAbs(output_min, output_max); | ||
} else { | ||
output_max = dataA_absmax + dataB_absmax; | ||
output_min = -output_max; | ||
} | ||
// 2: scale 0 for dataA, scale 1 for data B | ||
const int scales_num = 2; | ||
std::vector<float> scales(scales_num, 1); | ||
if (in_data[quantized_elemwise_add_enum::kDataA].dtype() | ||
!= in_data[quantized_elemwise_add_enum::kDataB].dtype()) { | ||
auto s8_pd = (is_dataA_int8 == true) | ||
? dataA_mem->get_primitive_desc() | ||
: dataB_mem->get_primitive_desc(); | ||
rescaled_mem = TmpMemMgr::Get()->Alloc(s8_pd); | ||
float u8_reorder_scale = 0; | ||
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) { | ||
if (is_dataA_int8 == true) { | ||
u8_reorder_scale = out_data_scale / B_scale; | ||
scales[0] = out_data_scale / A_scale; | ||
} else { | ||
u8_reorder_scale = out_data_scale / A_scale; | ||
scales[1] = out_data_scale / B_scale; | ||
} | ||
} else { | ||
// x*dataA_absmax/dataA_range = y*(dataA_absmax+dataB_absmax)/output_range | ||
if (is_dataA_int8 == true) { | ||
u8_reorder_scale = dataB_absmax * output_data_range | ||
/ ((dataA_absmax + dataB_absmax) * kUint8Range); | ||
scales[0] = dataA_absmax * output_data_range | ||
/ ((dataA_absmax + dataB_absmax) * dataA_range); | ||
} else { | ||
u8_reorder_scale = dataA_absmax * output_data_range | ||
/ ((dataA_absmax + dataB_absmax) * dataA_range); | ||
scales[1] = dataB_absmax * output_data_range | ||
/ ((dataA_absmax + dataB_absmax) * kInt8Range); | ||
} | ||
} | ||
std::vector<float> reorder_scale = {u8_reorder_scale}; | ||
primitive_attr reorder_attr; | ||
reorder_attr.set_int_output_round_mode(round_mode::round_nearest); | ||
reorder_attr.set_output_scales(0, reorder_scale); | ||
auto u8_mem = (is_dataA_int8 == true) ? dataB_mem : dataA_mem; | ||
const auto reorder_pd = mkldnn::reorder::primitive_desc(u8_mem->get_primitive_desc(), | ||
s8_pd, | ||
reorder_attr); | ||
MKLDNNStream::Get()->RegisterPrim(mkldnn::reorder(reorder_pd, *u8_mem, *rescaled_mem)); | ||
|
||
if (is_dataA_int8 == true) { | ||
dataB_mem = rescaled_mem; | ||
} else { | ||
dataA_mem = rescaled_mem; | ||
} | ||
} else { | ||
// same data type and has same data range | ||
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) { | ||
scales[0] = out_data_scale / A_scale; | ||
scales[1] = out_data_scale / B_scale; | ||
} else { | ||
scales[0] = dataA_absmax * output_data_range / ((dataA_absmax + dataB_absmax) * dataA_range); | ||
scales[1] = dataB_absmax * output_data_range / ((dataA_absmax + dataB_absmax) * dataA_range); | ||
} | ||
} | ||
|
||
std::vector<mkldnn::primitive::at> in_prims; | ||
std::vector<mkldnn::memory::primitive_desc> in_pds; | ||
in_prims.push_back(*dataA_mem); | ||
in_prims.push_back(*dataB_mem); | ||
in_pds.push_back(dataA_mem->get_primitive_desc()); | ||
in_pds.push_back(dataB_mem->get_primitive_desc()); | ||
size_t i_ndim = in_data[quantized_elemwise_add_enum::kDataA].shape().ndim(); | ||
mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim); | ||
for (size_t i = 0; i < i_ndim; i++) { | ||
i_dims[i] = static_cast<int>(in_data[quantized_elemwise_add_enum::kDataA].shape()[i]); | ||
} | ||
mkldnn::memory::format i_fmt = static_cast<mkldnn::memory::format>( | ||
in_pds[quantized_elemwise_add_enum::kDataA].desc().data.format); | ||
auto output_desc = mkldnn::memory::desc(i_dims, output_data_type, i_fmt); | ||
mkldnn::sum::primitive_desc pdesc(output_desc, scales, in_pds); | ||
auto mem = CreateMKLDNNMem(out_data[quantized_elemwise_add_enum::kOut], | ||
pdesc.dst_primitive_desc(), | ||
req[0], | ||
&in_data[0]); | ||
MKLDNNStream *stream = MKLDNNStream::Get(); | ||
stream->RegisterPrim(mkldnn::sum(pdesc, in_prims, *mem.second)); | ||
CommitOutput(out_data[quantized_elemwise_add_enum::kOut], mem); | ||
stream->Submit(); | ||
|
||
out_data[quantized_elemwise_add_enum::kMin].data().dptr<float>()[0] = output_min; | ||
out_data[quantized_elemwise_add_enum::kMax].data().dptr<float>()[0] = output_max; | ||
} | ||
|
||
inline static bool ElemwiseAddStorageType(const nnvm::NodeAttrs& attrs, const int dev_mask, | ||
DispatchMode* dispatch_mode, std::vector<int>* in_attrs, | ||
std::vector<int>* out_attrs) { | ||
// Check num of inputs: A, B, A_min, A_max, B_min, B_max | ||
CHECK_EQ(in_attrs->size(), 6U); | ||
// Check num of outputs: C, C_min, C_max | ||
CHECK_EQ(out_attrs->size(), 3U); | ||
|
||
return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs); | ||
} | ||
|
||
NNVM_REGISTER_OP(_contrib_quantized_elemwise_add) | ||
.set_attr<FInferStorageType>("FInferStorageType", ElemwiseAddStorageType) | ||
.set_attr<FComputeEx>("FComputeEx<cpu>", MKLDNNQuantizedElemwiseAddForward) | ||
.set_attr<bool>("TIsMKLDNN", true) | ||
.set_attr_parser(ParamParser<QuantizeElemwiseAddParam>) | ||
.add_arguments(QuantizeElemwiseAddParam::__FIELDS__()); | ||
} // namespace op | ||
} // namespace mxnet | ||
|
||
#endif // MXNET_USE_MKLDNN == 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
/*! | ||
* \file quantized_elemwise_add-inl.h | ||
* \brief | ||
* \author Rong Zhang | ||
*/ | ||
|
||
#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZED_ELEMWISE_ADD_INL_H_ | ||
#define MXNET_OPERATOR_QUANTIZATION_QUANTIZED_ELEMWISE_ADD_INL_H_ | ||
|
||
#include "../tensor/elemwise_unary_op.h" | ||
|
||
namespace mxnet { | ||
namespace op { | ||
/* These structure is used for requantization only when fusion */ | ||
struct QuantizeElemwiseAddParam : public dmlc::Parameter<QuantizeElemwiseAddParam> { | ||
dmlc::optional<float> min_calib_range; | ||
dmlc::optional<float> max_calib_range; | ||
DMLC_DECLARE_PARAMETER(QuantizeElemwiseAddParam) { | ||
DMLC_DECLARE_FIELD(min_calib_range) | ||
.set_default(dmlc::optional<float>()) | ||
.describe("The minimum scalar value in the form of float32 obtained " | ||
"through calibration. If present, it will be used to requantize the " | ||
"int8 output data."); | ||
DMLC_DECLARE_FIELD(max_calib_range) | ||
.set_default(dmlc::optional<float>()) | ||
.describe("The maximum scalar value in the form of float32 obtained " | ||
"through calibration. If present, it will be used to requantize the " | ||
"int8 output data."); | ||
} | ||
}; | ||
|
||
namespace quantized_elemwise_add_enum { | ||
enum QuantizedElemwiseAddOutputs { kOut, kMin, kMax }; | ||
enum QuantizedElemwiseAddInputs { kDataA, kDataB, kAMin, kAMax, kBMin, kBMax}; | ||
} | ||
|
||
} // namespace op | ||
} // namespace mxnet | ||
|
||
#endif // MXNET_OPERATOR_QUANTIZATION_QUANTIZED_ELEMWISE_ADD_INL_H_ |
Oops, something went wrong.