Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[QualcommQnn] add tanh, unsqueeze, equal, slice, cast #9296

Merged
merged 1 commit into from
Jul 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class NCHW2NHWCDataLayoutConverter {
void ConvertBatchNormalization(core::Operation* operation);
void ConvertCast(core::Operation* operation);
void ConvertClip(core::Operation* operation);
void ConvertComparisons(core::Operation* operation);
void ConvertConv2DTranspose(core::Operation* operation);
void ConvertCumSum(core::Operation* operation);
void ConvertElementwise(core::Operation* operation);
Expand All @@ -59,12 +60,14 @@ class NCHW2NHWCDataLayoutConverter {
void ConvertResizeNearest(core::Operation* operation);
void ConvertResizeLinear(core::Operation* operation);
void ConvertShape(core::Operation* operation);
void ConvertSlice(core::Operation* operation);
void ConvertSoftmax(core::Operation* operation);
void ConvertSplit(core::Operation* operation);
void ConvertSqueeze(core::Operation* operation);
void ConvertStack(core::Operation* operation);
void ConvertTranspose(core::Operation* operation);
void ConvertMatMul(core::Operation* operation);
void ConvertUnsqueeze(core::Operation* operation);

private:
core::Model* model_{nullptr};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,81 @@ void NCHW2NHWCDataLayoutConverter::ConvertConv2DTranspose(
SetOperationLayout(operation, 3);
}

void NCHW2NHWCDataLayoutConverter::ConvertComparisons(
core::Operation* operation) {
auto& input_operands = operation->input_operands;
auto& output_operands = operation->output_operands;
auto input_count = input_operands.size();
auto output_count = output_operands.size();
NNADAPTER_CHECK_EQ(input_count, 2);
NNADAPTER_CHECK_EQ(output_count, 1);
auto output_operand = output_operands[0];
auto output_dimensions_count = output_operand->type.dimensions.count;
// Force to align the dimorder vector of all of input operands
std::vector<int32_t> reference_permutation;
core::Operand* reference_operand = nullptr;
for (size_t i = 0; i < input_count; i++) {
auto input_operand = input_operands[i];
if (!IsConstantOperand(input_operand)) {
auto input_permutation = GetPermutation(input_operand);
if (input_permutation.size() > reference_permutation.size()) {
reference_permutation = input_permutation;
reference_operand = input_operand;
}
}
}
if (reference_permutation.empty()) {
// All of input operands are constant
SetPermutation(output_operand,
IdentityPermutation(output_dimensions_count));
} else {
auto reference_dimensions_count = reference_operand->type.dimensions.count;
for (size_t i = 0; i < input_count; i++) {
auto input_operand = input_operands[i];
auto input_dimensions_count = input_operand->type.dimensions.count;
if (!IsConstantOperand(input_operand)) {
auto input_permutation = GetPermutation(input_operand);
auto transpose_input_permutation = MultiplyPermutation(
InversePermutation(input_permutation), reference_permutation);
if (!IsIdentityPermutation(transpose_input_permutation)) {
auto transpose_input_operand = AppendTransposeOperation(
model_, input_operand, transpose_input_permutation);
UpdateOperationInputOperands(
{operation}, input_operand, transpose_input_operand);
SetPermutation(transpose_input_operand, reference_permutation);
}
} else {
if (IsIdentityPermutation(reference_permutation)) {
// Ignore
} else if (input_dimensions_count == reference_permutation.size()) {
TransposeOperand(input_operand, reference_permutation);
} else {
// Expand shape with 1
std::vector<int32_t> origin_reference_dimensions(
reference_dimensions_count);
TransposeDimensions(reference_operand->type.dimensions.data,
InversePermutation(reference_permutation),
&origin_reference_dimensions[0]);
std::vector<int32_t> expanded_input_dimensions;
for (uint32_t j = 0, k = 0; j < reference_dimensions_count; j++) {
if (origin_reference_dimensions[j] ==
input_operand->type.dimensions.data[k] &&
k < input_dimensions_count) {
expanded_input_dimensions.push_back(
input_operand->type.dimensions.data[k]);
++k;
} else {
expanded_input_dimensions.push_back(1);
}
}
}
}
}
TransposeOperand(output_operand, reference_permutation);
SetPermutation(output_operand, reference_permutation);
}
}

void NCHW2NHWCDataLayoutConverter::ConvertCumSum(core::Operation* operation) {
auto& input_operands = operation->input_operands;
auto& output_operands = operation->output_operands;
Expand Down Expand Up @@ -679,6 +754,26 @@ void NCHW2NHWCDataLayoutConverter::ConvertShape(core::Operation* operation) {
SetPermutation(output_operand, IdentityPermutation(output_dimensions_count));
}

void NCHW2NHWCDataLayoutConverter::ConvertSlice(core::Operation* operation) {
auto& input_operands = operation->input_operands;
auto& output_operands = operation->output_operands;
auto input_count = input_operands.size();
auto output_count = output_operands.size();
NNADAPTER_CHECK_EQ(input_count, 5);
NNADAPTER_CHECK_EQ(output_count, 1);
// Recalculate the axis according to the dimorder vector of the input operand
auto axes_operand = input_operands[1];
int axes_count = axes_operand->length / sizeof(int32_t);
int* axes = reinterpret_cast<int32_t*>(axes_operand->buffer);
auto input_permutation = GetPermutation(input_operands[0]);
for (int i = 0; i < axes_count; i++) {
axes[i] = TransposeAxis(axes[i], input_permutation);
}
auto output_operand = output_operands[0];
TransposeOperand(output_operand, input_permutation);
SetPermutation(output_operand, input_permutation);
}

void NCHW2NHWCDataLayoutConverter::ConvertFill(core::Operation* operation) {
auto& input_operands = operation->input_operands;
auto& output_operands = operation->output_operands;
Expand Down Expand Up @@ -937,6 +1032,32 @@ void NCHW2NHWCDataLayoutConverter::ConvertTranspose(
SetPermutation(output_operand, input_permutation);
}

void NCHW2NHWCDataLayoutConverter::ConvertUnsqueeze(
core::Operation* operation) {
auto& input_operands = operation->input_operands;
auto& output_operands = operation->output_operands;
auto input_count = input_operands.size();
auto output_count = output_operands.size();
NNADAPTER_CHECK_EQ(input_count, 2);
NNADAPTER_CHECK_EQ(output_count, 1);
auto input_operand = input_operands[0];
int input_dimensions_count = input_operand->type.dimensions.count;
auto output_operand = output_operands[0];
auto output_dimensions_count = output_operand->type.dimensions.count;
// Force to restore the dimorder vector of the input operand
auto input_permutation = GetPermutation(input_operand);
auto transpose_input_permutation = InversePermutation(input_permutation);
if (!IsIdentityPermutation(transpose_input_permutation)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

先这样简单处理,后面再优化下,看看是否能类似squeeze一样处理,目的还是尽可能避免插入 transpose算子而带来的性能下降。

auto transpose_input_operand = AppendTransposeOperation(
model_, input_operand, transpose_input_permutation);
UpdateOperationInputOperands(
{operation}, input_operand, transpose_input_operand);
SetPermutation(transpose_input_operand,
IdentityPermutation(input_dimensions_count));
}
SetPermutation(output_operand, IdentityPermutation(output_dimensions_count));
}

void NCHW2NHWCDataLayoutConverter::Apply(core::Model* model) {
model_ = model;
// Initialize the permutation of model input operands
Expand Down Expand Up @@ -987,6 +1108,14 @@ void NCHW2NHWCDataLayoutConverter::Apply(core::Model* model) {
case NNADAPTER_CUM_SUM:
ConvertCumSum(operation);
break;
case NNADAPTER_EQUAL:
case NNADAPTER_GREATER:
case NNADAPTER_GREATER_EQUAL:
case NNADAPTER_LESS:
case NNADAPTER_LESS_EQUAL:
case NNADAPTER_NOT_EQUAL:
ConvertComparisons(operation);
break;
case NNADAPTER_FILL:
ConvertFill(operation);
break;
Expand Down Expand Up @@ -1045,6 +1174,9 @@ void NCHW2NHWCDataLayoutConverter::Apply(core::Model* model) {
case NNADAPTER_SHAPE:
ConvertShape(operation);
break;
case NNADAPTER_SLICE:
ConvertSlice(operation);
break;
case NNADAPTER_SOFTMAX:
ConvertSoftmax(operation);
break;
Expand All @@ -1060,6 +1192,9 @@ void NCHW2NHWCDataLayoutConverter::Apply(core::Model* model) {
case NNADAPTER_TRANSPOSE:
ConvertTranspose(operation);
break;
case NNADAPTER_UNSQUEEZE:
ConvertUnsqueeze(operation);
break;
default:
NNADAPTER_LOG(FATAL)
<< "Missing the processing of "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,18 @@ NNADAPTER_EXPORT void ConvertQuantizationSymmToAsymm(core::Model* model) {
switch (operation->type) {
case NNADAPTER_ADD:
case NNADAPTER_DIV:
case NNADAPTER_EQUAL:
case NNADAPTER_FULLY_CONNECTED:
case NNADAPTER_GATHER:
case NNADAPTER_GREATER:
case NNADAPTER_GREATER_EQUAL:
case NNADAPTER_LESS:
case NNADAPTER_LESS_EQUAL:
case NNADAPTER_MAT_MUL:
case NNADAPTER_MAX:
case NNADAPTER_MIN:
case NNADAPTER_MUL:
case NNADAPTER_NOT_EQUAL:
case NNADAPTER_POW:
case NNADAPTER_SUB: {
ConvertOperandSymmToAsymm(input_operands[0], 128);
Expand All @@ -96,25 +102,27 @@ NNADAPTER_EXPORT void ConvertQuantizationSymmToAsymm(core::Model* model) {
} break;
case NNADAPTER_AVERAGE_POOL_2D:
case NNADAPTER_BATCH_NORMALIZATION:
case NNADAPTER_CAST:
case NNADAPTER_CHANNEL_SHUFFLE:
case NNADAPTER_CLIP:
case NNADAPTER_CUM_SUM:
case NNADAPTER_FILL_LIKE:
case NNADAPTER_FLATTEN:
case NNADAPTER_HARD_SIGMOID:
case NNADAPTER_HARD_SWISH:
case NNADAPTER_LEAKY_RELU:
case NNADAPTER_MAX_POOL_2D:
case NNADAPTER_RELU:
case NNADAPTER_RELU6:
case NNADAPTER_RESHAPE:
case NNADAPTER_RESIZE_NEAREST:
case NNADAPTER_RESIZE_LINEAR:
case NNADAPTER_SLICE:
case NNADAPTER_SQUEEZE:
case NNADAPTER_SWISH:
case NNADAPTER_TANH:
case NNADAPTER_FLATTEN:
case NNADAPTER_TRANSPOSE:
case NNADAPTER_HARD_SIGMOID:
case NNADAPTER_HARD_SWISH:
case NNADAPTER_LEAKY_RELU:
case NNADAPTER_SQUEEZE:
case NNADAPTER_CLIP:
case NNADAPTER_CHANNEL_SHUFFLE:
case NNADAPTER_SLICE:
case NNADAPTER_FILL_LIKE: {
case NNADAPTER_UNSQUEEZE: {
ConvertOperandSymmToAsymm(input_operands[0], 128);
ConvertOperandSymmToAsymm(output_operands[0], 128);
PropagateAsymmZeroPoint(input_operands[0], output_operands[0]);
Expand Down
49 changes: 25 additions & 24 deletions lite/kernels/nnadapter/converter/all.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ REGISTER_CONVERTER(bmm, ConvertBmm, "intel_openvino");
REGISTER_CONVERTER(cast,
ConvertCast,
"huawei_ascend_npu,cambricon_mlu,huawei_kirin_npu,nvidia_"
"tensorrt,intel_openvino");
"tensorrt,intel_openvino,qualcomm_qnn");
REGISTER_CONVERTER(clip,
ConvertClip,
"huawei_ascend_npu,cambricon_mlu,verisilicon_timvx,huawei_"
Expand Down Expand Up @@ -92,10 +92,10 @@ REGISTER_CONVERTER(reshape2,
"npu,amlogic_npu,imagination_nna,verisilicon_timvx,"
"kunlunxin_xtcl,cambricon_mlu,android_nnapi,nvidia_tensorrt,"
"intel_openvino,qualcomm_qnn,google_xnnpack");
REGISTER_CONVERTER(
unsqueeze,
ConvertUnsqueeze,
"huawei_ascend_npu,cambricon_mlu,nvidia_tensorrt,intel_openvino");
REGISTER_CONVERTER(unsqueeze,
ConvertUnsqueeze,
"huawei_ascend_npu,cambricon_mlu,nvidia_tensorrt,intel_"
"openvino,qualcomm_qnn");
REGISTER_CONVERTER(
unsqueeze2,
ConvertUnsqueeze,
Expand Down Expand Up @@ -215,7 +215,7 @@ REGISTER_CONVERTER(tanh,
"rockchip_npu,mediatek_apu,huawei_kirin_npu,huawei_ascend_"
"npu,amlogic_npu,cambricon_mlu,verisilicon_timvx,kunlunxin_"
"xtcl,android_nnapi,intel_openvino,nvidia_tensorrt,"
"eeasytech_npu");
"eeasytech_npu,qualcomm_qnn");
REGISTER_CONVERTER(abs,
ConvertUnaryActivations,
"huawei_ascend_npu,huawei_kirin_npu,intel_openvino");
Expand Down Expand Up @@ -262,29 +262,30 @@ REGISTER_CONVERTER(
REGISTER_CONVERTER(equal,
ConvertComparisons,
"huawei_ascend_npu,huawei_kirin_npu,cambricon_mlu,intel_"
"openvino,nvidia_tensorrt");
"openvino,nvidia_tensorrt,qualcomm_qnn");
REGISTER_CONVERTER(expand_v2,
ConvertExpandV2,
"huawei_ascend_npu,cambricon_mlu,intel_openvino");
REGISTER_CONVERTER(not_equal,
ConvertComparisons,
"huawei_ascend_npu,huawei_kirin_npu,cambricon_mlu");
REGISTER_CONVERTER(greater_than,
ConvertComparisons,
"huawei_ascend_npu,huawei_kirin_npu,cambricon_mlu");
REGISTER_CONVERTER(
greater_equal,
not_equal,
ConvertComparisons,
"huawei_ascend_npu,huawei_kirin_npu,cambricon_mlu,intel_openvino");
REGISTER_CONVERTER(less_than,
ConvertComparisons,
"huawei_ascend_npu,huawei_kirin_npu,cambricon_mlu");
REGISTER_CONVERTER(less_equal,
ConvertComparisons,
"huawei_ascend_npu,huawei_kirin_npu,cambricon_mlu");
REGISTER_CONVERTER(less_than,
"huawei_ascend_npu,huawei_kirin_npu,cambricon_mlu,qualcomm_qnn");
REGISTER_CONVERTER(
greater_than,
ConvertComparisons,
"huawei_ascend_npu,huawei_kirin_npu,cambricon_mlu,qualcomm_qnn");
REGISTER_CONVERTER(greater_equal,
ConvertComparisons,
"huawei_ascend_npu,huawei_kirin_npu,cambricon_mlu");
"huawei_ascend_npu,huawei_kirin_npu,cambricon_mlu,intel_"
"openvino,qualcomm_qnn");
REGISTER_CONVERTER(
less_than,
ConvertComparisons,
"huawei_ascend_npu,huawei_kirin_npu,cambricon_mlu,qualcomm_qnn");
REGISTER_CONVERTER(
less_equal,
ConvertComparisons,
"huawei_ascend_npu,huawei_kirin_npu,cambricon_mlu,qualcomm_qnn");
REGISTER_CONVERTER(
reduce_mean,
ConvertReduce,
Expand Down Expand Up @@ -321,7 +322,7 @@ REGISTER_CONVERTER(
REGISTER_CONVERTER(slice,
ConvertSlice,
"huawei_ascend_npu,verisilicon_timvx,cambricon_mlu,nvidia_"
"tensorrt,intel_openvino");
"tensorrt,intel_openvino,qualcomm_qnn");
REGISTER_CONVERTER(strided_slice,
ConvertStridedSlice,
"huawei_ascend_npu,huawei_kirin_npu,nvidia_tensorrt");
Expand Down
2 changes: 2 additions & 0 deletions lite/tests/kernels/activation_compute_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,8 @@ TEST(Activation_tanh, precision) {
abs_error = 2e-5;
#elif defined(NNADAPTER_WITH_NVIDIA_TENSORRT)
abs_error = 2e-5;
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
abs_error = 2e-5;
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
abs_error = 1e-5;
for (auto& dims : test_dims) {
Expand Down
Loading