Skip to content

Commit

Permalink
Compute output shapes for some kernels (#2356)
Browse files Browse the repository at this point in the history
@tensorflow/micro

Update the output tensor shape during prepare phase when the computed shape does not match the shape in the flatbuffer.

Kernels:
- BATCH_TO_SPACE_ND
- SPACE_TO_BATCH_ND
- CONV
- RESHAPE
- EXPAND_DIMS
- DEPTHWISE_CONV

Update CMSIS_NN and ARC_MLI optimized kernels.
Add additional tests from TfLite for BATCH_TO_SPACE_ND and SPACE_TO_BATCH_ND.
Update existing tests.
Add tests for Keras model using convolution with dilation > 1.

Update memory_arena_threshold_test to increase total, tail, and persistent allocation sizes:
- Add 20 bytes for CONV output shape
- Add 15 bytes for arena allocation alignment
- x2 convolution layers

Update micro_speech_test arena size as per description in C++ code.

See #2319 for additional details.

Resolves [b/317362237](https://issuetracker.google.com/317362237)

bug=fixes #2368 #1646 #1629 #1231 #2338 #2319
  • Loading branch information
ddavis-2015 authored Jan 2, 2024
1 parent 17d0e7f commit bb54330
Show file tree
Hide file tree
Showing 27 changed files with 1,763 additions and 480 deletions.
12 changes: 3 additions & 9 deletions tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,13 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
#include "tensorflow/lite/micro/testing/micro_test.h"

#define TF_LITE_MICRO_CHECK_FAIL() \
do { \
if (micro_test::did_test_fail) { \
return kTfLiteError; \
} \
} while (false)

namespace {

// Arena size is a guesstimate, followed by use of
// MicroInterpreter::arena_used_bytes() on both the AudioPreprocessor and
// MicroSpeech models and using the larger of the two results.
constexpr size_t kArenaSize = 28584; // xtensa p6
// MicroSpeech models and using the larger of the two results plus the
// arena alignment size (16).
constexpr size_t kArenaSize = 28664; // xtensa p6
alignas(16) uint8_t g_arena[kArenaSize];

using Features = int8_t[kFeatureCount][kFeatureSize];
Expand Down
29 changes: 23 additions & 6 deletions tensorflow/lite/micro/kernels/arc_mli/conv.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,6 +29,7 @@ limitations under the License.
#include "tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h"
#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h"
#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h"
#include "tensorflow/lite/micro/kernels/conv.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_log.h"

Expand Down Expand Up @@ -122,7 +123,7 @@ bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input,
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
const TfLiteConvParams* params, int width,
int height, int filter_width, int filter_height,
int out_width, int out_height,
int* out_width, int* out_height,
const TfLiteType data_type, OpData* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
Expand All @@ -134,7 +135,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
params->dilation_height_factor, params->dilation_width_factor, height,
width, filter_height, filter_width, padding, &out_height, &out_width);
width, filter_height, filter_width, padding, out_height, out_width);
// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
#if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
Expand Down Expand Up @@ -167,6 +168,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
#endif
return kTfLiteOk;
}

void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
Expand All @@ -190,6 +192,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(context, node, kBiasTensor);

// Check dimensionality of input, filter, output
TF_LITE_ENSURE_EQ(context, input->dims->size, 4);
TF_LITE_ENSURE_EQ(context, filter->dims->size, 4);
TF_LITE_ENSURE_EQ(context, output->dims->size, 4);

// Check input channels matching filter
const int input_channels = input->dims->data[3];
const int filter_input_channels = filter->dims->data[3];
TF_LITE_ENSURE(context, filter_input_channels > 0);
TF_LITE_ENSURE_EQ(context, input_channels % filter_input_channels, 0);

int input_width = input->dims->data[2];
int input_height = input->dims->data[1];
#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST)
Expand All @@ -199,8 +212,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
int filter_width = filter->dims->data[2];
int filter_height = filter->dims->data[1];
#endif
int output_width = output->dims->data[2];
int output_height = output->dims->data[1];
int output_width = 0;
int output_height = 0;

// Dynamically allocate per-channel quantization parameters.
const int num_channels = filter->dims->data[kConvQuantizedDimension];
Expand Down Expand Up @@ -235,7 +248,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {

TF_LITE_ENSURE_STATUS(CalculateOpData(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, data));
filter_height, &output_width, &output_height, input->type, data));

// compute output tensor shape and relocate shape data
TF_LITE_ENSURE_STATUS(ConvReshapeOutputTensor(
context, node, input, filter, output, output_height, output_width));

data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
Expand Down
37 changes: 30 additions & 7 deletions tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,6 +30,7 @@ limitations under the License.
#include "tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h"
#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h"
#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h"
#include "tensorflow/lite/micro/kernels/depthwise_conv.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_log.h"

Expand Down Expand Up @@ -118,17 +119,16 @@ bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input,
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, int width,
int height, int filter_width, int filter_height,
int* out_width, int* out_height,
const TfLiteType data_type, OpData* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);

int unused_output_height, unused_output_width;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width, 1, 1, height, width,
filter_height, filter_width, params->padding, &unused_output_height,
&unused_output_width);
filter_height, filter_width, params->padding, out_height, out_width);

// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
Expand Down Expand Up @@ -182,6 +182,25 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* bias =
AllocateTempInputTensor(context, node, kBiasTensor);

// Check dimensionality of input, filter, output
TF_LITE_ENSURE_EQ(context, input->dims->size, 4);
TF_LITE_ENSURE_EQ(context, filter->dims->size, 4);
TF_LITE_ENSURE_EQ(context, output->dims->size, 4);
TF_LITE_ENSURE(context, params.dilation_height_factor > 0);
TF_LITE_ENSURE(context, params.dilation_width_factor > 0);

// Filter in DepthwiseConv is expected to be [1, height, width, channels].
TF_LITE_ENSURE_EQ(context, filter->dims->data[0], 1);

// Check input channels matching filter
const int num_filter_channels = filter->dims->data[3];
const int num_input_channels = input->dims->data[3];
TF_LITE_ENSURE(context, num_input_channels != 0);
TF_LITE_ENSURE_EQ(context, num_filter_channels % num_input_channels, 0);

int output_width = 0;
int output_height = 0;

const TfLiteType data_type = input->type;
int width = SizeOfDimension(input, 2);
int height = SizeOfDimension(input, 1);
Expand Down Expand Up @@ -227,9 +246,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
affine_quantization->zero_point->size);
}

TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
filter_width, filter_height, data_type,
data));
TF_LITE_ENSURE_STATUS(CalculateOpData(
context, node, params, width, height, filter_width, filter_height,
&output_width, &output_height, data_type, data));

// compute output tensor shape and relocate shape data
TF_LITE_ENSURE_STATUS(DepthwiseConvReshapeOutputTensor(
context, node, input, filter, output, output_height, output_width));

data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
Expand Down
91 changes: 88 additions & 3 deletions tensorflow/lite/micro/kernels/batch_to_space_nd.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -15,7 +15,10 @@ limitations under the License.

#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"

#include <algorithm>

#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/runtime_shape.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
Expand All @@ -38,6 +41,68 @@ constexpr int kOutputTensor = 0;
const int kInputOutputMinDimensionNum = 3;
const int kInputOutputMaxDimensionNum = 4;

TfLiteStatus ReshapeOutputTensor(TfLiteContext* context, const TfLiteNode* node,
const TfLiteTensor* input,
const TfLiteTensor* block_shape,
const TfLiteTensor* crops,
TfLiteTensor* output) {
TF_LITE_ENSURE(context, IsConstantOrPersistentTensor(block_shape));
TF_LITE_ENSURE(context, IsConstantOrPersistentTensor(crops));
const int32_t* block_shape_data = GetTensorData<int32_t>(block_shape);
const int32_t* crops_data = GetTensorData<int32_t>(crops);

TfLiteIntArray* input_dims = input->dims;
int spatial_dims_num = input_dims->size - 2;
// Block_shape should be a 1D tensor with dimension [spatial_dims_num].
TF_LITE_ENSURE_EQ(context, NumDimensions(block_shape), 1);
TF_LITE_ENSURE_EQ(context, block_shape->dims->data[0], spatial_dims_num);
// Crops should be a 2D tensor with dimension [spatial_dims_num, 2].
TF_LITE_ENSURE_EQ(context, NumDimensions(crops), 2);
TF_LITE_ENSURE_EQ(context, crops->dims->data[0], spatial_dims_num);
TF_LITE_ENSURE_EQ(context, crops->dims->data[1], 2);

for (int i = 0; i < spatial_dims_num * 2; ++i) {
TF_LITE_ENSURE(context, crops_data[i] >= 0);
}

// copy from input tensor as per TfLite code
TF_LITE_ENSURE_EQ(context, input_dims->size, output->dims->size);
RuntimeShape output_shape = GetTensorShape(input);
// keep a copy of the output tensor shape for later comparison
RuntimeShape old_output_shape = GetTensorShape(output);

int output_batch_size = input_dims->data[0];
for (int dim = 0; dim < spatial_dims_num; ++dim) {
// Number of batch must be multiple of (block_shape[dim]).
TF_LITE_ENSURE(context, block_shape_data[dim] != 0);
TF_LITE_ENSURE_EQ(context, output_batch_size % block_shape_data[dim], 0);
output_batch_size = output_batch_size / block_shape_data[dim];
output_shape.SetDim(dim + 1,
input_dims->data[dim + 1] * block_shape_data[dim] -
crops_data[dim * 2] - crops_data[dim * 2 + 1]);
}
output_shape.SetDim(0, output_batch_size);
output_shape.SetDim(input_dims->size - 1,
input_dims->data[input_dims->size - 1]);

// check if need to relocate output tensor dims
if (output_shape == old_output_shape) {
return kTfLiteOk;
}
TF_LITE_ENSURE(context,
output_shape.FlatSize() <= old_output_shape.FlatSize());

// set the output tensor dims from output_shape
TfLiteEvalTensor* output_eval =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_STATUS(tflite::micro::CreateWritableTensorDimsWithCopy(
context, output, output_eval));
std::copy_n(output_shape.DimsData(), output_shape.DimensionsCount(),
output->dims->data);

return kTfLiteOk;
}

TfLiteStatus BatchToSpaceNDPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
Expand All @@ -46,20 +111,40 @@ TfLiteStatus BatchToSpaceNDPrepare(TfLiteContext* context, TfLiteNode* node) {

TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* block_shape =
micro_context->AllocateTempInputTensor(node, kBlockShapeTensor);
TF_LITE_ENSURE(context, block_shape != nullptr);
TfLiteTensor* crops =
micro_context->AllocateTempInputTensor(node, kCropsTensor);
TF_LITE_ENSURE(context, crops != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, input != nullptr && output != nullptr);
TF_LITE_ENSURE(context, output != nullptr);

TF_LITE_ENSURE(context, NumDimensions(input) >= kInputOutputMinDimensionNum);
TF_LITE_ENSURE(context, NumDimensions(output) >= kInputOutputMinDimensionNum);
TF_LITE_ENSURE(context, NumDimensions(input) <= kInputOutputMaxDimensionNum);
TF_LITE_ENSURE(context, NumDimensions(output) <= kInputOutputMaxDimensionNum);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE(context,
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);

if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE(context, input->params.scale == output->params.scale);
TF_LITE_ENSURE(context,
input->params.zero_point == output->params.zero_point);
}

TfLiteStatus status =
ReshapeOutputTensor(context, node, input, block_shape, crops, output);

micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(block_shape);
micro_context->DeallocateTempTfLiteTensor(crops);
micro_context->DeallocateTempTfLiteTensor(output);

return kTfLiteOk;
return status;
}

TfLiteStatus BatchToSpaceNDEval(TfLiteContext* context, TfLiteNode* node) {
Expand Down
Loading

0 comments on commit bb54330

Please sign in to comment.