From 61d1bb6c4428ad730e525d0164e12df12b789985 Mon Sep 17 00:00:00 2001
From: felix-johnny <48442848+felix-johnny@users.noreply.github.com>
Date: Mon, 8 May 2023 12:47:54 +0200
Subject: [PATCH] svdf s8: Fix state data overwrite (#59)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds unit test to verify fix as well.

Co-authored-by: Måns Nilsson <mans.nilsson@arm.com>
---
 .../arm_nn_vec_mat_mult_t_s8.c                |  20 +-
 Tests/UnitTest/CMakeLists.txt                 |   1 +
 .../Common/svdf_s8_weights_template.json      | 174 ++++++++++++++++++
 .../TestData/svdf_int8/biases_data.h          |   6 +
 .../TestData/svdf_int8/config_data.h          |  19 ++
 .../TestData/svdf_int8/input_sequence_data.h  |   8 +
 .../TestCases/TestData/svdf_int8/state_data.h |   6 +
 .../TestCases/TestData/svdf_int8/test_data.h  |   8 +
 .../TestData/svdf_int8/weights_feature_data.h |  18 ++
 .../TestData/svdf_int8/weights_time_data.h    |   7 +
 .../TestCases/test_arm_svdf_s8/CMakeLists.txt |  23 +++
 .../Unity/unity_test_arm_svdf_s8.c            |  47 +++++
 .../test_arm_svdf_s8/test_arm_svdf_s8.c       | 118 ++++++++++++
 Tests/UnitTest/generate_test_data.py          |  55 +++++-
 14 files changed, 494 insertions(+), 16 deletions(-)
 create mode 100644 Tests/UnitTest/TestCases/Common/svdf_s8_weights_template.json
 create mode 100644 Tests/UnitTest/TestCases/TestData/svdf_int8/biases_data.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/svdf_int8/config_data.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/svdf_int8/input_sequence_data.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/svdf_int8/state_data.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/svdf_int8/test_data.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/svdf_int8/weights_feature_data.h
 create mode 100644 Tests/UnitTest/TestCases/TestData/svdf_int8/weights_time_data.h
 create mode 100644 Tests/UnitTest/TestCases/test_arm_svdf_s8/CMakeLists.txt
 create mode 100644 Tests/UnitTest/TestCases/test_arm_svdf_s8/Unity/unity_test_arm_svdf_s8.c
 create mode 100644 Tests/UnitTest/TestCases/test_arm_svdf_s8/test_arm_svdf_s8.c

diff --git a/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c b/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
index 4d2d04b4..8568676c 100644
--- a/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
+++ b/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
@@ -21,8 +21,8 @@
  * Title:        arm_nn_vec_mat_mult_t_s8
  * Description:  s8 vector by matrix (transposed) multiplication
  *
- * $Date:        27 March 2023
- * $Revision:    V.5.4.0
+ * $Date:        5 May 2023
+ * $Revision:    V.5.4.1
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -89,6 +89,12 @@ arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s8(const int8_t *lhs,
         int32_t rhs_sum_0 = 0;
         int32_t rhs_sum_1 = 0;
         int32_t rhs_sum_2 = 0;
+        if (bias)
+        {
+            acc_0 = *bias++;
+            acc_1 = *bias++;
+            acc_2 = *bias++;
+        }
 
         uint32_t col_cnt = (uint32_t)rhs_cols;
 
@@ -119,13 +125,6 @@ arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s8(const int8_t *lhs,
         rhs += 3 * rhs_cols;
 
         int32x4_t acc = {acc_0, acc_1, acc_2, 0};
-        mve_pred16_t p = vctp32q(3);
-        if (bias)
-        {
-            int32x4_t b = vldrwq_z_s32(bias, p);
-            acc = vaddq_x_s32(acc, b, p);
-            bias += 3;
-        }
         const int32x4_t rhs_sum = {rhs_sum_0, rhs_sum_1, rhs_sum_2, 0};
         acc += vdupq_n_s32(lhs_offset) * rhs_sum;
 
@@ -134,9 +133,10 @@ arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s8(const int8_t *lhs,
         acc = vmaxq_s32(acc, vdupq_n_s32(activation_min));
         acc = vminq_s32(acc, vdupq_n_s32(activation_max));
 
+        const mve_pred16_t p = vctp32q(3);
         if (address_offset > 1L)
         {
-            vstrbq_scatter_offset_s32(dst, address_offset_array, acc);
+            vstrbq_scatter_offset_p_s32(dst, address_offset_array, acc, p);
         }
         else
         {
diff --git a/Tests/UnitTest/CMakeLists.txt b/Tests/UnitTest/CMakeLists.txt
index 09922e85..6b4469b3 100644
--- a/Tests/UnitTest/CMakeLists.txt
+++ b/Tests/UnitTest/CMakeLists.txt
@@ -94,6 +94,7 @@ add_subdirectory(TestCases/test_arm_max_pool_s8)
 add_subdirectory(TestCases/test_arm_softmax_s16)
 add_subdirectory(TestCases/test_arm_softmax_s8)
 add_subdirectory(TestCases/test_arm_softmax_s8_s16)
+add_subdirectory(TestCases/test_arm_svdf_s8)
 add_subdirectory(TestCases/test_arm_svdf_state_s16_s8)
 add_subdirectory(TestCases/test_arm_ds_cnn_l_s8)
 add_subdirectory(TestCases/test_arm_ds_cnn_s_s8)
diff --git a/Tests/UnitTest/TestCases/Common/svdf_s8_weights_template.json b/Tests/UnitTest/TestCases/Common/svdf_s8_weights_template.json
new file mode 100644
index 00000000..5f846641
--- /dev/null
+++ b/Tests/UnitTest/TestCases/Common/svdf_s8_weights_template.json
@@ -0,0 +1,174 @@
+{
+  "version": 3,
+  "operator_codes": [
+    {
+      "deprecated_builtin_code": 27,
+      "version": 1
+    }
+  ],
+  "subgraphs": [
+    {
+      "tensors": [
+        {
+          "shape": [
+            batches,
+            input_size
+          ],
+          "type": "INT8",
+          "buffer": 0,
+          "name": "tensor_input",
+          "quantization": {
+            "scale": [
+              input_scale
+            ],
+            "zero_point": [
+              input_zp
+            ],
+            "quantized_dimension": 0
+          },
+          "is_variable": false
+        },
+        {
+          "shape": [
+            number_filters,
+            input_size
+          ],
+          "type": "INT8",
+          "buffer": 1,
+          "name": "tensor_weight_1",
+          "quantization": {
+            "scale": [
+              w_1_scale
+            ],
+            "zero_point": [
+              w_1_zp
+            ],
+            "quantized_dimension": 0
+          },
+          "is_variable": false
+        },
+        {
+          "shape": [
+            number_filters,
+            memory_size
+          ],
+          "type": "INT8",
+          "buffer": 2,
+          "name": "tensor_weight_2",
+          "quantization": {
+            "scale": [
+              w_2_scale
+            ],
+            "zero_point": [
+              w_2_zp
+            ],
+            "quantized_dimension": 0
+          },
+          "is_variable": false
+        },
+        {
+          "shape": [
+              number_units
+          ],
+          "type": "INT32",
+          "buffer": 3,
+          "name": "tensor_bias",
+          "quantization": {
+            "scale": [
+              bias_scale
+            ],
+            "zero_point": [
+              bias_zp
+            ],
+            "quantized_dimension": 0
+          },
+          "is_variable": false
+        },
+        {
+          "shape": [
+            batches,
+            memory_sizeXnumber_filters
+          ],
+          "type": "INT8",
+          "buffer": 4,
+          "name": "tensor_state",
+          "quantization": {
+            "scale": [
+              state_scale
+            ],
+            "zero_point": [
+              state_zp
+            ],
+            "quantized_dimension": 0
+          },
+          "is_variable": true
+        },
+        {
+          "shape": [
+            batches,
+            number_units
+          ],
+          "type": "INT8",
+          "buffer": 5,
+          "name": "tensor_output",
+          "quantization": {
+            "scale": [
+              output_scale
+            ],
+            "zero_point": [
+              output_zp
+            ],
+            "quantized_dimension": 0
+          },
+          "is_variable": false
+        }
+      ],
+      "inputs": [
+        0
+      ],
+      "outputs": [
+        5
+      ],
+      "operators": [
+        {
+          "opcode_index": 0,
+          "inputs": [
+            0,
+            1,
+            2,
+            3,
+            4
+          ],
+          "outputs": [
+            5
+          ],
+          "builtin_options_type": "SVDFOptions",
+          "builtin_options": {
+            "rank": rank_value,
+            "fused_activation_function": "RELU"
+          },
+          "custom_options_format": "FLEXBUFFERS"
+        }
+      ]
+    }
+  ],
+  "description": "CMSIS-NN unit test model",
+  "buffers": [
+    {},
+    {
+      "data": []
+    },
+    {
+      "data": []
+    },
+    {
+      "data": []
+    },
+    {
+      "data": []
+    },
+    {
+      "data": []
+    }
+  ]
+}
diff --git a/Tests/UnitTest/TestCases/TestData/svdf_int8/biases_data.h b/Tests/UnitTest/TestCases/TestData/svdf_int8/biases_data.h
new file mode 100644
index 00000000..d0f83880
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/svdf_int8/biases_data.h
@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
+// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+#pragma once
+#include <stdint.h>
+
+const int32_t svdf_int8_biases[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
diff --git a/Tests/UnitTest/TestCases/TestData/svdf_int8/config_data.h b/Tests/UnitTest/TestCases/TestData/svdf_int8/config_data.h
new file mode 100644
index 00000000..90ae6e60
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/svdf_int8/config_data.h
@@ -0,0 +1,19 @@
+// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
+// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+#pragma once
+#define SVDF_INT8_MULTIPLIER_IN 1717987072
+#define SVDF_INT8_MULTIPLIER_OUT 1099511552
+#define SVDF_INT8_SHIFT_1 -3
+#define SVDF_INT8_SHIFT_2 -11
+#define SVDF_INT8_IN_ACTIVATION_MIN -32768
+#define SVDF_INT8_IN_ACTIVATION_MAX 32767
+#define SVDF_INT8_RANK 1
+#define SVDF_INT8_FEATURE_BATCHES 12
+#define SVDF_INT8_TIME_BATCHES 2
+#define SVDF_INT8_INPUT_SIZE 20
+#define SVDF_INT8_DST_SIZE 12
+#define SVDF_INT8_OUT_ACTIVATION_MIN -128
+#define SVDF_INT8_OUT_ACTIVATION_MAX 127
+#define SVDF_INT8_INPUT_BATCHES 1
+#define SVDF_INT8_INPUT_OFFSET 0
+#define SVDF_INT8_OUTPUT_OFFSET 0
diff --git a/Tests/UnitTest/TestCases/TestData/svdf_int8/input_sequence_data.h b/Tests/UnitTest/TestCases/TestData/svdf_int8/input_sequence_data.h
new file mode 100644
index 00000000..e446aff2
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/svdf_int8/input_sequence_data.h
@@ -0,0 +1,8 @@
+// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
+// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+#pragma once
+#include <stdint.h>
+
+const int8_t svdf_int8_input_sequence[40] = {-23,  123,  104, 88,  -47, 92,  -14, -14, -90,  -113, -94, -46, 121, -125,
+                                             -100, -59,  69,  19,  -80, 24,  86,  37,  8,    116,  113, -45, 116, 41,
+                                             -1,   -103, 37,  103, 88,  -86, 28,  30,  -121, 4,    -83, -109};
diff --git a/Tests/UnitTest/TestCases/TestData/svdf_int8/state_data.h b/Tests/UnitTest/TestCases/TestData/svdf_int8/state_data.h
new file mode 100644
index 00000000..f7825195
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/svdf_int8/state_data.h
@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
+// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+#pragma once
+#include <stdint.h>
+
+const int8_t svdf_int8_state[24] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
diff --git a/Tests/UnitTest/TestCases/TestData/svdf_int8/test_data.h b/Tests/UnitTest/TestCases/TestData/svdf_int8/test_data.h
new file mode 100644
index 00000000..e3dcde09
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/svdf_int8/test_data.h
@@ -0,0 +1,8 @@
+// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
+// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_sequence_data.h"
+#include "state_data.h"
+#include "weights_feature_data.h"
+#include "weights_time_data.h"
diff --git a/Tests/UnitTest/TestCases/TestData/svdf_int8/weights_feature_data.h b/Tests/UnitTest/TestCases/TestData/svdf_int8/weights_feature_data.h
new file mode 100644
index 00000000..e3ef9fbe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/svdf_int8/weights_feature_data.h
@@ -0,0 +1,18 @@
+// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
+// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+#pragma once
+#include <stdint.h>
+
+const int8_t svdf_int8_weights_feature[240] = {
+    55,   27,  100,  94,   12,  -10, 99,  108,  30,   -39,  5,    56,  13,   -100, -15,  -81, 71,   -26,  9,    56,
+    -125, -19, -102, -2,   -88, -78, -37, -4,   -127, -39,  -104, 45,  84,   97,   -30,  109, 26,   -122, 56,   46,
+    -122, 3,   80,   -53,  -66, -78, 22,  -81,  40,   8,    15,   -44, -72,  -128, -55,  34,  -95,  21,   -65,  80,
+    94,   -38, 33,   58,   -62, 106, 111, 0,    75,   -122, -116, 77,  56,   72,   113,  37,  -88,  13,   -6,   10,
+    -48,  29,  -62,  -98,  13,  78,  -86, 6,    49,   35,   -70,  -6,  102,  55,   31,   -90, 75,   -98,  104,  25,
+    61,   -95, 2,    -74,  32,  -93, -47, 102,  -82,  -48,  -81,  55,  -121, -63,  39,   -50, 64,   46,   123,  -126,
+    50,   28,  -9,   -31,  -87, 47,  -8,  -65,  98,   103,  -41,  22,  14,   75,   -36,  -36, 28,   -89,  -110, 107,
+    93,   -71, 5,    -54,  77,  105, 102, -115, -79,  -31,  -114, -47, -65,  44,   -55,  61,  56,   -35,  -97,  -12,
+    116,  30,  -28,  -80,  -49, 93,  108, -23,  57,   125,  56,   61,  -108, 109,  2,    -80, -94,  -95,  -67,  -4,
+    71,   -64, 122,  -127, 123, 86,  -12, 27,   109,  93,   -2,   -30, -107, 117,  -120, 41,  -33,  -7,   -57,  78,
+    6,    -31, 108,  79,   86,  -16, 70,  -93,  -47,  93,   -54,  60,  -85,  116,  -69,  -28, -120, -29,  7,    -79,
+    -89,  -68, 25,   48,   19,  89,  50,  31,   -105, -69,  -26,  -2,  116,  -76,  62,   -26, -9,   10,   102,  24};
diff --git a/Tests/UnitTest/TestCases/TestData/svdf_int8/weights_time_data.h b/Tests/UnitTest/TestCases/TestData/svdf_int8/weights_time_data.h
new file mode 100644
index 00000000..569928ba
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/svdf_int8/weights_time_data.h
@@ -0,0 +1,7 @@
+// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
+// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+#pragma once
+#include <stdint.h>
+
+const int8_t svdf_int8_weights_time[24] = {30,  32,  -33, 59,  -53, -32,  -45, -12, -119, 87,  34,  -114,
+                                           -92, -45, -90, 114, -68, -116, -8,  -10, 18,   -55, 100, 1};
diff --git a/Tests/UnitTest/TestCases/test_arm_svdf_s8/CMakeLists.txt b/Tests/UnitTest/TestCases/test_arm_svdf_s8/CMakeLists.txt
new file mode 100644
index 00000000..8682f235
--- /dev/null
+++ b/Tests/UnitTest/TestCases/test_arm_svdf_s8/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2023 Arm Limited or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_svdf_s8)
+
+target_sources(test_arm_svdf_s8 PRIVATE
+    Unity/unity_test_arm_svdf_s8.c
+    Unity/TestRunner/unity_test_arm_svdf_s8_runner.c)
diff --git a/Tests/UnitTest/TestCases/test_arm_svdf_s8/Unity/unity_test_arm_svdf_s8.c b/Tests/UnitTest/TestCases/test_arm_svdf_s8/Unity/unity_test_arm_svdf_s8.c
new file mode 100644
index 00000000..ff493a4f
--- /dev/null
+++ b/Tests/UnitTest/TestCases/test_arm_svdf_s8/Unity/unity_test_arm_svdf_s8.c
@@ -0,0 +1,47 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributd under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_svdf_s8.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+    uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_svdf_int8_arm_s8(void) { svdf_int8_arm_svdf_s8(); }
diff --git a/Tests/UnitTest/TestCases/test_arm_svdf_s8/test_arm_svdf_s8.c b/Tests/UnitTest/TestCases/test_arm_svdf_s8/test_arm_svdf_s8.c
new file mode 100644
index 00000000..a18d046f
--- /dev/null
+++ b/Tests/UnitTest/TestCases/test_arm_svdf_s8/test_arm_svdf_s8.c
@@ -0,0 +1,118 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "unity.h"
+#include <arm_nnfunctions.h>
+
+#include "../TestData/svdf_int8/test_data.h"
+#include "../Utils/validate.h"
+
+#define REPEAT_NUM (1)
+
+void svdf_int8_arm_svdf_s8(void)
+{
+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+    cmsis_nn_context input_ctx;
+    cmsis_nn_context output_ctx;
+    cmsis_nn_svdf_params svdf_int8_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims weights_feature_dims;
+    cmsis_nn_dims weights_time_dims;
+    cmsis_nn_dims state_dims;
+    cmsis_nn_dims output_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_per_tensor_quant_params input_quant_params;
+    cmsis_nn_per_tensor_quant_params output_quant_params;
+    int8_t output_data[SVDF_INT8_DST_SIZE] = {1};
+    const int8_t *weights_feature_data = svdf_int8_weights_feature;
+    const int8_t *weights_time_data = svdf_int8_weights_time;
+
+    input_dims.n = SVDF_INT8_INPUT_BATCHES;
+    input_dims.h = SVDF_INT8_INPUT_SIZE;
+    weights_feature_dims.n = SVDF_INT8_FEATURE_BATCHES;
+    weights_time_dims.h = SVDF_INT8_TIME_BATCHES;
+
+    input_quant_params.multiplier = SVDF_INT8_MULTIPLIER_IN;
+    input_quant_params.shift = SVDF_INT8_SHIFT_1;
+    output_quant_params.multiplier = SVDF_INT8_MULTIPLIER_OUT;
+    output_quant_params.shift = SVDF_INT8_SHIFT_2;
+
+    svdf_int8_params.input_activation.min = SVDF_INT8_IN_ACTIVATION_MIN;
+    svdf_int8_params.input_activation.max = SVDF_INT8_IN_ACTIVATION_MAX;
+    svdf_int8_params.output_activation.min = SVDF_INT8_OUT_ACTIVATION_MIN;
+    svdf_int8_params.output_activation.max = SVDF_INT8_OUT_ACTIVATION_MAX;
+    svdf_int8_params.input_offset = SVDF_INT8_INPUT_OFFSET;
+    svdf_int8_params.output_offset = SVDF_INT8_OUTPUT_OFFSET;
+    svdf_int8_params.rank = SVDF_INT8_RANK;
+
+    const int input_round_size = SVDF_INT8_INPUT_BATCHES * SVDF_INT8_INPUT_SIZE;
+    const int number_inputs = sizeof(svdf_int8_input_sequence) / input_round_size;
+    const int32_t number_units = SVDF_INT8_FEATURE_BATCHES / SVDF_INT8_RANK;
+    const int scratch_size = SVDF_INT8_INPUT_BATCHES * SVDF_INT8_FEATURE_BATCHES * sizeof(int32_t);
+    const int scratch_size_out = SVDF_INT8_INPUT_BATCHES * number_units * sizeof(int32_t);
+
+    // + SVDF_INT8_TIME_BATCHES additional bytes to make sure it is not overwritten
+    const int state_data_size = sizeof(svdf_int8_state) + SVDF_INT8_TIME_BATCHES;
+    const int8_t initial_data = 66;
+
+    input_ctx.buf = malloc(scratch_size);
+    output_ctx.buf = malloc(scratch_size_out);
+
+    int8_t *input_data = malloc(input_round_size);
+    int8_t *state_data = malloc(state_data_size);
+
+    memset(state_data, initial_data, state_data_size);
+
+    for (int i = 0; i < REPEAT_NUM; i++)
+    {
+        memcpy(state_data, svdf_int8_state, sizeof(svdf_int8_state));
+        for (int j = 0; j < number_inputs; j++)
+        {
+            memcpy(input_data, svdf_int8_input_sequence + j * input_round_size, input_round_size);
+            arm_cmsis_nn_status result = arm_svdf_s8(&input_ctx,
+                                                     &output_ctx,
+                                                     &svdf_int8_params,
+                                                     &input_quant_params,
+                                                     &output_quant_params,
+                                                     &input_dims,
+                                                     input_data,
+                                                     &state_dims,
+                                                     state_data,
+                                                     &weights_feature_dims,
+                                                     weights_feature_data,
+                                                     &weights_time_dims,
+                                                     weights_time_data,
+                                                     &bias_dims,
+                                                     svdf_int8_biases,
+                                                     &output_dims,
+                                                     output_data);
+            TEST_ASSERT_EQUAL(expected, result);
+        }
+    }
+
+    // Make sure state data is not written outside boundary
+    for (int i = sizeof(svdf_int8_state); i < state_data_size; i++)
+    {
+        TEST_ASSERT_EQUAL(state_data[i], initial_data);
+    }
+
+    free(state_data);
+    free(input_data);
+    free(input_ctx.buf);
+    free(output_ctx.buf);
+}
diff --git a/Tests/UnitTest/generate_test_data.py b/Tests/UnitTest/generate_test_data.py
index 01a3f990..da4e08df 100755
--- a/Tests/UnitTest/generate_test_data.py
+++ b/Tests/UnitTest/generate_test_data.py
@@ -497,7 +497,11 @@ def convert_and_interpret(self, model, inttype, input_data=None, dataset_shape=N
 
         return interpreter
 
-    def generate_json_from_template(self, weights_feature_data=None, weights_time_data=None, bias_data=None):
+    def generate_json_from_template(self,
+                                    weights_feature_data=None,
+                                    weights_time_data=None,
+                                    bias_data=None,
+                                    int8_time_weights=False):
         """
         Takes a json template and parameters as input and creates a new json file.
         """
@@ -517,7 +521,10 @@ def generate_json_from_template(self, weights_feature_data=None, weights_time_da
                 data["buffers"][w_1_buffer_index]["data"] = self.to_bytes(weights_feature_data.numpy().ravel(), 1)
             if weights_time_data is not None:
                 w_2_buffer_index = 2
-                data["buffers"][w_2_buffer_index]["data"] = self.to_bytes(weights_time_data.numpy().ravel(), 2)
+                if int8_time_weights:
+                    data["buffers"][w_2_buffer_index]["data"] = self.to_bytes(weights_time_data.numpy().ravel(), 1)
+                else:
+                    data["buffers"][w_2_buffer_index]["data"] = self.to_bytes(weights_time_data.numpy().ravel(), 2)
             if bias_data is not None:
                 bias_buffer_index = 3
                 data["buffers"][bias_buffer_index]["data"] = self.to_bytes(bias_data.numpy().ravel(), 4)
@@ -1158,6 +1165,7 @@ def __init__(self,
                  input_size=3,
                  number_units=4,
                  generate_bias=True,
+                 int8_time_weights=False,
                  input_scale=0.1,
                  input_zp=0,
                  w_1_scale=0.005,
@@ -1202,7 +1210,13 @@ def __init__(self,
         self.in_activation_max = INT16_MAX
         self.in_activation_min = INT16_MIN
 
-        self.json_template = "TestCases/Common/svdf_template.json"
+        self.int8_time_weights = int8_time_weights
+
+        if self.int8_time_weights:
+            self.json_template = "TestCases/Common/svdf_s8_weights_template.json"
+        else:
+            self.json_template = "TestCases/Common/svdf_template.json"
+
         self.json_replacements = {
             "memory_sizeXnumber_filters": self.memory_size * self.number_filters,
             "batches": self.batches,
@@ -1289,7 +1303,10 @@ def generate_data(self, input_data=None, weights=None, biases=None, time_data=No
                                               regenerate=self.regenerate_new_weights)
 
         # Generate tflite model
-        generated_json = self.generate_json_from_template(weights_feature_data, weights_time_data, biases)
+        generated_json = self.generate_json_from_template(weights_feature_data,
+                                                          weights_time_data,
+                                                          biases,
+                                                          self.int8_time_weights)
         self.flatc_generate_tflite(generated_json, self.schema_file)
 
         # Run TFL interpreter
@@ -1316,9 +1333,20 @@ def generate_data(self, input_data=None, weights=None, biases=None, time_data=No
 
         # Generate unit test C headers
         self.generate_c_array("weights_feature", interpreter.get_tensor(weights_1_layer['index']))
-        self.generate_c_array("weights_time", interpreter.get_tensor(weights_2_layer['index']), datatype='int16_t')
         self.generate_c_array(self.bias_data_file_prefix, interpreter.get_tensor(bias_layer['index']), "int32_t")
-        self.generate_c_array("state", interpreter.get_tensor(state_layer['index']), "int16_t")
+
+        if self.int8_time_weights:
+            self.generate_c_array("weights_time", interpreter.get_tensor(weights_2_layer['index']), datatype='int8_t')
+            self.generate_c_array("state", interpreter.get_tensor(state_layer['index']), "int8_t")
+        else:
+            self.generate_c_array("weights_time", interpreter.get_tensor(weights_2_layer['index']), datatype='int16_t')
+            self.generate_c_array("state", interpreter.get_tensor(state_layer['index']), "int16_t")
+
+        # TODO: generate output reference with int8 time weights.
+        if self.int8_time_weights:
+            self.write_c_config_header()
+            self.write_c_header_wrapper()
+            return
 
         # Generate reference output
         svdf_ref = None
@@ -3429,6 +3457,21 @@ def load_testdata_sets() -> dict:
                                           input_size=20,
                                           number_units=12,
                                           generate_bias=False)
+    dataset = 'svdf_int8'
+    testdata_sets[dataset] = SVDFSettings(dataset,
+                                          type_of_test,
+                                          regenerate_weights,
+                                          regenerate_input,
+                                          regenerate_biases,
+                                          schema_file,
+                                          batches=1,
+                                          number_inputs=2,
+                                          rank=1,
+                                          memory_size=2,
+                                          input_size=20,
+                                          number_units=12,
+                                          generate_bias=False,
+                                          int8_time_weights=True)
 
     type_of_test = 'add'
     dataset = 'add'