rolling 20210708

2026-01-29 13:50:39 +03:00 · 2021-08-07 15:25:27 +02:00
parent 6f06af1d5f
commit 32f15fc557
138 changed files with 8048 additions and 2292 deletions
--- a/code/components/tfmicro/tensorflow/lite/micro/all_ops_resolver.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/all_ops_resolver.cc
@@ -32,14 +32,18 @@ AllOpsResolver::AllOpsResolver() {
  AddConcatenation();
  AddConv2D();
  AddCos();
+  AddCumSum();
+  AddDepthToSpace();
  AddDepthwiseConv2D();
  AddDequantize();
  AddDetectionPostprocess();
-  AddDiv();
  AddElu();
  AddEqual();
  AddEthosU();
+  AddExpandDims();
  AddFloor();
+  AddFloorDiv();
+  AddFloorMod();
  AddFullyConnected();
  AddGreater();
  AddGreaterEqual();
@@ -70,6 +74,7 @@ AllOpsResolver::AllOpsResolver() {
  AddRelu();
  AddRelu6();
  AddReshape();
+  AddResizeBilinear();
  AddResizeNearestNeighbor();
  AddRound();
  AddRsqrt();
@@ -77,6 +82,7 @@ AllOpsResolver::AllOpsResolver() {
  AddSin();
  AddSoftmax();
  AddSpaceToBatchNd();
+  AddSpaceToDepth();
  AddSplit();
  AddSplitV();
  AddSqrt();
@@ -87,6 +93,7 @@ AllOpsResolver::AllOpsResolver() {
  AddSvdf();
  AddTanh();
  AddTransposeConv();
+  AddTranspose();
  AddUnpack();
 }

--- a/code/components/tfmicro/tensorflow/lite/micro/flatbuffer_utils.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/flatbuffer_utils.cc
@@ -0,0 +1,64 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/micro/flatbuffer_utils.h"
+
+namespace tflite {
+
+FlexbufferWrapper::FlexbufferWrapper(const uint8_t* buffer, size_t size)
+    : flexbuffers::Vector(flexbuffers::GetRoot(buffer, size).AsVector()) {}
+
+int64_t FlexbufferWrapper::ElementAsInt64(size_t i) const {
+  const uint8_t* elem = data_ + i * byte_width_;
+  return ::flexbuffers::ReadInt64(elem, byte_width_);
+}
+
+uint64_t FlexbufferWrapper::ElementAsUInt64(size_t i) const {
+  const uint8_t* elem = data_ + i * byte_width_;
+  return ::flexbuffers::ReadUInt64(elem, byte_width_);
+}
+
+int32_t FlexbufferWrapper::ElementAsInt32(size_t i) const {
+  return static_cast<int32_t>(ElementAsInt64(i));
+}
+
+bool FlexbufferWrapper::ElementAsBool(size_t i) const {
+  return static_cast<bool>(ElementAsUInt64(i));
+}
+
+double FlexbufferWrapper::ElementAsDouble(size_t i) const {
+  const uint8_t* elem = data_ + i * byte_width_;
+  return ::flexbuffers::ReadDouble(elem, byte_width_);
+}
+
+float FlexbufferWrapper::ElementAsFloat(size_t i) const {
+  return static_cast<float>(FlexbufferWrapper::ElementAsDouble(i));
+}
+
+// TODO(b/192589496): Ops must always be there. Remove this function when fixed
+uint32_t NumSubgraphOperators(const SubGraph* subgraph) {
+  if (subgraph->operators() != nullptr) {
+    return subgraph->operators()->size();
+  } else {
+    return 0;
+  }
+}
+// TODO(b/192589496): Ops must always be there. Remove this function when fixed
+uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx) {
+  const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
+  return NumSubgraphOperators(subgraph);
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/flatbuffer_utils.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/flatbuffer_utils.h
@@ -0,0 +1,56 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
+#define THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
+
+#include "flatbuffers/flatbuffers.h"
+#include "flatbuffers/flexbuffers.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+// Kernels use flexbuffers::Map to pack their init parameters in a tflite file,
+// with the parameter names as map keys and the parameter values as the
+// corresponding map values.
+// Accessing the map values using the flexbuffers:Map class is inline heavy,
+// which can cause the code size to bloat beyond what's reasonable for a micro
+// application. Use this class instead, when possible.
+// FlexbufferWrapper takes advantage of the following properties of
+// flexbuffers::Map:
+// 1. It can be viewed as a flexbuffers::Vector of the values.
+// 2. The values in the vector are ordered alphabetically by their keys.
+// 3. All integer and Boolean values are stored as 64-bit numbers.
+// 4. All floating point values are stored as double precision numbers.
+// The properties are mentioned in the flexbuffers docs, but we rely on
+// a unit test to catch design changes.
+class FlexbufferWrapper : public flexbuffers::Vector {
+ public:
+  // Construct with a serialized flexbuffer 'buffer' of 'size' bytes
+  explicit FlexbufferWrapper(const uint8_t* buffer, size_t size);
+  int64_t ElementAsInt64(size_t i) const;
+  uint64_t ElementAsUInt64(size_t i) const;
+  int32_t ElementAsInt32(size_t i) const;
+  bool ElementAsBool(size_t i) const;
+  double ElementAsDouble(size_t i) const;
+  float ElementAsFloat(size_t i) const;
+};
+
+// Return the number of operators in a subgraph tflite
+uint32_t NumSubgraphOperators(const SubGraph* subgraph);
+uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx);
+
+}  // namespace tflite
+
+#endif  // THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/activations.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/activations.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

+#include "tensorflow/lite/micro/kernels/activations.h"
+
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/common.h"
@@ -25,141 +27,21 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_utils.h"

 namespace tflite {
-namespace ops {
-namespace micro {
-namespace activations {
 namespace {

-struct ReluOpData {
-  ReluParams params;
-};
-
-struct Relu6OpData {
-  int8_t six_int8;
-  int8_t zero_int8;
-  uint8_t six_uint8;
-  uint8_t zero_uint8;
-};
-
-}  // namespace
-
-constexpr int kInputTensor = 0;
-constexpr int kOutputTensor = 0;
-
-template <typename T>
-inline void ReluQuantized(const ReluOpData& data,
-                          const RuntimeShape& input_shape,
-                          const RuntimeShape& output_shape, const T* input_data,
-                          T* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    const int32_t val = static_cast<int32_t>(input_data[i]);
-    int32_t clamped =
-        data.params.output_offset +
-        MultiplyByQuantizedMultiplier(val - data.params.input_offset,
-                                      data.params.output_multiplier,
-                                      data.params.output_shift);
-    clamped = std::max(data.params.quantized_activation_min, clamped);
-    clamped = std::min(data.params.quantized_activation_max, clamped);
-    output_data[i] = static_cast<T>(clamped);
-  }
-}
-
-template <typename T>
-inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
-                                ReluOpData* data) {
-  float act_min = 0.0;
-  float act_max = std::numeric_limits<float>::infinity();
-  double real_multiplier =
-      static_cast<double>(input->params.scale / output->params.scale);
-
-  const RuntimeShape input_shape = GetTensorShape(input);
-  const RuntimeShape output_shape = GetTensorShape(output);
-
-  QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
-                     &data->params.output_shift);
-
-  data->params.quantized_activation_min = std::max(
-      static_cast<int32_t>(std::numeric_limits<T>::min()),
-      output->params.zero_point +
-          static_cast<int32_t>(roundf(act_min / output->params.scale)));
-  data->params.quantized_activation_max =
-      act_max == std::numeric_limits<float>::infinity()
-          ? static_cast<int32_t>(std::numeric_limits<T>::max())
-          : std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
-                     output->params.zero_point +
-                         static_cast<int32_t>(
-                             roundf(act_max / output->params.scale)));
-  data->params.input_offset = input->params.zero_point;
-  data->params.output_offset = output->params.zero_point;
-}
-
-inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
-                      const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    const float val = input_data[i];
-    const float lower = 0.0f;
-    const float clamped = val < lower ? lower : val;
-    output_data[i] = clamped;
-  }
-}
-
-inline void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
-                       const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    const float val = input_data[i];
-    const float upper = 6.0f;
-    const float lower = 0.0f;
-    const float clamped = val > upper ? upper : val < lower ? lower : val;
-    output_data[i] = clamped;
-  }
-}
-
-template <typename Q>
-inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
-                           const Q* input_data,
-                           const RuntimeShape& output_shape, Q* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    const Q val = input_data[i];
-    const Q clamped = val > upper ? upper : val < lower ? lower : val;
-    output_data[i] = clamped;
-  }
-}
-
 void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
  return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
 }

-TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
-
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  if (input->type == kTfLiteInt8) {
-    CalculateReluOpData<int8_t>(input, output, data);
-  } else if (input->type == kTfLiteUInt8) {
-    CalculateReluOpData<uint8_t>(input, output, data);
-  }
-
-  return kTfLiteOk;
-}
-
 TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
  TFLITE_DCHECK(node->user_data != nullptr);
  const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));

  const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
  TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);

  switch (input->type) {
    case kTfLiteFloat32: {
@@ -171,19 +53,12 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
      return kTfLiteOk;
    }
    case kTfLiteInt8: {
-      ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
+      tflite::ReluQuantized(data, tflite::micro::GetTensorShape(input),
                            tflite::micro::GetTensorShape(output),
                            tflite::micro::GetTensorData<int8_t>(input),
                            tflite::micro::GetTensorData<int8_t>(output));
      return kTfLiteOk;
    }
-    case kTfLiteUInt8: {
-      ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
-                             tflite::micro::GetTensorShape(output),
-                             tflite::micro::GetTensorData<uint8_t>(input),
-                             tflite::micro::GetTensorData<uint8_t>(output));
-      return kTfLiteOk;
-    }
    default: {
      TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
                         TfLiteTypeGetName(input->type));
@@ -197,34 +72,14 @@ void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
  return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
 }

-TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
-
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-
-  if (input->type == kTfLiteInt8) {
-    data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
-                                                  input->params.zero_point);
-    data->zero_int8 = input->params.zero_point;
-  } else if (input->type == kTfLiteUInt8) {
-    data->six_uint8 = FloatToQuantizedType<uint8_t>(6.0f, input->params.scale,
-                                                    input->params.zero_point);
-    data->zero_uint8 = input->params.zero_point;
-  }
-
-  return kTfLiteOk;
-}
-
 TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
  TFLITE_DCHECK(node->user_data != nullptr);
  const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));

  const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
  TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);

  switch (input->type) {
    case kTfLiteFloat32: {
@@ -236,19 +91,11 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
      return kTfLiteOk;
    }
    case kTfLiteInt8: {
-      Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
-                             tflite::micro::GetTensorShape(input),
-                             tflite::micro::GetTensorData<int8_t>(input),
-                             tflite::micro::GetTensorShape(output),
-                             tflite::micro::GetTensorData<int8_t>(output));
-      return kTfLiteOk;
-    }
-    case kTfLiteUInt8: {
-      Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
-                              tflite::micro::GetTensorShape(input),
-                              tflite::micro::GetTensorData<uint8_t>(input),
-                              tflite::micro::GetTensorShape(output),
-                              tflite::micro::GetTensorData<uint8_t>(output));
+      Relu6Quantized(data.zero_int8, data.six_int8,
+                     tflite::micro::GetTensorShape(input),
+                     tflite::micro::GetTensorData<int8_t>(input),
+                     tflite::micro::GetTensorShape(output),
+                     tflite::micro::GetTensorData<int8_t>(output));
      return kTfLiteOk;
    }
    default: {
@@ -259,13 +106,13 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
  }
 }

-}  // namespace activations
+}  // namespace

 TfLiteRegistration Register_RELU() {
-  return {/*init=*/activations::ReluInit,
+  return {/*init=*/ReluInit,
          /*free=*/nullptr,
-          /*prepare=*/activations::ReluPrepare,
-          /*invoke=*/activations::ReluEval,
+          /*prepare=*/ReluPrepare,
+          /*invoke=*/ReluEval,
          /*profiling_string=*/nullptr,
          /*builtin_code=*/0,
          /*custom_name=*/nullptr,
@@ -273,16 +120,14 @@ TfLiteRegistration Register_RELU() {
 }

 TfLiteRegistration Register_RELU6() {
-  return {/*init=*/activations::Relu6Init,
+  return {/*init=*/Relu6Init,
          /*free=*/nullptr,
-          /*prepare=*/activations::Relu6Prepare,
-          /*invoke=*/activations::Relu6Eval,
+          /*prepare=*/Relu6Prepare,
+          /*invoke=*/Relu6Eval,
          /*profiling_string=*/nullptr,
          /*builtin_code=*/0,
          /*custom_name=*/nullptr,
          /*version=*/0};
 }

-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/activations.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/activations.h
@@ -0,0 +1,63 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
+
+#include <cstdint>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+extern const int kActivationsInputTensor;
+extern const int kActivationsOutputTensor;
+
+struct ReluOpData {
+  ReluParams params;
+};
+
+struct Relu6OpData {
+  int8_t six_int8;
+  int8_t zero_int8;
+};
+
+void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
+                   const RuntimeShape& output_shape, const int8_t* input_data,
+                   int8_t* output_data);
+
+template <typename T>
+void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
+                         ReluOpData* data);
+
+void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
+               const RuntimeShape& output_shape, float* output_data);
+
+void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
+                const RuntimeShape& output_shape, float* output_data);
+
+void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
+                    const int8_t* input_data, const RuntimeShape& output_shape,
+                    int8_t* output_data);
+
+TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node);
+
+TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/activations_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/activations_common.cc
@@ -0,0 +1,148 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <algorithm>
+#include <cstdint>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/kernels/activations.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+
+const int kActivationsInputTensor = 0;
+const int kActivationsOutputTensor = 0;
+
+void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
+                   const RuntimeShape& output_shape, const int8_t* input_data,
+                   int8_t* output_data) {
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i) {
+    const int32_t val = static_cast<int32_t>(input_data[i]);
+    int32_t clamped =
+        data.params.output_offset +
+        MultiplyByQuantizedMultiplier(val - data.params.input_offset,
+                                      data.params.output_multiplier,
+                                      data.params.output_shift);
+    clamped = std::max(data.params.quantized_activation_min, clamped);
+    clamped = std::min(data.params.quantized_activation_max, clamped);
+    output_data[i] = static_cast<int8_t>(clamped);
+  }
+}
+
+template <typename T>
+void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
+                         ReluOpData* data) {
+  float act_min = 0.0;
+  float act_max = std::numeric_limits<float>::infinity();
+  double real_multiplier =
+      static_cast<double>(input->params.scale / output->params.scale);
+
+  const RuntimeShape input_shape = GetTensorShape(input);
+  const RuntimeShape output_shape = GetTensorShape(output);
+
+  QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
+                     &data->params.output_shift);
+
+  data->params.quantized_activation_min = std::max(
+      static_cast<int32_t>(std::numeric_limits<T>::min()),
+      output->params.zero_point +
+          static_cast<int32_t>(roundf(act_min / output->params.scale)));
+  data->params.quantized_activation_max =
+      act_max == std::numeric_limits<float>::infinity()
+          ? static_cast<int32_t>(std::numeric_limits<T>::max())
+          : std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
+                     output->params.zero_point +
+                         static_cast<int32_t>(
+                             roundf(act_max / output->params.scale)));
+  data->params.input_offset = input->params.zero_point;
+  data->params.output_offset = output->params.zero_point;
+}
+
+void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
+               const RuntimeShape& output_shape, float* output_data) {
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i) {
+    const float val = input_data[i];
+    const float lower = 0.0f;
+    const float clamped = val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
+                const RuntimeShape& output_shape, float* output_data) {
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i) {
+    const float val = input_data[i];
+    const float upper = 6.0f;
+    const float lower = 0.0f;
+    const float clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
+                    const int8_t* input_data, const RuntimeShape& output_shape,
+                    int8_t* output_data) {
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i) {
+    const int8_t val = input_data[i];
+    const int8_t clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
+
+  const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output = GetOutput(context, node, kActivationsOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  if (input->type == kTfLiteInt8) {
+    CalculateReluOpData<int8_t>(input, output, data);
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
+
+  const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+
+  if (input->type == kTfLiteInt8) {
+    data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
+                                                  input->params.zero_point);
+    data->zero_int8 = input->params.zero_point;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/add.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/add.cc
@@ -66,12 +66,12 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
                             OpData* data) {
  data->requires_broadcast = !HaveSameShapes(input1, input2);

-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
    // 8bit -> 8bit general quantized path, with general rescalings
    data->input1_offset = -input1->params.zero_point;
    data->input2_offset = -input2->params.zero_point;
    data->output_offset = output->params.zero_point;
-    data->left_shift = 20;
+    data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20;
    const double twice_max_input_scale =
        2 * static_cast<double>(
                std::max(input1->params.scale, input2->params.scale));
@@ -133,24 +133,25 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
                              const TfLiteEvalTensor* input1,
                              const TfLiteEvalTensor* input2,
                              TfLiteEvalTensor* output) {
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
-    tflite::ArithmeticParams op_params;
-    op_params.left_shift = data->left_shift;
-    op_params.input1_offset = data->input1_offset;
-    op_params.input1_multiplier = data->input1_multiplier;
-    op_params.input1_shift = data->input1_shift;
-    op_params.input2_offset = data->input2_offset;
-    op_params.input2_multiplier = data->input2_multiplier;
-    op_params.input2_shift = data->input2_shift;
-    op_params.output_offset = data->output_offset;
-    op_params.output_multiplier = data->output_multiplier;
-    op_params.output_shift = data->output_shift;
-    SetActivationParams(data->output_activation_min,
-                        data->output_activation_max, &op_params);
-    bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-        tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorShape(input2), &op_params);
-    if (output->type == kTfLiteInt8) {
+  tflite::ArithmeticParams op_params;
+  op_params.left_shift = data->left_shift;
+  op_params.input1_offset = data->input1_offset;
+  op_params.input1_multiplier = data->input1_multiplier;
+  op_params.input1_shift = data->input1_shift;
+  op_params.input2_offset = data->input2_offset;
+  op_params.input2_multiplier = data->input2_multiplier;
+  op_params.input2_shift = data->input2_shift;
+  op_params.output_offset = data->output_offset;
+  op_params.output_multiplier = data->output_multiplier;
+  op_params.output_shift = data->output_shift;
+  SetActivationParams(data->output_activation_min, data->output_activation_max,
+                      &op_params);
+  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+      tflite::micro::GetTensorShape(input1),
+      tflite::micro::GetTensorShape(input2), &op_params);
+
+  switch (output->type) {
+    case kTfLiteInt8: {
      if (need_broadcast) {
        reference_integer_ops::BroadcastAdd4DSlow(
            op_params, tflite::micro::GetTensorShape(input1),
@@ -168,24 +169,32 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
            tflite::micro::GetTensorShape(output),
            tflite::micro::GetTensorData<int8_t>(output));
      }
-    } else {
+      break;
+    }
+    case kTfLiteInt16: {
      if (need_broadcast) {
        reference_ops::BroadcastAdd4DSlow(
            op_params, tflite::micro::GetTensorShape(input1),
-            tflite::micro::GetTensorData<uint8_t>(input1),
+            tflite::micro::GetTensorData<int16_t>(input1),
            tflite::micro::GetTensorShape(input2),
-            tflite::micro::GetTensorData<uint8_t>(input2),
+            tflite::micro::GetTensorData<int16_t>(input2),
            tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<uint8_t>(output));
+            tflite::micro::GetTensorData<int16_t>(output));
      } else {
        reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
-                           tflite::micro::GetTensorData<uint8_t>(input1),
+                           tflite::micro::GetTensorData<int16_t>(input1),
                           tflite::micro::GetTensorShape(input2),
-                           tflite::micro::GetTensorData<uint8_t>(input2),
+                           tflite::micro::GetTensorData<int16_t>(input2),
                           tflite::micro::GetTensorShape(output),
-                           tflite::micro::GetTensorData<uint8_t>(output));
+                           tflite::micro::GetTensorData<int16_t>(output),
+                           false);
      }
+      break;
    }
+    default:
+      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
+                         TfLiteTypeGetName(output->type), output->type);
+      return kTfLiteError;
  }

  return kTfLiteOk;
@@ -231,7 +240,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {

  if (output->type == kTfLiteFloat32) {
    EvalAdd(context, node, params, data, input1, input2, output);
-  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+  } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
    TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
                                                input1, input2, output));
  } else {
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/add_n.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/add_n.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <cstdint>

 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
@@ -28,6 +29,22 @@ namespace {
 constexpr int kInputTensor0 = 0;
 constexpr int kOutputTensor = 0;

+constexpr int kAddNIntegerShift = 20;
+
+// only used with INT8 tensors
+struct OpData {
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t input_offset;
+  int32_t output_offset;
+  int32_t input_multiplier;
+  int32_t output_multiplier;
+  int input_shift;
+  int output_shift;
+  int left_shift;
+  int scratch_index;
+};
+
 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
  int num_inputs = NumInputs(node);
  TF_LITE_ENSURE(context, num_inputs >= 2);
@@ -47,19 +64,61 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
    TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, i, &input));
    TF_LITE_ENSURE(context, HaveSameShapes(input_tensor_first, input));
    TF_LITE_ENSURE_TYPES_EQ(context, input_tensor_first->type, input->type);
+
+    // Check that all INT8 input tensors have the same zero-point and scale.
+    if (input_tensor_first->type == kTfLiteInt8) {
+      TF_LITE_ENSURE(context, input_tensor_first->params.zero_point ==
+                                  input->params.zero_point);
+      TF_LITE_ENSURE(context,
+                     input_tensor_first->params.scale == input->params.scale);
+    }
  }

-  // Allocate scratch buffer space for pointer to each tensor's data
-  // and store the scratch buffer index in the node's user_data
  if (output->type == kTfLiteFloat32) {
+    // Allocate scratch buffer space for pointer to each tensor's data
+    // and store the scratch buffer index in the node's user_data
    int scratch_index;
    size_t scratch_size = sizeof(float*) * num_inputs;
    TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena(
                                   context, scratch_size, &scratch_index));
    node->user_data =
        reinterpret_cast<decltype(node->user_data)>(scratch_index);
+  } else if (output->type == kTfLiteInt8) {
+    node->user_data =
+        context->AllocatePersistentBuffer(context, sizeof(OpData));
+    OpData* data = static_cast<OpData*>(node->user_data);
+
+    // Allocate scratch buffer space for pointer to each tensor's data
+    // and store the scratch buffer index in OpData
+    size_t scratch_size = sizeof(int8_t*) * num_inputs;
+    TF_LITE_ENSURE_OK(
+        context, context->RequestScratchBufferInArena(context, scratch_size,
+                                                      &data->scratch_index));
+
+    // 8bit -> 8bit general quantized path, with general rescalings
+    data->input_offset = -input_tensor_first->params.zero_point;
+    data->output_offset = output->params.zero_point;
+    data->left_shift = kAddNIntegerShift;
+    const double twice_max_input_scale =
+        2 * static_cast<double>(input_tensor_first->params.scale);
+    const double real_input_multiplier =
+        static_cast<double>(input_tensor_first->params.scale) /
+        twice_max_input_scale;
+    const double real_output_multiplier =
+        twice_max_input_scale /
+        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_input_multiplier, &data->input_multiplier, &data->input_shift);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_output_multiplier, &data->output_multiplier, &data->output_shift);
+
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, kTfLiteActNone, output, &data->output_activation_min,
+        &data->output_activation_max));
  } else {
-    TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
+    TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32 and INT8, got %s.",
                       TfLiteTypeGetName(output->type));
    return kTfLiteError;
  }
@@ -72,12 +131,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }

 template <typename T>
-void EvalAddN(TfLiteContext* context, TfLiteNode* node,
-              TfLiteEvalTensor* output) {
+inline const T** CopyInputsToScratchBuffer(TfLiteContext* context,
+                                           TfLiteNode* node,
+                                           const int scratch_index) {
  int num_inputs = NumInputs(node);
-
-  int scratch_index =
-      static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
  void* scratch_buffer = context->GetScratchBuffer(context, scratch_index);
  const T** all_inputs = static_cast<decltype(all_inputs)>(scratch_buffer);
  for (int i = 0; i < num_inputs; i++) {
@@ -86,17 +143,56 @@ void EvalAddN(TfLiteContext* context, TfLiteNode* node,
    all_inputs[i] = tflite::micro::GetTensorData<T>(next_input);
  }

+  return all_inputs;
+}
+
+template <typename T>
+void EvalAddN(TfLiteContext* context, TfLiteNode* node,
+              TfLiteEvalTensor* output) {
+  int num_inputs = NumInputs(node);
+
+  int scratch_index =
+      static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
+  const T** all_inputs =
+      CopyInputsToScratchBuffer<T>(context, node, scratch_index);
+
  reference_ops::AddN<T>(tflite::micro::GetTensorShape(output), num_inputs,
                         all_inputs, tflite::micro::GetTensorData<T>(output));
 }

+template <typename T>
+void EvalAddNQuantized(TfLiteContext* context, TfLiteNode* node,
+                       TfLiteEvalTensor* output) {
+  int num_inputs = NumInputs(node);
+
+  OpData* data = static_cast<OpData*>(node->user_data);
+  const T** all_inputs =
+      CopyInputsToScratchBuffer<T>(context, node, data->scratch_index);
+
+  ArithmeticParams params;
+  params.left_shift = data->left_shift;
+  params.input1_offset = data->input_offset;
+  params.input1_multiplier = data->input_multiplier;
+  params.input1_shift = data->input_shift;
+  params.output_offset = data->output_offset;
+  params.output_multiplier = data->output_multiplier;
+  params.output_shift = data->output_shift;
+  SetActivationParams(data->output_activation_min, data->output_activation_max,
+                      &params);
+
+  reference_ops::AddN(params, tflite::micro::GetTensorShape(output), num_inputs,
+                      all_inputs, tflite::micro::GetTensorData<T>(output));
+}
+
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
  TfLiteEvalTensor* output =
      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
  if (output->type == kTfLiteFloat32) {
    EvalAddN<float>(context, node, output);
+  } else if (output->type == kTfLiteInt8) {
+    EvalAddNQuantized<int8_t>(context, node, output);
  } else {
-    TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
+    TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32 and INT8, got %s.",
                       TfLiteTypeGetName(output->type));
    return kTfLiteError;
  }
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/circular_buffer.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/circular_buffer.cc
@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#define FLATBUFFERS_LOCALE_INDEPENDENT 0
-#include "flatbuffers/flexbuffers.h"
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
@@ -22,6 +20,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/flatbuffer_utils.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"

 /*
@@ -56,6 +55,11 @@ namespace {
 constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;

+// Indices into the init flexbuffer's vector.
+// The parameter's name is in the comment that follows.
+// Elements in the vectors are ordered alphabetically by parameter name.
+constexpr int kCyclesMaxIndex = 0;  // 'cycles_max'
+
 // TODO(b/149795762): Add this to TfLiteStatus enum.
 constexpr TfLiteStatus kTfLiteAbort = static_cast<TfLiteStatus>(-9);

@@ -76,8 +80,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {

  if (buffer != nullptr && length > 0) {
    const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
-    const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
-    op_data->cycles_max = m["cycles_max"].AsInt32();
+    tflite::FlexbufferWrapper wrapper(buffer_t, length);
+    op_data->cycles_max = wrapper.ElementAsInt32(kCyclesMaxIndex);
  } else {
    op_data->cycles_max = 0;
  }
@@ -118,6 +122,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
    // https://docs.google.com/document/d/1lc_G2ZFhjiKFo02UHjBaljye1xsL0EkfybkaVELEE3Q/edit?usp=sharing
    // https://docs.google.com/document/d/1pGc42PuWyrk-Jy1-9qeqtggvsmHr1ifz8Lmqfpr2rKA/edit?usp=sharing
    if (output->dims->data[1] == 5 || output->dims->data[1] == 13 ||
+        output->dims->data[1] == 25 ||
        (cb_prepare_count == 5 && output->dims->data[2] == 2 &&
         output->dims->data[3] == 96)) {
      op_data->cycles_max = 1;
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/concatenation.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/concatenation.cc
@@ -147,8 +147,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
  TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
  TF_LITE_ENSURE(context,
                 input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 ||
-                     input_type == kTfLiteInt8 || input_type == kTfLiteInt32 ||
-                     input_type == kTfLiteInt64);
+                     input_type == kTfLiteInt8 || input_type == kTfLiteInt16 ||
+                     input_type == kTfLiteInt32 || input_type == kTfLiteInt64);

  // Output type must match input type
  TF_LITE_ENSURE_EQ(context, output_type, input_type);
@@ -182,6 +182,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {

  switch (output_type) {  // Already know in/outtypes are same.
    case kTfLiteFloat32:
+    case kTfLiteInt16:
    case kTfLiteInt32:
    case kTfLiteInt64: {
      data->params.axis = CalculatePositiveAxis(params->axis, output);
@@ -247,6 +248,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
    case kTfLiteInt64:
      EvalUnquantized<int64_t>(context, node);
      break;
+    case kTfLiteInt16:
+      EvalUnquantized<int16_t>(context, node);
+      break;

    default:
      TF_LITE_KERNEL_LOG(
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/conv.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/conv.cc
@@ -53,8 +53,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
  const auto& data = *(static_cast<const OpDataConv*>(node->user_data));

  TF_LITE_ENSURE_EQ(context, input->type, output->type);
-  TF_LITE_ENSURE_MSG(context, input->type == filter->type,
-                     "Hybrid models are not supported on TFLite Micro.");
+  TF_LITE_ENSURE_MSG(
+      context,
+      input->type == filter->type ||
+          (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
+      "Hybrid models are not supported on TFLite Micro.");

  switch (input->type) {  // Already know in/out types are same.
    case kTfLiteFloat32: {
@@ -70,6 +73,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
          tflite::micro::GetTensorShape(nullptr), nullptr);
      break;
    }
+    case kTfLiteInt16: {
+      reference_integer_ops::ConvPerChannel(
+          ConvParamsQuantized(params, data), data.per_channel_output_multiplier,
+          data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<int16_t>(input),
+          tflite::micro::GetTensorShape(filter),
+          tflite::micro::GetTensorData<int8_t>(filter),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<std::int64_t>(bias),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int16_t>(output));
+      break;
+    }
    case kTfLiteInt8: {
      reference_integer_ops::ConvPerChannel(
          ConvParamsQuantized(params, data), data.per_channel_output_multiplier,
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/conv.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/conv.h
@@ -72,6 +72,21 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,

 TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node);

+// This is the most generic TfLiteRegistration. The actual supported types may
+// still be target dependent. The only requirement is that every implementation
+// (reference or optimized) must define this function.
+TfLiteRegistration Register_CONV_2D();
+
+#if defined(XTENSA)
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int8 inputs and outputs.
+TfLiteRegistration Register_CONV_2D_INT8REF();
+#else
+inline TfLiteRegistration Register_CONV_2D_INT8REF() {
+  return Register_CONV_2D();
+}
+#endif
+
 }  // namespace tflite

 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/conv_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/conv_common.cc
@@ -111,8 +111,7 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
        context, input, filter, bias, output, params.activation,
        &data->output_multiplier, &data->output_shift,
        &data->output_activation_min, &data->output_activation_max,
-        data->per_channel_output_multiplier,
-        reinterpret_cast<int*>(data->per_channel_output_shift),
+        data->per_channel_output_multiplier, data->per_channel_output_shift,
        output_channels));
  }

@@ -155,7 +154,7 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
          context, num_channels * sizeof(int32_t)));

  // All per-channel quantized tensors need valid zero point and scale arrays.
-  if (input->type == kTfLiteInt8) {
+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
    TF_LITE_ENSURE_EQ(context, filter->quantization.type,
                      kTfLiteAffineQuantization);

--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/conv_test.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/conv_test.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
-#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_

 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
@@ -59,36 +59,45 @@ TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
                                 TfLiteRegistration registration,
                                 uint8_t* output_data, float tolerance = 1e-5);

-TfLiteStatus TestConvFloat(const int* input_dims_data, const float* input_data,
-                           const int* filter_dims_data,
-                           const float* filter_data, const int* bias_dims_data,
-                           const float* bias_data, const int* output_dims_data,
+TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data,
+                           int* filter_dims_data, const float* filter_data,
+                           int* bias_dims_data, const float* bias_data,
+                           int* output_dims_data,
                           const float* expected_output_data,
                           TfLiteConvParams* conv_params,
                           TfLiteRegistration registration, float* output_data);

 TfLiteStatus TestConvQuantizedPerLayer(
-    const int* input_dims_data, const float* input_data,
-    uint8_t* input_quantized, float input_scale, const int* filter_dims_data,
-    const float* filter_data, uint8_t* filter_quantized, float filter_scale,
-    const int* bias_dims_data, const float* bias_data, int32_t* bias_quantized,
-    const int* output_dims_data, const float* expected_output_data,
-    uint8_t* expected_output_quantized, float output_scale,
-    TfLiteConvParams* conv_params, TfLiteRegistration registration,
-    uint8_t* output_data);
+    int* input_dims_data, const float* input_data, uint8_t* input_quantized,
+    float input_scale, int* filter_dims_data, const float* filter_data,
+    uint8_t* filter_quantized, float filter_scale, int* bias_dims_data,
+    const float* bias_data, int32_t* bias_quantized, int* output_dims_data,
+    const float* expected_output_data, uint8_t* expected_output_quantized,
+    float output_scale, TfLiteConvParams* conv_params,
+    TfLiteRegistration registration, uint8_t* output_data);

 TfLiteStatus TestConvQuantizedPerChannel(
-    const int* input_dims_data, const float* input_data,
-    int8_t* input_quantized, float input_scale, int input_zero_point,
-    const int* filter_dims_data, const float* filter_data,
-    int8_t* filter_data_quantized, const int* bias_dims_data,
-    const float* bias_data, int32_t* bias_data_quantized, float* bias_scales,
-    int* bias_zero_points, const int* output_dims_data,
+    int* input_dims_data, const float* input_data, int8_t* input_quantized,
+    float input_scale, int input_zero_point, int* filter_dims_data,
+    const float* filter_data, int8_t* filter_data_quantized,
+    int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized,
+    float* bias_scales, int* bias_zero_points, int* output_dims_data,
    const float* expected_output_data, int8_t* expected_output_data_quantized,
    float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
    TfLiteRegistration registration, int8_t* output_data);

+TfLiteStatus TestConvQuantizedPerChannel(
+    int* input_dims_data, const float* input_data, int16_t* input_quantized,
+    float input_scale, int input_zero_point, int* filter_dims_data,
+    const float* filter_data, int8_t* filter_data_quantized,
+    int* bias_dims_data, const float* bias_data,
+    std::int64_t* bias_data_quantized, float* bias_scales,
+    int* bias_zero_points, int* output_dims_data,
+    const float* expected_output_data, int16_t* expected_output_data_quantized,
+    float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
+    TfLiteRegistration registration, int16_t* output_data);
+
 }  // namespace testing
 }  // namespace tflite

-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/cumsum.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/cumsum.cc
@@ -0,0 +1,173 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/internal/reference/cumsum.h"
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+
+namespace tflite {
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kAxisTensor = 1;
+constexpr int kOutputTensor = 0;
+
+constexpr int kCumSumIntegerShift = 20;
+
+// only used with INT8 tensors
+struct OpData {
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t input_offset;
+  int32_t output_offset;
+  int32_t input_multiplier;
+  int32_t output_multiplier;
+  int input_shift;
+  int output_shift;
+  int left_shift;
+};
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* axis = GetInput(context, node, kAxisTensor);
+
+  TF_LITE_ENSURE(context,
+                 input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
+  TF_LITE_ENSURE_EQ(context, axis->type, kTfLiteInt32);
+
+  TF_LITE_ENSURE_EQ(context, NumElements(axis), 1);
+
+  TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
+
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  TF_LITE_ENSURE_EQ(context, input->type, output->type);
+  TF_LITE_ENSURE(context, HaveSameShapes(input, output));
+
+  if (output->type == kTfLiteInt8) {
+    node->user_data =
+        context->AllocatePersistentBuffer(context, sizeof(OpData));
+    OpData* data = static_cast<OpData*>(node->user_data);
+
+    // 8bit -> 8bit general quantized path, with general rescalings
+    data->input_offset = -input->params.zero_point;
+    data->output_offset = output->params.zero_point;
+    data->left_shift = kCumSumIntegerShift;
+    const double twice_max_input_scale =
+        2 * static_cast<double>(input->params.scale);
+    const double real_input_multiplier =
+        static_cast<double>(input->params.scale) / twice_max_input_scale;
+    const double real_output_multiplier =
+        twice_max_input_scale /
+        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_input_multiplier, &data->input_multiplier, &data->input_shift);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_output_multiplier, &data->output_multiplier, &data->output_shift);
+
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, kTfLiteActNone, output, &data->output_activation_min,
+        &data->output_activation_max));
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return CalculateOpData(context, node);
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  const TfLiteEvalTensor* axis_tensor =
+      tflite::micro::GetEvalInput(context, node, kAxisTensor);
+
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  auto* cs_params = static_cast<TfLiteCumsumParams*>(node->builtin_data);
+  auto input_shape = tflite::micro::GetTensorShape(input);
+
+  int32_t axis = *tflite::micro::GetTensorData<int32_t>(axis_tensor);
+  if (axis < 0) axis += input_shape.DimensionsCount();
+
+  if (axis < 0 || axis >= input_shape.DimensionsCount()) {
+    TF_LITE_KERNEL_LOG(context, "CUMSUM Invalid axis: %d", axis);
+    return kTfLiteError;
+  }
+
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      reference_ops::CumSum(tflite::micro::GetTensorData<float>(input),
+                            input_shape, axis, cs_params->exclusive,
+                            cs_params->reverse,
+                            tflite::micro::GetTensorData<float>(output));
+      return kTfLiteOk;
+    } break;
+
+    case kTfLiteInt8: {
+      auto* data = static_cast<OpData*>(node->user_data);
+      ArithmeticParams params;
+      params.left_shift = data->left_shift;
+      params.input1_offset = data->input_offset;
+      params.input1_multiplier = data->input_multiplier;
+      params.input1_shift = data->input_shift;
+      params.output_offset = data->output_offset;
+      params.output_multiplier = data->output_multiplier;
+      params.output_shift = data->output_shift;
+      SetActivationParams(data->output_activation_min,
+                          data->output_activation_max, &params);
+      reference_ops::CumSum(params, tflite::micro::GetTensorData<int8_t>(input),
+                            input_shape, axis, cs_params->exclusive,
+                            cs_params->reverse,
+                            tflite::micro::GetTensorData<int8_t>(output));
+      return kTfLiteOk;
+    } break;
+
+    default: {
+      TF_LITE_KERNEL_LOG(context,
+                         "CUMSUM only supports FLOAT32 and INT8, got %s.",
+                         TfLiteTypeGetName(output->type));
+      return kTfLiteError;
+    }
+  }
+
+  return kTfLiteError;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_CUMSUM() {
+  return {/*init=*/nullptr,
+          /*free=*/nullptr,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/depth_to_space.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/depth_to_space.cc
@@ -0,0 +1,143 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
+
+#include <stdint.h>
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+
+namespace tflite {
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kOutputTensor = 0;
+
+// input/output tensor shape rank associations
+constexpr int kBatchRank = 0;
+constexpr int kHeightRank = 1;
+constexpr int kWidthRank = 2;
+constexpr int kDepthRank = 3;
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteDepthToSpaceParams*>(node->builtin_data);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const TfLiteTensor* input;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
+  TfLiteTensor* output;
+  TF_LITE_ENSURE_OK(context,
+                    GetOutputSafe(context, node, kOutputTensor, &output));
+
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
+
+  auto data_type = output->type;
+  TF_LITE_ENSURE(context,
+                 data_type == kTfLiteFloat32 || data_type == kTfLiteInt8);
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+
+  const int block_size = params->block_size;
+  TF_LITE_ENSURE(context, block_size > 0);
+  const int input_height = input->dims->data[kHeightRank];
+  const int input_width = input->dims->data[kWidthRank];
+  const int input_channels = input->dims->data[kDepthRank];
+  int output_height = input_height * block_size;
+  int output_width = input_width * block_size;
+  int output_channels = input_channels / block_size / block_size;
+
+  TF_LITE_ENSURE_EQ(context, input_height, output_height / block_size);
+  TF_LITE_ENSURE_EQ(context, input_width, output_width / block_size);
+  TF_LITE_ENSURE_EQ(context, input_channels,
+                    output_channels * block_size * block_size);
+
+  // We must update the output tensor dimensions.
+  // The dims storage is expected to be the same area in memory
+  // for both TfLiteTensor and TfLiteEvalTensor.  This is important
+  // because TfLiteTensor in the MicroInterpreter is a temporary
+  // allocation.  For the KernelRunner interpreter, TfLiteEvalTensor
+  // is a temporary allocation.  We must therefore relocate the dims
+  // from the FlatBuffer to the persistant storage arena.
+  TfLiteEvalTensor* output_eval =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
+                                 context, output, output_eval));
+  output->dims->data[kBatchRank] = input->dims->data[kBatchRank];
+  output->dims->data[kHeightRank] = output_height;
+  output->dims->data[kWidthRank] = output_width;
+  output->dims->data[kDepthRank] = output_channels;
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return CalculateOpData(context, node);
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteDepthToSpaceParams*>(node->builtin_data);
+
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  tflite::DepthToSpaceParams op_params;
+  op_params.block_size = static_cast<int32_t>(params->block_size);
+
+  switch (input->type) {  // Already know in/out types are same.
+    case kTfLiteFloat32:
+      reference_ops::DepthToSpace(op_params,
+                                  tflite::micro::GetTensorShape(input),
+                                  tflite::micro::GetTensorData<float>(input),
+                                  tflite::micro::GetTensorShape(output),
+                                  tflite::micro::GetTensorData<float>(output));
+      break;
+    case kTfLiteInt8:
+      reference_ops::DepthToSpace(op_params,
+                                  tflite::micro::GetTensorShape(input),
+                                  tflite::micro::GetTensorData<int8_t>(input),
+                                  tflite::micro::GetTensorShape(output),
+                                  tflite::micro::GetTensorData<int8_t>(output));
+      break;
+    default:
+      TF_LITE_KERNEL_LOG(
+          context, "DEPTH_TO_SPACE only supports FLOAT32 and INT8, got %s.",
+          TfLiteTypeGetName(output->type));
+      return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_DEPTH_TO_SPACE() {
+  return {/*init=*/nullptr,
+          /*free=*/nullptr,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/depthwise_conv.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/depthwise_conv.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
-#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/depthwise_conv_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/depthwise_conv_common.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
-#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
@@ -113,8 +112,7 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
        context, input, filter, bias, output, params.activation,
        &data->output_multiplier, &data->output_shift,
        &data->output_activation_min, &data->output_activation_max,
-        data->per_channel_output_multiplier,
-        reinterpret_cast<int*>(data->per_channel_output_shift),
+        data->per_channel_output_multiplier, data->per_channel_output_shift,
        output_channels));
  }

--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/detection_postprocess.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/detection_postprocess.cc
@@ -15,7 +15,6 @@ limitations under the License.

 #include <numeric>

-#define FLATBUFFERS_LOCALE_INDEPENDENT 0
 #include "flatbuffers/flexbuffers.h"
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
@@ -117,12 +116,11 @@ struct OpData {
 };

 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
  OpData* op_data = nullptr;

  const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
  const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
-
-  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
  op_data = reinterpret_cast<OpData*>(
      context->AllocatePersistentBuffer(context, sizeof(OpData)));

--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/div.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/div.cc
@@ -1,206 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/kernels/internal/reference/div.h"
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-#include "tensorflow/lite/kernels/kernel_util.h"
-#include "tensorflow/lite/micro/kernels/kernel_util.h"
-
-namespace tflite {
-namespace {
-
-constexpr int kInputTensor1 = 0;
-constexpr int kInputTensor2 = 1;
-constexpr int kOutputTensor = 0;
-
-struct OpData {
-  // Parameters used in the quantized paths where the output is 8bit
-  int32_t input1_zero_point;
-  int32_t input2_zero_point;
-  int32_t output_zero_point;
-  int32_t output_activation_min;
-  int32_t output_activation_max;
-
-  // Parameters used in all quantized paths
-  int32_t output_multiplier;
-  int output_shift;
-};
-
-TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
-                             TfLiteDivParams* params, OpData* data) {
-  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-
-  const TfLiteTensor* input1;
-  TF_LITE_ENSURE_OK(context,
-                    GetInputSafe(context, node, kInputTensor1, &input1));
-  const TfLiteTensor* input2;
-  TF_LITE_ENSURE_OK(context,
-                    GetInputSafe(context, node, kInputTensor2, &input2));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
-
-  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
-  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
-
-  if (output->type == kTfLiteInt8) {
-    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
-        context, params->activation, output, &data->output_activation_min,
-        &data->output_activation_max));
-    const double real_multiplier = static_cast<double>(
-        input1->params.scale / (input2->params.scale * output->params.scale));
-    QuantizeMultiplier(real_multiplier, &data->output_multiplier,
-                       &data->output_shift);
-    data->input1_zero_point = input1->params.zero_point;
-    data->input2_zero_point = input2->params.zero_point;
-    data->output_zero_point = output->params.zero_point;
-  }
-
-  return kTfLiteOk;
-}
-
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
-  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
-  auto* data = static_cast<OpData*>(node->user_data);
-  return CalculateOpData(context, node, params, data);
-}
-
-void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
-             const OpData* data, const TfLiteEvalTensor* input1,
-             const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
-  tflite::ArithmeticParams op_params = {};
-
-#define TF_LITE_DIV(type, opname, data_type)                           \
-  data_type output_activation_min, output_activation_max;              \
-  CalculateActivationRange(params->activation, &output_activation_min, \
-                           &output_activation_max);                    \
-  SetActivationParams(output_activation_min, output_activation_max,    \
-                      &op_params);                                     \
-  type::opname(op_params, tflite::micro::GetTensorShape(input1),       \
-               tflite::micro::GetTensorData<data_type>(input1),        \
-               tflite::micro::GetTensorShape(input2),                  \
-               tflite::micro::GetTensorData<data_type>(input2),        \
-               tflite::micro::GetTensorShape(output),                  \
-               tflite::micro::GetTensorData<data_type>(output))
-
-  bool requires_broadcast = reference_ops::ProcessBroadcastShapes(
-      tflite::micro::GetTensorShape(input1),
-      tflite::micro::GetTensorShape(input2), &op_params);
-
-  if (requires_broadcast) {
-    TF_LITE_DIV(reference_ops, BroadcastDivSlow, float);
-  } else {
-    TF_LITE_DIV(reference_ops, Div, float);
-  }
-#undef TF_LITE_DIV
-}
-
-TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                           TfLiteDivParams* params, const OpData* data,
-                           const TfLiteEvalTensor* input1,
-                           const TfLiteEvalTensor* input2,
-                           TfLiteEvalTensor* output) {
-  tflite::ArithmeticParams op_params = {};
-
-#define TF_LITE_DIV(type, opname, dtype)                         \
-  type::opname(op_params, tflite::micro::GetTensorShape(input1), \
-               tflite::micro::GetTensorData<dtype>(input1),      \
-               tflite::micro::GetTensorShape(input2),            \
-               tflite::micro::GetTensorData<dtype>(input2),      \
-               tflite::micro::GetTensorShape(output),            \
-               tflite::micro::GetTensorData<dtype>(output))
-
-  if (input1->type == kTfLiteInt8 && input2->type == kTfLiteInt8 &&
-      output->type == kTfLiteInt8) {
-    SetActivationParams(data->output_activation_min,
-                        data->output_activation_max, &op_params);
-    op_params.input1_offset = -data->input1_zero_point;
-    op_params.input2_offset = -data->input2_zero_point;
-    op_params.output_offset = data->output_zero_point;
-    op_params.output_multiplier = data->output_multiplier;
-    op_params.output_shift = data->output_shift;
-
-    bool requires_broadcast = reference_ops::ProcessBroadcastShapes(
-        tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorShape(input2), &op_params);
-
-    if (requires_broadcast) {
-      TF_LITE_DIV(reference_ops, BroadcastDivSlow, int8_t);
-    } else {
-      TF_LITE_DIV(reference_ops, Div, int8_t);
-    }
-#undef TF_LITE_DIV
-  } else {
-    TF_LITE_KERNEL_LOG(
-        context, "Unsupported combination of input and output types in DIV.");
-    return kTfLiteError;
-  }
-
-  return kTfLiteOk;
-}
-
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-  auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
-  TFLITE_DCHECK(node->user_data != nullptr);
-  auto* data = static_cast<OpData*>(node->user_data);
-
-  const TfLiteEvalTensor* input1 =
-      tflite::micro::GetEvalInput(context, node, kInputTensor1);
-  const TfLiteEvalTensor* input2 =
-      tflite::micro::GetEvalInput(context, node, kInputTensor2);
-  TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
-
-  if (output->type == kTfLiteFloat32) {
-    EvalDiv(context, node, params, data, input1, input2, output);
-  } else if (output->type == kTfLiteInt8) {
-    TF_LITE_ENSURE_OK(context, EvalQuantized(context, node, params, data,
-                                             input1, input2, output));
-  } else {
-    TF_LITE_KERNEL_LOG(context,
-                       "DIV only supports FLOAT32, quantized INT8 "
-                       "now, got type %s (%d).",
-                       TfLiteTypeGetName(output->type), output->type);
-    return kTfLiteError;
-  }
-
-  return kTfLiteOk;
-}
-
-}  // namespace
-
-TfLiteRegistration Register_DIV() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
-}
-
-}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/elu.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/elu.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/types.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_error_reporter.h"

 namespace tflite {
 namespace {
@@ -45,7 +46,10 @@ using TransformFunc = float (*)(float);
 template <typename T>
 void PopulateLookupTable(const TfLiteTensor* input, const TfLiteTensor* output,
                         const TransformFunc transform, OpData* data) {
-  if (sizeof(T) != 1) TF_LITE_FATAL("Lookup table valid only for 8bit");
+  if (sizeof(T) != 1) {
+    MicroPrintf("Lookup table valid only for 8bit");
+    TFLITE_ABORT;
+  }

  const float inverse_scale = 1 / output->params.scale;
  int32_t maxval = std::numeric_limits<T>::max();
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/floor_div.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/floor_div.cc
@@ -0,0 +1,130 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+namespace {
+
+// Input/output tensor index.
+constexpr int kInputTensor1 = 0;
+constexpr int kInputTensor2 = 1;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const TfLiteTensor* input1;
+  TF_LITE_ENSURE_OK(context,
+                    GetInputSafe(context, node, kInputTensor1, &input1));
+  const TfLiteTensor* input2;
+  TF_LITE_ENSURE_OK(context,
+                    GetInputSafe(context, node, kInputTensor2, &input2));
+  TfLiteTensor* output;
+  TF_LITE_ENSURE_OK(context,
+                    GetOutputSafe(context, node, kOutputTensor, &output));
+
+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
+
+  return kTfLiteOk;
+}
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  return nullptr;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return CalculateOpData(context, node);
+}
+
+template <typename T>
+TfLiteStatus EvalFloorDiv(TfLiteContext* context,
+                          const TfLiteEvalTensor* input1,
+                          const TfLiteEvalTensor* input2,
+                          TfLiteEvalTensor* output) {
+  const T* denominator_data = tflite::micro::GetTensorData<T>(input2);
+
+  // Validate the denominator.
+  for (int i = 0; i < tflite::ElementCount(*input2->dims); ++i) {
+    if (std::equal_to<T>()(denominator_data[i], 0)) {
+      TF_LITE_KERNEL_LOG(context, "Division by 0");
+      return kTfLiteError;
+    }
+  }
+
+  bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
+
+  if (requires_broadcast) {
+    reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<T>(input1),
+        tflite::micro::GetTensorShape(input2), denominator_data,
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<T>(output), reference_ops::FloorDiv<T>);
+  } else {
+    reference_ops::BinaryFunction<T, T, T>(
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<T>(input1),
+        tflite::micro::GetTensorShape(input2), denominator_data,
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<T>(output), reference_ops::FloorDiv<T>);
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  switch (input1->type) {
+    case kTfLiteFloat32: {
+      return EvalFloorDiv<float>(context, input1, input2, output);
+    }
+    default: {
+      TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by FLOOR_DIV.",
+                         TfLiteTypeGetName(input1->type));
+      return kTfLiteError;
+    }
+  }
+}
+
+}  // namespace
+
+TfLiteRegistration Register_FLOOR_DIV() {
+  return {/*init=*/Init,
+          /*free=*/nullptr,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/floor_mod.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/floor_mod.cc
@@ -0,0 +1,128 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_utils.h"
+
+// OLD-TODO(b/117523611): We should factor out a binary_op and put binary ops
+// there.
+namespace tflite {
+namespace {
+
+// Input/output tensor index.
+constexpr int kInputTensor1 = 0;
+constexpr int kInputTensor2 = 1;
+constexpr int kOutputTensor = 0;
+
+// OLD-TODO(b/117912880): Support quantization.
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const TfLiteTensor* input1;
+  TF_LITE_ENSURE_OK(context,
+                    GetInputSafe(context, node, kInputTensor1, &input1));
+  const TfLiteTensor* input2;
+  TF_LITE_ENSURE_OK(context,
+                    GetInputSafe(context, node, kInputTensor2, &input2));
+  TfLiteTensor* output;
+  TF_LITE_ENSURE_OK(context,
+                    GetOutputSafe(context, node, kOutputTensor, &output));
+
+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
+
+  return kTfLiteOk;
+}
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  return nullptr;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return CalculateOpData(context, node);
+}
+
+template <typename T>
+TfLiteStatus EvalFloorMod(TfLiteContext* context, bool requires_broadcast,
+                          const TfLiteEvalTensor* input1,
+                          const TfLiteEvalTensor* input2,
+                          TfLiteEvalTensor* output) {
+  const T* denominator_data = tflite::micro::GetTensorData<T>(input2);
+
+  if (requires_broadcast) {
+    reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<T>(input1),
+        tflite::micro::GetTensorShape(input2), denominator_data,
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<T>(output), reference_ops::FloorMod<T>);
+  } else {
+    reference_ops::BinaryFunction<T, T, T>(
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<T>(input1),
+        tflite::micro::GetTensorShape(input2), denominator_data,
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<T>(output), reference_ops::FloorMod<T>);
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
+
+  switch (input1->type) {
+    case kTfLiteFloat32: {
+      return EvalFloorMod<float>(context, requires_broadcast, input1, input2,
+                                 output);
+    }
+    default: {
+      TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by FLOOR_MOD.",
+                         TfLiteTypeGetName(input1->type));
+      return kTfLiteError;
+    }
+  }
+}
+
+}  // namespace
+
+TfLiteRegistration Register_FLOOR_MOD() {
+  return {/*init=*/Init,
+          /*free=*/nullptr,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/fully_connected.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/fully_connected.cc
@@ -109,19 +109,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
      break;
    }

-    case kTfLiteUInt8: {
-      tflite::reference_ops::FullyConnected(
-          FullyConnectedParamsQuantized(data),
-          tflite::micro::GetTensorShape(input),
-          tflite::micro::GetTensorData<uint8_t>(input),
-          tflite::micro::GetTensorShape(filter),
-          tflite::micro::GetTensorData<uint8_t>(filter),
-          tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<int32_t>(bias),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<uint8_t>(output));
-      break;
-    }
    default: {
      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                         TfLiteTypeGetName(input->type), input->type);
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/fully_connected.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/fully_connected.h
@@ -65,7 +65,7 @@ TfLiteStatus CalculateOpDataFullyConnected(
 // (reference or optimized) must define this function.
 TfLiteRegistration Register_FULLY_CONNECTED();

-#if defined(CMSIS_NN) || defined(ARDUINO)
+#if defined(CMSIS_NN)
 // The Arduino is a special case where we use the CMSIS kernels, but because of
 // the current approach to building for Arduino, we do not support -DCMSIS_NN as
 // part of the build. As a result, we use defined(ARDUINO) as proxy for the
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/fully_connected_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/fully_connected_common.cc
@@ -65,6 +65,11 @@ TfLiteStatus CalculateOpDataFullyConnected(
                       &data->output_shift);

    data->input_zero_point = input->params.zero_point;
+    // Filter weights will always be symmetric quantized since we only support
+    // int8 quantization. See
+    // https://github.com/tensorflow/tensorflow/issues/44912 for additional
+    // context.
+    TFLITE_DCHECK(filter->params.zero_point == 0);
    data->filter_zero_point = filter->params.zero_point;
    data->output_zero_point = output->params.zero_point;

--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/gather.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/gather.cc
@@ -0,0 +1,222 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kInputPositions = 1;
+constexpr int kOutputTensor = 0;
+
+template <typename InputT, typename CoordsT = int32_t>
+TfLiteStatus Gather(const TfLiteGatherParams* params,
+                    const TfLiteEvalTensor* input,
+                    const TfLiteEvalTensor* coords, TfLiteEvalTensor* output) {
+  const InputT* input_data = tflite::micro::GetTensorData<InputT>(input);
+  const CoordsT* coords_data = tflite::micro::GetTensorData<CoordsT>(coords);
+  InputT* output_data = tflite::micro::GetTensorData<InputT>(output);
+  const TfLiteIntArray* input_dims = input->dims;
+  const int input_dims_size = input_dims->size;
+  int axis = params->axis;
+  if (axis < 0) {
+    axis += input_dims_size;
+  }
+  TFLITE_DCHECK_GE(axis, 0);
+  TFLITE_DCHECK_LT(axis, input_dims_size);
+
+  int batch_dims = params->batch_dims;
+  // batch_dims should be in range: [-rank(coords), rank(coords)].
+  // Negative batch_dims is added with rank of coords.
+  const TfLiteIntArray* coords_dims = coords->dims;
+  const int coords_dims_size = coords_dims->size;
+  if (batch_dims < 0) {
+    batch_dims += coords_dims_size;
+  }
+  TFLITE_DCHECK_GE(batch_dims, 0);
+  TFLITE_DCHECK_LT(batch_dims, input_dims_size);
+  TFLITE_DCHECK_LE(batch_dims, coords_dims_size);
+  TFLITE_DCHECK_GE(axis, batch_dims);
+  for (int i = 0; i < batch_dims; ++i) {
+    TFLITE_DCHECK_EQ(input_dims->data[i], coords_dims->data[i]);
+  }
+
+  const int axis_size = input_dims->data[axis];
+
+  int batch_size = 1;
+  for (int i = 0; i < batch_dims; ++i) {
+    batch_size *= input_dims->data[i];
+  }
+  int outer_size = 1;
+  for (int i = batch_dims; i < axis; ++i) {
+    outer_size *= input_dims->data[i];
+  }
+  int inner_size = 1;
+  for (int i = axis + 1; i < input_dims_size; ++i) {
+    inner_size *= input_dims->data[i];
+  }
+  int coord_size = 1;
+  for (int i = batch_dims; i < coords_dims_size; ++i) {
+    coord_size *= coords_dims->data[i];
+  }
+
+  for (int batch = 0; batch < batch_size; ++batch) {
+    for (int outer = 0; outer < outer_size; ++outer) {
+      for (int coord = 0; coord < coord_size; ++coord) {
+        TFLITE_DCHECK_GE(coords_data[coord], 0);
+        TFLITE_DCHECK_LT(coords_data[coord], axis_size);
+        std::memcpy(output_data +
+                        (((batch * outer_size) + outer) * coord_size + coord) *
+                            inner_size,
+                    input_data + (((batch * outer_size) + outer) * axis_size +
+                                  coords_data[batch * coord_size + coord]) *
+                                     inner_size,
+                    sizeof(InputT) * inner_size);
+      }
+    }
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const auto* params =
+      reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
+  const TfLiteTensor* input;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
+  const TfLiteTensor* coords;
+  TF_LITE_ENSURE_OK(context,
+                    GetInputSafe(context, node, kInputPositions, &coords));
+  TfLiteTensor* output;
+  TF_LITE_ENSURE_OK(context,
+                    GetOutputSafe(context, node, kOutputTensor, &output));
+  switch (coords->type) {
+    case kTfLiteInt32:
+      break;
+    default:
+      TF_LITE_KERNEL_LOG(context,
+                         "Positions of type '%s' are not supported by gather.",
+                         TfLiteTypeGetName(coords->type));
+      return kTfLiteError;
+      break;
+  }
+
+  // Assign to output the input type.
+  output->type = input->type;
+
+  // Check conditions for different types.
+  switch (input->type) {
+    case kTfLiteFloat32:
+    case kTfLiteInt8:
+      break;
+    default:
+      TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by gather.",
+                         TfLiteTypeGetName(input->type));
+      return kTfLiteError;
+      break;
+  }
+
+  int axis = params->axis;
+  if (axis < 0) {
+    axis += NumDimensions(input);
+  }
+  TF_LITE_ENSURE(context, 0 <= axis && axis < NumDimensions(input));
+
+  int batch_dims = params->batch_dims;
+  // batch_dims should be in range: [-rank(coords), rank(coords)].
+  // Negative batch_dims is added with rank of coords.
+  if (batch_dims < 0) {
+    batch_dims += NumDimensions(coords);
+  }
+  TF_LITE_ENSURE(context, batch_dims <= axis);
+  TF_LITE_ENSURE(context, 0 <= batch_dims && batch_dims < NumDimensions(input));
+  TF_LITE_ENSURE(context, batch_dims <= NumDimensions(coords));
+  for (int i = 0; i < batch_dims; ++i) {
+    TF_LITE_ENSURE_EQ(context, input->dims->data[i], coords->dims->data[i]);
+  }
+
+  // GATHER updates the output tensor dimensions, but TfLiteTensor in the
+  // MicroInterpreter is a temporary allocation. We must therefore relocate the
+  // dims from the FlatBuffer to the persistant storage arena.
+  TfLiteEvalTensor* output_eval =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
+                                 context, output, output_eval));
+
+  TfLiteIntArray* output_shape = output->dims;
+  output_shape->size =
+      NumDimensions(input) + NumDimensions(coords) - 1 - batch_dims;
+  int output_index = 0;
+  for (int i = 0; i < axis; ++i) {
+    output_shape->data[output_index++] = input->dims->data[i];
+  }
+  for (int i = batch_dims; i < coords->dims->size; ++i) {
+    output_shape->data[output_index++] = coords->dims->data[i];
+  }
+  for (int i = axis + 1; i < input->dims->size; ++i) {
+    output_shape->data[output_index++] = input->dims->data[i];
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const auto* params =
+      reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  const TfLiteEvalTensor* coords =
+      tflite::micro::GetEvalInput(context, node, kInputPositions);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  if (coords->type == kTfLiteInt32) {
+    switch (input->type) {
+      case kTfLiteFloat32:
+        return Gather<float, int32_t>(params, input, coords, output);
+        break;
+      case kTfLiteInt8:
+        return Gather<int8_t, int32_t>(params, input, coords, output);
+        break;
+      default:
+        TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by gather.",
+                           TfLiteTypeGetName(input->type));
+        return kTfLiteError;
+        break;
+    }
+  }
+  return kTfLiteOk;
+}
+}  // namespace
+
+TfLiteRegistration Register_GATHER() {
+  return {/*init=*/nullptr,
+          /*free=*/nullptr,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/gather_nd.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/gather_nd.cc
@@ -0,0 +1,201 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+namespace {
+
+constexpr int kParams = 0;
+constexpr int kIndices = 1;
+constexpr int kOutputTensor = 0;
+constexpr int MAX_INDICES_ND = 5;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const TfLiteTensor* params;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kParams, &params));
+  const TfLiteTensor* indices;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kIndices, &indices));
+  TfLiteTensor* output;
+  TF_LITE_ENSURE_OK(context,
+                    GetOutputSafe(context, node, kOutputTensor, &output));
+
+  switch (params->type) {
+    case kTfLiteFloat32:
+    case kTfLiteInt8:
+      break;
+    default:
+      TF_LITE_KERNEL_LOG(context,
+                         "Params of type '%s' are not supported by gather_nd.",
+                         TfLiteTypeGetName(params->type));
+      return kTfLiteError;
+      break;
+  }
+  switch (indices->type) {
+    case kTfLiteInt32:
+      break;
+    default:
+      TF_LITE_KERNEL_LOG(context,
+                         "Indices of type '%s' are not supported by gather_nd.",
+                         TfLiteTypeGetName(indices->type));
+      return kTfLiteError;
+  }
+
+  const int params_rank = NumDimensions(params);
+  const int indices_rank = NumDimensions(indices);
+  const int indices_nd = SizeOfDimension(indices, indices_rank - 1);
+  if (params_rank < 1) {
+    TF_LITE_KERNEL_LOG(context, "Params must be at least a vector.");
+    return kTfLiteError;
+  }
+  if (indices_rank < 1) {
+    TF_LITE_KERNEL_LOG(context, "Indices must be at least a vector.");
+    return kTfLiteError;
+  }
+  if (indices_nd > params_rank) {
+    TF_LITE_KERNEL_LOG(
+        context, "Index innermost dimension length must be <= params rank.");
+    return kTfLiteError;
+  }
+  if (indices_nd > MAX_INDICES_ND) {
+    TF_LITE_KERNEL_LOG(context,
+                       "Index innermost dimension length must not exceed %d.",
+                       MAX_INDICES_ND);
+    return kTfLiteError;
+  }
+
+  // Assign to output the input type.
+  output->type = params->type;
+
+  // TFLM gather_nd does not create the output tensor, but it needs to ensure
+  // that the output shape is correct. The result shape is
+  // indices.shape[:-1] + params.shape[indices.shape[-1]:]
+  TfLiteIntArray* output_shape = output->dims;
+  int output_index = 0;
+  for (int i = 0; i < indices_rank - 1; ++i) {
+    output_shape->data[output_index++] = indices->dims->data[i];
+  }
+  for (int i = indices_nd; i < params_rank; ++i) {
+    output_shape->data[output_index++] = params->dims->data[i];
+  }
+  output_shape->size = output_index;
+  return kTfLiteOk;
+}
+
+template <typename ParamsT, typename IndicesT>
+TfLiteStatus GatherNd(const TfLiteEvalTensor* params,
+                      const TfLiteEvalTensor* indices,
+                      TfLiteEvalTensor* output) {
+  const int indices_dims = indices->dims->size;
+  const int indices_nd = indices->dims->data[indices_dims - 1];
+  const int params_dims = params->dims->size;
+  const IndicesT* index_data = tflite::micro::GetTensorData<IndicesT>(indices);
+  const ParamsT* param_data = tflite::micro::GetTensorData<ParamsT>(params);
+  ParamsT* output_data = tflite::micro::GetTensorData<ParamsT>(output);
+
+  int n_slices = 1;
+  for (int i = 0; i < indices_dims - 1; ++i) {
+    n_slices *= indices->dims->data[i];
+  }
+
+  // If indices[-1] == params.rank, fetch single elements.
+  // If indices[-1] < params.rank, fetch slices.
+  int slice_size = 1;
+  for (int i = indices_nd; i < params_dims; ++i) {
+    slice_size *= params->dims->data[i];
+  }
+
+  int remain_flat_size = ElementCount(*params->dims);
+
+  // Number of elements per dimension
+  int dims_to_count[MAX_INDICES_ND];
+  for (int i = 0; i < indices_nd; ++i) {
+    dims_to_count[i] = remain_flat_size / params->dims->data[i];
+    remain_flat_size = dims_to_count[i];
+  }
+
+  for (int i = 0; i < n_slices; ++i) {
+    int from_pos = 0;
+    for (int j = 0; j < indices_nd; ++j) {
+      int offset = i * indices_nd + j;
+      IndicesT index = index_data[offset];
+      from_pos += index * dims_to_count[j];
+    }
+    std::memcpy(output_data + i * slice_size, param_data + from_pos,
+                sizeof(ParamsT) * slice_size);
+  }
+  return kTfLiteOk;
+}
+
+template <typename IndicesT>
+TfLiteStatus EvalGatherNd(TfLiteContext* context,
+                          const TfLiteEvalTensor* params,
+                          const TfLiteEvalTensor* indices,
+                          TfLiteEvalTensor* output) {
+  switch (params->type) {
+    case kTfLiteFloat32:
+      return GatherNd<float, IndicesT>(params, indices, output);
+      break;
+    case kTfLiteInt8:
+      return GatherNd<int8_t, IndicesT>(params, indices, output);
+      break;
+    default:
+      TF_LITE_KERNEL_LOG(context,
+                         "Params type '%s' are not supported by gather_nd.",
+                         TfLiteTypeGetName(params->type));
+      return kTfLiteError;
+  }
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* params =
+      tflite::micro::GetEvalInput(context, node, kParams);
+  const TfLiteEvalTensor* indices =
+      tflite::micro::GetEvalInput(context, node, kIndices);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  switch (indices->type) {
+    case kTfLiteInt32:
+      return EvalGatherNd<int32_t>(context, params, indices, output);
+      break;
+    default:
+      TF_LITE_KERNEL_LOG(context,
+                         "Indices of type '%s' are not supported by gather_nd.",
+                         TfLiteTypeGetName(indices->type));
+      return kTfLiteError;
+  }
+}
+}  // namespace
+
+TfLiteRegistration Register_GATHER_ND() {
+  return {/*init=*/nullptr,
+          /*free=*/nullptr,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/hard_swish.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/hard_swish.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,72 +23,23 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/types.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/kernels/hard_swish.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_error_reporter.h"
 #include "tensorflow/lite/micro/micro_utils.h"

 namespace tflite {
-namespace ops {
-namespace micro {
-namespace hard_swish {
-
-constexpr int kInputTensor = 0;
-constexpr int kOutputTensor = 0;
-
+namespace {
 void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
  return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
 }

-TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
-    HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
-
-    params->input_zero_point = input->params.zero_point;
-    params->output_zero_point = output->params.zero_point;
-
-    const float input_scale = input->params.scale;
-    const float hires_input_scale = (1.0f / 128.0f) * input_scale;
-    const float reluish_scale = 3.0f / 32768.0f;
-    const float output_scale = output->params.scale;
-
-    const double output_multiplier =
-        static_cast<double>(hires_input_scale / output_scale);
-    int32_t output_multiplier_fixedpoint_int32;
-    QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
-                       &params->output_multiplier_exponent);
-    DownScaleInt32ToInt16Multiplier(
-        output_multiplier_fixedpoint_int32,
-        &params->output_multiplier_fixedpoint_int16);
-
-    TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
-
-    const double reluish_multiplier =
-        static_cast<double>(hires_input_scale / reluish_scale);
-    int32_t reluish_multiplier_fixedpoint_int32;
-    QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
-                       &params->reluish_multiplier_exponent);
-    DownScaleInt32ToInt16Multiplier(
-        reluish_multiplier_fixedpoint_int32,
-        &params->reluish_multiplier_fixedpoint_int16);
-  }
-
-  return kTfLiteOk;
-}
-
 TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
  const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      tflite::micro::GetEvalInput(context, node, kHardSwishInputTensor);
  TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kHardSwishOutputTensor);
  HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);

  switch (input->type) {
@@ -99,13 +50,6 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
          tflite::micro::GetTensorShape(output),
          tflite::micro::GetTensorData<float>(output));
    } break;
-    case kTfLiteUInt8: {
-      tflite::reference_ops::HardSwish<uint8_t>(
-          *params, tflite::micro::GetTensorShape(input),
-          tflite::micro::GetTensorData<uint8_t>(input),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<uint8_t>(output));
-    } break;
    case kTfLiteInt8: {
      tflite::reference_ops::HardSwish<int8_t>(
          *params, tflite::micro::GetTensorShape(input),
@@ -114,29 +58,24 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
          tflite::micro::GetTensorData<int8_t>(output));
    } break;
    default: {
-      TF_LITE_KERNEL_LOG(
-          context,
-          "Only float32/int8_t/uint8_t are supported currently, got %s",
-          TfLiteTypeGetName(input->type));
+      MicroPrintf("Unsupported type %s", TfLiteTypeGetName(input->type));
      return kTfLiteError;
    }
  }
  return kTfLiteOk;
 }

-}  // namespace hard_swish
+}  // namespace

 TfLiteRegistration Register_HARD_SWISH() {
-  return {/*init=*/hard_swish::HardSwishInit,
+  return {/*init=*/HardSwishInit,
          /*free=*/nullptr,
-          /*prepare=*/hard_swish::HardSwishPrepare,
-          /*invoke=*/hard_swish::HardSwishEval,
+          /*prepare=*/tflite::HardSwishPrepare,
+          /*invoke=*/HardSwishEval,
          /*profiling_string=*/nullptr,
          /*builtin_code=*/0,
          /*custom_name=*/nullptr,
          /*version=*/0};
 }

-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/hard_swish.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/hard_swish.h
@@ -0,0 +1,30 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+
+namespace tflite {
+
+extern const int kHardSwishInputTensor;
+extern const int kHardSwishOutputTensor;
+
+TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node);
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/hard_swish_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/hard_swish_common.cc
@@ -0,0 +1,79 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/kernels/hard_swish.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+
+const int kHardSwishInputTensor = 0;
+const int kHardSwishOutputTensor = 0;
+
+TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const TfLiteTensor* input = GetInput(context, node, kHardSwishInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output = GetOutput(context, node, kHardSwishOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  if (input->type == kTfLiteInt8) {
+    HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
+
+    params->input_zero_point = input->params.zero_point;
+    params->output_zero_point = output->params.zero_point;
+
+    const float input_scale = input->params.scale;
+    const float hires_input_scale = (1.0f / 128.0f) * input_scale;
+    const float reluish_scale = 3.0f / 32768.0f;
+    const float output_scale = output->params.scale;
+
+    const double output_multiplier =
+        static_cast<double>(hires_input_scale / output_scale);
+    int32_t output_multiplier_fixedpoint_int32;
+    QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
+                       &params->output_multiplier_exponent);
+    DownScaleInt32ToInt16Multiplier(
+        output_multiplier_fixedpoint_int32,
+        &params->output_multiplier_fixedpoint_int16);
+
+    TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
+
+    const double reluish_multiplier =
+        static_cast<double>(hires_input_scale / reluish_scale);
+    int32_t reluish_multiplier_fixedpoint_int32;
+    QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
+                       &params->reluish_multiplier_exponent);
+    DownScaleInt32ToInt16Multiplier(
+        reluish_multiplier_fixedpoint_int32,
+        &params->reluish_multiplier_fixedpoint_int16);
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/if.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/if.cc
@@ -0,0 +1,166 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <stddef.h>
+
+#include <cstring>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/memory_helpers.h"
+#include "tensorflow/lite/micro/micro_graph.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+namespace {
+
+struct OpData {
+  int then_subgraph_index;
+  int else_subgraph_index;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+  const auto* params =
+      reinterpret_cast<const TfLiteIfParams*>(node->builtin_data);
+  op_data->then_subgraph_index = params->then_subgraph_index;
+  op_data->else_subgraph_index = params->else_subgraph_index;
+
+  TF_LITE_ENSURE(context, node->inputs->size > 0);
+
+  // The first input is the condition.
+  const TfLiteTensor* cond;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &cond));
+  TF_LITE_ENSURE_EQ(context, cond->type, kTfLiteBool);
+  TF_LITE_ENSURE_EQ(context, NumElements(cond), 1);
+
+  // The first input of the node is the condition. The rest of inputs are
+  // passed to the branch subgraphs. Therefore, the number of subgraph inputs
+  // will be the number of node inputs - 1.
+  size_t num_inputs = node->inputs->size - 1;
+  size_t num_outputs = node->outputs->size;
+
+  // Casting to TfliteIntArray is required since we are re-using
+  // GetExecutionPlan from TfLiteContext. On TFLM this method returns a
+  // MicroGraph.
+  // TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
+  MicroGraph* graph_info;
+  context->GetExecutionPlan(context,
+                            reinterpret_cast<TfLiteIntArray**>(&graph_info));
+
+  TF_LITE_ENSURE(context,
+                 op_data->then_subgraph_index < graph_info->NumSubgraphs());
+  TF_LITE_ENSURE(context,
+                 op_data->else_subgraph_index < graph_info->NumSubgraphs());
+
+  TF_LITE_ENSURE_EQ(
+      context, num_inputs,
+      graph_info->NumSubgraphInputs(op_data->then_subgraph_index));
+  TF_LITE_ENSURE_EQ(
+      context, num_outputs,
+      graph_info->NumSubgraphOutputs(op_data->then_subgraph_index));
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+
+  const TfLiteTensor* cond;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &cond));
+  bool cond_value = cond->data.b[0];
+
+  // Casting to TfliteIntArray is required since we are re-using
+  // GetExecutionPlan from TfLiteContext. On TFLM this method returns a
+  // MicroGraph.
+  // TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
+  MicroGraph* graph_info;
+  context->GetExecutionPlan(context,
+                            reinterpret_cast<TfLiteIntArray**>(&graph_info));
+
+  // Currently we copy the input / output between the subgraphs. This isn't
+  // optimized yet.
+  int active_branch_subgraph_index =
+      cond_value ? op_data->then_subgraph_index : op_data->else_subgraph_index;
+
+  for (size_t i = 0;
+       i < graph_info->NumSubgraphInputs(active_branch_subgraph_index); ++i) {
+    const TfLiteEvalTensor* input =
+        tflite::micro::GetEvalInput(context, node, i + 1);
+
+    TfLiteEvalTensor* subgraph_input =
+        graph_info->GetSubgraphInput(active_branch_subgraph_index, i);
+
+    // These checks must occur in Eval since TfLiteEvalTensors are not available
+    // during Prepare.
+    size_t input_bytes;
+    size_t subgraph_input_bytes;
+    TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(input, &input_bytes));
+    TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(
+                                   subgraph_input, &subgraph_input_bytes));
+    TF_LITE_ENSURE_TYPES_EQ(context, input->type, subgraph_input->type);
+    TF_LITE_ENSURE_EQ(context, input_bytes, subgraph_input_bytes);
+    memcpy(subgraph_input->data.raw, input->data.raw, input_bytes);
+  }
+
+  TF_LITE_ENSURE_OK(context,
+                    graph_info->InvokeSubgraph(active_branch_subgraph_index));
+
+  for (size_t i = 0;
+       i < graph_info->NumSubgraphOutputs(active_branch_subgraph_index); ++i) {
+    const TfLiteEvalTensor* output =
+        tflite::micro::GetEvalOutput(context, node, i);
+
+    TfLiteEvalTensor* subgraph_output =
+        graph_info->GetSubgraphOutput(active_branch_subgraph_index, i);
+
+    // These checks must occur in Eval since TfLiteEvalTensors are not available
+    // during Prepare.
+    size_t output_bytes;
+    size_t subgraph_output_bytes;
+    TF_LITE_ENSURE_OK(context,
+                      TfLiteEvalTensorByteLength(output, &output_bytes));
+    TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(
+                                   subgraph_output, &subgraph_output_bytes));
+    TF_LITE_ENSURE_TYPES_EQ(context, output->type, subgraph_output->type);
+    TF_LITE_ENSURE_EQ(context, output_bytes, subgraph_output_bytes);
+    memcpy(output->data.raw, subgraph_output->data.raw, output_bytes);
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace.
+
+TfLiteRegistration Register_IF() {
+  return {/*init=*/Init,
+          /*free=*/nullptr,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.cc
@@ -16,6 +16,8 @@ limitations under the License.
 #include "tensorflow/lite/micro/kernels/kernel_runner.h"

 #include "tensorflow/lite/micro/micro_error_reporter.h"
+#include "tensorflow/lite/micro/simple_memory_allocator.h"
+#include "tensorflow/lite/micro/test_helpers.h"

 namespace tflite {
 namespace micro {
@@ -37,7 +39,8 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
                                               kKernelRunnerBuffer_,
                                               kKernelRunnerBufferSize_)),
      registration_(registration),
-      tensors_(tensors) {
+      tensors_(tensors),
+      mock_micro_graph_(allocator_) {
  // Prepare TfLiteContext:
  context_.impl_ = static_cast<void*>(this);
  context_.ReportError = ReportOpError;
@@ -47,6 +50,8 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
  context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
  context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
  context_.GetScratchBuffer = GetScratchBuffer;
+  context_.GetExecutionPlan = GetGraph;
+  context_.recommended_num_threads = 0;

  // Prepare TfLiteNode:
  node_.inputs = inputs;
@@ -157,5 +162,15 @@ void KernelRunner::ReportOpError(struct TfLiteContext* context,
  va_end(args);
 }

+TfLiteStatus KernelRunner::GetGraph(struct TfLiteContext* context,
+                                    TfLiteIntArray** args) {
+  TFLITE_DCHECK(context != nullptr);
+  KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
+  TFLITE_DCHECK(runner != nullptr);
+  // TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
+  *args = reinterpret_cast<TfLiteIntArray*>(runner->GetMockGraph());
+  return kTfLiteOk;
+}
+
 }  // namespace micro
 }  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.h
@@ -18,6 +18,7 @@ limitations under the License.

 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/micro/mock_micro_graph.h"
 #include "tensorflow/lite/micro/simple_memory_allocator.h"

 namespace tflite {
@@ -45,6 +46,10 @@ class KernelRunner {
  // passed into the constructor of this class.
  TfLiteStatus Invoke();

+  // Returns a pointer to the internal MockMicroGraph which KernelRunner uses
+  // to stub out MicroGraph methods and track invocations on each subgraph.
+  MockMicroGraph* GetMockGraph() { return &mock_micro_graph_; }
+
 protected:
  static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
                                 int tensor_index);
@@ -57,6 +62,11 @@ class KernelRunner {
  static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
  static void ReportOpError(struct TfLiteContext* context, const char* format,
                            ...);
+  // This method matches GetExecutionPlan from TfLiteContext since TFLM reuses
+  // this method to get the MicroGraph from an operator context.
+  // TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
+  static TfLiteStatus GetGraph(struct TfLiteContext* context,
+                               TfLiteIntArray** args);

 private:
  static constexpr int kNumScratchBuffers_ = 12;
@@ -67,6 +77,7 @@ class KernelRunner {
  SimpleMemoryAllocator* allocator_ = nullptr;
  const TfLiteRegistration& registration_;
  TfLiteTensor* tensors_ = nullptr;
+  MockMicroGraph mock_micro_graph_;

  TfLiteContext context_ = {};
  TfLiteNode node_ = {};
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_util.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_util.cc
@@ -49,5 +49,30 @@ PaddingType RuntimePaddingType(TfLitePadding padding) {
  }
 }

+// Relocate tensor dims from FlatBuffer to the persistent storage arena.
+// The old dims data is copied to the new storage area.
+// The tensor and eval_tensor must be the same tensor.
+// Only use during Prepare phase.
+TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
+                                              TfLiteTensor* tensor,
+                                              TfLiteEvalTensor* eval_tensor) {
+  TF_LITE_ENSURE(context, tensor != nullptr);
+  TF_LITE_ENSURE(context, eval_tensor != nullptr);
+  TF_LITE_ENSURE(context, context->AllocatePersistentBuffer != nullptr);
+  int ranks = tensor->dims->size;
+  size_t alloc_size = TfLiteIntArrayGetSizeInBytes(ranks);
+  TfLiteIntArray* new_dims = static_cast<TfLiteIntArray*>(
+      context->AllocatePersistentBuffer(context, alloc_size));
+  TfLiteIntArray* old_dims = tensor->dims;
+  new_dims->size = ranks;
+  tensor->dims = new_dims;
+  eval_tensor->dims = new_dims;
+  for (int i = 0; i < ranks; i++) {
+    new_dims->data[i] = old_dims->data[i];
+  }
+
+  return kTfLiteOk;
+}
+
 }  // namespace micro
 }  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_util.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_util.h
@@ -72,6 +72,14 @@ bool HaveSameShapes(const TfLiteEvalTensor* input1,

 PaddingType RuntimePaddingType(TfLitePadding padding);

+// Relocate tensor dims from FlatBuffer to the persistent storage arena.
+// The old dims data is copied to the new storage area.
+// The tensor and eval_tensor must be the same tensor.
+// Only use during Prepare phase.
+TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
+                                              TfLiteTensor* tensor,
+                                              TfLiteEvalTensor* eval_tensor);
+
 }  // namespace micro
 }  // namespace tflite

--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/l2_pool_2d.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/l2_pool_2d.cc
@@ -70,7 +70,13 @@ TfLiteStatus L2Prepare(TfLiteContext* context, TfLiteNode* node) {
  // The dims storage is expected to be the same area in memory
  // for both TfLiteTensor and TfLiteEvalTensor.  This is important
  // because TfLiteTensor in the MicroInterpreter is a temporary
-  // allocation.
+  // allocation.  For the KernelRunner interpreter, TfLiteEvalTensor
+  // is a temporary allocation.  We must therefore relocate the dims
+  // from the FlatBuffer to the persistant storage arena.
+  TfLiteEvalTensor* output_eval =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
+                                 context, output, output_eval));
  output->dims->data[kBatchRank] = batches;
  output->dims->data[kHeightRank] = out_height;
  output->dims->data[kWidthRank] = out_width;
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/l2norm.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/l2norm.cc
@@ -67,8 +67,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
    data->input_zero_point = 0;
  }

-  // TODO(ahentz): For some reason our implementations don't support
-  // activations.
+  // Our implementations don't currently support activations.
  TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);

  return kTfLiteOk;
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/leaky_relu.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/leaky_relu.cc
@@ -68,7 +68,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
                    GetOutputSafe(context, node, kOutputTensor, &output));
  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);

-  if (output->type == kTfLiteInt8) {
+  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
    LeakyReluOpData* data = static_cast<LeakyReluOpData*>(node->user_data);
    const auto* params =
        static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
@@ -127,6 +127,10 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
      QuantizeLeakyRelu<int8_t>(data, input, output);
      return kTfLiteOk;
    } break;
+    case kTfLiteInt16: {
+      QuantizeLeakyRelu<int16_t>(data, input, output);
+      return kTfLiteOk;
+    } break;
    default:
      TF_LITE_KERNEL_LOG(
          context, "Only float32, int8 are supported by LEAKY_RELU, got %s.",
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/log_softmax.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/log_softmax.cc
@@ -0,0 +1,150 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
+
+#include <cstddef>
+#include <cstdint>
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+
+namespace tflite {
+namespace {
+
+// used only with quantized data
+struct LogSoftmaxOpData {
+  int32_t input_multiplier;
+  int32_t input_left_shift;
+  int32_t reverse_scaling_divisor;
+  int32_t reverse_scaling_right_shift;
+  int diff_min;
+  size_t outer_size;  // number of tensor elements skipping computation axis
+  size_t depth;       // number of tensor elements on computation axis
+};
+
+// input/output tensor index
+constexpr int kInputTensor = 0;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+  const TfLiteTensor* input;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
+  TfLiteTensor* output;
+  TF_LITE_ENSURE_OK(context,
+                    GetOutputSafe(context, node, kOutputTensor, &output));
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+
+  TF_LITE_ENSURE(context, HaveSameShapes(input, output));
+
+  if (input->type == kTfLiteInt8) {
+    node->user_data =
+        context->AllocatePersistentBuffer(context, sizeof(LogSoftmaxOpData));
+    auto data = static_cast<LogSoftmaxOpData*>(node->user_data);
+
+    // quantization datum
+    constexpr int32_t kOutputZeroPoint = 127;
+    constexpr float kOutputScale = 16.0 / 256;
+    constexpr double kBeta = 1.0;
+    constexpr int kScaledDiffIntegerBits = 5;
+
+    TF_LITE_ENSURE(context, output->params.scale == kOutputScale);
+    TF_LITE_ENSURE(context, output->params.zero_point == kOutputZeroPoint);
+
+    int input_left_shift;
+    int reverse_scaling_right_shift;
+    tflite::PreprocessLogSoftmaxScalingExp(
+        kBeta, static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
+        &data->input_multiplier, &input_left_shift,
+        &data->reverse_scaling_divisor, &reverse_scaling_right_shift);
+    data->input_left_shift = static_cast<int32_t>(input_left_shift);
+    data->reverse_scaling_right_shift =
+        static_cast<int32_t>(-reverse_scaling_right_shift);
+    // diff_min has a negative value, and is used to limit the maximum magnitude
+    // of the diffs, which are <= 0.
+    data->diff_min =
+        -tflite::CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
+
+    RuntimeShape input_shape = GetTensorShape(input);
+    const int trailing_dim = input_shape.DimensionsCount() - 1;
+    data->outer_size =
+        static_cast<size_t>(FlatSizeSkipDim(input_shape, trailing_dim));
+    data->depth = static_cast<size_t>(input_shape.Dims(trailing_dim));
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus LogSoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
+  return CalculateOpData(context, node);
+}
+
+TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
+  const LogSoftmaxOpData* data =
+      static_cast<LogSoftmaxOpData*>(node->user_data);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      SoftmaxParams op_params = {};
+      reference_ops::LogSoftmax(op_params, tflite::micro::GetTensorShape(input),
+                                tflite::micro::GetTensorData<float>(input),
+                                tflite::micro::GetTensorShape(output),
+                                tflite::micro::GetTensorData<float>(output));
+      return kTfLiteOk;
+    }
+    case kTfLiteInt8: {
+      SoftmaxParams op_params = {};
+      op_params.input_multiplier = data->input_multiplier;
+      op_params.input_left_shift = data->input_left_shift;
+      op_params.reverse_scaling_divisor = data->reverse_scaling_divisor;
+      op_params.reverse_scaling_right_shift = data->reverse_scaling_right_shift;
+      op_params.diff_min = data->diff_min;
+      reference_ops::LogSoftmax(op_params, data->outer_size, data->depth,
+                                tflite::micro::GetTensorShape(input),
+                                tflite::micro::GetTensorData<int8_t>(input),
+                                tflite::micro::GetTensorShape(output),
+                                tflite::micro::GetTensorData<int8_t>(output));
+      return kTfLiteOk;
+    }
+    default:
+      TF_LITE_KERNEL_LOG(context,
+                         "LOG_SOFTMAX only supports float32, int8, got %s.",
+                         TfLiteTypeGetName(input->type));
+      return kTfLiteError;
+  }
+}
+
+}  // namespace
+
+TfLiteRegistration Register_LOG_SOFTMAX() {
+  return {/*init=*/nullptr,
+          /*free=*/nullptr,
+          /*prepare=*/LogSoftmaxPrepare,
+          /*invoke=*/LogSoftmaxEval,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/logical.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/logical.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/lite/micro/kernels/logical.h"
+
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/reference/binary_function.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
@@ -19,60 +21,17 @@ limitations under the License.
 #include "tensorflow/lite/micro/kernels/kernel_util.h"

 namespace tflite {
-namespace ops {
-namespace micro {
-namespace logical {
 namespace {

-// Input/output tensor index.
-constexpr int kInputTensor1 = 0;
-constexpr int kInputTensor2 = 1;
-constexpr int kOutputTensor = 0;
-
-TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
-                         bool (*func)(bool, bool)) {
-  const TfLiteEvalTensor* input1 =
-      tflite::micro::GetEvalInput(context, node, kInputTensor1);
-  const TfLiteEvalTensor* input2 =
-      tflite::micro::GetEvalInput(context, node, kInputTensor2);
-  TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
-
-  if (tflite::micro::HaveSameShapes(input1, input2)) {
-    reference_ops::BinaryFunction<bool, bool, bool>(
-        tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorData<bool>(input1),
-        tflite::micro::GetTensorShape(input2),
-        tflite::micro::GetTensorData<bool>(input2),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<bool>(output), func);
-  } else {
-    reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
-        tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorData<bool>(input1),
-        tflite::micro::GetTensorShape(input2),
-        tflite::micro::GetTensorData<bool>(input2),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<bool>(output), func);
-  }
-
-  return kTfLiteOk;
-}
-
-bool LogicalOr(bool x, bool y) { return x || y; }
-
 TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) {
  return LogicalImpl(context, node, LogicalOr);
 }

-bool LogicalAnd(bool x, bool y) { return x && y; }
-
 TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
  return LogicalImpl(context, node, LogicalAnd);
 }

 }  // namespace
-}  // namespace logical

 TfLiteRegistration Register_LOGICAL_OR() {
  // Init, Free, Prepare, Eval are satisfying the Interface required by
@@ -80,7 +39,7 @@ TfLiteRegistration Register_LOGICAL_OR() {
  return {/*init=*/nullptr,
          /*free=*/nullptr,
          /*prepare=*/nullptr,
-          /*invoke=*/logical::LogicalOrEval,
+          /*invoke=*/LogicalOrEval,
          /*profiling_string=*/nullptr,
          /*builtin_code=*/0,
          /*custom_name=*/nullptr,
@@ -93,13 +52,11 @@ TfLiteRegistration Register_LOGICAL_AND() {
  return {/*init=*/nullptr,
          /*free=*/nullptr,
          /*prepare=*/nullptr,
-          /*invoke=*/logical::LogicalAndEval,
+          /*invoke=*/LogicalAndEval,
          /*profiling_string=*/nullptr,
          /*builtin_code=*/0,
          /*custom_name=*/nullptr,
          /*version=*/0};
 }

-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/logical.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/logical.h
@@ -0,0 +1,35 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+
+namespace tflite {
+// Input/output tensor index.
+extern const int kLogicalInputTensor1;
+extern const int kLogicalInputTensor2;
+extern const int kLogicalOutputTensor;
+
+TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
+                         bool (*func)(bool, bool));
+
+bool LogicalOr(bool x, bool y);
+bool LogicalAnd(bool x, bool y);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/logical_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/logical_common.cc
@@ -0,0 +1,63 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/logical.h"
+
+namespace tflite {
+
+// Input/output tensor index.
+const int kLogicalInputTensor1 = 0;
+const int kLogicalInputTensor2 = 1;
+const int kLogicalOutputTensor = 0;
+
+TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
+                         bool (*func)(bool, bool)) {
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kLogicalInputTensor1);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kLogicalInputTensor2);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kLogicalOutputTensor);
+
+  if (tflite::micro::HaveSameShapes(input1, input2)) {
+    reference_ops::BinaryFunction<bool, bool, bool>(
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<bool>(input1),
+        tflite::micro::GetTensorShape(input2),
+        tflite::micro::GetTensorData<bool>(input2),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<bool>(output), func);
+  } else {
+    reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<bool>(input1),
+        tflite::micro::GetTensorShape(input2),
+        tflite::micro::GetTensorData<bool>(input2),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<bool>(output), func);
+  }
+
+  return kTfLiteOk;
+}
+
+bool LogicalOr(bool x, bool y) { return x || y; }
+
+bool LogicalAnd(bool x, bool y) { return x && y; }
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/logistic.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/logistic.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -24,71 +24,24 @@ limitations under the License.
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/op_macros.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/logistic.h"

 namespace tflite {
-namespace ops {
-namespace micro {
-namespace activations {
 namespace {
-constexpr int kInputTensor = 0;
-constexpr int kOutputTensor = 0;
-
-struct OpData {
-  int32_t input_zero_point;
-  int32_t input_range_radius;
-  int32_t input_multiplier;
-  int input_left_shift;
-};
-
-TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
-                                       OpData* data) {
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
-  if (input->type == kTfLiteInt8) {
-    TF_LITE_ENSURE_EQ(context, output->params.zero_point,
-                      std::numeric_limits<int8_t>::min());
-
-    static constexpr int kInputIntegerBits = 4;
-    const double input_real_multiplier =
-        static_cast<double>(input->params.scale) *
-        static_cast<double>(1 << (31 - kInputIntegerBits));
-
-    data->input_zero_point = input->params.zero_point;
-
-    const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
-    data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
-
-    data->input_range_radius =
-        CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
-  }
-  return kTfLiteOk;
-}
-}  // namespace

 void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
-
-  return CalculateArithmeticOpData(context, node, data);
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataLogistic));
 }

 TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
  const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      tflite::micro::GetEvalInput(context, node, kLogisticInputTensor);
  TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kLogisticOutputTensor);

  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
+  OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);

  if (input->type == kTfLiteFloat32) {
    switch (output->type) {
@@ -133,18 +86,16 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
  return kTfLiteOk;
 }

-}  // namespace activations
+}  // namespace

 TfLiteRegistration Register_LOGISTIC() {
-  return {/*init=*/activations::LogisticInit,
+  return {/*init=*/LogisticInit,
          /*free=*/nullptr,
-          /*prepare=*/activations::LogisticPrepare,
-          /*invoke=*/activations::LogisticEval,
+          /*prepare=*/LogisticPrepare,
+          /*invoke=*/LogisticEval,
          /*profiling_string=*/nullptr,
          /*builtin_code=*/0,
          /*custom_name=*/nullptr,
          /*version=*/0};
 }
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/logistic.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/logistic.h
@@ -0,0 +1,42 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
+
+#include <cstdint>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+
+namespace tflite {
+extern const int kLogisticInputTensor;
+extern const int kLogisticOutputTensor;
+
+struct OpDataLogistic {
+  int32_t input_zero_point;
+  int32_t input_range_radius;
+  int32_t input_multiplier;
+  int input_left_shift;
+};
+
+TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
+                                               TfLiteNode* node,
+                                               OpDataLogistic* data);
+
+TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node);
+
+}  // namespace tflite
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/logistic_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/logistic_common.cc
@@ -0,0 +1,68 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/logistic.h"
+
+namespace tflite {
+const int kLogisticInputTensor = 0;
+const int kLogisticOutputTensor = 0;
+
+TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
+                                               TfLiteNode* node,
+                                               OpDataLogistic* data) {
+  const TfLiteTensor* input = GetInput(context, node, kLogisticInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output = GetOutput(context, node, kLogisticOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+  if (input->type == kTfLiteInt8) {
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point,
+                      std::numeric_limits<int8_t>::min());
+
+    static constexpr int kInputIntegerBits = 4;
+    const double input_real_multiplier =
+        static_cast<double>(input->params.scale) *
+        static_cast<double>(1 << (31 - kInputIntegerBits));
+
+    data->input_zero_point = input->params.zero_point;
+
+    const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
+    data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
+
+    data->input_range_radius =
+        CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
+
+  return CalculateArithmeticOpDataLogistic(context, node, data);
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/micro_ops.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/micro_ops.h
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -32,23 +32,40 @@ namespace tflite {
 // have their Register function declarations in the tflite namespace.

 TfLiteRegistration Register_ADD_N();
+TfLiteRegistration Register_AVERAGE_POOL_2D();
 TfLiteRegistration Register_BATCH_TO_SPACE_ND();
 TfLiteRegistration Register_CAST();
-TfLiteRegistration Register_CONV_2D();
+TfLiteRegistration Register_CUMSUM();
+TfLiteRegistration Register_DEPTH_TO_SPACE();
 TfLiteRegistration Register_DEPTHWISE_CONV_2D();
 TfLiteRegistration Register_DIV();
 TfLiteRegistration Register_ELU();
 TfLiteRegistration Register_EXP();
 TfLiteRegistration Register_EXPAND_DIMS();
 TfLiteRegistration Register_FILL();
+TfLiteRegistration Register_FLOOR_DIV();
+TfLiteRegistration Register_FLOOR_MOD();
+TfLiteRegistration Register_GATHER();
+TfLiteRegistration Register_GATHER_ND();
+TfLiteRegistration Register_HARD_SWISH();
+TfLiteRegistration Register_IF();
 TfLiteRegistration Register_L2_POOL_2D();
 TfLiteRegistration Register_LEAKY_RELU();
+TfLiteRegistration Register_LOG_SOFTMAX();
+TfLiteRegistration Register_LOGICAL_AND();
+TfLiteRegistration Register_LOGICAL_OR();
+TfLiteRegistration Register_LOGISTIC();
+TfLiteRegistration Register_MAX_POOL_2D();
 TfLiteRegistration Register_QUANTIZE();
+TfLiteRegistration Register_RELU();
+TfLiteRegistration Register_RELU6();
+TfLiteRegistration Register_RESIZE_BILINEAR();
 TfLiteRegistration Register_SHAPE();
-TfLiteRegistration Register_SOFTMAX();
 TfLiteRegistration Register_SPACE_TO_BATCH_ND();
+TfLiteRegistration Register_SPACE_TO_DEPTH();
 TfLiteRegistration Register_SQUEEZE();
 TfLiteRegistration Register_SVDF();
+TfLiteRegistration Register_TRANSPOSE();
 TfLiteRegistration Register_TRANSPOSE_CONV();
 TfLiteRegistration Register_ZEROS_LIKE();

@@ -59,7 +76,6 @@ TfLiteRegistration Register_ABS();
 TfLiteRegistration Register_ADD();
 TfLiteRegistration Register_ARG_MAX();
 TfLiteRegistration Register_ARG_MIN();
-TfLiteRegistration Register_AVERAGE_POOL_2D();
 TfLiteRegistration Register_CEIL();
 // TODO(b/160234179): Change custom OPs to also return by value.
 TfLiteRegistration* Register_CIRCULAR_BUFFER();
@@ -70,16 +86,11 @@ TfLiteRegistration Register_EQUAL();
 TfLiteRegistration Register_FLOOR();
 TfLiteRegistration Register_GREATER();
 TfLiteRegistration Register_GREATER_EQUAL();
-TfLiteRegistration Register_HARD_SWISH();
 TfLiteRegistration Register_LESS();
 TfLiteRegistration Register_LESS_EQUAL();
 TfLiteRegistration Register_LOG();
-TfLiteRegistration Register_LOGICAL_AND();
 TfLiteRegistration Register_LOGICAL_NOT();
-TfLiteRegistration Register_LOGICAL_OR();
-TfLiteRegistration Register_LOGISTIC();
 TfLiteRegistration Register_MAXIMUM();
-TfLiteRegistration Register_MAX_POOL_2D();
 TfLiteRegistration Register_MEAN();
 TfLiteRegistration Register_MINIMUM();
 TfLiteRegistration Register_MUL();
@@ -90,8 +101,6 @@ TfLiteRegistration Register_PAD();
 TfLiteRegistration Register_PADV2();
 TfLiteRegistration Register_PRELU();
 TfLiteRegistration Register_REDUCE_MAX();
-TfLiteRegistration Register_RELU();
-TfLiteRegistration Register_RELU6();
 TfLiteRegistration Register_RESHAPE();
 TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
 TfLiteRegistration Register_ROUND();
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/mul.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/mul.cc
@@ -62,7 +62,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,

  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);

-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+  if (output->type == kTfLiteInt8) {
    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
        context, params->activation, output, &data->output_activation_min,
        &data->output_activation_max));
@@ -104,42 +104,21 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
      tflite::micro::GetTensorShape(input1),
      tflite::micro::GetTensorShape(input2), &op_params);

-  if (output->type == kTfLiteInt8) {
-    if (need_broadcast) {
-      reference_integer_ops::BroadcastMul4DSlow(
-          op_params, tflite::micro::GetTensorShape(input1),
-          tflite::micro::GetTensorData<int8_t>(input1),
-          tflite::micro::GetTensorShape(input2),
-          tflite::micro::GetTensorData<int8_t>(input2),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<int8_t>(output));
-    } else {
-      reference_integer_ops::Mul(op_params,
-                                 tflite::micro::GetTensorShape(input1),
-                                 tflite::micro::GetTensorData<int8_t>(input1),
-                                 tflite::micro::GetTensorShape(input2),
-                                 tflite::micro::GetTensorData<int8_t>(input2),
-                                 tflite::micro::GetTensorShape(output),
-                                 tflite::micro::GetTensorData<int8_t>(output));
-    }
-  } else if (output->type == kTfLiteUInt8) {
-    if (need_broadcast) {
-      reference_integer_ops::BroadcastMul4DSlow(
-          op_params, tflite::micro::GetTensorShape(input1),
-          tflite::micro::GetTensorData<uint8_t>(input1),
-          tflite::micro::GetTensorShape(input2),
-          tflite::micro::GetTensorData<uint8_t>(input2),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<uint8_t>(output));
-    } else {
-      reference_integer_ops::Mul(op_params,
-                                 tflite::micro::GetTensorShape(input1),
-                                 tflite::micro::GetTensorData<uint8_t>(input1),
-                                 tflite::micro::GetTensorShape(input2),
-                                 tflite::micro::GetTensorData<uint8_t>(input2),
-                                 tflite::micro::GetTensorShape(output),
-                                 tflite::micro::GetTensorData<uint8_t>(output));
-    }
+  if (need_broadcast) {
+    reference_integer_ops::BroadcastMul4DSlow(
+        op_params, tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<int8_t>(input1),
+        tflite::micro::GetTensorShape(input2),
+        tflite::micro::GetTensorData<int8_t>(input2),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<int8_t>(output));
+  } else {
+    reference_integer_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
+                               tflite::micro::GetTensorData<int8_t>(input1),
+                               tflite::micro::GetTensorShape(input2),
+                               tflite::micro::GetTensorData<int8_t>(input2),
+                               tflite::micro::GetTensorShape(output),
+                               tflite::micro::GetTensorData<int8_t>(output));
  }
 }

@@ -203,7 +182,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
      tflite::micro::GetEvalOutput(context, node, kOutputTensor);

  switch (input1->type) {
-    case kTfLiteUInt8:
    case kTfLiteInt8:
      EvalQuantized(context, node, data, input1, input2, output);
      break;
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/pooling.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/pooling.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -15,163 +15,34 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/reference/pooling.h"

 #include "tensorflow/lite/c/builtin_op_data.h"
-#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
-#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
-#include "tensorflow/lite/kernels/padding.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/pooling.h"

 namespace tflite {
-namespace ops {
-namespace micro {
-namespace pooling {

 namespace {

-constexpr int kInputTensor = 0;
-constexpr int kOutputTensor = 0;
-
-struct OpData {
-  TfLitePaddingValues padding;
-  int32_t activation_min;
-  int32_t activation_max;
-  float activation_min_f32;
-  float activation_max_f32;
-};
-
-TfLiteStatus CalculateOpData(const TfLiteContext* context,
-                             const TfLitePoolParams* params,
-                             const TfLiteTensor* input,
-                             const TfLiteTensor* output, OpData* data) {
-  // input: batch, height, width, channel
-  int height = SizeOfDimension(input, 1);
-  int width = SizeOfDimension(input, 2);
-
-  int out_height, out_width;
-
-  data->padding = ComputePaddingHeightWidth(
-      params->stride_height, params->stride_width,
-      /*dilation_rate_height=*/1,
-      /*dilation_rate_width=*/1, height, width, params->filter_height,
-      params->filter_width, params->padding, &out_height, &out_width);
-
-  return kTfLiteOk;
-}
-
-void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
-                      const TfLitePoolParams* params, const OpData* data,
-                      const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
-  PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.float_activation_min = data->activation_min_f32;
-  op_params.float_activation_max = data->activation_max_f32;
-  reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
-                             tflite::micro::GetTensorData<float>(input),
-                             tflite::micro::GetTensorShape(output),
-                             tflite::micro::GetTensorData<float>(output));
-}
-
-void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
-                          const TfLitePoolParams* params, const OpData* data,
-                          const TfLiteEvalTensor* input,
-                          TfLiteEvalTensor* output) {
-  TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
-
-  PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.quantized_activation_min = data->activation_min;
-  op_params.quantized_activation_max = data->activation_max;
-
-  if (input->type == kTfLiteUInt8) {
-    reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
-                               tflite::micro::GetTensorData<uint8_t>(input),
-                               tflite::micro::GetTensorShape(output),
-                               tflite::micro::GetTensorData<uint8_t>(output));
-  } else {
-    reference_integer_ops::AveragePool(
-        op_params, tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<int8_t>(input),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<int8_t>(output));
-  }
-}
-
-void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
-                  TfLitePoolParams* params, const OpData* data,
-                  const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
-  tflite::PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.float_activation_min = data->activation_min_f32;
-  op_params.float_activation_max = data->activation_max_f32;
-  reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
-                         tflite::micro::GetTensorData<float>(input),
-                         tflite::micro::GetTensorShape(output),
-                         tflite::micro::GetTensorData<float>(output));
-}
-
-void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                      TfLitePoolParams* params, const OpData* data,
-                      const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
-  tflite::PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.quantized_activation_min = data->activation_min;
-  op_params.quantized_activation_max = data->activation_max;
-
-  if (input->type == kTfLiteUInt8) {
-    reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
-                           tflite::micro::GetTensorData<uint8_t>(input),
-                           tflite::micro::GetTensorShape(output),
-                           tflite::micro::GetTensorData<uint8_t>(output));
-  } else {
-    reference_integer_ops::MaxPool(
-        op_params, tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<int8_t>(input),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<int8_t>(output));
-  }
-}
-}  // namespace
-
 TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
  TFLITE_DCHECK(node->builtin_data != nullptr);
  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);

  TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData* data = static_cast<const OpData*>(node->user_data);
+  const OpDataPooling* data =
+      static_cast<const OpDataPooling*>(node->user_data);

  const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      micro::GetEvalInput(context, node, kPoolingInputTensor);
  TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);

  // Inputs and outputs share the same type, guaranteed by the converter.
  switch (input->type) {
    case kTfLiteFloat32:
-      AverageEvalFloat(context, node, params, data, input, output);
+      AveragePoolingEvalFloat(context, node, params, data, input, output);
      break;
-    case kTfLiteUInt8:
    case kTfLiteInt8:
-      AverageEvalQuantized(context, node, params, data, input, output);
+      AveragePoolingEvalQuantized(context, node, params, data, input, output);
      break;
    default:
      TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
@@ -186,20 +57,20 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);

  TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData* data = static_cast<const OpData*>(node->user_data);
+  const OpDataPooling* data =
+      static_cast<const OpDataPooling*>(node->user_data);

  const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      micro::GetEvalInput(context, node, kPoolingInputTensor);
  TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);

  switch (input->type) {
    case kTfLiteFloat32:
-      MaxEvalFloat(context, node, params, data, input, output);
+      MaxPoolingEvalFloat(context, node, params, data, input, output);
      break;
-    case kTfLiteUInt8:
    case kTfLiteInt8:
-      MaxEvalQuantized(context, node, params, data, input, output);
+      MaxPoolingEvalQuantized(context, node, params, data, input, output);
      break;
    default:
      TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
@@ -211,42 +82,16 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {

 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling));
 }

-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
-
-  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
-
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
-
-  if (input->type == kTfLiteFloat32) {
-    CalculateActivationRange(params->activation, &data->activation_min_f32,
-                             &data->activation_max_f32);
-  } else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
-    CalculateActivationRangeQuantized(context, params->activation, output,
-                                      &data->activation_min,
-                                      &data->activation_max);
-  }
-
-  return kTfLiteOk;
-}
-
-}  // namespace pooling
+}  // namespace

 TfLiteRegistration Register_AVERAGE_POOL_2D() {
-  return {/*init=*/pooling::Init,
+  return {/*init=*/Init,
          /*free=*/nullptr,
-          /*prepare=*/pooling::Prepare,
-          /*invoke=*/pooling::AverageEval,
+          /*prepare=*/PoolingPrepare,
+          /*invoke=*/AverageEval,
          /*profiling_string=*/nullptr,
          /*builtin_code=*/0,
          /*custom_name=*/nullptr,
@@ -254,16 +99,14 @@ TfLiteRegistration Register_AVERAGE_POOL_2D() {
 }

 TfLiteRegistration Register_MAX_POOL_2D() {
-  return {/*init=*/pooling::Init,
+  return {/*init=*/Init,
          /*free=*/nullptr,
-          /*prepare=*/pooling::Prepare,
-          /*invoke=*/pooling::MaxEval,
+          /*prepare=*/PoolingPrepare,
+          /*invoke=*/MaxEval,
          /*profiling_string=*/nullptr,
          /*builtin_code=*/0,
          /*custom_name=*/nullptr,
          /*version=*/0};
 }

-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/pooling.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/pooling.h
@@ -0,0 +1,71 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
+
+#include <cstdint>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+
+namespace tflite {
+
+extern const int kPoolingInputTensor;
+extern const int kPoolingOutputTensor;
+
+struct OpDataPooling {
+  TfLitePaddingValues padding;
+  int32_t activation_min;
+  int32_t activation_max;
+  float activation_min_f32;
+  float activation_max_f32;
+};
+
+TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context,
+                                    const TfLitePoolParams* params,
+                                    const TfLiteTensor* input,
+                                    const TfLiteTensor* output,
+                                    OpDataPooling* data);
+
+TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node);
+
+void AveragePoolingEvalFloat(const TfLiteContext* context,
+                             const TfLiteNode* node,
+                             const TfLitePoolParams* params,
+                             const OpDataPooling* data,
+                             const TfLiteEvalTensor* input,
+                             TfLiteEvalTensor* output);
+
+void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
+                                 const TfLitePoolParams* params,
+                                 const OpDataPooling* data,
+                                 const TfLiteEvalTensor* input,
+                                 TfLiteEvalTensor* output);
+
+void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
+                         TfLitePoolParams* params, const OpDataPooling* data,
+                         const TfLiteEvalTensor* input,
+                         TfLiteEvalTensor* output);
+
+void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
+                             TfLitePoolParams* params,
+                             const OpDataPooling* data,
+                             const TfLiteEvalTensor* input,
+                             TfLiteEvalTensor* output);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/pooling_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/pooling_common.cc
@@ -0,0 +1,163 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/padding.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/pooling.h"
+
+namespace tflite {
+
+const int kPoolingInputTensor = 0;
+const int kPoolingOutputTensor = 0;
+
+TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context,
+                                    const TfLitePoolParams* params,
+                                    const TfLiteTensor* input,
+                                    const TfLiteTensor* output,
+                                    OpDataPooling* data) {
+  // input: batch, height, width, channel
+  int height = SizeOfDimension(input, 1);
+  int width = SizeOfDimension(input, 2);
+
+  int out_height, out_width;
+
+  data->padding = ComputePaddingHeightWidth(
+      params->stride_height, params->stride_width,
+      /*dilation_rate_height=*/1,
+      /*dilation_rate_width=*/1, height, width, params->filter_height,
+      params->filter_width, params->padding, &out_height, &out_width);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpDataPooling* data = static_cast<OpDataPooling*>(node->user_data);
+
+  const TfLiteTensor* input = GetInput(context, node, kPoolingInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output = GetOutput(context, node, kPoolingOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_STATUS(
+      CalculateOpDataPooling(context, params, input, output, data));
+
+  if (input->type == kTfLiteFloat32) {
+    CalculateActivationRange(params->activation, &data->activation_min_f32,
+                             &data->activation_max_f32);
+  } else if (input->type == kTfLiteInt8) {
+    CalculateActivationRangeQuantized(context, params->activation, output,
+                                      &data->activation_min,
+                                      &data->activation_max);
+  }
+
+  return kTfLiteOk;
+}
+
+void AveragePoolingEvalFloat(const TfLiteContext* context,
+                             const TfLiteNode* node,
+                             const TfLitePoolParams* params,
+                             const OpDataPooling* data,
+                             const TfLiteEvalTensor* input,
+                             TfLiteEvalTensor* output) {
+  PoolParams op_params;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.filter_height = params->filter_height;
+  op_params.filter_width = params->filter_width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+  op_params.float_activation_min = data->activation_min_f32;
+  op_params.float_activation_max = data->activation_max_f32;
+  reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
+                             tflite::micro::GetTensorData<float>(input),
+                             tflite::micro::GetTensorShape(output),
+                             tflite::micro::GetTensorData<float>(output));
+}
+
+void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
+                                 const TfLitePoolParams* params,
+                                 const OpDataPooling* data,
+                                 const TfLiteEvalTensor* input,
+                                 TfLiteEvalTensor* output) {
+  TFLITE_DCHECK(input->type == kTfLiteInt8);
+
+  PoolParams op_params;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.filter_height = params->filter_height;
+  op_params.filter_width = params->filter_width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+  op_params.quantized_activation_min = data->activation_min;
+  op_params.quantized_activation_max = data->activation_max;
+
+  reference_integer_ops::AveragePool(
+      op_params, tflite::micro::GetTensorShape(input),
+      tflite::micro::GetTensorData<int8_t>(input),
+      tflite::micro::GetTensorShape(output),
+      tflite::micro::GetTensorData<int8_t>(output));
+}
+
+void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
+                         TfLitePoolParams* params, const OpDataPooling* data,
+                         const TfLiteEvalTensor* input,
+                         TfLiteEvalTensor* output) {
+  tflite::PoolParams op_params;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.filter_height = params->filter_height;
+  op_params.filter_width = params->filter_width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+  op_params.float_activation_min = data->activation_min_f32;
+  op_params.float_activation_max = data->activation_max_f32;
+  reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
+                         tflite::micro::GetTensorData<float>(input),
+                         tflite::micro::GetTensorShape(output),
+                         tflite::micro::GetTensorData<float>(output));
+}
+
+void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
+                             TfLitePoolParams* params,
+                             const OpDataPooling* data,
+                             const TfLiteEvalTensor* input,
+                             TfLiteEvalTensor* output) {
+  tflite::PoolParams op_params;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.filter_height = params->filter_height;
+  op_params.filter_width = params->filter_width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+  op_params.quantized_activation_min = data->activation_min;
+  op_params.quantized_activation_max = data->activation_max;
+
+  reference_integer_ops::MaxPool(op_params,
+                                 tflite::micro::GetTensorShape(input),
+                                 tflite::micro::GetTensorData<int8_t>(input),
+                                 tflite::micro::GetTensorShape(output),
+                                 tflite::micro::GetTensorData<int8_t>(output));
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/quantize_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/quantize_common.cc
@@ -57,6 +57,7 @@ TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,

  if ((input->type == kTfLiteInt16 && output->type == kTfLiteInt8) ||
      (input->type == kTfLiteInt8 && output->type == kTfLiteInt8) ||
+      (input->type == kTfLiteInt8 && output->type == kTfLiteInt16) ||
      (input->type == kTfLiteInt8 && output->type == kTfLiteInt32) ||
      (input->type == kTfLiteInt16 && output->type == kTfLiteInt16) ||
      (input->type == kTfLiteInt16 && output->type == kTfLiteInt32)) {
@@ -145,6 +146,13 @@ TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
            data->input_zero_point, data->quantization_params.zero_point,
            tflite::micro::GetTensorData<int8_t>(output));
        break;
+      case kTfLiteInt16:
+        reference_ops::Requantize(
+            tflite::micro::GetTensorData<int8_t>(input), size,
+            data->requantize_output_multiplier, data->requantize_output_shift,
+            data->input_zero_point, data->quantization_params.zero_point,
+            tflite::micro::GetTensorData<int16_t>(output));
+        break;
      case kTfLiteInt32:
        reference_ops::Requantize(
            tflite::micro::GetTensorData<int8_t>(input), size,
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/reduce.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/reduce.cc
@@ -103,14 +103,15 @@ TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
  const TfLiteTensor* input = GetInput(context, node, 0);
  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
  const TfLiteTensor* output = GetOutput(context, node, 0);
-  if (input->type == kTfLiteInt8) {
+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
    const double real_multiplier = static_cast<double>(input->params.scale) /
                                   static_cast<double>(output->params.scale);
    QuantizeMultiplier(real_multiplier, &op_data->multiplier, &op_data->shift);
  }

  int output_size = NumElements(output);
-  if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
+  if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 ||
+      input->type == kTfLiteInt16) {
    context->RequestScratchBufferInArena(context, output_size * sizeof(int32_t),
                                         &op_data->temp_buffer_idx);
    op_data->input_zp = input->params.zero_point;
@@ -213,6 +214,43 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
                temp_buffer, false));
      }
    } break;
+    case kTfLiteInt16: {
+      // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+      if (params->keep_dims && special_case_4d_axes_1_and_2) {
+        reference_integer_ops::Mean(
+            op_params, op_data->multiplier, op_data->shift,
+            tflite::micro::GetTensorShape(input),
+            tflite::micro::GetTensorData<int16_t>(input), op_data->input_zp,
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int16_t>(output), op_data->output_zp);
+      } else if (op_data->input_zp == op_data->output_zp &&
+                 op_data->input_scale == op_data->output_scale) {
+        int32_t* temp_buffer = static_cast<int32_t*>(
+            context->GetScratchBuffer(context, op_data->temp_buffer_idx));
+        TF_LITE_ENSURE(
+            context,
+            reference_ops::Mean(tflite::micro::GetTensorData<int16_t>(input),
+                                input->dims->data, input->dims->size,
+                                tflite::micro::GetTensorData<int16_t>(output),
+                                output->dims->data, output->dims->size,
+                                tflite::micro::GetTensorData<int>(axis),
+                                num_axis, params->keep_dims, temp_index,
+                                resolved_axis, temp_buffer));
+      } else {
+        int32_t* temp_buffer = static_cast<int32_t*>(
+            context->GetScratchBuffer(context, op_data->temp_buffer_idx));
+        TF_LITE_ENSURE(
+            context,
+            reference_ops::QuantizedMeanOrSum(
+                tflite::micro::GetTensorData<int16_t>(input), op_data->input_zp,
+                op_data->input_scale, input->dims->data, input->dims->size,
+                tflite::micro::GetTensorData<int16_t>(output),
+                op_data->output_zp, op_data->output_scale, output->dims->data,
+                output->dims->size, tflite::micro::GetTensorData<int>(axis),
+                num_axis, params->keep_dims, temp_index, resolved_axis,
+                temp_buffer, false));
+      }
+    } break;
    case kTfLiteUInt8: {
      // Defer to specialized implementation for 4D Mean across axes 1 & 2.
      if (params->keep_dims && special_case_4d_axes_1_and_2) {
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/resize_bilinear.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/resize_bilinear.cc
@@ -0,0 +1,116 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kSizeTensor = 1;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
+
+  TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32);
+  output->type = input->type;
+
+  TF_LITE_ENSURE_MSG(context, IsConstantTensor(size),
+                     "Non constant size tensor not supported");
+
+  // Ensure params are valid.
+  auto* params =
+      reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
+  if (params->half_pixel_centers && params->align_corners) {
+    TF_LITE_KERNEL_LOG(
+        context, "If half_pixel_centers is True, align_corners must be False.");
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
+
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  const TfLiteEvalTensor* size =
+      tflite::micro::GetEvalInput(context, node, kSizeTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  if (output->type == kTfLiteFloat32) {
+    tflite::ResizeBilinearParams op_params;
+    op_params.align_corners = params->align_corners;
+    op_params.half_pixel_centers = params->half_pixel_centers;
+    reference_ops::ResizeBilinear(op_params,
+                                  tflite::micro::GetTensorShape(input),
+                                  tflite::micro::GetTensorData<float>(input),
+                                  tflite::micro::GetTensorShape(size),
+                                  tflite::micro::GetTensorData<int32_t>(size),
+                                  tflite::micro::GetTensorShape(output),
+                                  tflite::micro::GetTensorData<float>(output));
+  } else if (output->type == kTfLiteInt8) {
+    tflite::ResizeBilinearParams op_params;
+    op_params.align_corners = params->align_corners;
+    op_params.half_pixel_centers = params->half_pixel_centers;
+    reference_ops::ResizeBilinearInteger(
+        op_params, tflite::micro::GetTensorShape(input),
+        tflite::micro::GetTensorData<int8_t>(input),
+        tflite::micro::GetTensorShape(size),
+        tflite::micro::GetTensorData<int32_t>(size),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<int8_t>(output));
+  } else {
+    TF_LITE_KERNEL_LOG(context, "Output type is %d, requires float or int8.",
+                       output->type);
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_RESIZE_BILINEAR() {
+  return {/*init=*/nullptr,
+          /*free=*/nullptr,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/softmax.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/softmax.h
@@ -25,6 +25,21 @@ void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length);

 TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node);

+// This is the most generic TfLiteRegistration. The actual supported types may
+// still be target dependent. The only requirement is that every implementation
+// (reference or optimized) must define this function.
+TfLiteRegistration Register_SOFTMAX();
+
+#if defined(XTENSA)
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int8 input and int16 output.
+TfLiteRegistration Register_SOFTMAX_INT8_INT16();
+#else
+inline TfLiteRegistration Register_SOFTMAX_INT8_INT16() {
+  return Register_SOFTMAX();
+}
+#endif
+
 }  // namespace tflite

 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/softmax_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/softmax_common.cc
@@ -125,10 +125,12 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
    // exp LUT only used on negative values
    // we consider exp(-10.0) is insignificant to accumulation
-    gen_lut([](float value) { return std::exp(value); }, -10.0f, 0.0f,
-            op_data->exp_lut, kInt16LUTArraySize);
-    gen_lut([](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f,
-            op_data->one_over_one_plus_x_lut, kInt16LUTArraySize);
+    gen_lut<float, int16_t, int16_t>(
+        [](float value) { return std::exp(value); }, -10.0f, 0.0f, -1.0f, 1.0f,
+        op_data->exp_lut);
+    gen_lut<float, int16_t, int16_t>(
+        [](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f, -1.0f,
+        1.0f, op_data->one_over_one_plus_x_lut);
    op_data->zero_point = output->params.zero_point;
    op_data->scale = output->params.scale;
  }
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/space_to_depth.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/space_to_depth.cc
@@ -0,0 +1,128 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
+
+#include <stdint.h>
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+
+namespace tflite {
+
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kOutputTensor = 0;
+constexpr int kBatchRank = 0;
+constexpr int kHeightRank = 1;
+constexpr int kWidthRank = 2;
+constexpr int kDepthRank = 3;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteSpaceToDepthParams*>(node->builtin_data);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const TfLiteTensor* input;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
+  TfLiteTensor* output;
+  TF_LITE_ENSURE_OK(context,
+                    GetOutputSafe(context, node, kOutputTensor, &output));
+
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
+
+  auto data_type = output->type;
+  TF_LITE_ENSURE(context,
+                 data_type == kTfLiteFloat32 || data_type == kTfLiteInt8);
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+
+  const int block_size = params->block_size;
+  const int input_height = input->dims->data[kHeightRank];
+  const int input_width = input->dims->data[kWidthRank];
+  int output_height = input_height / block_size;
+  int output_width = input_width / block_size;
+
+  TF_LITE_ENSURE_EQ(context, input_height, output_height * block_size);
+  TF_LITE_ENSURE_EQ(context, input_width, output_width * block_size);
+
+  // Relocate dims to the persistent storage arena before changing them,
+  // otherwise we'd be modifying temporary copies made by the interpreters each
+  // time they process the layer.
+  TfLiteEvalTensor* output_eval =
+      micro::GetEvalOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE_OK(context, micro::CreateWritableTensorDimsWithCopy(
+                                 context, output, output_eval));
+
+  output->dims->data[kBatchRank] = input->dims->data[kBatchRank];
+  output->dims->data[kHeightRank] = output_height;
+  output->dims->data[kWidthRank] = output_width;
+  output->dims->data[kDepthRank] =
+      input->dims->data[kDepthRank] * block_size * block_size;
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteSpaceToDepthParams*>(node->builtin_data);
+
+  const TfLiteEvalTensor* input =
+      micro::GetEvalInput(context, node, kInputTensor);
+  TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
+
+  SpaceToDepthParams op_params;
+  op_params.block_size = params->block_size;
+
+  switch (input->type) {  // Already know in/out types are same.
+    case kTfLiteFloat32:
+      reference_ops::SpaceToDepth(op_params, micro::GetTensorShape(input),
+                                  micro::GetTensorData<float>(input),
+                                  micro::GetTensorShape(output),
+                                  micro::GetTensorData<float>(output));
+      break;
+    case kTfLiteInt8:
+      reference_ops::SpaceToDepth(op_params, micro::GetTensorShape(input),
+                                  micro::GetTensorData<int8_t>(input),
+                                  micro::GetTensorShape(output),
+                                  micro::GetTensorData<int8_t>(output));
+      break;
+    default:
+      TF_LITE_KERNEL_LOG(
+          context, "SPACE_TO_DEPTH only supports FLOAT32 and INT8, got %s.",
+          TfLiteTypeGetName(input->type));
+      return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_SPACE_TO_DEPTH() {
+  return {/*init=*/nullptr,
+          /*free=*/nullptr,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/strided_slice.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/strided_slice.cc
@@ -167,6 +167,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                                  tflite::micro::GetTensorShape(output),
                                  tflite::micro::GetTensorData<int8_t>(output));
      break;
+    case kTfLiteInt16:
+      reference_ops::StridedSlice(
+          op_params, tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<int16_t>(input),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int16_t>(output));
+      break;
    default:
      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                         TfLiteTypeGetName(input->type), input->type);
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/sub.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/sub.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/add.h"
 #include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/internal/types.h"
@@ -62,12 +63,17 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
                             OpData* data) {
  data->requires_broadcast = !HaveSameShapes(input1, input2);

-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
+      output->type == kTfLiteInt16) {
    // 8bit -> 8bit general quantized path, with general rescalings
    data->input1_offset = -input1->params.zero_point;
    data->input2_offset = -input2->params.zero_point;
    data->output_offset = output->params.zero_point;
-    data->left_shift = 20;
+
+    // The shift is set to 15 in case of 16-bit and 20 in case of 8-bit,
+    // accordingly. In case of 16-bit we have 65535 << 15 which is less than 1
+    // << 31, therefore the addition will still fit in a 32 bit accumulator.
+    data->left_shift = output->type == kTfLiteInt16 ? 15 : 20;
    const float twice_max_input_scale =
        2 * std::max(input1->params.scale, input2->params.scale);
    const double real_input1_multiplier =
@@ -84,6 +90,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
    QuantizeMultiplierSmallerThanOneExp(
        real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);

+    // Use add kernel for 16-bit sub, since it supports output requantization.
+    // This matches behavior in TFLite.
+    data->input2_multiplier *= (output->type == kTfLiteInt16) ? -1 : 1;
    QuantizeMultiplierSmallerThanOneExp(
        real_output_multiplier, &data->output_multiplier, &data->output_shift);

@@ -151,25 +160,25 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
                              const TfLiteEvalTensor* input1,
                              const TfLiteEvalTensor* input2,
                              TfLiteEvalTensor* output) {
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
-    tflite::ArithmeticParams op_params;
-    op_params.left_shift = data->left_shift;
-    op_params.input1_offset = data->input1_offset;
-    op_params.input1_multiplier = data->input1_multiplier;
-    op_params.input1_shift = data->input1_shift;
-    op_params.input2_offset = data->input2_offset;
-    op_params.input2_multiplier = data->input2_multiplier;
-    op_params.input2_shift = data->input2_shift;
-    op_params.output_offset = data->output_offset;
-    op_params.output_multiplier = data->output_multiplier;
-    op_params.output_shift = data->output_shift;
-    SetActivationParams(data->output_activation_min,
-                        data->output_activation_max, &op_params);
-    bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-        tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorShape(input2), &op_params);
+  tflite::ArithmeticParams op_params;
+  op_params.left_shift = data->left_shift;
+  op_params.input1_offset = data->input1_offset;
+  op_params.input1_multiplier = data->input1_multiplier;
+  op_params.input1_shift = data->input1_shift;
+  op_params.input2_offset = data->input2_offset;
+  op_params.input2_multiplier = data->input2_multiplier;
+  op_params.input2_shift = data->input2_shift;
+  op_params.output_offset = data->output_offset;
+  op_params.output_multiplier = data->output_multiplier;
+  op_params.output_shift = data->output_shift;
+  SetActivationParams(data->output_activation_min, data->output_activation_max,
+                      &op_params);
+  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+      tflite::micro::GetTensorShape(input1),
+      tflite::micro::GetTensorShape(input2), &op_params);

-    if (output->type == kTfLiteInt8) {
+  switch (output->type) {
+    case kTfLiteInt8: {
      if (need_broadcast) {
        tflite::reference_ops::BroadcastSubSlow(
            op_params, tflite::micro::GetTensorShape(input1),
@@ -187,27 +196,53 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
            tflite::micro::GetTensorShape(output),
            tflite::micro::GetTensorData<int8_t>(output));
      }
-    } else {
-      if (need_broadcast) {
-        tflite::reference_ops::BroadcastSubSlow(
-            op_params, tflite::micro::GetTensorShape(input1),
-            tflite::micro::GetTensorData<uint8_t>(input1),
-            tflite::micro::GetTensorShape(input2),
-            tflite::micro::GetTensorData<uint8_t>(input2),
-            tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<uint8_t>(output));
-      } else {
-        tflite::reference_ops::Sub(
-            op_params, tflite::micro::GetTensorShape(input1),
-            tflite::micro::GetTensorData<uint8_t>(input1),
-            tflite::micro::GetTensorShape(input2),
-            tflite::micro::GetTensorData<uint8_t>(input2),
-            tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<uint8_t>(output));
-      }
+      break;
    }
+    case kTfLiteInt16: {
+      if (need_broadcast) {
+        tflite::reference_ops::BroadcastAdd4DSlow(
+            op_params, tflite::micro::GetTensorShape(input1),
+            tflite::micro::GetTensorData<int16_t>(input1),
+            tflite::micro::GetTensorShape(input2),
+            tflite::micro::GetTensorData<int16_t>(input2),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int16_t>(output));
+      } else {
+        tflite::reference_ops::Add(
+            op_params, tflite::micro::GetTensorShape(input1),
+            tflite::micro::GetTensorData<int16_t>(input1),
+            tflite::micro::GetTensorShape(input2),
+            tflite::micro::GetTensorData<int16_t>(input2),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int16_t>(output), false);
+      }
+      break;
+    }
+    case kTfLiteUInt8: {
+      if (need_broadcast) {
+        tflite::reference_ops::BroadcastSubSlow(
+            op_params, tflite::micro::GetTensorShape(input1),
+            tflite::micro::GetTensorData<uint8_t>(input1),
+            tflite::micro::GetTensorShape(input2),
+            tflite::micro::GetTensorData<uint8_t>(input2),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<uint8_t>(output));
+      } else {
+        tflite::reference_ops::Sub(
+            op_params, tflite::micro::GetTensorShape(input1),
+            tflite::micro::GetTensorData<uint8_t>(input1),
+            tflite::micro::GetTensorShape(input2),
+            tflite::micro::GetTensorData<uint8_t>(input2),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<uint8_t>(output));
+      }
+      break;
+    }
+    default:
+      TF_LITE_KERNEL_LOG(context, "Quantized type %s not currently supported.",
+                         TfLiteTypeGetName(output->type));
+      return kTfLiteError;
  }
-
  return kTfLiteOk;
 }

@@ -226,7 +261,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {

  if (output->type == kTfLiteFloat32) {
    EvalSub(context, node, params, &data, input1, input2, output);
-  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
+             output->type == kTfLiteInt16) {
    TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data,
                                                input1, input2, output));
  } else {
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/transpose.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/transpose.cc
@@ -0,0 +1,112 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace {
+
+struct TransposeContext {
+  TransposeContext(TfLiteContext* context, TfLiteNode* node) {
+    input = GetInput(context, node, 0);
+    perm = GetInput(context, node, 1);
+    output = GetOutput(context, node, 0);
+  }
+  const TfLiteTensor* input;
+  const TfLiteTensor* perm;
+  TfLiteTensor* output;
+};
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TransposeContext op_context(context, node);
+
+  // Ensure validity of input tensor.
+  TF_LITE_ENSURE_MSG(context, NumDimensions(op_context.input) <= 5,
+                     "Transpose op only supports 1D-5D input arrays.");
+  TF_LITE_ENSURE_TYPES_EQ(context, op_context.input->type,
+                          op_context.output->type);
+
+  int dims = NumDimensions(op_context.input);
+  const int32_t* perm_data = GetTensorData<int32_t>(op_context.perm);
+
+  // Ensure validity of the permutations tensor as a 1D tensor.
+  TF_LITE_ENSURE_EQ(context, NumDimensions(op_context.perm), 1);
+  TF_LITE_ENSURE_EQ(context, op_context.perm->dims->data[0], dims);
+  for (int idx = 0; idx < dims; ++idx) {
+    TF_LITE_ENSURE_MSG(context, (perm_data[idx] >= 0 && perm_data[idx] < dims),
+                       "Transpose op permutations array is out of bounds.");
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TransposeContext op_context(context, node);
+
+  const int32_t* perm_data = GetTensorData<int32_t>(op_context.perm);
+  const int size = op_context.perm->dims->data[0];
+  TransposeParams params;
+  params.perm_count = size;
+  for (int i = 0; i < size; ++i) {
+    params.perm[i] = perm_data[i];
+  }
+
+  // Transpose kernel only does rearranging values not numeric evaluations
+  // on each cell. It's safe to implement per size of scalar type and this
+  // trick keeps the total code size in a reasonable range.
+  switch (op_context.input->type) {
+    case kTfLiteFloat32:
+      reference_ops::Transpose(params, GetTensorShape(op_context.input),
+                               GetTensorData<float>(op_context.input),
+                               GetTensorShape(op_context.output),
+                               GetTensorData<float>(op_context.output));
+      break;
+    case kTfLiteInt8:
+      reference_ops::Transpose(params, GetTensorShape(op_context.input),
+                               GetTensorData<int8_t>(op_context.input),
+                               GetTensorShape(op_context.output),
+                               GetTensorData<int8_t>(op_context.output));
+      break;
+    default:
+      TF_LITE_KERNEL_LOG(context,
+                         "Type %s is currently not supported by Transpose. "
+                         "Only float32 and int8 is supported",
+                         TfLiteTypeGetName(op_context.input->type));
+      return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_TRANSPOSE() {
+  return {/*init=*/nullptr,
+          /*free=*/nullptr,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
+}
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/transpose_conv.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/transpose_conv.cc
@@ -47,6 +47,10 @@ struct OpData {
  // A scratch buffer is required for quantized implementations.
  int scratch_buffer_index;

+  // TODO(b/192090531): Remove this once all 8x16 transpose conv models use
+  // 64-bit biases.
+  int bias_converted_buffer_index;
+
  // Multiplier and shift arrays are required for the int8 implementation.
  int32_t* per_channel_output_multiplier;
  int32_t* per_channel_output_shift;
@@ -103,9 +107,21 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
        &data->params.output_multiplier, &data->params.output_shift,
        &data->params.quantized_activation_min,
        &data->params.quantized_activation_max,
-        data->per_channel_output_multiplier,
-        reinterpret_cast<int*>(data->per_channel_output_shift),
+        data->per_channel_output_multiplier, data->per_channel_output_shift,
        output_channels));
+
+    // TODO(b/192090531): Remove this once all 8x16 transpose conv models use
+    // 64-bit biases.
+    if (input->type == kTfLiteInt16) {
+      TFLITE_DCHECK(filter->type == kTfLiteInt8);
+      TFLITE_DCHECK(output->type == kTfLiteInt16);
+      if (bias->type == kTfLiteInt16) {
+        TFLITE_DCHECK(
+            context->RequestScratchBufferInArena(
+                context, GetTensorShape(bias).FlatSize() * sizeof(std::int64_t),
+                &(data->bias_converted_buffer_index)) == kTfLiteOk);
+      }
+    }
  }
  return kTfLiteOk;
 }
@@ -154,8 +170,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                      &(data->scratch_buffer_index)) == kTfLiteOk);
  }

+  // Quantized 16x8 kernels use an int64 scratch buffer.
+  if (input->type == kTfLiteInt16) {
+    TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
+    TFLITE_DCHECK(context->RequestScratchBufferInArena(
+                      context,
+                      GetTensorShape(output).FlatSize() * sizeof(std::int64_t),
+                      &(data->scratch_buffer_index)) == kTfLiteOk);
+  }
+
  // All per-channel quantized tensors need valid zero point and scale arrays.
-  if (input->type == kTfLiteInt8) {
+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
    TF_LITE_ENSURE_EQ(context, filter->quantization.type,
                      kTfLiteAffineQuantization);

@@ -212,8 +237,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
  const OpData& data = *(static_cast<const OpData*>(node->user_data));

  TF_LITE_ENSURE_EQ(context, input->type, output->type);
-  TF_LITE_ENSURE_MSG(context, input->type == filter->type,
-                     "Hybrid models are not supported on TFLite Micro.");
+  TF_LITE_ENSURE_MSG(
+      context,
+      input->type == filter->type ||
+          (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
+      "Hybrid models are not supported on TFLite Micro.");

  switch (input->type) {  // Already know in/out types are same.
    case kTfLiteFloat32: {
@@ -245,6 +273,44 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
          tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
      break;
    }
+    case kTfLiteInt16: {
+      std::int64_t* scratch_buffer = static_cast<int64_t*>(
+          context->GetScratchBuffer(context, data.scratch_buffer_index));
+      // TODO(b/192090531): Remove this once all 8x16 transpose conv models use
+      // 64-bit biases.
+      if (bias->type == kTfLiteInt16) {
+        std::int64_t* bias_converted_buffer =
+            static_cast<int64_t*>(context->GetScratchBuffer(
+                context, data.bias_converted_buffer_index));
+        for (int i = 0; i < tflite::micro::GetTensorShape(bias).FlatSize();
+             i++) {
+          bias_converted_buffer[i] = bias->data.i16[i];
+        }
+        reference_integer_ops::TransposeConv(
+            data.params, data.per_channel_output_multiplier,
+            data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
+            tflite::micro::GetTensorData<int16_t>(input),
+            tflite::micro::GetTensorShape(filter),
+            tflite::micro::GetTensorData<int8_t>(filter),
+            tflite::micro::GetTensorShape(bias), bias_converted_buffer,
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int16_t>(output),
+            tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
+      } else {
+        reference_integer_ops::TransposeConv(
+            data.params, data.per_channel_output_multiplier,
+            data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
+            tflite::micro::GetTensorData<int16_t>(input),
+            tflite::micro::GetTensorShape(filter),
+            tflite::micro::GetTensorData<int8_t>(filter),
+            tflite::micro::GetTensorShape(bias),
+            tflite::micro::GetTensorData<std::int64_t>(bias),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int16_t>(output),
+            tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
+      }
+      break;
+    }
    default:
      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                         TfLiteTypeGetName(input->type), input->type);
--- a/code/components/tfmicro/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc
@@ -15,8 +15,28 @@ limitations under the License.

 #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"

+#include "tensorflow/lite/micro/micro_error_reporter.h"
+#include "tensorflow/lite/micro/micro_string.h"
+
 namespace tflite {

+namespace {
+
+// Returns a character representing a numbered buffer
+// for GreedyMemoryPlanner::PrintMemoryPlan()
+char GetOrdinalCharacter(int i) {
+  if (i < 10) {
+    return '0' + i;
+  } else if (i < 36) {
+    return 'a' + (i - 10);
+  } else if (i < 62) {
+    return 'A' + (i - 36);
+  }
+  return '*';
+}
+
+}  // namespace
+
 // Simple stable in-place sort function. Not time-efficient for large arrays.
 // Would normally be in an anonymous namespace to keep it private, but we want
 // to be able to test it externally.
@@ -297,8 +317,6 @@ size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
  while (entry) {
    BufferRequirements* requirements =
        &requirements_[entry->requirements_index];
-    // TODO(b/148246793): Update all size and offset variables types from
-    //                    int to size_t
    const size_t current_size = entry->offset + requirements->size;
    if (current_size > max_size) {
      max_size = current_size;
@@ -311,17 +329,14 @@ size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
  return max_size;
 }

-void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
+void GreedyMemoryPlanner::PrintMemoryPlan() {
  CalculateOffsetsIfNeeded();

  for (int i = 0; i < buffer_count_; ++i) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter,
-        "Planner buffer ID: %d, calculated offset: %d, size required: %d, "
-        "first_time_created: %d, "
-        "last_time_used: %d",
-        i, buffer_offsets_[i], requirements_[i].size,
-        requirements_[i].first_time_used, requirements_[i].last_time_used);
+    MicroPrintf("%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d",
+                GetOrdinalCharacter(i), i, requirements_[i].size,
+                buffer_offsets_[i], requirements_[i].first_time_used,
+                requirements_[i].last_time_used);
  }

  constexpr int kLineWidth = 80;
@@ -345,6 +360,7 @@ void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
    for (int c = 0; c < kLineWidth; ++c) {
      line[c] = '.';
    }
+    int memory_use = 0;
    for (int i = 0; i < buffer_count_; ++i) {
      BufferRequirements* requirements = &requirements_[i];
      if ((t < requirements->first_time_used) ||
@@ -356,28 +372,21 @@ void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
        continue;
      }
      const int size = requirements->size;
+      memory_use += size;
      const int line_start = (offset * kLineWidth) / max_size;
      const int line_end = ((offset + size) * kLineWidth) / max_size;
      for (int n = line_start; n < line_end; ++n) {
        if (line[n] == '.') {
-          char display;
-          if (i < 10) {
-            display = '0' + i;
-          } else if (i < 36) {
-            display = 'a' + (i - 10);
-          } else if (i < 62) {
-            display = 'A' + (i - 36);
-          } else {
-            display = '*';
-          }
-          line[n] = display;
+          line[n] = GetOrdinalCharacter(i);
        } else {
          line[n] = '!';
        }
      }
    }
    line[kLineWidth] = 0;
-    TF_LITE_REPORT_ERROR(error_reporter, "%s", (const char*)line);
+
+    MicroPrintf("%s%d: %s (%dk)", t < 10 ? " " : "", t, (const char*)line,
+                (memory_use + 1023) / 1024);
  }
 }

--- a/code/components/tfmicro/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h
@@ -81,7 +81,7 @@ class GreedyMemoryPlanner : public MemoryPlanner {
                                  int buffer_index, int* offset) override;

  // Prints an ascii-art diagram of the buffer layout plan.
-  void PrintMemoryPlan(ErrorReporter* error_reporter);
+  void PrintMemoryPlan();

  // Debug method to check whether any buffer allocations are overlapping. This
  // is an O(N^2) complexity operation, so only use for testing.
--- a/code/components/tfmicro/tensorflow/lite/micro/micro_allocator.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/micro_allocator.cc
@@ -29,7 +29,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/memory_helpers.h"
 #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
 #include "tensorflow/lite/micro/memory_planner/memory_planner.h"
-#include "tensorflow/lite/micro/micro_op_resolver.h"
+#include "tensorflow/lite/micro/micro_error_reporter.h"
 #include "tensorflow/lite/micro/simple_memory_allocator.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 #include "tensorflow/lite/schema/schema_utils.h"
@@ -211,6 +211,8 @@ TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
    }
  }

+  uint32_t operators_size = NumSubgraphOperators(subgraph);
+
  for (size_t i = 0; i < subgraph->inputs()->size(); ++i) {
    const int tensor_index = subgraph->inputs()->Get(i);
    AllocationInfo* current = &info_[tensor_index];
@@ -221,11 +223,11 @@ TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
  for (size_t i = 0; i < subgraph->outputs()->size(); ++i) {
    const int tensor_index = subgraph->outputs()->Get(i);
    AllocationInfo* current = &info_[tensor_index];
-    current->last_used = subgraph->operators()->size() - 1;
+    current->last_used = operators_size - 1;
  }

  // Figure out when the first and last use of each tensor is.
-  for (int i = (subgraph->operators()->size() - 1); i >= 0; --i) {
+  for (int i = (operators_size - 1); i >= 0; --i) {
    const auto* op = subgraph->operators()->Get(i);
    for (size_t n = 0; n < op->inputs()->size(); ++n) {
      const int tensor_index = op->inputs()->Get(n);
@@ -242,47 +244,11 @@ TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
      }
    }
  }
-
-  // Sanity check for valid tensor lifetime.
-  for (size_t i = 0; i < tensor_count_; ++i) {
-    AllocationInfo* current = &info_[i];
-    // Even though tensor appears to be read only it may still need to be
-    // allocated.
-    const bool appears_read_only =
-        (current->first_created == -1) && (current->last_used != -1);
-    const bool has_partial_lifetime =
-        !appears_read_only &&
-        ((current->first_created == -1) || (current->last_used == -1));
-    if (has_partial_lifetime && current->needs_allocating) {
-      TF_LITE_REPORT_ERROR(
-          reporter_,
-          "Logic error in memory planner, tensor %d has an invalid lifetime: "
-          "first_created: %d, last_used: %d",
-          i, current->first_created, current->last_used);
-      return kTfLiteError;
-    }
-  }
  return kTfLiteOk;
 }

-// The tensor offsets will be encoded in the metadata:[Metadata] field of the
-// Model. The following encoding applies:
-//
-// | Metadata component |                 Value                                |
-// |    name:string     | “OfflineMemoryAllocation”                            |
-// |    buffer:unit     | Index of buffer containing memory allocation data    |
-//
-// The buffer contents for the memory allocation is a list of 32-bit integers.
-// The number of tensors, n, must be equal to the number of tensors defined in
-// the model. The following encoding applies:
-//
-// |  Offset |                            Value                                |
-// |    0    | Offline allocation format version – set to 0                    |
-// |    1    | Subgraph index to which this allocation applies                 |
-// |    2    | Number offsets following: n                                     |
-// |    3    | Arena byte offset of tensor #0 or -1 to allocate at runtime     |
-// |    4    | Arena byte offset of tensor #1 or -1 to allocate at runtime     |
-// | 3+(n-1) | Arena byte offset of tensor #(n-1) or -1 to allocate at runtime |
+// Get offline tensors allocation plan. See
+// micro/docs/memory_management.md for more info.
 TfLiteStatus AllocationInfoBuilder::GetOfflinePlannedOffsets(
    const Model* model, const int32_t** offline_planner_offsets) {
  if (model->metadata()) {
@@ -404,18 +370,18 @@ TfLiteStatus FlatBufferVectorToTfLiteTypeArray(
    // Big-endian architecture can not use the same memory layout as
    // flatbuffers::Vector<kFlatBufferVectorType>. Allocate from the tail and
    // copy values from the flatbuffer into the newly allocated chunk.
-    kTfLiteArrayType* array =
-        reinterpret_cast<kTfLiteArrayType*>(allocator->AllocateFromTail(
-            TfLiteIntArrayGetSizeInBytes(flatbuffer_array->Length()),
+    kTfLiteArrayType* array = reinterpret_cast<kTfLiteArrayType*>(
+        allocator->SimpleMemoryAllocator::AllocateFromTail(
+            TfLiteIntArrayGetSizeInBytes(flatbuffer_array->size()),
            alignof(kTfLiteArrayType)));
    if (array == nullptr) {
      TF_LITE_REPORT_ERROR(
          error_reporter,
          "Failed to allocate %d bytes of memory to copy an array.",
-          TfLiteIntArrayGetSizeInBytes(flatbuffer_array->Length()));
+          TfLiteIntArrayGetSizeInBytes(flatbuffer_array->size()));
      return kTfLiteError;
    }
-    array->size = flatbuffer_array->Length();
+    array->size = flatbuffer_array->size();
    for (int i = 0; i < array->size; ++i) {
      array->data[i] = flatbuffer_array->Get(i);
    }
@@ -624,33 +590,46 @@ MicroAllocator* MicroAllocator::Create(SimpleMemoryAllocator* memory_allocator,
  return allocator;
 }

-TfLiteStatus MicroAllocator::StartModelAllocation(
-    const Model* model, const MicroOpResolver& op_resolver,
-    NodeAndRegistration** node_and_registrations,
-    TfLiteEvalTensor** eval_tensors) {
+SubgraphAllocations* MicroAllocator::StartModelAllocation(const Model* model) {
  TFLITE_DCHECK(model != nullptr);

  if (model_is_allocating_) {
    TF_LITE_REPORT_ERROR(error_reporter_,
                         "MicroAllocator: Model allocation started before "
                         "finishing previously allocated model");
-    return kTfLiteError;
+    return nullptr;
  }

  model_is_allocating_ = true;

-  TF_LITE_ENSURE_STATUS(InitScratchBufferData());
-  TF_LITE_ENSURE_STATUS(AllocateTfLiteEvalTensors(model, eval_tensors));
-  TF_LITE_ENSURE_STATUS(
-      AllocateNodeAndRegistrations(model, node_and_registrations));
-  TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer(
-      model, op_resolver, *node_and_registrations));
+  uint8_t* data_allocator_buffer = memory_allocator_->AllocateFromTail(
+      sizeof(MicroBuiltinDataAllocator), alignof(MicroBuiltinDataAllocator));
+  builtin_data_allocator_ =
+      new (data_allocator_buffer) MicroBuiltinDataAllocator(memory_allocator_);

-  return kTfLiteOk;
+  if (InitScratchBufferData() != kTfLiteOk) {
+    return nullptr;
+  }
+
+  // Allocate struct to store eval tensors, nodes and registrations.
+  SubgraphAllocations* output = reinterpret_cast<SubgraphAllocations*>(
+      memory_allocator_->AllocateFromTail(
+          sizeof(SubgraphAllocations) * model->subgraphs()->size(),
+          alignof(SubgraphAllocations)));
+  if (output == nullptr) {
+    MicroPrintf("Failed to allocate memory for model metadata.");
+    return nullptr;
+  }
+
+  if (AllocateTfLiteEvalTensors(model, output) != kTfLiteOk ||
+      AllocateNodeAndRegistrations(model, output) != kTfLiteOk) {
+    return nullptr;
+  }
+  return output;
 }

 TfLiteStatus MicroAllocator::FinishModelAllocation(
-    const Model* model, TfLiteEvalTensor* eval_tensors,
+    const Model* model, SubgraphAllocations* subgraph_allocations,
    ScratchBufferHandle** scratch_buffer_handles) {
  if (!model_is_allocating_) {
    TF_LITE_REPORT_ERROR(error_reporter_,
@@ -659,15 +638,20 @@ TfLiteStatus MicroAllocator::FinishModelAllocation(
    return kTfLiteError;
  }

-  const SubGraph* subgraph = GetSubGraphFromModel(model);
-  TFLITE_DCHECK(subgraph != nullptr);
-
-  TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles(
-      scratch_buffer_handles, scratch_buffer_request_count_));
-  TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph, eval_tensors,
-                                               *scratch_buffer_handles));
-  TF_LITE_ENSURE_STATUS(AllocateVariables(subgraph, eval_tensors));
+  // TODO(b/187993197): Track scratch buffers for each subgraph.
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
+    TFLITE_DCHECK(subgraph != nullptr);

+    TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles(
+        scratch_buffer_handles, scratch_buffer_request_count_));
+    TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(
+        model, subgraph_allocations[subgraph_idx].tensors,
+        *scratch_buffer_handles, subgraph_idx));
+    TF_LITE_ENSURE_STATUS(AllocateVariables(
+        subgraph, subgraph_allocations[subgraph_idx].tensors));
+  }
  model_is_allocating_ = false;
  return kTfLiteOk;
 }
@@ -677,6 +661,7 @@ void* MicroAllocator::AllocatePersistentBuffer(size_t bytes) {
 }

 TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes,
+                                                         int subgraph_idx,
                                                         int* buffer_idx) {
  // All scratch buffer requests are stored in the head section of the arena
  // when a model is in the prepare phase. First align a scratch buffer request
@@ -751,153 +736,72 @@ size_t MicroAllocator::used_bytes() const {
 }

 TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
-    const Model* model, NodeAndRegistration** node_and_registrations) {
-  TFLITE_DCHECK(node_and_registrations);
+    const Model* model, SubgraphAllocations* subgraph_allocations) {
+  TFLITE_DCHECK(subgraph_allocations != nullptr);

-  const SubGraph* subgraph = GetSubGraphFromModel(model);
-  TFLITE_DCHECK(subgraph != nullptr);
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
+    TFLITE_DCHECK(subgraph != nullptr);

-  NodeAndRegistration* output = reinterpret_cast<NodeAndRegistration*>(
-      memory_allocator_->AllocateFromTail(
-          sizeof(NodeAndRegistration) * subgraph->operators()->size(),
-          alignof(NodeAndRegistration)));
-  if (output == nullptr) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter_,
-        "Failed to allocate memory for node_and_registrations.");
-    return kTfLiteError;
-  }
-  *node_and_registrations = output;
-  return kTfLiteOk;
-}
+    uint32_t operators_size = NumSubgraphOperators(subgraph);

-TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
-    const Model* model, const MicroOpResolver& op_resolver,
-    NodeAndRegistration* node_and_registrations) {
-  TFLITE_DCHECK(model != nullptr);
-  TFLITE_DCHECK(node_and_registrations != nullptr);
-
-  const SubGraph* subgraph = GetSubGraphFromModel(model);
-  TFLITE_DCHECK(subgraph != nullptr);
-
-  TfLiteStatus status = kTfLiteOk;
-  auto* opcodes = model->operator_codes();
-  MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_);
-  for (size_t i = 0; i < subgraph->operators()->size(); ++i) {
-    const auto* op = subgraph->operators()->Get(i);
-    const size_t index = op->opcode_index();
-    if (index >= opcodes->size()) {
-      TF_LITE_REPORT_ERROR(error_reporter_,
-                           "Missing registration for opcode_index %d\n", index);
+    // Initialize NodeAndRegistrations for the subgraph.
+    NodeAndRegistration* output = reinterpret_cast<NodeAndRegistration*>(
+        memory_allocator_->AllocateFromTail(
+            sizeof(NodeAndRegistration) * operators_size,
+            alignof(NodeAndRegistration)));
+    if (output == nullptr) {
+      TF_LITE_REPORT_ERROR(
+          error_reporter_,
+          "Failed to allocate memory for node_and_registrations.");
      return kTfLiteError;
    }
-    auto* opcode = (*opcodes)[index];
-    status =
-        GetRegistrationFromOpCode(opcode, op_resolver, error_reporter_,
-                                  &(node_and_registrations[i].registration));
-    if (status != kTfLiteOk) {
-      TF_LITE_REPORT_ERROR(error_reporter_,
-                           "Failed to get registration from op code %s\n ",
-                           EnumNameBuiltinOperator(GetBuiltinCode(opcode)));
-      return status;
-    }
-    const auto* registration = node_and_registrations[i].registration;
-    if (registration == nullptr) {
-      TF_LITE_REPORT_ERROR(error_reporter_, "Skipping op for opcode_index %d\n",
-                           index);
-      return kTfLiteError;
-    }
-    BuiltinOperator op_type =
-        static_cast<BuiltinOperator>(registration->builtin_code);
-
-    const char* custom_data = nullptr;
-    size_t custom_data_size = 0;
-    unsigned char* builtin_data = nullptr;
-
-    if (op_type == BuiltinOperator_CUSTOM) {
-      // Custom Ops may or may not have a non-null custom_options field.
-      if (op->custom_options() != nullptr) {
-        custom_data =
-            reinterpret_cast<const char*>(op->custom_options()->data());
-        custom_data_size = op->custom_options()->size();
-      }
-    } else {
-      if (op->custom_options() != nullptr) {
-        TF_LITE_REPORT_ERROR(
-            error_reporter_,
-            "Unsupported behavior: found builtin operator %s with custom "
-            "options.\n",
-            EnumNameBuiltinOperator(op_type));
-        return kTfLiteError;
-      }
-
-      MicroOpResolver::BuiltinParseFunction parser =
-          op_resolver.GetOpDataParser(op_type);
-      if (parser == nullptr) {
-        TF_LITE_REPORT_ERROR(error_reporter_, "Did not find a parser for %s",
-                             EnumNameBuiltinOperator(op_type));
-
-        return kTfLiteError;
-      }
-      TF_LITE_ENSURE_STATUS(parser(op, error_reporter_, &builtin_data_allocator,
-                                   (void**)(&builtin_data)));
-    }
-
-    TfLiteIntArray* inputs_array;
-    TF_LITE_ENSURE_STATUS(internal::FlatBufferVectorToTfLiteTypeArray(
-        memory_allocator_, error_reporter_, op->inputs(), &inputs_array));
-
-    TfLiteIntArray* outputs_array;
-    TF_LITE_ENSURE_STATUS(internal::FlatBufferVectorToTfLiteTypeArray(
-        memory_allocator_, error_reporter_, op->outputs(), &outputs_array));
-
-    TfLiteNode* node = &(node_and_registrations[i].node);
-    *node = {};
-    node->inputs = inputs_array;
-    node->outputs = outputs_array;
-    node->builtin_data = reinterpret_cast<void*>(builtin_data);
-    node->custom_initial_data = custom_data;
-    node->custom_initial_data_size = custom_data_size;
+    subgraph_allocations[subgraph_idx].node_and_registrations = output;
  }
-
  return kTfLiteOk;
 }
-
 TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor(
-    const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
-  const SubGraph* subgraph = GetSubGraphFromModel(model);
+    const Model* model, const SubgraphAllocations* subgraph_allocations,
+    int tensor_index, int subgraph_index) {
+  const SubGraph* subgraph = model->subgraphs()->Get(subgraph_index);
  TFLITE_DCHECK(subgraph != nullptr);

  // This value is allocated from persistent arena space. It is guaranteed to be
  // around for the lifetime of the application.
-  TfLiteTensor* tensor =
-      AllocatePersistentTfLiteTensorInternal(model, eval_tensors, tensor_index);
+  TfLiteTensor* tensor = AllocatePersistentTfLiteTensorInternal();

  // Populate any fields from the flatbuffer, since this TfLiteTensor struct is
  // allocated in the persistent section of the arena, ensure that additional
  // allocations also take place in that section of the arena.
-  if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index,
-                                         /*allocate_temp=*/false) !=
-      kTfLiteOk) {
+  if (PopulateTfLiteTensorFromFlatbuffer(
+          model, tensor, tensor_index, subgraph_index,
+          /*allocate_temp=*/false) != kTfLiteOk) {
    TF_LITE_REPORT_ERROR(error_reporter_,
                         "Failed to populate a persistent TfLiteTensor struct "
                         "from flatbuffer data!");
    return nullptr;
  }

-  if (eval_tensors != nullptr) {
+  if (subgraph_allocations != nullptr) {
    // Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
    // and not located in the flatbuffer are stored on the pre-allocated list of
    // TfLiteEvalTensors structs. These structs are the source of truth, simply
    // point the corresponding buffer to the new TfLiteTensor data value.
-    tensor->data.data = eval_tensors[tensor_index].data.data;
+    tensor->data.data =
+        subgraph_allocations[subgraph_index].tensors[tensor_index].data.data;
+    // TfLiteEvalTensor structs must also be the source of truth for the
+    // TfLiteTensor dims.
+    tensor->dims =
+        subgraph_allocations[subgraph_index].tensors[tensor_index].dims;
  }
  return tensor;
 }

 TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
-    const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
-  const SubGraph* subgraph = GetSubGraphFromModel(model);
+    const Model* model, const SubgraphAllocations* subgraph_allocations,
+    int tensor_index, int subgraph_index) {
+  const SubGraph* subgraph = model->subgraphs()->Get(subgraph_index);
  TFLITE_DCHECK(subgraph != nullptr);

  // This value is allocated from temporary arena space. It is guaranteed to be
@@ -910,7 +814,8 @@ TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
  // Populate any fields from the flatbuffer, since this TfLiteTensor struct is
  // allocated in the temp section of the arena, ensure that additional
  // allocations also take place in that section of the arena.
-  if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index,
+  if (PopulateTfLiteTensorFromFlatbuffer(model, tensor, tensor_index,
+                                         subgraph_index,
                                         /*allocate_temp=*/true) != kTfLiteOk) {
    TF_LITE_REPORT_ERROR(
        error_reporter_,
@@ -918,12 +823,17 @@ TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
    return nullptr;
  }

-  if (eval_tensors != nullptr) {
+  if (subgraph_allocations != nullptr) {
    // Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
    // and not located in the flatbuffer are stored on the pre-allocated list of
    // TfLiteEvalTensors structs. These structs are the source of truth, simply
    // point the corresponding buffer to the new TfLiteTensor data value.
-    tensor->data.data = eval_tensors[tensor_index].data.data;
+    tensor->data.data =
+        subgraph_allocations[subgraph_index].tensors[tensor_index].data.data;
+    // TfLiteEvalTensor structs must also be the source of truth for the
+    // TfLiteTensor dims.
+    tensor->dims =
+        subgraph_allocations[subgraph_index].tensors[tensor_index].dims;
  }
  return tensor;
 }
@@ -933,38 +843,41 @@ void MicroAllocator::ResetTempAllocations() {
 }

 TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
-    const Model* model, TfLiteEvalTensor** eval_tensors) {
-  TFLITE_DCHECK(eval_tensors != nullptr);
+    const Model* model, SubgraphAllocations* subgraph_allocations) {
+  TFLITE_DCHECK(subgraph_allocations != nullptr);

-  const SubGraph* subgraph = GetSubGraphFromModel(model);
-  TFLITE_DCHECK(subgraph != nullptr);
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
+    TFLITE_DCHECK(subgraph != nullptr);

-  size_t alloc_count = subgraph->tensors()->size();
-  TfLiteEvalTensor* tensors =
-      reinterpret_cast<TfLiteEvalTensor*>(memory_allocator_->AllocateFromTail(
-          sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor)));
-  if (tensors == nullptr) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Failed to allocate memory for context->eval_tensors, "
-                         "%d bytes required",
-                         sizeof(TfLiteEvalTensor) * alloc_count);
-    return kTfLiteError;
-  }
-
-  for (size_t i = 0; i < alloc_count; ++i) {
-    TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer(
-        memory_allocator_, *subgraph->tensors()->Get(i), model->buffers(),
-        error_reporter_, &tensors[i]);
-    if (status != kTfLiteOk) {
-      TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
-                           i);
+    size_t alloc_count = subgraph->tensors()->size();
+    TfLiteEvalTensor* tensors =
+        reinterpret_cast<TfLiteEvalTensor*>(memory_allocator_->AllocateFromTail(
+            sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor)));
+    if (tensors == nullptr) {
+      TF_LITE_REPORT_ERROR(
+          error_reporter_,
+          "Failed to allocate memory for context->eval_tensors, "
+          "%d bytes required",
+          sizeof(TfLiteEvalTensor) * alloc_count);
      return kTfLiteError;
    }
+
+    for (size_t i = 0; i < alloc_count; ++i) {
+      TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer(
+          memory_allocator_, *subgraph->tensors()->Get(i), model->buffers(),
+          error_reporter_, &tensors[i]);
+      if (status != kTfLiteOk) {
+        TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
+                             i);
+        return kTfLiteError;
+      }
+    }
+    subgraph_allocations[subgraph_idx].tensors = tensors;
  }
-  *eval_tensors = tensors;
  return kTfLiteOk;
 }
-
 TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
                                               TfLiteEvalTensor* eval_tensors) {
  for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
@@ -988,20 +901,20 @@ TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
  return kTfLiteOk;
 }

-TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal(
-    const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
+TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal() {
  return reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
      sizeof(TfLiteTensor), alignof(TfLiteTensor)));
 }

 TfLiteStatus MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
-    const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
-    int tensor_index, bool allocate_temp) {
+    const Model* model, TfLiteTensor* tensor, int tensor_index,
+    int subgraph_idx, bool allocate_temp) {
  // TODO(b/162311891): This method serves as a stub to ensure quantized
  // allocations in the tail can be recorded. Once the interpreter has APIs for
  // accessing buffers on TfLiteEvalTensor this method can be dropped.
  return internal::InitializeTfLiteTensorFromFlatbuffer(
-      memory_allocator_, allocate_temp, *subgraph->tensors()->Get(tensor_index),
+      memory_allocator_, allocate_temp,
+      *model->subgraphs()->Get(subgraph_idx)->tensors()->Get(tensor_index),
      model->buffers(), error_reporter_, tensor);
 }

@@ -1009,20 +922,9 @@ ErrorReporter* MicroAllocator::error_reporter() const {
  return error_reporter_;
 }

-const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) {
-  auto* subgraphs = model->subgraphs();
-  if (subgraphs->size() != 1) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Only 1 subgraph is currently supported.\n");
-    return nullptr;
-  }
-  return (*subgraphs)[0];
-}
-
 TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
-    const Model* model, const SubGraph* subgraph,
-    TfLiteEvalTensor* eval_tensors,
-    ScratchBufferHandle* scratch_buffer_handles) {
+    const Model* model, TfLiteEvalTensor* eval_tensors,
+    ScratchBufferHandle* scratch_buffer_handles, int subgraph_idx) {
  size_t head_usage = 0;
  // Create static memory plan
  // 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
@@ -1034,6 +936,7 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
  // allocated from the temp section and cleaned up at the bottom of this
  // function.

+  const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
  size_t allocation_info_count =
      subgraph->tensors()->size() + scratch_buffer_request_count_;
  size_t bytes = sizeof(AllocationInfo) * allocation_info_count;
@@ -1096,6 +999,9 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
  TF_LITE_ENSURE_STATUS(CommitPlan(error_reporter_, &planner,
                                   memory_allocator_->GetHeadBuffer(),
                                   allocation_info, allocation_info_count));
+#ifdef TF_LITE_SHOW_MEMORY_USE
+  planner.PrintMemoryPlan();
+#endif
  head_usage = planner.GetMaximumMemorySize();

  // The head is used to store memory plans for one model at a time during the
@@ -1155,4 +1061,15 @@ internal::ScratchBufferRequest* MicroAllocator::GetScratchBufferRequests() {
                     alignof(internal::ScratchBufferRequest)));
 }

+TfLiteStatus MicroAllocator::FlatBufferVectorToTfLiteTypeArray(
+    const flatbuffers::Vector<int32_t>* flatbuffer_array,
+    TfLiteIntArray** result) {
+  return internal::FlatBufferVectorToTfLiteTypeArray(
+      memory_allocator_, error_reporter_, flatbuffer_array, result);
+}
+
+BuiltinDataAllocator* MicroAllocator::GetBuiltinDataAllocator() {
+  return builtin_data_allocator_;
+}
+
 }  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/micro_allocator.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/micro_allocator.h
@@ -18,11 +18,11 @@ limitations under the License.
 #include <cstddef>
 #include <cstdint>

-#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/core/api/error_reporter.h"
+#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
 #include "tensorflow/lite/micro/compatibility.h"
-#include "tensorflow/lite/micro/micro_op_resolver.h"
+#include "tensorflow/lite/micro/flatbuffer_utils.h"
 #include "tensorflow/lite/micro/simple_memory_allocator.h"
 #include "tensorflow/lite/schema/schema_generated.h"

@@ -75,6 +75,13 @@ typedef struct {
  uint8_t* data;
 } ScratchBufferHandle;

+// Stores all per-subgraph allocations. This includes the node and registration
+// array, tensor list and scratch buffer handles for each subgraph.
+typedef struct {
+  NodeAndRegistration* node_and_registrations;
+  TfLiteEvalTensor* tensors;
+} SubgraphAllocations;
+
 // Allocator responsible for allocating memory for all intermediate tensors
 // necessary to invoke a model.
 //
@@ -114,28 +121,31 @@ class MicroAllocator {
  static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
                                ErrorReporter* error_reporter);

-  // Begin allocating internal resources required for model inference.
+  // Allocates internal resources required for model inference for each subgraph
+  // from the arena.
+  //
  // This method will run through the flatbuffer data supplied in the model to
  // properly allocate tensor, node, and op registration data. This method is
-  // expected to be followed with a call to FinishModelAllocation() before
-  // resuming allocation with another model. All persistent tensor buffers are
-  // stored in the out-param eval_tensors. This value is allocated from the
-  // persistent memory arena and will be used to host runtime tensor buffers.
-  TfLiteStatus StartModelAllocation(
-      const Model* model, const MicroOpResolver& op_resolver,
-      NodeAndRegistration** node_and_registrations,
-      TfLiteEvalTensor** eval_tensors);
+  // expected to be followed with a call to FinishModelAllocation()  Returns a
+  // pointer to an array of SubgraphAllocations (also stored in the tail of the
+  // arena) where each index corresponds to a different subgraph in the model.
+  // Return value is nullptr if the allocations failed.
+  SubgraphAllocations* StartModelAllocation(const Model* model);

  // Finish allocating internal resources required for model inference.
-  // This method will plan non-persistent buffers and commit a memory plan to
-  // the 'head' section of the memory arena. All variable tensor data will also
-  // be allocated. This method should be called after assigning model resources
-  // in StartModelAllocation(). The eval_tensors pointer should be the value
-  // passed into this class during StartModelAllocation(). Scratch buffer
-  // handles are stored in the out-param `scratch_buffer_handles`. This value
-  // will be used in `GetScratchBuffer` call to retrieve scratch buffers.
+  //
+  // -Plan the memory for activation tensors and scratch buffers.
+  // -Update eval tensors for each subgraph based on planned offsets.
+  // -Allocate scratch buffer handles array and update based on planned offsets.
+  //
+  // This method should be called after assigning model resources
+  // in StartModelAllocation(). The subgraph_allocations pointer should be the
+  // value passed into this class during StartModelAllocation(). Scratch buffer
+  // handles are stored in the out-param `scratch_buffer_handles` array which is
+  // allocated in this method. This value will be used in `GetScratchBuffer`
+  // call to retrieve scratch buffers.
  TfLiteStatus FinishModelAllocation(
-      const Model* model, TfLiteEvalTensor* eval_tensors,
+      const Model* model, SubgraphAllocations* subgraph_allocations,
      ScratchBufferHandle** scratch_buffer_handles);

  // Allocates a TfLiteTensor struct and populates the returned value with
@@ -145,17 +155,19 @@ class MicroAllocator {
  // class during StartModelAllocation() and contains the source-of-truth for
  // buffers.
  virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
-      const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
+      const Model* model, const SubgraphAllocations* subgraph_allocations,
+      int tensor_index, int subgraph_index);

  // Allocates a TfLiteTensor struct and populates the returned value with
  // properties from the model flatbuffer. This struct is allocated from
  // temporary arena memory is only guaranteed until a call is made to
-  // ResetTempAllocations(). The eval_tensors pointer should be the value passed
-  // into this class during StartModelAllocation() and contains the
-  // source-of-truth for buffers.
-  virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
-                                                 TfLiteEvalTensor* eval_tensors,
-                                                 int tensor_index);
+  // ResetTempAllocations(). Subgraph_allocaitons contains the array of
+  // TfLiteEvalTensors. If the newly allocated temp at the specified subgraph
+  // and tensor index is already present int the TfLiteEvalTensor array, its
+  // data buffer will be re-used.
+  virtual TfLiteTensor* AllocateTempTfLiteTensor(
+      const Model* model, const SubgraphAllocations* subgraph_allocations,
+      int tensor_index, int subgraph_index);

  // Resets all temporary allocations. This method should be called after a
  // chain of temp allocations (e.g. chain of TfLiteTensor objects via
@@ -171,7 +183,8 @@ class MicroAllocator {
  // This method only requests a buffer with a given size to be used after a
  // model has finished allocation via FinishModelAllocation(). All requested
  // buffers will be accessible by the out-param in that method.
-  TfLiteStatus RequestScratchBufferInArena(size_t bytes, int* buffer_idx);
+  TfLiteStatus RequestScratchBufferInArena(size_t bytes, int subgraph_idx,
+                                           int* buffer_idx);

  // Finish allocating a specific NodeAndRegistration prepare block (kernel
  // entry for a model) with a given node ID. This call ensures that any scratch
@@ -183,6 +196,14 @@ class MicroAllocator {
  // `FinishModelAllocation`. Otherwise, it will return 0.
  size_t used_bytes() const;

+  // Converts a flatbuffer int32_t array to a TfLiteIntArray, accounting for
+  // endiannes.
+  TfLiteStatus FlatBufferVectorToTfLiteTypeArray(
+      const flatbuffers::Vector<int32_t>* flatbuffer_array,
+      TfLiteIntArray** result);
+
+  BuiltinDataAllocator* GetBuiltinDataAllocator();
+
 protected:
  MicroAllocator(SimpleMemoryAllocator* memory_allocator,
                 ErrorReporter* error_reporter);
@@ -192,23 +213,13 @@ class MicroAllocator {
  // registration pointers required to represent the inference graph of the
  // model.
  virtual TfLiteStatus AllocateNodeAndRegistrations(
-      const Model* model, NodeAndRegistration** node_and_registrations);
-
-  // Populates node and registration pointers representing the inference graph
-  // of the model from values inside the flatbuffer (loaded from the TfLiteModel
-  // instance). Persistent data (e.g. operator data) is allocated from the
-  // arena.
-  virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
-      const Model* model, const MicroOpResolver& op_resolver,
-      NodeAndRegistration* node_and_registrations);
+      const Model* model, SubgraphAllocations* subgraph_allocations);

  // Allocates the list of persistent TfLiteEvalTensors that are used for the
  // "eval" phase of model inference. These structs will be the source of truth
-  // for all tensor buffers. Allocation results are stored in the out-param
-  // eval_tensors.
+  // for all tensor buffers.
  virtual TfLiteStatus AllocateTfLiteEvalTensors(
-      const Model* model, TfLiteEvalTensor** eval_tensors);
-
+      const Model* model, SubgraphAllocations* subgraph_allocations);
  // Allocates persistent tensor buffers for variable tensors in the subgraph.
  virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
                                         TfLiteEvalTensor* eval_tensors);
@@ -216,21 +227,19 @@ class MicroAllocator {
  // Allocate and return a persistent TfLiteTensor.
  // TODO(b/162311891): Drop this method when the interpreter has an API for
  // accessing TfLiteEvalTensor structs.
-  virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
-      const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
+  virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal();

  // Populates a TfLiteTensor struct with data from the model flatbuffer. Any
  // quantization data is allocated from either the tail (persistent) or temp
  // sections of the arena based on the allocation flag.
-  virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
-      const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
-      int tensor_index, bool allocate_temp);
+  virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
+                                                          TfLiteTensor* tensor,
+                                                          int tensor_index,
+                                                          int subgraph_idx,
+                                                          bool allocate_temp);

  ErrorReporter* error_reporter() const;

-  // Returns the first subgraph from the model.
-  const SubGraph* GetSubGraphFromModel(const Model* model);
-
 private:
  // Commits a memory plan for all non-persistent buffer allocations in the
  // 'head' section of the memory arena. The eval_tensors pointer is the list of
@@ -240,9 +249,8 @@ class MicroAllocator {
  // ScratchBufferHandle structs that will point to allocated buffers also in
  // the head section.
  virtual TfLiteStatus CommitStaticMemoryPlan(
-      const Model* model, const SubGraph* subgraph,
-      TfLiteEvalTensor* eval_tensors,
-      ScratchBufferHandle* scratch_buffer_handles);
+      const Model* model, TfLiteEvalTensor* eval_tensors,
+      ScratchBufferHandle* scratch_buffer_handles, int subgraph_idx);

  // Allocates an array of ScratchBufferHandle structs in the tail section for a
  // given number of handles.
@@ -261,6 +269,9 @@ class MicroAllocator {
  // A simple memory allocator that always allocate from the arena tail or head.
  SimpleMemoryAllocator* memory_allocator_;

+  // Allocator used to allocate persistent builtin data.
+  BuiltinDataAllocator* builtin_data_allocator_;
+
  ErrorReporter* error_reporter_;
  bool model_is_allocating_;

--- a/code/components/tfmicro/tensorflow/lite/micro/micro_graph.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/micro_graph.cc
@@ -0,0 +1,245 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/micro/micro_graph.h"
+
+#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/micro/flatbuffer_utils.h"
+#include "tensorflow/lite/micro/memory_helpers.h"
+#include "tensorflow/lite/micro/micro_error_reporter.h"
+#include "tensorflow/lite/micro/micro_profiler.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+namespace {
+
+#ifndef TF_LITE_STRIP_ERROR_STRINGS
+const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
+  if (registration->builtin_code == BuiltinOperator_CUSTOM) {
+    return registration->custom_name;
+  } else {
+    return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
+  }
+}
+#endif  // !defined(TF_LITE_STRIP_ERROR_STRINGS)
+
+}  // namespace
+
+MicroGraph::MicroGraph(TfLiteContext* context, const Model* model,
+                       MicroAllocator* allocator)
+    : context_(context),
+      model_(model),
+      allocator_(allocator),
+      current_subgraph_index_(0) {
+  if (model != nullptr) {
+    subgraphs_ = model->subgraphs();
+  }
+}
+
+MicroGraph::~MicroGraph() {}
+
+TfLiteStatus MicroGraph::InitSubgraphs() {
+  int previous_subgraph_idx = current_subgraph_index_;
+
+  for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size();
+       subgraph_idx++) {
+    current_subgraph_index_ = subgraph_idx;
+    uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx);
+    for (size_t i = 0; i < operators_size; ++i) {
+      TfLiteNode* node =
+          &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node);
+      const TfLiteRegistration* registration =
+          subgraph_allocations_[subgraph_idx]
+              .node_and_registrations[i]
+              .registration;
+      size_t init_data_size;
+      const char* init_data;
+      if (registration->builtin_code == BuiltinOperator_CUSTOM) {
+        init_data = reinterpret_cast<const char*>(node->custom_initial_data);
+        init_data_size = node->custom_initial_data_size;
+      } else {
+        init_data = reinterpret_cast<const char*>(node->builtin_data);
+        init_data_size = 0;
+      }
+      if (registration->init) {
+        node->user_data =
+            registration->init(context_, init_data, init_data_size);
+      }
+    }
+  }
+  current_subgraph_index_ = previous_subgraph_idx;
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus MicroGraph::PrepareSubgraphs() {
+  int previous_subgraph_idx = current_subgraph_index_;
+
+  for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size();
+       subgraph_idx++) {
+    current_subgraph_index_ = subgraph_idx;
+    uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx);
+    for (size_t i = 0; i < operators_size; ++i) {
+      TfLiteNode* node =
+          &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node);
+      const TfLiteRegistration* registration =
+          subgraph_allocations_[subgraph_idx]
+              .node_and_registrations[i]
+              .registration;
+      if (registration->prepare != nullptr) {
+        TfLiteStatus prepare_status = registration->prepare(context_, node);
+        if (prepare_status != kTfLiteOk) {
+          MicroPrintf("Node %s (number %df) failed to prepare with status %d",
+                      OpNameFromRegistration(registration), i, prepare_status);
+          return kTfLiteError;
+        }
+      }
+      allocator_->FinishPrepareNodeAllocations(/*node_id=*/i);
+    }
+  }
+  current_subgraph_index_ = previous_subgraph_idx;
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus MicroGraph::FreeSubgraphs() {
+  int previous_subgraph_idx = current_subgraph_index_;
+
+  for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size();
+       subgraph_idx++) {
+    current_subgraph_index_ = subgraph_idx;
+    uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx);
+    for (size_t i = 0; i < operators_size; ++i) {
+      TfLiteNode* node =
+          &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node);
+      const TfLiteRegistration* registration =
+          subgraph_allocations_[subgraph_idx]
+              .node_and_registrations[i]
+              .registration;
+      // registration is allocated outside the interpreter, so double check to
+      // make sure it's not nullptr;
+      if (registration != nullptr && registration->free != nullptr) {
+        registration->free(context_, node->user_data);
+      }
+    }
+  }
+  current_subgraph_index_ = previous_subgraph_idx;
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus MicroGraph::InvokeSubgraph(int subgraph_idx) {
+  int previous_subgraph_idx = current_subgraph_index_;
+  current_subgraph_index_ = subgraph_idx;
+
+  if (static_cast<size_t>(subgraph_idx) >= subgraphs_->size()) {
+    MicroPrintf("Accessing subgraph %d but only %d subgraphs found",
+                subgraph_idx, subgraphs_->size());
+    return kTfLiteError;
+  }
+  uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx);
+  for (size_t i = 0; i < operators_size; ++i) {
+    TfLiteNode* node =
+        &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node);
+    const TfLiteRegistration* registration = subgraph_allocations_[subgraph_idx]
+                                                 .node_and_registrations[i]
+                                                 .registration;
+
+// This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with
+// -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is
+// only defined for builds with the error strings.
+#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
+    ScopedMicroProfiler scoped_profiler(
+        OpNameFromRegistration(registration),
+        reinterpret_cast<MicroProfiler*>(context_->profiler));
+#endif
+
+    TFLITE_DCHECK(registration->invoke);
+    TfLiteStatus invoke_status = registration->invoke(context_, node);
+
+    // All TfLiteTensor structs used in the kernel are allocated from temp
+    // memory in the allocator. This creates a chain of allocations in the
+    // temp section. The call below resets the chain of allocations to
+    // prepare for the next call.
+    allocator_->ResetTempAllocations();
+
+    if (invoke_status == kTfLiteError) {
+      MicroPrintf("Node %s (number %d) failed to invoke with status %d",
+                  OpNameFromRegistration(registration), i, invoke_status);
+      return kTfLiteError;
+    } else if (invoke_status != kTfLiteOk) {
+      return invoke_status;
+    }
+  }
+  current_subgraph_index_ = previous_subgraph_idx;
+  return kTfLiteOk;
+}
+
+TfLiteStatus MicroGraph::ResetVariableTensors() {
+  for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size();
+       subgraph_idx++) {
+    const SubGraph* subgraph = (*subgraphs_)[subgraph_idx];
+    for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
+      auto* tensor = subgraph->tensors()->Get(i);
+      if (tensor->is_variable()) {
+        size_t buffer_size;
+        TF_LITE_ENSURE_STATUS(TfLiteEvalTensorByteLength(
+            &subgraph_allocations_[subgraph_idx].tensors[i], &buffer_size));
+
+        int value = 0;
+        if (tensor->type() == tflite::TensorType_INT8) {
+          value = tensor->quantization()->zero_point()->Get(0);
+        }
+        memset(subgraph_allocations_[subgraph_idx].tensors[i].data.raw, value,
+               buffer_size);
+      }
+    }
+  }
+
+  return kTfLiteOk;
+}
+
+int MicroGraph::NumSubgraphs() { return model_->subgraphs()->size(); }
+
+void MicroGraph::SetSubgraphAllocations(
+    SubgraphAllocations* subgraph_allocations) {
+  subgraph_allocations_ = subgraph_allocations;
+}
+
+size_t MicroGraph::NumSubgraphInputs(int subgraph_idx) {
+  return model_->subgraphs()->Get(subgraph_idx)->inputs()->size();
+}
+
+TfLiteEvalTensor* MicroGraph::GetSubgraphInput(int subgraph_idx,
+                                               int input_idx) {
+  int tensor_idx =
+      model_->subgraphs()->Get(subgraph_idx)->inputs()->Get(input_idx);
+  return &subgraph_allocations_[subgraph_idx].tensors[tensor_idx];
+}
+
+size_t MicroGraph::NumSubgraphOutputs(int subgraph_idx) {
+  return model_->subgraphs()->Get(subgraph_idx)->outputs()->size();
+}
+
+TfLiteEvalTensor* MicroGraph::GetSubgraphOutput(int subgraph_idx,
+                                                int output_idx) {
+  int tensor_idx =
+      model_->subgraphs()->Get(subgraph_idx)->outputs()->Get(output_idx);
+  return &subgraph_allocations_[subgraph_idx].tensors[tensor_idx];
+}
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/micro_graph.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/micro_graph.h
@@ -0,0 +1,97 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_
+#define TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/micro/micro_allocator.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+// Abstracts the details of interacting with the tflite::Model.
+//
+// Provides methods to access, initialize, prepare, invoke and free any
+// subgraph in the tflite::Graph.
+class MicroGraph {
+ public:
+  // The lifetime of the context, model, and allocator must be at least as long
+  // as that of the graph object, since the this class may need to access them
+  // at any time.
+  MicroGraph(TfLiteContext* context, const Model* model,
+             MicroAllocator* allocator);
+  virtual ~MicroGraph();
+
+  // Sets up builtin data and calls TfLiteRegistration->Init for every operator
+  // in every subgraph in the model.
+  virtual TfLiteStatus InitSubgraphs();
+
+  // Calls TfLiteRegistration->Prepare for every operator in every subgraph in
+  // the model.
+  virtual TfLiteStatus PrepareSubgraphs();
+
+  // Calls TfLiteRegistration->Free for every operator in every subgraph in the
+  // model.
+  virtual TfLiteStatus FreeSubgraphs();
+
+  // Calls TfLiteRegistration->Invoke for every operator in a single subgraph in
+  // the model.
+  virtual TfLiteStatus InvokeSubgraph(int subgraph_idx);
+
+  // Zeros out all variable tensors in all subgraphs in the model.
+  virtual TfLiteStatus ResetVariableTensors();
+
+  // Number of tensor inputs to a specified subgraph in the model.
+  virtual size_t NumSubgraphInputs(int subgraph_idx);
+
+  // Get the specified input tensor of a specified subgraph in the model.
+  virtual TfLiteEvalTensor* GetSubgraphInput(int subgraph_idx, int input_idx);
+
+  // Number of tensor outputs from a specified subgraph in the model.
+  virtual size_t NumSubgraphOutputs(int subgraph_idx);
+
+  // Get the specified output tensor of a specified subgraph in the model.
+  virtual TfLiteEvalTensor* GetSubgraphOutput(int subgraph_idx, int output_idx);
+
+  // Number of subgraphs in the model.
+  virtual int NumSubgraphs();
+
+  // Hook to pass in subgraph allocations tracked within the interpreter,
+  // allowing MicroGraph to init / prepare / invoke subgraphs in the model.
+  void SetSubgraphAllocations(SubgraphAllocations* subgraph_allocations);
+
+  // Get the current subgraph index. Within an on operator, this is guaranteed
+  // to be the subgraph of that operator.
+  int GetCurrentSubgraphIndex() { return current_subgraph_index_; }
+
+  // Gets the list of alloctions for each subgraph. This is the source of truth
+  // for all per-subgraph allocation data.
+  SubgraphAllocations* GetAllocations() { return subgraph_allocations_; }
+
+ private:
+  TfLiteContext* context_;
+  const Model* model_;
+  MicroAllocator* allocator_;
+  SubgraphAllocations* subgraph_allocations_ = nullptr;
+  int current_subgraph_index_;
+  const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs_;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_
--- a/code/components/tfmicro/tensorflow/lite/micro/micro_interpreter.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/micro_interpreter.cc
@@ -22,87 +22,16 @@ limitations under the License.
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/core/api/error_reporter.h"
 #include "tensorflow/lite/core/api/tensor_utils.h"
+#include "tensorflow/lite/micro/flatbuffer_utils.h"
 #include "tensorflow/lite/micro/memory_helpers.h"
 #include "tensorflow/lite/micro/micro_allocator.h"
 #include "tensorflow/lite/micro/micro_error_reporter.h"
 #include "tensorflow/lite/micro/micro_op_resolver.h"
 #include "tensorflow/lite/micro/micro_profiler.h"
 #include "tensorflow/lite/schema/schema_generated.h"
+#include "tensorflow/lite/schema/schema_utils.h"

 namespace tflite {
-namespace {
-
-#ifndef TF_LITE_STRIP_ERROR_STRINGS
-const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
-  if (registration->builtin_code == BuiltinOperator_CUSTOM) {
-    return registration->custom_name;
-  } else {
-    return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
-  }
-}
-#endif  // !defined(TF_LITE_STRIP_ERROR_STRINGS)
-
-}  // namespace
-
-namespace internal {
-
-ContextHelper::ContextHelper(ErrorReporter* error_reporter,
-                             MicroAllocator* allocator, const Model* model)
-    : allocator_(allocator), error_reporter_(error_reporter), model_(model) {}
-
-void* ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
-                                              size_t bytes) {
-  return reinterpret_cast<ContextHelper*>(ctx->impl_)
-      ->allocator_->AllocatePersistentBuffer(bytes);
-}
-
-TfLiteStatus ContextHelper::RequestScratchBufferInArena(TfLiteContext* ctx,
-                                                        size_t bytes,
-                                                        int* buffer_idx) {
-  ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
-  return helper->allocator_->RequestScratchBufferInArena(bytes, buffer_idx);
-}
-
-void* ContextHelper::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
-  ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
-  ScratchBufferHandle* handle = helper->scratch_buffer_handles_ + buffer_idx;
-  return handle->data;
-}
-
-void ContextHelper::ReportOpError(struct TfLiteContext* context,
-                                  const char* format, ...) {
-#ifndef TF_LITE_STRIP_ERROR_STRINGS
-  ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
-  va_list args;
-  va_start(args, format);
-  TF_LITE_REPORT_ERROR(helper->error_reporter_, format, args);
-  va_end(args);
-#endif
-}
-
-TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context,
-                                       int tensor_idx) {
-  ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
-  return helper->allocator_->AllocateTempTfLiteTensor(
-      helper->model_, helper->eval_tensors_, tensor_idx);
-}
-
-TfLiteEvalTensor* ContextHelper::GetEvalTensor(
-    const struct TfLiteContext* context, int tensor_idx) {
-  ContextHelper* helper = reinterpret_cast<ContextHelper*>(context->impl_);
-  return &helper->eval_tensors_[tensor_idx];
-}
-
-void ContextHelper::SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors) {
-  eval_tensors_ = eval_tensors;
-}
-
-void ContextHelper::SetScratchBufferHandles(
-    ScratchBufferHandle* scratch_buffer_handles) {
-  scratch_buffer_handles_ = scratch_buffer_handles;
-}
-
-}  // namespace internal

 MicroInterpreter::MicroInterpreter(const Model* model,
                                   const MicroOpResolver& op_resolver,
@@ -115,10 +44,10 @@ MicroInterpreter::MicroInterpreter(const Model* model,
      error_reporter_(error_reporter),
      allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size,
                                         error_reporter)),
+
+      graph_(&context_, model, &allocator_),
      tensors_allocated_(false),
      initialization_status_(kTfLiteError),
-      eval_tensors_(nullptr),
-      context_helper_(error_reporter_, &allocator_, model),
      input_tensors_(nullptr),
      output_tensors_(nullptr) {
  Init(profiler);
@@ -133,122 +62,159 @@ MicroInterpreter::MicroInterpreter(const Model* model,
      op_resolver_(op_resolver),
      error_reporter_(error_reporter),
      allocator_(*allocator),
+      graph_(&context_, model, allocator),
      tensors_allocated_(false),
      initialization_status_(kTfLiteError),
-      eval_tensors_(nullptr),
-      context_helper_(error_reporter_, &allocator_, model),
      input_tensors_(nullptr),
      output_tensors_(nullptr) {
  Init(profiler);
 }

 MicroInterpreter::~MicroInterpreter() {
-  if (node_and_registrations_ != nullptr) {
-    for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
-      TfLiteNode* node = &(node_and_registrations_[i].node);
-      const TfLiteRegistration* registration =
-          node_and_registrations_[i].registration;
-      // registration is allocated outside the interpreter, so double check to
-      // make sure it's not nullptr;
-      if (registration != nullptr && registration->free != nullptr) {
-        registration->free(&context_, node->user_data);
-      }
-    }
+  if (graph_.GetAllocations() != nullptr) {
+    graph_.FreeSubgraphs();
  }
 }

 void MicroInterpreter::Init(MicroProfiler* profiler) {
-  const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
-      model_->subgraphs();
-  if (subgraphs->size() != 1) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Only 1 subgraph is currently supported.\n");
-    initialization_status_ = kTfLiteError;
-    return;
-  }
-  subgraph_ = (*subgraphs)[0];
-
-  context_.impl_ = static_cast<void*>(&context_helper_);
-  context_.ReportError = context_helper_.ReportOpError;
-  context_.GetTensor = context_helper_.GetTensor;
-  context_.GetEvalTensor = context_helper_.GetEvalTensor;
-  context_.recommended_num_threads = 1;
+  context_.impl_ = static_cast<void*>(this);
+  context_.ReportError = ReportOpError;
+  context_.GetTensor = GetTensor;
+  context_.ReportError = ReportOpError;
+  context_.GetTensor = GetTensor;
+  context_.GetEvalTensor = GetEvalTensor;
  context_.profiler = profiler;

  initialization_status_ = kTfLiteOk;
 }

+TfLiteStatus MicroInterpreter::PrepareNodeAndRegistrationDataFromFlatbuffer() {
+  for (int subgraph_idx = 0; subgraph_idx < graph_.NumSubgraphs();
+       subgraph_idx++) {
+    const SubGraph* subgraph = model_->subgraphs()->Get(subgraph_idx);
+    TFLITE_DCHECK(subgraph != nullptr);
+
+    auto* opcodes = model_->operator_codes();
+    BuiltinDataAllocator* builtin_data_allocator =
+        allocator_.GetBuiltinDataAllocator();
+    uint32_t operators_size = NumSubgraphOperators(subgraph);
+    for (size_t i = 0; i < operators_size; ++i) {
+      const auto* op = subgraph->operators()->Get(i);
+      const size_t index = op->opcode_index();
+      if (index >= opcodes->size()) {
+        MicroPrintf("Missing registration for opcode_index %d\n", index);
+        return kTfLiteError;
+      }
+      const auto* opcode = opcodes->Get(index);
+      TfLiteStatus status =
+          GetRegistrationFromOpCode(opcode, op_resolver_, error_reporter_,
+                                    &(graph_.GetAllocations()[subgraph_idx]
+                                          .node_and_registrations[i]
+                                          .registration));
+      if (status != kTfLiteOk) {
+        MicroPrintf("Failed to get registration from op code %s\n ",
+                    EnumNameBuiltinOperator(GetBuiltinCode(opcode)));
+        return status;
+      }
+      const auto* registration = graph_.GetAllocations()[subgraph_idx]
+                                     .node_and_registrations[i]
+                                     .registration;
+      if (registration == nullptr) {
+        MicroPrintf("Skipping op for opcode_index %d\n", index);
+        return kTfLiteError;
+      }
+      BuiltinOperator op_type =
+          static_cast<BuiltinOperator>(registration->builtin_code);
+
+      const char* custom_data = nullptr;
+      size_t custom_data_size = 0;
+      unsigned char* builtin_data = nullptr;
+
+      if (op_type == BuiltinOperator_CUSTOM) {
+        // Custom Ops may or may not have a non-null custom_options field.
+        if (op->custom_options() != nullptr) {
+          custom_data =
+              reinterpret_cast<const char*>(op->custom_options()->data());
+          custom_data_size = op->custom_options()->size();
+        }
+      } else {
+        if (op->custom_options() != nullptr) {
+          MicroPrintf(
+              "Unsupported behavior: found builtin operator %s with custom "
+              "options.\n",
+              EnumNameBuiltinOperator(op_type));
+          return kTfLiteError;
+        }
+
+        MicroOpResolver::BuiltinParseFunction parser =
+            op_resolver_.GetOpDataParser(op_type);
+        if (parser == nullptr) {
+          MicroPrintf("Did not find a parser for %s",
+                      EnumNameBuiltinOperator(op_type));
+
+          return kTfLiteError;
+        }
+        TF_LITE_ENSURE_STATUS(parser(op, error_reporter_,
+                                     builtin_data_allocator,
+                                     (void**)(&builtin_data)));
+      }
+
+      TfLiteIntArray* inputs_array;
+      TF_LITE_ENSURE_STATUS(allocator_.FlatBufferVectorToTfLiteTypeArray(
+          op->inputs(), &inputs_array));
+
+      TfLiteIntArray* outputs_array;
+      TF_LITE_ENSURE_STATUS(allocator_.FlatBufferVectorToTfLiteTypeArray(
+          op->outputs(), &outputs_array));
+
+      TfLiteNode* node = &(
+          graph_.GetAllocations()[subgraph_idx].node_and_registrations[i].node);
+      *node = {};
+      node->inputs = inputs_array;
+      node->outputs = outputs_array;
+      node->builtin_data = reinterpret_cast<void*>(builtin_data);
+      node->custom_initial_data = custom_data;
+      node->custom_initial_data_size = custom_data_size;
+    }
+  }
+  return kTfLiteOk;
+}
+
 TfLiteStatus MicroInterpreter::AllocateTensors() {
-  if (allocator_.StartModelAllocation(model_, op_resolver_,
-                                      &node_and_registrations_,
-                                      &eval_tensors_) != kTfLiteOk) {
+  SubgraphAllocations* allocations = allocator_.StartModelAllocation(model_);
+
+  if (allocations == nullptr) {
    TF_LITE_REPORT_ERROR(error_reporter_,
                         "Failed starting model allocation.\n");
    initialization_status_ = kTfLiteError;
    return kTfLiteError;
  }

-  // Update the pointer now that TfLiteEvalTensor allocation has completed on
-  // the context helper.
-  // TODO(b/16157777): This call would not be needed if ContextHelper rolled
-  // into the interpreter.
-  context_helper_.SetTfLiteEvalTensors(eval_tensors_);
-  context_.tensors_size = subgraph_->tensors()->size();
+  graph_.SetSubgraphAllocations(allocations);
+
+  TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer());

  // Only allow AllocatePersistentBuffer in Init stage.
-  context_.AllocatePersistentBuffer = context_helper_.AllocatePersistentBuffer;
+  context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
  context_.RequestScratchBufferInArena = nullptr;
  context_.GetScratchBuffer = nullptr;
-
-  for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
-    auto* node = &(node_and_registrations_[i].node);
-    auto* registration = node_and_registrations_[i].registration;
-    size_t init_data_size;
-    const char* init_data;
-    if (registration->builtin_code == BuiltinOperator_CUSTOM) {
-      init_data = reinterpret_cast<const char*>(node->custom_initial_data);
-      init_data_size = node->custom_initial_data_size;
-    } else {
-      init_data = reinterpret_cast<const char*>(node->builtin_data);
-      init_data_size = 0;
-    }
-    if (registration->init) {
-      node->user_data =
-          registration->init(&context_, init_data, init_data_size);
-    }
-  }
+  context_.GetExecutionPlan = GetGraph;
+  graph_.InitSubgraphs();

  // Both AllocatePersistentBuffer and RequestScratchBufferInArena is
  // available in Prepare stage.
-  context_.RequestScratchBufferInArena =
-      context_helper_.RequestScratchBufferInArena;
-  for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
-    auto* node = &(node_and_registrations_[i].node);
-    auto* registration = node_and_registrations_[i].registration;
-    if (registration->prepare) {
-      TfLiteStatus prepare_status = registration->prepare(&context_, node);
-      if (prepare_status != kTfLiteOk) {
-        TF_LITE_REPORT_ERROR(
-            error_reporter_,
-            "Node %s (number %df) failed to prepare with status %d",
-            OpNameFromRegistration(registration), i, prepare_status);
-        return kTfLiteError;
-      }
-    }
-    allocator_.FinishPrepareNodeAllocations(/*node_id=*/i);
-  }
+  context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
+  graph_.PrepareSubgraphs();

  // Prepare is done, we're ready for Invoke. Memory allocation is no longer
  // allowed. Kernels can only fetch scratch buffers via GetScratchBuffer.
  context_.AllocatePersistentBuffer = nullptr;
  context_.RequestScratchBufferInArena = nullptr;
-  context_.GetScratchBuffer = context_helper_.GetScratchBuffer;
+  context_.GetScratchBuffer = GetScratchBuffer;

-  TF_LITE_ENSURE_OK(&context_,
-                    allocator_.FinishModelAllocation(model_, eval_tensors_,
-                                                     &scratch_buffer_handles_));
-  // TODO(b/16157777): Remove this when ContextHelper is rolled into this class.
-  context_helper_.SetScratchBufferHandles(scratch_buffer_handles_);
+  TF_LITE_ENSURE_OK(&context_, allocator_.FinishModelAllocation(
+                                   model_, graph_.GetAllocations(),
+                                   &scratch_buffer_handles_));

  // TODO(b/162311891): Drop these allocations when the interpreter supports
  // handling buffers from TfLiteEvalTensor.
@@ -266,7 +232,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {

  for (size_t i = 0; i < inputs_size(); ++i) {
    input_tensors_[i] = allocator_.AllocatePersistentTfLiteTensor(
-        model_, eval_tensors_, inputs().Get(i));
+        model_, graph_.GetAllocations(), inputs().Get(i), 0);
    if (input_tensors_[i] == nullptr) {
      TF_LITE_REPORT_ERROR(error_reporter_,
                           "Failed to initialize input tensor %d", i);
@@ -290,7 +256,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {

  for (size_t i = 0; i < outputs_size(); ++i) {
    output_tensors_[i] = allocator_.AllocatePersistentTfLiteTensor(
-        model_, eval_tensors_, outputs().Get(i));
+        model_, graph_.GetAllocations(), outputs().Get(i), 0);
    if (output_tensors_[i] == nullptr) {
      TF_LITE_REPORT_ERROR(error_reporter_,
                           "Failed to initialize output tensor %d", i);
@@ -316,41 +282,7 @@ TfLiteStatus MicroInterpreter::Invoke() {
  if (!tensors_allocated_) {
    TF_LITE_ENSURE_OK(&context_, AllocateTensors());
  }
-
-  for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
-    auto* node = &(node_and_registrations_[i].node);
-    auto* registration = node_and_registrations_[i].registration;
-
-// This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with
-// -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is
-// only defined for builds with the error strings.
-#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
-    ScopedMicroProfiler scoped_profiler(
-        OpNameFromRegistration(registration),
-        reinterpret_cast<MicroProfiler*>(context_.profiler));
-#endif
-
-    TFLITE_DCHECK(registration->invoke);
-    TfLiteStatus invoke_status = registration->invoke(&context_, node);
-
-    // All TfLiteTensor structs used in the kernel are allocated from temp
-    // memory in the allocator. This creates a chain of allocations in the
-    // temp section. The call below resets the chain of allocations to
-    // prepare for the next call.
-    allocator_.ResetTempAllocations();
-
-    if (invoke_status == kTfLiteError) {
-      TF_LITE_REPORT_ERROR(
-          error_reporter_,
-          "Node %s (number %d) failed to invoke with status %d",
-          OpNameFromRegistration(registration), i, invoke_status);
-      return kTfLiteError;
-    } else if (invoke_status != kTfLiteOk) {
-      return invoke_status;
-    }
-  }
-
-  return kTfLiteOk;
+  return graph_.InvokeSubgraph(0);
 }

 TfLiteTensor* MicroInterpreter::input(size_t index) {
@@ -375,34 +307,68 @@ TfLiteTensor* MicroInterpreter::output(size_t index) {
  return output_tensors_[index];
 }

-TfLiteTensor* MicroInterpreter::tensor(size_t index) {
-  const size_t length = tensors_size();
-  if (index >= length) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Tensor index %d out of range (length is %d)", index,
-                         length);
-    return nullptr;
-  }
-  return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
-                                                   index);
+TfLiteStatus MicroInterpreter::ResetVariableTensors() {
+  return graph_.ResetVariableTensors();
 }

-TfLiteStatus MicroInterpreter::ResetVariableTensors() {
-  for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) {
-    auto* tensor = subgraph_->tensors()->Get(i);
-    if (tensor->is_variable()) {
-      size_t buffer_size;
-      TF_LITE_ENSURE_STATUS(
-          TfLiteEvalTensorByteLength(&eval_tensors_[i], &buffer_size));
+void* MicroInterpreter::AllocatePersistentBuffer(TfLiteContext* ctx,
+                                                 size_t bytes) {
+  return reinterpret_cast<MicroInterpreter*>(ctx->impl_)
+      ->allocator_.AllocatePersistentBuffer(bytes);
+}

-      int value = 0;
-      if (tensor->type() == tflite::TensorType_INT8) {
-        value = tensor->quantization()->zero_point()->Get(0);
-      }
-      memset(eval_tensors_[i].data.raw, value, buffer_size);
-    }
-  }
+TfLiteStatus MicroInterpreter::RequestScratchBufferInArena(TfLiteContext* ctx,
+                                                           size_t bytes,
+                                                           int* buffer_idx) {
+  MicroInterpreter* interpreter =
+      reinterpret_cast<MicroInterpreter*>(ctx->impl_);
+  return interpreter->allocator_.RequestScratchBufferInArena(
+      bytes, interpreter->graph_.GetCurrentSubgraphIndex(), buffer_idx);
+}

+void* MicroInterpreter::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
+  MicroInterpreter* interpreter =
+      reinterpret_cast<MicroInterpreter*>(ctx->impl_);
+  ScratchBufferHandle* handle =
+      interpreter->scratch_buffer_handles_ + buffer_idx;
+  return handle->data;
+}
+
+void MicroInterpreter::ReportOpError(struct TfLiteContext* context,
+                                     const char* format, ...) {
+#ifndef TF_LITE_STRIP_ERROR_STRINGS
+  MicroInterpreter* interpreter =
+      static_cast<MicroInterpreter*>(context->impl_);
+  va_list args;
+  va_start(args, format);
+  TF_LITE_REPORT_ERROR(interpreter->error_reporter_, format, args);
+  va_end(args);
+#endif
+}
+
+TfLiteTensor* MicroInterpreter::GetTensor(const struct TfLiteContext* context,
+                                          int tensor_idx) {
+  MicroInterpreter* interpreter =
+      static_cast<MicroInterpreter*>(context->impl_);
+  return interpreter->allocator_.AllocateTempTfLiteTensor(
+      interpreter->model_, interpreter->graph_.GetAllocations(), tensor_idx,
+      interpreter->get_subgraph_index());
+}
+
+TfLiteEvalTensor* MicroInterpreter::GetEvalTensor(
+    const struct TfLiteContext* context, int tensor_idx) {
+  MicroInterpreter* interpreter =
+      reinterpret_cast<MicroInterpreter*>(context->impl_);
+  return &interpreter->graph_
+              .GetAllocations()[interpreter->get_subgraph_index()]
+              .tensors[tensor_idx];
+}
+
+TfLiteStatus MicroInterpreter::GetGraph(struct TfLiteContext* context,
+                                        TfLiteIntArray** args) {
+  MicroInterpreter* interpreter =
+      reinterpret_cast<MicroInterpreter*>(context->impl_);
+  *args = reinterpret_cast<TfLiteIntArray*>(&interpreter->graph_);
  return kTfLiteOk;
 }

--- a/code/components/tfmicro/tensorflow/lite/micro/micro_interpreter.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/micro_interpreter.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/lite/core/api/error_reporter.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/micro/micro_allocator.h"
+#include "tensorflow/lite/micro/micro_graph.h"
 #include "tensorflow/lite/micro/micro_op_resolver.h"
 #include "tensorflow/lite/micro/micro_profiler.h"
 #include "tensorflow/lite/portable_type_to_tflitetype.h"
@@ -34,46 +35,6 @@ limitations under the License.

 namespace tflite {

-namespace internal {
-
-// A helper class to encapsulate the implementation of APIs in Context.
-// context->impl_ points to an instance of this class.
-// Check tensorflow/lite/c/common.h for detailed descriptions.
-// TODO(b/16157777): Consider rolling this class into MicroInterpreter.
-class ContextHelper {
- public:
-  explicit ContextHelper(ErrorReporter* error_reporter,
-                         MicroAllocator* allocator, const Model* model);
-
-  // Functions that will be assigned to function pointers on TfLiteContext:
-  static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
-  static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
-                                                  size_t bytes,
-                                                  int* buffer_idx);
-  static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
-  static void ReportOpError(struct TfLiteContext* context, const char* format,
-                            ...);
-  static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
-                                 int tensor_idx);
-  static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
-                                         int tensor_idx);
-
-  // Sets the pointer to a list of TfLiteEvalTensor instances.
-  void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors);
-
-  // Sets the pointer to a list of ScratchBufferHandle instances.
-  void SetScratchBufferHandles(ScratchBufferHandle* scratch_buffer_handles);
-
- private:
-  MicroAllocator* allocator_ = nullptr;
-  ErrorReporter* error_reporter_ = nullptr;
-  const Model* model_ = nullptr;
-  TfLiteEvalTensor* eval_tensors_ = nullptr;
-  ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
-};
-
-}  // namespace internal
-
 class MicroInterpreter {
 public:
  // The lifetime of the model, op resolver, tensor arena, error reporter and
@@ -108,22 +69,12 @@ class MicroInterpreter {
  // TODO(b/149795762): Add this to the TfLiteStatus enum.
  TfLiteStatus Invoke();

-  size_t tensors_size() const { return context_.tensors_size; }
-  TfLiteTensor* tensor(size_t tensor_index);
-  template <class T>
-  T* typed_tensor(int tensor_index) {
-    if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) {
-      if (tensor_ptr->type == typeToTfLiteType<T>()) {
-        return GetTensorData<T>(tensor_ptr);
-      }
-    }
-    return nullptr;
-  }
-
  TfLiteTensor* input(size_t index);
-  size_t inputs_size() const { return subgraph_->inputs()->Length(); }
+  size_t inputs_size() const {
+    return model_->subgraphs()->Get(0)->inputs()->size();
+  }
  const flatbuffers::Vector<int32_t>& inputs() const {
-    return *subgraph_->inputs();
+    return *model_->subgraphs()->Get(0)->inputs();
  }
  TfLiteTensor* input_tensor(size_t index) { return input(index); }
  template <class T>
@@ -137,9 +88,11 @@ class MicroInterpreter {
  }

  TfLiteTensor* output(size_t index);
-  size_t outputs_size() const { return subgraph_->outputs()->Length(); }
+  size_t outputs_size() const {
+    return model_->subgraphs()->Get(0)->outputs()->size();
+  }
  const flatbuffers::Vector<int32_t>& outputs() const {
-    return *subgraph_->outputs();
+    return *model_->subgraphs()->Get(0)->outputs();
  }
  TfLiteTensor* output_tensor(size_t index) { return output(index); }
  template <class T>
@@ -157,12 +110,11 @@ class MicroInterpreter {

  TfLiteStatus initialization_status() const { return initialization_status_; }

-  size_t operators_size() const { return subgraph_->operators()->size(); }
-
-  // For debugging only.
-  const NodeAndRegistration node_and_registration(int node_index) const {
-    return node_and_registrations_[node_index];
-  }
+  // Populates node and registration pointers representing the inference graph
+  // of the model from values inside the flatbuffer (loaded from the TfLiteModel
+  // instance). Persistent data (e.g. operator data) is allocated from the
+  // arena.
+  TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer();

  // For debugging only.
  // Returns the actual used arena in bytes. This method gives the optimal arena
@@ -181,24 +133,36 @@ class MicroInterpreter {
  // error reporting during initialization.
  void Init(MicroProfiler* profiler);

-  NodeAndRegistration* node_and_registrations_ = nullptr;
+  // Gets the current subgraph index used from within context methods.
+  int get_subgraph_index() { return graph_.GetCurrentSubgraphIndex(); }
+
+  // Static functions that are bound to the TfLiteContext instance:
+  static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
+  static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
+                                                  size_t bytes,
+                                                  int* buffer_idx);
+  static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
+  static void ReportOpError(struct TfLiteContext* context, const char* format,
+                            ...);
+  static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
+                                 int tensor_idx);
+  static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
+                                         int tensor_idx);
+  static TfLiteStatus GetGraph(struct TfLiteContext* context,
+                               TfLiteIntArray** args);

  const Model* model_;
  const MicroOpResolver& op_resolver_;
  ErrorReporter* error_reporter_;
  TfLiteContext context_ = {};
  MicroAllocator& allocator_;
+  MicroGraph graph_;
  bool tensors_allocated_;

  TfLiteStatus initialization_status_;

-  const SubGraph* subgraph_ = nullptr;
-  TfLiteEvalTensor* eval_tensors_ = nullptr;
  ScratchBufferHandle* scratch_buffer_handles_ = nullptr;

-  // TODO(b/16157777): Drop this reference:
-  internal::ContextHelper context_helper_;
-
  // TODO(b/162311891): Clean these pointers up when this class supports buffers
  // from TfLiteEvalTensor.
  TfLiteTensor** input_tensors_;
--- a/code/components/tfmicro/tensorflow/lite/micro/micro_mutable_op_resolver.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/micro_mutable_op_resolver.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -24,9 +24,11 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/compatibility.h"
 #include "tensorflow/lite/kernels/op_macros.h"
 #include "tensorflow/lite/micro/compatibility.h"
+#include "tensorflow/lite/micro/kernels/conv.h"
 #include "tensorflow/lite/micro/kernels/ethosu.h"
 #include "tensorflow/lite/micro/kernels/fully_connected.h"
 #include "tensorflow/lite/micro/kernels/micro_ops.h"
+#include "tensorflow/lite/micro/kernels/softmax.h"
 #include "tensorflow/lite/micro/micro_op_resolver.h"
 #include "tensorflow/lite/schema/schema_generated.h"

@@ -139,8 +141,7 @@ class MicroMutableOpResolver : public MicroOpResolver {

  TfLiteStatus AddAveragePool2D() {
    return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D,
-                      tflite::ops::micro::Register_AVERAGE_POOL_2D(),
-                      ParsePool);
+                      tflite::Register_AVERAGE_POOL_2D(), ParsePool);
  }

  TfLiteStatus AddBatchToSpaceNd() {
@@ -168,8 +169,9 @@ class MicroMutableOpResolver : public MicroOpResolver {
                      ParseConcatenation);
  }

-  TfLiteStatus AddConv2D() {
-    return AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), ParseConv2D);
+  TfLiteStatus AddConv2D(
+      const TfLiteRegistration& registration = Register_CONV_2D()) {
+    return AddBuiltin(BuiltinOperator_CONV_2D, registration, ParseConv2D);
  }

  TfLiteStatus AddCos() {
@@ -177,6 +179,16 @@ class MicroMutableOpResolver : public MicroOpResolver {
                      ParseCos);
  }

+  TfLiteStatus AddCumSum() {
+    return AddBuiltin(BuiltinOperator_CUMSUM, tflite::Register_CUMSUM(),
+                      ParseCumsum);
+  }
+
+  TfLiteStatus AddDepthToSpace() {
+    return AddBuiltin(BuiltinOperator_DEPTH_TO_SPACE,
+                      tflite::Register_DEPTH_TO_SPACE(), ParseDepthToSpace);
+  }
+
  TfLiteStatus AddDepthwiseConv2D() {
    return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
                      Register_DEPTHWISE_CONV_2D(), ParseDepthwiseConv2D);
@@ -193,10 +205,6 @@ class MicroMutableOpResolver : public MicroOpResolver {
                     tflite::Register_DETECTION_POSTPROCESS());
  }

-  TfLiteStatus AddDiv() {
-    return AddBuiltin(BuiltinOperator_DIV, tflite::Register_DIV(), ParseDiv);
-  }
-
  TfLiteStatus AddElu() {
    return AddBuiltin(BuiltinOperator_ELU, tflite::Register_ELU(), ParseElu);
  }
@@ -223,17 +231,41 @@ class MicroMutableOpResolver : public MicroOpResolver {
                      ParseExpandDims);
  }

+  TfLiteStatus AddFill() {
+    return AddBuiltin(BuiltinOperator_FILL, tflite::Register_FILL(), ParseFill);
+  }
+
  TfLiteStatus AddFloor() {
    return AddBuiltin(BuiltinOperator_FLOOR,
                      tflite::ops::micro::Register_FLOOR(), ParseFloor);
  }

+  TfLiteStatus AddFloorDiv() {
+    return AddBuiltin(BuiltinOperator_FLOOR_DIV, tflite::Register_FLOOR_DIV(),
+                      ParseFloorDiv);
+  }
+
+  TfLiteStatus AddFloorMod() {
+    return AddBuiltin(BuiltinOperator_FLOOR_MOD, tflite::Register_FLOOR_MOD(),
+                      ParseFloorMod);
+  }
+
  TfLiteStatus AddFullyConnected(
      const TfLiteRegistration& registration = Register_FULLY_CONNECTED()) {
    return AddBuiltin(BuiltinOperator_FULLY_CONNECTED, registration,
                      ParseFullyConnected);
  }

+  TfLiteStatus AddGather() {
+    return AddBuiltin(BuiltinOperator_GATHER, tflite::Register_GATHER(),
+                      ParseGather);
+  }
+
+  TfLiteStatus AddGatherNd() {
+    return AddBuiltin(BuiltinOperator_GATHER_ND, tflite::Register_GATHER_ND(),
+                      ParseGatherNd);
+  }
+
  TfLiteStatus AddGreater() {
    return AddBuiltin(BuiltinOperator_GREATER,
                      tflite::ops::micro::Register_GREATER(), ParseGreater);
@@ -246,11 +278,14 @@ class MicroMutableOpResolver : public MicroOpResolver {
  }

  TfLiteStatus AddHardSwish() {
-    return AddBuiltin(BuiltinOperator_HARD_SWISH,
-                      tflite::ops::micro::Register_HARD_SWISH(),
+    return AddBuiltin(BuiltinOperator_HARD_SWISH, tflite::Register_HARD_SWISH(),
                      ParseHardSwish);
  }

+  TfLiteStatus AddIf() {
+    return AddBuiltin(BuiltinOperator_IF, tflite::Register_IF(), ParseIf);
+  }
+
  TfLiteStatus AddL2Normalization() {
    return AddBuiltin(BuiltinOperator_L2_NORMALIZATION,
                      tflite::ops::micro::Register_L2_NORMALIZATION(),
@@ -285,8 +320,7 @@ class MicroMutableOpResolver : public MicroOpResolver {

  TfLiteStatus AddLogicalAnd() {
    return AddBuiltin(BuiltinOperator_LOGICAL_AND,
-                      tflite::ops::micro::Register_LOGICAL_AND(),
-                      ParseLogicalAnd);
+                      tflite::Register_LOGICAL_AND(), ParseLogicalAnd);
  }

  TfLiteStatus AddLogicalNot() {
@@ -296,14 +330,13 @@ class MicroMutableOpResolver : public MicroOpResolver {
  }

  TfLiteStatus AddLogicalOr() {
-    return AddBuiltin(BuiltinOperator_LOGICAL_OR,
-                      tflite::ops::micro::Register_LOGICAL_OR(),
+    return AddBuiltin(BuiltinOperator_LOGICAL_OR, tflite::Register_LOGICAL_OR(),
                      ParseLogicalOr);
  }

  TfLiteStatus AddLogistic() {
-    return AddBuiltin(BuiltinOperator_LOGISTIC,
-                      tflite::ops::micro::Register_LOGISTIC(), ParseLogistic);
+    return AddBuiltin(BuiltinOperator_LOGISTIC, tflite::Register_LOGISTIC(),
+                      ParseLogistic);
  }

  TfLiteStatus AddMaximum() {
@@ -313,7 +346,7 @@ class MicroMutableOpResolver : public MicroOpResolver {

  TfLiteStatus AddMaxPool2D() {
    return AddBuiltin(BuiltinOperator_MAX_POOL_2D,
-                      tflite::ops::micro::Register_MAX_POOL_2D(), ParsePool);
+                      tflite::Register_MAX_POOL_2D(), ParsePool);
  }

  TfLiteStatus AddMean() {
@@ -372,13 +405,12 @@ class MicroMutableOpResolver : public MicroOpResolver {
  }

  TfLiteStatus AddRelu() {
-    return AddBuiltin(BuiltinOperator_RELU, tflite::ops::micro::Register_RELU(),
-                      ParseRelu);
+    return AddBuiltin(BuiltinOperator_RELU, tflite::Register_RELU(), ParseRelu);
  }

  TfLiteStatus AddRelu6() {
-    return AddBuiltin(BuiltinOperator_RELU6,
-                      tflite::ops::micro::Register_RELU6(), ParseRelu6);
+    return AddBuiltin(BuiltinOperator_RELU6, tflite::Register_RELU6(),
+                      ParseRelu6);
  }

  TfLiteStatus AddReshape() {
@@ -386,6 +418,11 @@ class MicroMutableOpResolver : public MicroOpResolver {
                      tflite::ops::micro::Register_RESHAPE(), ParseReshape);
  }

+  TfLiteStatus AddResizeBilinear() {
+    return AddBuiltin(BuiltinOperator_RESIZE_BILINEAR,
+                      Register_RESIZE_BILINEAR(), ParseResizeBilinear);
+  }
+
  TfLiteStatus AddResizeNearestNeighbor() {
    return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
                      tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(),
@@ -411,9 +448,9 @@ class MicroMutableOpResolver : public MicroOpResolver {
                      ParseSin);
  }

-  TfLiteStatus AddSoftmax() {
-    return AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(),
-                      ParseSoftmax);
+  TfLiteStatus AddSoftmax(
+      const TfLiteRegistration& registration = Register_SOFTMAX()) {
+    return AddBuiltin(BuiltinOperator_SOFTMAX, registration, ParseSoftmax);
  }

  TfLiteStatus AddSpaceToBatchNd() {
@@ -421,6 +458,11 @@ class MicroMutableOpResolver : public MicroOpResolver {
                      Register_SPACE_TO_BATCH_ND(), ParseSpaceToBatchNd);
  }

+  TfLiteStatus AddSpaceToDepth() {
+    return AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH(),
+                      ParseSpaceToDepth);
+  }
+
  TfLiteStatus AddSplit() {
    return AddBuiltin(BuiltinOperator_SPLIT,
                      tflite::ops::micro::Register_SPLIT(), ParseSplit);
@@ -471,6 +513,11 @@ class MicroMutableOpResolver : public MicroOpResolver {
                      tflite::Register_TRANSPOSE_CONV(), ParseTransposeConv);
  }

+  TfLiteStatus AddTranspose() {
+    return AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE(),
+                      ParseTranspose);
+  }
+
  TfLiteStatus AddUnpack() {
    return AddBuiltin(BuiltinOperator_UNPACK,
                      tflite::ops::micro::Register_UNPACK(), ParseUnpack);
--- a/code/components/tfmicro/tensorflow/lite/micro/micro_profiler.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/micro_profiler.cc
@@ -55,4 +55,14 @@ void MicroProfiler::Log() const {
 #endif
 }

+void MicroProfiler::LogCsv() const {
+#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
+  MicroPrintf("\"Event\",\"Tag\",\"Ticks\"");
+  for (int i = 0; i < num_events_; ++i) {
+    int32_t ticks = end_ticks_[i] - start_ticks_[i];
+    MicroPrintf("%d,%s,%d", i, tags_[i], ticks);
+  }
+#endif
+}
+
 }  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/micro_profiler.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/micro_profiler.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -53,14 +53,19 @@ class MicroProfiler {
  // event[i] <= start time of event[i+1]).
  int32_t GetTotalTicks() const;

-  // Prints the profiling information of each of the events.
+  // Prints the profiling information of each of the events in human readable
+  // form.
  void Log() const;

+  // Prints the profiling information of each of the events in CSV (Comma
+  // Separated Value) form.
+  void LogCsv() const;
+
 private:
  // Maximum number of events that this class can keep track of. If we call
  // AddEvent more than kMaxEvents number of times, then the oldest event's
  // profiling information will be overwritten.
-  static constexpr int kMaxEvents = 50;
+  static constexpr int kMaxEvents = 1024;

  const char* tags_[kMaxEvents];
  int32_t start_ticks_[kMaxEvents];
@@ -70,7 +75,7 @@ class MicroProfiler {
  TF_LITE_REMOVE_VIRTUAL_DELETE;
 };

-#if defined(NDEBUG)
+#if defined(TF_LITE_STRIP_ERROR_STRINGS)
 // For release builds, the ScopedMicroProfiler is a noop.
 //
 // This is done because the ScipedProfiler is used as part of the
@@ -111,7 +116,7 @@ class ScopedMicroProfiler {
  uint32_t event_handle_ = 0;
  MicroProfiler* profiler_ = nullptr;
 };
-#endif  // !defined(NDEBUG)
+#endif  // !defined(TF_LITE_STRIP_ERROR_STRINGS)

 }  // namespace tflite

--- a/code/components/tfmicro/tensorflow/lite/micro/micro_string.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/micro_string.cc
@@ -283,6 +283,14 @@ extern "C" int MicroVsnprintf(char* output, int len, const char* format,
        case '%':
          output[output_index++] = *current++;
          break;
+        case 'c':
+          if (usable_length - output_index < 1) {
+            output[output_index++] = '\0';
+            return output_index;
+          }
+          output[output_index++] = va_arg(args, int32_t);
+          current++;
+          break;
        case 's':
          char* string = va_arg(args, char*);
          int string_idx = 0;
--- a/code/components/tfmicro/tensorflow/lite/micro/micro_utils.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/micro_utils.cc
@@ -21,6 +21,7 @@ limitations under the License.

 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/micro_error_reporter.h"

 namespace tflite {

@@ -50,7 +51,8 @@ void SignedSymmetricPerChannelQuantize(const float* values,
    stride = channel_count;
    channel_stride = 1;
  } else {
-    TF_LITE_FATAL("quantized dimension must be 0 or 3");
+    MicroPrintf("quantized dimension must be 0 or 3");
+    TFLITE_ABORT;
  }

  // Calculate scales for each channel.
--- a/code/components/tfmicro/tensorflow/lite/micro/micro_utils.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/micro_utils.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <algorithm>
 #include <cmath>
 #include <cstdint>
+#include <limits>

 #include "tensorflow/lite/c/common.h"

@@ -43,11 +44,13 @@ T FloatToQuantizedType(const float value, const float scale, int zero_point) {

 template <typename T>
 T FloatToSymmetricQuantizedType(const float value, const float scale) {
-  int32_t result = round(value / scale);
-  result =
-      std::max(static_cast<int32_t>(std::numeric_limits<T>::min() + 1), result);
-  result =
-      std::min(static_cast<int32_t>(std::numeric_limits<T>::max()), result);
+  // 64-bit values are required since 8x16 conv accumulates to int64, meaning
+  // an int64 bias is required.
+  std::int64_t result = round(value / scale);
+  result = std::max(
+      static_cast<std::int64_t>(std::numeric_limits<T>::min() + 1), result);
+  result = std::min(static_cast<std::int64_t>(std::numeric_limits<T>::max()),
+                    result);
  return result;
 }

--- a/code/components/tfmicro/tensorflow/lite/micro/mock_micro_graph.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/mock_micro_graph.cc
@@ -0,0 +1,66 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/micro/mock_micro_graph.h"
+
+#include "tensorflow/lite/micro/test_helpers.h"
+
+namespace tflite {
+
+MockMicroGraph::MockMicroGraph(SimpleMemoryAllocator* allocator)
+    : MicroGraph(nullptr, nullptr, nullptr),
+      allocator_(allocator),
+      init_count_(0),
+      prepare_count_(0),
+      free_count_(0) {
+  memset(invoke_counts_, 0, sizeof(invoke_counts_));
+  mock_tensor_ =
+      reinterpret_cast<TfLiteEvalTensor*>(allocator_->AllocateFromTail(
+          sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
+  int* dims_array = reinterpret_cast<int*>(
+      allocator_->AllocateFromTail(3 * sizeof(int), alignof(int)));
+  float* data_array = reinterpret_cast<float*>(
+      allocator_->AllocateFromTail(2 * sizeof(float), alignof(float)));
+  int dims[] = {2, 1, 2};
+  memcpy(dims_array, dims, 3 * sizeof(int));
+  mock_tensor_->dims = testing::IntArrayFromInts(dims_array);
+  mock_tensor_->data.f = data_array;
+  mock_tensor_->type = kTfLiteFloat32;
+}
+
+TfLiteStatus MockMicroGraph::InvokeSubgraph(int subgraph_idx) {
+  invoke_counts_[subgraph_idx]++;
+  return kTfLiteOk;
+}
+
+TfLiteStatus MockMicroGraph::ResetVariableTensors() { return kTfLiteOk; }
+
+size_t MockMicroGraph::NumSubgraphInputs(int subgraph_idx) { return 1; }
+
+TfLiteEvalTensor* MockMicroGraph::GetSubgraphInput(int subgraph_idx,
+                                                   int tensor_idx) {
+  return mock_tensor_;
+}
+
+size_t MockMicroGraph::NumSubgraphOutputs(int subgraph_idx) { return 1; }
+
+TfLiteEvalTensor* MockMicroGraph::GetSubgraphOutput(int subgraph_idx,
+                                                    int tensor_idx) {
+  return mock_tensor_;
+}
+
+int MockMicroGraph::NumSubgraphs() { return kMaxSubgraphs; }
+
+}  // namespace tflite
--- a/code/components/tfmicro/tensorflow/lite/micro/mock_micro_graph.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/mock_micro_graph.h
@@ -0,0 +1,60 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_
+#define TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/micro/micro_allocator.h"
+#include "tensorflow/lite/micro/micro_graph.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+// MockMicroGraph stubs out all MicroGraph methods used during invoke. A count
+// of the number of calls to invoke for each subgraph is maintained for
+// validation of control flow operators.
+class MockMicroGraph : public MicroGraph {
+ public:
+  explicit MockMicroGraph(SimpleMemoryAllocator* allocator);
+  TfLiteStatus InvokeSubgraph(int subgraph_idx) override;
+  TfLiteStatus ResetVariableTensors() override;
+  size_t NumSubgraphInputs(int subgraph_idx) override;
+  TfLiteEvalTensor* GetSubgraphInput(int subgraph_idx, int tensor_idx) override;
+  size_t NumSubgraphOutputs(int subgraph_idx) override;
+  TfLiteEvalTensor* GetSubgraphOutput(int subgraph_idx,
+                                      int tensor_idx) override;
+  int NumSubgraphs() override;
+  int get_init_count() const { return init_count_; }
+  int get_prepare_count() const { return prepare_count_; }
+  int get_free_count() const { return free_count_; }
+  int get_invoke_count(int subgraph_idx) const {
+    return invoke_counts_[subgraph_idx];
+  }
+
+ private:
+  static constexpr int kMaxSubgraphs = 10;
+  SimpleMemoryAllocator* allocator_;
+  TfLiteEvalTensor* mock_tensor_;
+  int init_count_;
+  int prepare_count_;
+  int free_count_;
+  int invoke_counts_[kMaxSubgraphs];
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_
--- a/code/components/tfmicro/tensorflow/lite/micro/recording_micro_allocator.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/recording_micro_allocator.cc
@@ -130,58 +130,48 @@ void RecordingMicroAllocator::PrintRecordedAllocation(
 }

 TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations(
-    const Model* model, NodeAndRegistration** node_and_registrations) {
-  RecordedAllocation allocations = SnapshotAllocationUsage();
-
-  TfLiteStatus status = MicroAllocator::AllocateNodeAndRegistrations(
-      model, node_and_registrations);
-
-  RecordAllocationUsage(allocations,
-                        recorded_node_and_registration_array_data_);
-  // The allocation count in SimpleMemoryAllocator will only be 1. To provide
-  // better logging, decrement by 1 and add in the actual number of operators
-  // used in the graph:
-  // The allocation for this recording will always be 1. This is because the
-  // parent class mallocs one large allocation for the number of nodes in the
-  // graph (e.g. sizeof(NodeAndRegistration) * num_nodes).
-  // To prevent extra overhead and potential for fragmentation, manually adjust
-  // the accounting by decrementing by 1 and adding the actual number of nodes
-  // used in the graph:
-  recorded_node_and_registration_array_data_.count +=
-      GetSubGraphFromModel(model)->operators()->size() - 1;
-  return status;
-}
-
-TfLiteStatus
-RecordingMicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
-    const Model* model, const MicroOpResolver& op_resolver,
-    NodeAndRegistration* node_and_registrations) {
+    const Model* model, SubgraphAllocations* subgraph_allocations) {
  RecordedAllocation allocations = SnapshotAllocationUsage();

  TfLiteStatus status =
-      MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
-          model, op_resolver, node_and_registrations);
-
-  RecordAllocationUsage(allocations, recorded_op_data_);
+      MicroAllocator::AllocateNodeAndRegistrations(model, subgraph_allocations);
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    RecordAllocationUsage(allocations,
+                          recorded_node_and_registration_array_data_);
+    // The allocation count in SimpleMemoryAllocator will only be 1. To provide
+    // better logging, decrement by 1 and add in the actual number of operators
+    // used in the graph:
+    // The allocation for this recording will always be 1. This is because the
+    // parent class mallocs one large allocation for the number of nodes in the
+    // graph (e.g. sizeof(NodeAndRegistration) * num_nodes).
+    // To prevent extra overhead and potential for fragmentation, manually
+    // adjust the accounting by decrementing by 1 and adding the actual number
+    // of nodes used in the graph:
+    recorded_node_and_registration_array_data_.count +=
+        model->subgraphs()->Get(subgraph_idx)->operators()->size() - 1;
+  }
  return status;
 }

 TfLiteStatus RecordingMicroAllocator::AllocateTfLiteEvalTensors(
-    const Model* model, TfLiteEvalTensor** eval_tensors) {
+    const Model* model, SubgraphAllocations* subgraph_allocations) {
  RecordedAllocation allocations = SnapshotAllocationUsage();

  TfLiteStatus status =
-      MicroAllocator::AllocateTfLiteEvalTensors(model, eval_tensors);
-
-  RecordAllocationUsage(allocations, recorded_tflite_eval_tensor_data_);
-  // The allocation for this recording will always be 1. This is because the
-  // parent class mallocs one large allocation for the number of tensors in the
-  // graph (e.g. sizeof(TfLiteEvalTensor) * num_tensors).
-  // To prevent extra overhead and potential for fragmentation, manually adjust
-  // the accounting by decrementing by 1 and adding the actual number of tensors
-  // used in the graph:
-  recorded_tflite_eval_tensor_data_.count +=
-      GetSubGraphFromModel(model)->tensors()->size() - 1;
+      MicroAllocator::AllocateTfLiteEvalTensors(model, subgraph_allocations);
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    RecordAllocationUsage(allocations, recorded_tflite_eval_tensor_data_);
+    // The allocation for this recording will always be 1. This is because the
+    // parent class mallocs one large allocation for the number of tensors in
+    // the graph (e.g. sizeof(TfLiteEvalTensor) * num_tensors). To prevent extra
+    // overhead and potential for fragmentation, manually adjust the accounting
+    // by decrementing by 1 and adding the actual number of tensors used in the
+    // graph:
+    recorded_tflite_eval_tensor_data_.count +=
+        model->subgraphs()->Get(subgraph_idx)->tensors()->size() - 1;
+  }
  return status;
 }

@@ -197,24 +187,24 @@ TfLiteStatus RecordingMicroAllocator::AllocateVariables(
  return status;
 }

-TfLiteTensor* RecordingMicroAllocator::AllocatePersistentTfLiteTensorInternal(
-    const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
+TfLiteTensor*
+RecordingMicroAllocator::AllocatePersistentTfLiteTensorInternal() {
  RecordedAllocation allocations = SnapshotAllocationUsage();

-  TfLiteTensor* result = MicroAllocator::AllocatePersistentTfLiteTensorInternal(
-      model, eval_tensors, tensor_index);
+  TfLiteTensor* result =
+      MicroAllocator::AllocatePersistentTfLiteTensorInternal();

  RecordAllocationUsage(allocations, recorded_persistent_tflite_tensor_data_);
  return result;
 }

 TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
-    const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
-    int tensor_index, bool allocate_temp) {
+    const Model* model, TfLiteTensor* tensor, int tensor_index,
+    int subgraph_index, bool allocate_temp) {
  RecordedAllocation allocations = SnapshotAllocationUsage();

  TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
-      model, subgraph, tensor, tensor_index, allocate_temp);
+      model, tensor, tensor_index, subgraph_index, allocate_temp);

  RecordAllocationUsage(allocations,
                        recorded_persistent_tflite_tensor_quantization_data_);
--- a/code/components/tfmicro/tensorflow/lite/micro/recording_micro_allocator.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/recording_micro_allocator.h
@@ -72,27 +72,22 @@ class RecordingMicroAllocator : public MicroAllocator {

 protected:
  TfLiteStatus AllocateNodeAndRegistrations(
-      const Model* model,
-      NodeAndRegistration** node_and_registrations) override;
-  TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
-      const Model* model, const MicroOpResolver& op_resolver,
-      NodeAndRegistration* node_and_registrations) override;
+      const Model* model, SubgraphAllocations* subgraph_allocations) override;
  TfLiteStatus AllocateTfLiteEvalTensors(
-      const Model* model, TfLiteEvalTensor** eval_tensors) override;
+      const Model* model, SubgraphAllocations* subgraph_allocations) override;
  TfLiteStatus AllocateVariables(const SubGraph* subgraph,
                                 TfLiteEvalTensor* eval_tensors) override;
  // TODO(b/162311891): Once all kernels have been updated to the new API drop
  // this method. It is only used to record TfLiteTensor persistent allocations.
-  TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
-      const Model* model, TfLiteEvalTensor* eval_tensors,
-      int tensor_index) override;
+  TfLiteTensor* AllocatePersistentTfLiteTensorInternal() override;
+
  // TODO(b/162311891): Once all kernels have been updated to the new API drop
  // this function since all allocations for quantized data will take place in
  // the temp section.
  TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
-                                                  const SubGraph* subgraph,
                                                  TfLiteTensor* tensor,
                                                  int tensor_index,
+                                                  int subgraph_index,
                                                  bool allocate_temp) override;

 private:
@@ -115,6 +110,8 @@ class RecordingMicroAllocator : public MicroAllocator {
  RecordedAllocation recorded_persistent_buffer_data_ = {};
  RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {};
  RecordedAllocation recorded_node_and_registration_array_data_ = {};
+
+  // TODO(b/187993291): Re-enable OpData allocating tracking.
  RecordedAllocation recorded_op_data_ = {};

  TF_LITE_REMOVE_VIRTUAL_DELETE
--- a/code/components/tfmicro/tensorflow/lite/micro/recording_micro_interpreter.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/recording_micro_interpreter.h
@@ -37,11 +37,12 @@ class RecordingMicroInterpreter : public MicroInterpreter {
  RecordingMicroInterpreter(const Model* model,
                            const MicroOpResolver& op_resolver,
                            uint8_t* tensor_arena, size_t tensor_arena_size,
-                            ErrorReporter* error_reporter)
+                            ErrorReporter* error_reporter,
+                            MicroProfiler* profiler = nullptr)
      : MicroInterpreter(model, op_resolver,
                         RecordingMicroAllocator::Create(
                             tensor_arena, tensor_arena_size, error_reporter),
-                         error_reporter),
+                         error_reporter, profiler),
        recording_micro_allocator_(
            static_cast<const RecordingMicroAllocator&>(allocator())) {}

--- a/code/components/tfmicro/tensorflow/lite/micro/test_helpers.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/test_helpers.cc
@@ -195,7 +195,7 @@ const Model* ModelBuilder::BuildModel(
    buffers[i] = metadata_buffers_[i - 1];
  }

-  // TFLM only supports single subgraph.
+  // Default to single subgraph model.
  constexpr size_t subgraphs_size = 1;

  // Find out number of subgraph inputs.
@@ -341,6 +341,72 @@ const Model* BuildModelWithOfflinePlanning(int number_of_tensors,
      node_conn[0].input, node_conn[num_conns - 1].output, num_subgraph_inputs);
 }

+const Model* BuildModelWithUnusedInputs() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+
+  constexpr size_t buffers_size = 1;
+  const Offset<Buffer> buffers[buffers_size] = {CreateBuffer(*builder)};
+  constexpr size_t tensor_shape_size = 2;
+  const int32_t tensor_shape[tensor_shape_size] = {1, 64};
+  constexpr size_t tensors_size = 4;
+  const Offset<Tensor> tensors[tensors_size] = {
+      CreateTensor(*builder,
+                   builder->CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT8, 0,
+                   builder->CreateString("test_input_tensor"), 0, false),
+      CreateTensor(*builder,
+                   builder->CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT8, 0,
+                   builder->CreateString("test_unused_input_tensor"), 0, false),
+      CreateTensor(*builder,
+                   builder->CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT8, 0,
+                   builder->CreateString("test_output_tensor"), 0, false),
+      CreateTensor(*builder,
+                   builder->CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT8, 0,
+                   builder->CreateString("test_unused_tensor"), 0, false),
+  };
+  constexpr size_t inputs_size = 2;
+  const int32_t inputs[inputs_size] = {0, 1};
+  constexpr size_t outputs_size = 1;
+  const int32_t outputs[outputs_size] = {2};
+  constexpr size_t operator_inputs_size = 1;
+  const int32_t operator_inputs[operator_inputs_size] = {0};
+  constexpr size_t operator_outputs_size = 1;
+  const int32_t operator_outputs[operator_outputs_size] = {2};
+  constexpr size_t operators_size = 1;
+  const Offset<Operator> operators[operators_size] = {
+      CreateOperator(
+          *builder, 0,
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, operator_outputs_size),
+          BuiltinOptions_NONE),
+  };
+  constexpr size_t subgraphs_size = 1;
+  const Offset<SubGraph> subgraphs[subgraphs_size] = {
+      CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size),
+                     builder->CreateVector(inputs, inputs_size),
+                     builder->CreateVector(outputs, outputs_size),
+                     builder->CreateVector(operators, operators_size),
+                     builder->CreateString("test_subgraph"))};
+  constexpr size_t operator_codes_size = 1;
+  const Offset<OperatorCode> operator_codes[operator_codes_size] = {
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "mock_custom",
+                               /*version=*/0, BuiltinOperator_CUSTOM)};
+  const Offset<Model> model_offset = CreateModel(
+      *builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
+      builder->CreateVector(subgraphs, subgraphs_size),
+      builder->CreateString("test_model"),
+      builder->CreateVector(buffers, buffers_size));
+  FinishModelBuffer(*builder, model_offset);
+  void* model_pointer = builder->GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+  return model;
+}
+
 const Model* BuildSimpleMockModel() {
  using flatbuffers::Offset;
  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
@@ -638,6 +704,125 @@ const Model* BuildSimpleMultipleInputsModel() {
  return model;
 }

+const Model* BuildSimpleModelWithSubgraphsAndIf() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+
+  constexpr size_t buffers_size = 1;
+  const Offset<Buffer> buffers[buffers_size] = {
+      CreateBuffer(*builder),
+  };
+  const int32_t condition_tensor_shape[] = {1};
+  const int32_t data_tensor_shape[] = {1, 2};
+  constexpr size_t tensors_size = 4;
+  const Offset<Tensor> subgraph1_tensors[tensors_size] = {
+      CreateTensor(*builder, builder->CreateVector(condition_tensor_shape, 1),
+                   TensorType_BOOL, 0,
+                   builder->CreateString("condition tensor"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor2"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("output_tensor"), 0, false),
+  };
+  const Offset<Tensor> subgraph2_tensors[tensors_size] = {
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor2"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("output_tensor"), 0, false),
+  };
+  const Offset<Tensor> subgraph3_tensors[tensors_size] = {
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor2"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("output_tensor"), 0, false),
+  };
+
+  constexpr size_t if_inputs_size = 3;
+  const int32_t if_inputs[if_inputs_size] = {0, 1, 2};
+  constexpr size_t outputs_size = 1;
+  const int32_t if_outputs[outputs_size] = {3};
+  constexpr size_t operator_inputs_size = 2;
+  const int32_t operator_inputs[operator_inputs_size] = {0, 1};
+  const int32_t operator_outputs[outputs_size] = {2};
+  constexpr size_t operators_size = 1;
+  const Offset<Operator> subgraph1_operators[operators_size] = {
+      CreateOperator(
+          *builder, 0, builder->CreateVector(if_inputs, if_inputs_size),
+          builder->CreateVector(if_outputs, outputs_size),
+          BuiltinOptions_IfOptions, CreateIfOptions(*builder, 1, 2).Union()),
+  };
+  const Offset<Operator> subgraph2_operators[operators_size] = {
+      CreateOperator(
+          *builder, 1,
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, outputs_size),
+          BuiltinOptions_NONE),
+  };
+  const Offset<Operator> subgraph3_operators[operators_size] = {
+      CreateOperator(
+          *builder, 2,
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, outputs_size),
+          BuiltinOptions_NONE),
+  };
+  constexpr size_t subgraphs_size = 3;
+  const Offset<SubGraph> subgraphs[subgraphs_size] = {
+      CreateSubGraph(*builder, builder->CreateVector(subgraph1_tensors, 4),
+                     builder->CreateVector(if_inputs, if_inputs_size),
+                     builder->CreateVector(if_outputs, outputs_size),
+                     builder->CreateVector(subgraph1_operators, operators_size),
+                     builder->CreateString("if_subgraph")),
+      CreateSubGraph(
+          *builder, builder->CreateVector(subgraph2_tensors, 3),
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, outputs_size),
+          builder->CreateVector(subgraph2_operators, operators_size),
+          builder->CreateString("then_subgraph")),
+      CreateSubGraph(
+          *builder, builder->CreateVector(subgraph3_tensors, 3),
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, outputs_size),
+          builder->CreateVector(subgraph3_operators, operators_size),
+          builder->CreateString("else_subgraph")),
+  };
+  constexpr size_t operator_codes_size = 3;
+  const Offset<OperatorCode> operator_codes[operator_codes_size] = {
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "multiple_inputs_op",
+                               /*version=*/0, BuiltinOperator_IF),
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "multiple_inputs_op",
+                               /*version=*/0, BuiltinOperator_ADD),
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "multiple_inputs_op",
+                               /*version=*/0, BuiltinOperator_MUL),
+  };
+  const Offset<Model> model_offset = CreateModel(
+      *builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
+      builder->CreateVector(subgraphs, subgraphs_size),
+      builder->CreateString("test_model"),
+      builder->CreateVector(buffers, buffers_size));
+  FinishModelBuffer(*builder, model_offset);
+  void* model_pointer = builder->GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+  return model;
+}
+
 }  // namespace

 const TfLiteRegistration* SimpleStatefulOp::getRegistration() {
@@ -834,6 +1019,13 @@ AllOpsResolver GetOpResolver() {
                        MultipleInputs::GetMutableRegistration());
  return op_resolver;
 }
+const Model* GetModelWithUnusedInputs() {
+  static Model* model = nullptr;
+  if (!model) {
+    model = const_cast<Model*>(BuildModelWithUnusedInputs());
+  }
+  return model;
+}

 const Model* GetSimpleMockModel() {
  static Model* model = nullptr;
@@ -851,6 +1043,14 @@ const Model* GetSimpleMultipleInputsModel() {
  return model;
 }

+const Model* GetSimpleModelWithSubgraphsAndIf() {
+  static Model* model = nullptr;
+  if (!model) {
+    model = const_cast<Model*>(BuildSimpleModelWithSubgraphsAndIf());
+  }
+  return model;
+}
+
 const Model* GetComplexMockModel() {
  static Model* model = nullptr;
  if (!model) {
@@ -984,9 +1184,8 @@ void ReportOpError(struct TfLiteContext* context, const char* format, ...) {

 // Create a TfLiteIntArray from an array of ints.  The first element in the
 // supplied array must be the size of the array expressed as an int.
-TfLiteIntArray* IntArrayFromInts(const int* int_array) {
-  return const_cast<TfLiteIntArray*>(
-      reinterpret_cast<const TfLiteIntArray*>(int_array));
+TfLiteIntArray* IntArrayFromInts(int* int_array) {
+  return reinterpret_cast<TfLiteIntArray*>(int_array);
 }

 // Create a TfLiteFloatArray from an array of floats.  The first element in the
@@ -999,6 +1198,20 @@ TfLiteFloatArray* FloatArrayFromFloats(const float* floats) {
  return reinterpret_cast<TfLiteFloatArray*>(const_cast<float*>(floats));
 }

+TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized,
+                                       TfLiteIntArray* dims, float input_scale,
+                                       float weights_scale, bool is_variable) {
+  float bias_scale = input_scale * weights_scale;
+  tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
+
+  // Quantized int16_t tensors always have a zero point of 0, since the range of
+  // int16_t values is large, and because zero point costs extra cycles during
+  // processing.
+  TfLiteTensor result =
+      CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable);
+  return result;
+}
+
 TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
                                       TfLiteIntArray* dims, float input_scale,
                                       float weights_scale, bool is_variable) {
@@ -1013,11 +1226,27 @@ TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
  return result;
 }

+TfLiteTensor CreateQuantizedBiasTensor(const float* data,
+                                       std::int64_t* quantized,
+                                       TfLiteIntArray* dims, float input_scale,
+                                       float weights_scale, bool is_variable) {
+  float bias_scale = input_scale * weights_scale;
+  tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
+
+  // Quantized int32_t tensors always have a zero point of 0, since the range of
+  // int32_t values is large, and because zero point costs extra cycles during
+  // processing.
+  TfLiteTensor result =
+      CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable);
+  return result;
+}
+
 // Quantizes int32_t bias tensor with per-channel weights determined by input
 // scale multiplied by weight scale for each channel.
+template <typename T>
 TfLiteTensor CreatePerChannelQuantizedBiasTensor(
-    const float* input, int32_t* quantized, TfLiteIntArray* dims,
-    float input_scale, float* weight_scales, float* scales, int* zero_points,
+    const float* input, T* quantized, TfLiteIntArray* dims, float input_scale,
+    float* weight_scales, float* scales, int* zero_points,
    TfLiteAffineQuantization* affine_quant, int quantized_dimension,
    bool is_variable) {
  int input_size = ElementCount(*dims);
@@ -1031,8 +1260,8 @@ TfLiteTensor CreatePerChannelQuantizedBiasTensor(
    zero_points[i + 1] = 0;
  }

-  SymmetricPerChannelQuantize<int32_t>(input, quantized, input_size,
-                                       num_channels, scales_array);
+  SymmetricPerChannelQuantize<T>(input, quantized, input_size, num_channels,
+                                 scales_array);

  affine_quant->scale = FloatArrayFromFloats(scales);
  affine_quant->zero_point = IntArrayFromInts(zero_points);
@@ -1043,6 +1272,26 @@ TfLiteTensor CreatePerChannelQuantizedBiasTensor(
  return result;
 }

+TfLiteTensor CreatePerChannelQuantizedBiasTensor(
+    const float* input, int32_t* quantized, TfLiteIntArray* dims,
+    float input_scale, float* weight_scales, float* scales, int* zero_points,
+    TfLiteAffineQuantization* affine_quant, int quantized_dimension,
+    bool is_variable) {
+  return CreatePerChannelQuantizedBiasTensor<int32_t>(
+      input, quantized, dims, input_scale, weight_scales, scales, zero_points,
+      affine_quant, quantized_dimension, is_variable);
+}
+
+TfLiteTensor CreatePerChannelQuantizedBiasTensor(
+    const float* input, std::int64_t* quantized, TfLiteIntArray* dims,
+    float input_scale, float* weight_scales, float* scales, int* zero_points,
+    TfLiteAffineQuantization* affine_quant, int quantized_dimension,
+    bool is_variable) {
+  return CreatePerChannelQuantizedBiasTensor<std::int64_t>(
+      input, quantized, dims, input_scale, weight_scales, scales, zero_points,
+      affine_quant, quantized_dimension, is_variable);
+}
+
 TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
    const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
    int* zero_points, TfLiteAffineQuantization* affine_quant,
--- a/code/components/tfmicro/tensorflow/lite/micro/test_helpers.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/test_helpers.h
@@ -16,15 +16,13 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
 #define TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_

-// Useful functions for writing tests.
-
 #include <cstdint>
 #include <limits>

 #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
-#include "tensorflow/lite//kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/micro/all_ops_resolver.h"
 #include "tensorflow/lite/micro/micro_utils.h"
 #include "tensorflow/lite/portable_type_to_tflitetype.h"
@@ -126,9 +124,16 @@ const Model* GetModelWithOfflinePlanning(int num_tensors,
                                         int num_conns,
                                         int num_subgraph_inputs = 0);

+// Returns a flatbuffer with a single operator, two inputs (one unused) and one
+// output.
+const Model* GetModelWithUnusedInputs();
+
 // Returns a flatbuffer model with `simple_stateful_op`
 const Model* GetSimpleStatefulModel();

+// Returns a flatbuffer model with "if" and two subgraphs.
+const Model* GetSimpleModelWithSubgraphsAndIf();
+
 // Builds a one-dimensional flatbuffer tensor of the given size.
 const Tensor* Create1dFlatbufferTensor(int size, bool is_variable = false);

@@ -154,7 +159,7 @@ void PopulateContext(TfLiteTensor* tensors, int tensors_size,

 // Create a TfLiteIntArray from an array of ints.  The first element in the
 // supplied array must be the size of the array expressed as an int.
-TfLiteIntArray* IntArrayFromInts(const int* int_array);
+TfLiteIntArray* IntArrayFromInts(int* int_array);

 // Create a TfLiteFloatArray from an array of floats.  The first element in the
 // supplied array must be the size of the array expressed as a float.
@@ -199,11 +204,22 @@ TfLiteTensor CreateQuantizedTensor(const float* input, T* quantized,
  return CreateQuantizedTensor(quantized, dims, scale, zero_point, is_variable);
 }

+TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized,
+                                       TfLiteIntArray* dims, float input_scale,
+                                       float weights_scale,
+                                       bool is_variable = false);
+
 TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
                                       TfLiteIntArray* dims, float input_scale,
                                       float weights_scale,
                                       bool is_variable = false);

+TfLiteTensor CreateQuantizedBiasTensor(const float* data,
+                                       std::int64_t* quantized,
+                                       TfLiteIntArray* dims, float input_scale,
+                                       float weights_scale,
+                                       bool is_variable = false);
+
 // Quantizes int32_t bias tensor with per-channel weights determined by input
 // scale multiplied by weight scale for each channel.
 TfLiteTensor CreatePerChannelQuantizedBiasTensor(
@@ -212,6 +228,14 @@ TfLiteTensor CreatePerChannelQuantizedBiasTensor(
    TfLiteAffineQuantization* affine_quant, int quantized_dimension,
    bool is_variable = false);

+// Quantizes int64_t bias tensor with per-channel weights determined by input
+// scale multiplied by weight scale for each channel.
+TfLiteTensor CreatePerChannelQuantizedBiasTensor(
+    const float* input, std::int64_t* quantized, TfLiteIntArray* dims,
+    float input_scale, float* weight_scales, float* scales, int* zero_points,
+    TfLiteAffineQuantization* affine_quant, int quantized_dimension,
+    bool is_variable = false);
+
 TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
    const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
    int* zero_points, TfLiteAffineQuantization* affine_quant,