rolling 20210708

This commit is contained in:
jomjol
2021-08-07 15:25:27 +02:00
parent 6f06af1d5f
commit 32f15fc557
138 changed files with 8048 additions and 2292 deletions

View File

@@ -32,14 +32,18 @@ AllOpsResolver::AllOpsResolver() {
AddConcatenation();
AddConv2D();
AddCos();
AddCumSum();
AddDepthToSpace();
AddDepthwiseConv2D();
AddDequantize();
AddDetectionPostprocess();
AddDiv();
AddElu();
AddEqual();
AddEthosU();
AddExpandDims();
AddFloor();
AddFloorDiv();
AddFloorMod();
AddFullyConnected();
AddGreater();
AddGreaterEqual();
@@ -70,6 +74,7 @@ AllOpsResolver::AllOpsResolver() {
AddRelu();
AddRelu6();
AddReshape();
AddResizeBilinear();
AddResizeNearestNeighbor();
AddRound();
AddRsqrt();
@@ -77,6 +82,7 @@ AllOpsResolver::AllOpsResolver() {
AddSin();
AddSoftmax();
AddSpaceToBatchNd();
AddSpaceToDepth();
AddSplit();
AddSplitV();
AddSqrt();
@@ -87,6 +93,7 @@ AllOpsResolver::AllOpsResolver() {
AddSvdf();
AddTanh();
AddTransposeConv();
AddTranspose();
AddUnpack();
}

View File

@@ -0,0 +1,64 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/flatbuffer_utils.h"
namespace tflite {
FlexbufferWrapper::FlexbufferWrapper(const uint8_t* buffer, size_t size)
: flexbuffers::Vector(flexbuffers::GetRoot(buffer, size).AsVector()) {}
int64_t FlexbufferWrapper::ElementAsInt64(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadInt64(elem, byte_width_);
}
uint64_t FlexbufferWrapper::ElementAsUInt64(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadUInt64(elem, byte_width_);
}
int32_t FlexbufferWrapper::ElementAsInt32(size_t i) const {
return static_cast<int32_t>(ElementAsInt64(i));
}
bool FlexbufferWrapper::ElementAsBool(size_t i) const {
return static_cast<bool>(ElementAsUInt64(i));
}
double FlexbufferWrapper::ElementAsDouble(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadDouble(elem, byte_width_);
}
float FlexbufferWrapper::ElementAsFloat(size_t i) const {
return static_cast<float>(FlexbufferWrapper::ElementAsDouble(i));
}
// TODO(b/192589496): Ops must always be there. Remove this function when fixed
uint32_t NumSubgraphOperators(const SubGraph* subgraph) {
if (subgraph->operators() != nullptr) {
return subgraph->operators()->size();
} else {
return 0;
}
}
// TODO(b/192589496): Ops must always be there. Remove this function when fixed
uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx) {
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
return NumSubgraphOperators(subgraph);
}
} // namespace tflite

View File

@@ -0,0 +1,56 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
#define THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
#include "flatbuffers/flatbuffers.h"
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Kernels use flexbuffers::Map to pack their init parameters in a tflite file,
// with the parameter names as map keys and the parameter values as the
// corresponding map values.
// Accessing the map values using the flexbuffers:Map class is inline heavy,
// which can cause the code size to bloat beyond what's reasonable for a micro
// application. Use this class instead, when possible.
// FlexbufferWrapper takes advantage of the following properties of
// flexbuffers::Map:
// 1. It can be viewed as a flexbuffers::Vector of the values.
// 2. The values in the vector are ordered alphabetically by their keys.
// 3. All integer and Boolean values are stored as 64-bit numbers.
// 4. All floating point values are stored as double precision numbers.
// The properties are mentioned in the flexbuffers docs, but we rely on
// a unit test to catch design changes.
class FlexbufferWrapper : public flexbuffers::Vector {
public:
// Construct with a serialized flexbuffer 'buffer' of 'size' bytes
explicit FlexbufferWrapper(const uint8_t* buffer, size_t size);
int64_t ElementAsInt64(size_t i) const;
uint64_t ElementAsUInt64(size_t i) const;
int32_t ElementAsInt32(size_t i) const;
bool ElementAsBool(size_t i) const;
double ElementAsDouble(size_t i) const;
float ElementAsFloat(size_t i) const;
};
// Return the number of operators in a subgraph tflite
uint32_t NumSubgraphOperators(const SubGraph* subgraph);
uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx);
} // namespace tflite
#endif // THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/activations.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
@@ -25,141 +27,21 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
struct ReluOpData {
ReluParams params;
};
struct Relu6OpData {
int8_t six_int8;
int8_t zero_int8;
uint8_t six_uint8;
uint8_t zero_uint8;
};
} // namespace
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
template <typename T>
inline void ReluQuantized(const ReluOpData& data,
const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const T* input_data,
T* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t val = static_cast<int32_t>(input_data[i]);
int32_t clamped =
data.params.output_offset +
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
data.params.output_multiplier,
data.params.output_shift);
clamped = std::max(data.params.quantized_activation_min, clamped);
clamped = std::min(data.params.quantized_activation_max, clamped);
output_data[i] = static_cast<T>(clamped);
}
}
template <typename T>
inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data) {
float act_min = 0.0;
float act_max = std::numeric_limits<float>::infinity();
double real_multiplier =
static_cast<double>(input->params.scale / output->params.scale);
const RuntimeShape input_shape = GetTensorShape(input);
const RuntimeShape output_shape = GetTensorShape(output);
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
&data->params.output_shift);
data->params.quantized_activation_min = std::max(
static_cast<int32_t>(std::numeric_limits<T>::min()),
output->params.zero_point +
static_cast<int32_t>(roundf(act_min / output->params.scale)));
data->params.quantized_activation_max =
act_max == std::numeric_limits<float>::infinity()
? static_cast<int32_t>(std::numeric_limits<T>::max())
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
output->params.zero_point +
static_cast<int32_t>(
roundf(act_max / output->params.scale)));
data->params.input_offset = input->params.zero_point;
data->params.output_offset = output->params.zero_point;
}
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float lower = 0.0f;
const float clamped = val < lower ? lower : val;
output_data[i] = clamped;
}
}
inline void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float upper = 6.0f;
const float lower = 0.0f;
const float clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
template <typename Q>
inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
const Q* input_data,
const RuntimeShape& output_shape, Q* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const Q val = input_data[i];
const Q clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
}
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
CalculateReluOpData<int8_t>(input, output, data);
} else if (input->type == kTfLiteUInt8) {
CalculateReluOpData<uint8_t>(input, output, data);
}
return kTfLiteOk;
}
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
@@ -171,19 +53,12 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
case kTfLiteInt8: {
ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
tflite::ReluQuantized(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
}
default: {
TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
TfLiteTypeGetName(input->type));
@@ -197,34 +72,14 @@ void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
}
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
if (input->type == kTfLiteInt8) {
data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
input->params.zero_point);
data->zero_int8 = input->params.zero_point;
} else if (input->type == kTfLiteUInt8) {
data->six_uint8 = FloatToQuantizedType<uint8_t>(6.0f, input->params.scale,
input->params.zero_point);
data->zero_uint8 = input->params.zero_point;
}
return kTfLiteOk;
}
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
@@ -236,19 +91,11 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
case kTfLiteInt8: {
Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
Relu6Quantized(data.zero_int8, data.six_int8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
default: {
@@ -259,13 +106,13 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
}
}
} // namespace activations
} // namespace
TfLiteRegistration Register_RELU() {
return {/*init=*/activations::ReluInit,
return {/*init=*/ReluInit,
/*free=*/nullptr,
/*prepare=*/activations::ReluPrepare,
/*invoke=*/activations::ReluEval,
/*prepare=*/ReluPrepare,
/*invoke=*/ReluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
@@ -273,16 +120,14 @@ TfLiteRegistration Register_RELU() {
}
TfLiteRegistration Register_RELU6() {
return {/*init=*/activations::Relu6Init,
return {/*init=*/Relu6Init,
/*free=*/nullptr,
/*prepare=*/activations::Relu6Prepare,
/*invoke=*/activations::Relu6Eval,
/*prepare=*/Relu6Prepare,
/*invoke=*/Relu6Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,63 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
extern const int kActivationsInputTensor;
extern const int kActivationsOutputTensor;
struct ReluOpData {
ReluParams params;
};
struct Relu6OpData {
int8_t six_int8;
int8_t zero_int8;
};
void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const int8_t* input_data,
int8_t* output_data);
template <typename T>
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data);
void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data);
void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data);
void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& output_shape,
int8_t* output_data);
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node);
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_

View File

@@ -0,0 +1,148 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <algorithm>
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/activations.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
const int kActivationsInputTensor = 0;
const int kActivationsOutputTensor = 0;
void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const int8_t* input_data,
int8_t* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t val = static_cast<int32_t>(input_data[i]);
int32_t clamped =
data.params.output_offset +
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
data.params.output_multiplier,
data.params.output_shift);
clamped = std::max(data.params.quantized_activation_min, clamped);
clamped = std::min(data.params.quantized_activation_max, clamped);
output_data[i] = static_cast<int8_t>(clamped);
}
}
template <typename T>
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data) {
float act_min = 0.0;
float act_max = std::numeric_limits<float>::infinity();
double real_multiplier =
static_cast<double>(input->params.scale / output->params.scale);
const RuntimeShape input_shape = GetTensorShape(input);
const RuntimeShape output_shape = GetTensorShape(output);
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
&data->params.output_shift);
data->params.quantized_activation_min = std::max(
static_cast<int32_t>(std::numeric_limits<T>::min()),
output->params.zero_point +
static_cast<int32_t>(roundf(act_min / output->params.scale)));
data->params.quantized_activation_max =
act_max == std::numeric_limits<float>::infinity()
? static_cast<int32_t>(std::numeric_limits<T>::max())
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
output->params.zero_point +
static_cast<int32_t>(
roundf(act_max / output->params.scale)));
data->params.input_offset = input->params.zero_point;
data->params.output_offset = output->params.zero_point;
}
void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float lower = 0.0f;
const float clamped = val < lower ? lower : val;
output_data[i] = clamped;
}
}
void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float upper = 6.0f;
const float lower = 0.0f;
const float clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& output_shape,
int8_t* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int8_t val = input_data[i];
const int8_t clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kActivationsOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
CalculateReluOpData<int8_t>(input, output, data);
}
return kTfLiteOk;
}
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
if (input->type == kTfLiteInt8) {
data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
input->params.zero_point);
data->zero_int8 = input->params.zero_point;
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -66,12 +66,12 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
OpData* data) {
data->requires_broadcast = !HaveSameShapes(input1, input2);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
// 8bit -> 8bit general quantized path, with general rescalings
data->input1_offset = -input1->params.zero_point;
data->input2_offset = -input2->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = 20;
data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20;
const double twice_max_input_scale =
2 * static_cast<double>(
std::max(input1->params.scale, input2->params.scale));
@@ -133,24 +133,25 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min, data->output_activation_max,
&op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
switch (output->type) {
case kTfLiteInt8: {
if (need_broadcast) {
reference_integer_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
@@ -168,24 +169,32 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else {
break;
}
case kTfLiteInt16: {
if (need_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
tflite::micro::GetTensorData<int16_t>(output));
} else {
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
tflite::micro::GetTensorData<int16_t>(output),
false);
}
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
@@ -231,7 +240,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
if (output->type == kTfLiteFloat32) {
EvalAdd(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
} else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
input1, input2, output));
} else {

View File

@@ -18,6 +18,7 @@ limitations under the License.
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
@@ -28,6 +29,22 @@ namespace {
constexpr int kInputTensor0 = 0;
constexpr int kOutputTensor = 0;
constexpr int kAddNIntegerShift = 20;
// only used with INT8 tensors
struct OpData {
int32_t output_activation_min;
int32_t output_activation_max;
int32_t input_offset;
int32_t output_offset;
int32_t input_multiplier;
int32_t output_multiplier;
int input_shift;
int output_shift;
int left_shift;
int scratch_index;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
int num_inputs = NumInputs(node);
TF_LITE_ENSURE(context, num_inputs >= 2);
@@ -47,19 +64,61 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, i, &input));
TF_LITE_ENSURE(context, HaveSameShapes(input_tensor_first, input));
TF_LITE_ENSURE_TYPES_EQ(context, input_tensor_first->type, input->type);
// Check that all INT8 input tensors have the same zero-point and scale.
if (input_tensor_first->type == kTfLiteInt8) {
TF_LITE_ENSURE(context, input_tensor_first->params.zero_point ==
input->params.zero_point);
TF_LITE_ENSURE(context,
input_tensor_first->params.scale == input->params.scale);
}
}
// Allocate scratch buffer space for pointer to each tensor's data
// and store the scratch buffer index in the node's user_data
if (output->type == kTfLiteFloat32) {
// Allocate scratch buffer space for pointer to each tensor's data
// and store the scratch buffer index in the node's user_data
int scratch_index;
size_t scratch_size = sizeof(float*) * num_inputs;
TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena(
context, scratch_size, &scratch_index));
node->user_data =
reinterpret_cast<decltype(node->user_data)>(scratch_index);
} else if (output->type == kTfLiteInt8) {
node->user_data =
context->AllocatePersistentBuffer(context, sizeof(OpData));
OpData* data = static_cast<OpData*>(node->user_data);
// Allocate scratch buffer space for pointer to each tensor's data
// and store the scratch buffer index in OpData
size_t scratch_size = sizeof(int8_t*) * num_inputs;
TF_LITE_ENSURE_OK(
context, context->RequestScratchBufferInArena(context, scratch_size,
&data->scratch_index));
// 8bit -> 8bit general quantized path, with general rescalings
data->input_offset = -input_tensor_first->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = kAddNIntegerShift;
const double twice_max_input_scale =
2 * static_cast<double>(input_tensor_first->params.scale);
const double real_input_multiplier =
static_cast<double>(input_tensor_first->params.scale) /
twice_max_input_scale;
const double real_output_multiplier =
twice_max_input_scale /
((1 << data->left_shift) * static_cast<double>(output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input_multiplier, &data->input_multiplier, &data->input_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, kTfLiteActNone, output, &data->output_activation_min,
&data->output_activation_max));
} else {
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
@@ -72,12 +131,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
template <typename T>
void EvalAddN(TfLiteContext* context, TfLiteNode* node,
TfLiteEvalTensor* output) {
inline const T** CopyInputsToScratchBuffer(TfLiteContext* context,
TfLiteNode* node,
const int scratch_index) {
int num_inputs = NumInputs(node);
int scratch_index =
static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
void* scratch_buffer = context->GetScratchBuffer(context, scratch_index);
const T** all_inputs = static_cast<decltype(all_inputs)>(scratch_buffer);
for (int i = 0; i < num_inputs; i++) {
@@ -86,17 +143,56 @@ void EvalAddN(TfLiteContext* context, TfLiteNode* node,
all_inputs[i] = tflite::micro::GetTensorData<T>(next_input);
}
return all_inputs;
}
template <typename T>
void EvalAddN(TfLiteContext* context, TfLiteNode* node,
TfLiteEvalTensor* output) {
int num_inputs = NumInputs(node);
int scratch_index =
static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
const T** all_inputs =
CopyInputsToScratchBuffer<T>(context, node, scratch_index);
reference_ops::AddN<T>(tflite::micro::GetTensorShape(output), num_inputs,
all_inputs, tflite::micro::GetTensorData<T>(output));
}
template <typename T>
void EvalAddNQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteEvalTensor* output) {
int num_inputs = NumInputs(node);
OpData* data = static_cast<OpData*>(node->user_data);
const T** all_inputs =
CopyInputsToScratchBuffer<T>(context, node, data->scratch_index);
ArithmeticParams params;
params.left_shift = data->left_shift;
params.input1_offset = data->input_offset;
params.input1_multiplier = data->input_multiplier;
params.input1_shift = data->input_shift;
params.output_offset = data->output_offset;
params.output_multiplier = data->output_multiplier;
params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min, data->output_activation_max,
&params);
reference_ops::AddN(params, tflite::micro::GetTensorShape(output), num_inputs,
all_inputs, tflite::micro::GetTensorData<T>(output));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
EvalAddN<float>(context, node, output);
} else if (output->type == kTfLiteInt8) {
EvalAddNQuantized<int8_t>(context, node, output);
} else {
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}

View File

@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#define FLATBUFFERS_LOCALE_INDEPENDENT 0
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
@@ -22,6 +20,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/flatbuffer_utils.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
/*
@@ -56,6 +55,11 @@ namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
// Indices into the init flexbuffer's vector.
// The parameter's name is in the comment that follows.
// Elements in the vectors are ordered alphabetically by parameter name.
constexpr int kCyclesMaxIndex = 0; // 'cycles_max'
// TODO(b/149795762): Add this to TfLiteStatus enum.
constexpr TfLiteStatus kTfLiteAbort = static_cast<TfLiteStatus>(-9);
@@ -76,8 +80,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
if (buffer != nullptr && length > 0) {
const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
op_data->cycles_max = m["cycles_max"].AsInt32();
tflite::FlexbufferWrapper wrapper(buffer_t, length);
op_data->cycles_max = wrapper.ElementAsInt32(kCyclesMaxIndex);
} else {
op_data->cycles_max = 0;
}
@@ -118,6 +122,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// https://docs.google.com/document/d/1lc_G2ZFhjiKFo02UHjBaljye1xsL0EkfybkaVELEE3Q/edit?usp=sharing
// https://docs.google.com/document/d/1pGc42PuWyrk-Jy1-9qeqtggvsmHr1ifz8Lmqfpr2rKA/edit?usp=sharing
if (output->dims->data[1] == 5 || output->dims->data[1] == 13 ||
output->dims->data[1] == 25 ||
(cb_prepare_count == 5 && output->dims->data[2] == 2 &&
output->dims->data[3] == 96)) {
op_data->cycles_max = 1;

View File

@@ -147,8 +147,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
TF_LITE_ENSURE(context,
input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 ||
input_type == kTfLiteInt8 || input_type == kTfLiteInt32 ||
input_type == kTfLiteInt64);
input_type == kTfLiteInt8 || input_type == kTfLiteInt16 ||
input_type == kTfLiteInt32 || input_type == kTfLiteInt64);
// Output type must match input type
TF_LITE_ENSURE_EQ(context, output_type, input_type);
@@ -182,6 +182,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
switch (output_type) { // Already know in/outtypes are same.
case kTfLiteFloat32:
case kTfLiteInt16:
case kTfLiteInt32:
case kTfLiteInt64: {
data->params.axis = CalculatePositiveAxis(params->axis, output);
@@ -247,6 +248,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
case kTfLiteInt64:
EvalUnquantized<int64_t>(context, node);
break;
case kTfLiteInt16:
EvalUnquantized<int16_t>(context, node);
break;
default:
TF_LITE_KERNEL_LOG(

View File

@@ -53,8 +53,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const auto& data = *(static_cast<const OpDataConv*>(node->user_data));
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
TF_LITE_ENSURE_MSG(
context,
input->type == filter->type ||
(input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
"Hybrid models are not supported on TFLite Micro.");
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32: {
@@ -70,6 +73,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(nullptr), nullptr);
break;
}
case kTfLiteInt16: {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data), data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<std::int64_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
break;
}
case kTfLiteInt8: {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data), data.per_channel_output_multiplier,

View File

@@ -72,6 +72,21 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node);
// This is the most generic TfLiteRegistration. The actual supported types may
// still be target dependent. The only requirement is that every implementation
// (reference or optimized) must define this function.
TfLiteRegistration Register_CONV_2D();
#if defined(XTENSA)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 inputs and outputs.
TfLiteRegistration Register_CONV_2D_INT8REF();
#else
inline TfLiteRegistration Register_CONV_2D_INT8REF() {
return Register_CONV_2D();
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_

View File

@@ -111,8 +111,7 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
context, input, filter, bias, output, params.activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift),
data->per_channel_output_multiplier, data->per_channel_output_shift,
output_channels));
}
@@ -155,7 +154,7 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);

View File

@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
@@ -59,36 +59,45 @@ TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
TfLiteRegistration registration,
uint8_t* output_data, float tolerance = 1e-5);
TfLiteStatus TestConvFloat(const int* input_dims_data, const float* input_data,
const int* filter_dims_data,
const float* filter_data, const int* bias_dims_data,
const float* bias_data, const int* output_dims_data,
TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data,
int* filter_dims_data, const float* filter_data,
int* bias_dims_data, const float* bias_data,
int* output_dims_data,
const float* expected_output_data,
TfLiteConvParams* conv_params,
TfLiteRegistration registration, float* output_data);
TfLiteStatus TestConvQuantizedPerLayer(
const int* input_dims_data, const float* input_data,
uint8_t* input_quantized, float input_scale, const int* filter_dims_data,
const float* filter_data, uint8_t* filter_quantized, float filter_scale,
const int* bias_dims_data, const float* bias_data, int32_t* bias_quantized,
const int* output_dims_data, const float* expected_output_data,
uint8_t* expected_output_quantized, float output_scale,
TfLiteConvParams* conv_params, TfLiteRegistration registration,
uint8_t* output_data);
int* input_dims_data, const float* input_data, uint8_t* input_quantized,
float input_scale, int* filter_dims_data, const float* filter_data,
uint8_t* filter_quantized, float filter_scale, int* bias_dims_data,
const float* bias_data, int32_t* bias_quantized, int* output_dims_data,
const float* expected_output_data, uint8_t* expected_output_quantized,
float output_scale, TfLiteConvParams* conv_params,
TfLiteRegistration registration, uint8_t* output_data);
TfLiteStatus TestConvQuantizedPerChannel(
const int* input_dims_data, const float* input_data,
int8_t* input_quantized, float input_scale, int input_zero_point,
const int* filter_dims_data, const float* filter_data,
int8_t* filter_data_quantized, const int* bias_dims_data,
const float* bias_data, int32_t* bias_data_quantized, float* bias_scales,
int* bias_zero_points, const int* output_dims_data,
int* input_dims_data, const float* input_data, int8_t* input_quantized,
float input_scale, int input_zero_point, int* filter_dims_data,
const float* filter_data, int8_t* filter_data_quantized,
int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized,
float* bias_scales, int* bias_zero_points, int* output_dims_data,
const float* expected_output_data, int8_t* expected_output_data_quantized,
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
TfLiteRegistration registration, int8_t* output_data);
TfLiteStatus TestConvQuantizedPerChannel(
int* input_dims_data, const float* input_data, int16_t* input_quantized,
float input_scale, int input_zero_point, int* filter_dims_data,
const float* filter_data, int8_t* filter_data_quantized,
int* bias_dims_data, const float* bias_data,
std::int64_t* bias_data_quantized, float* bias_scales,
int* bias_zero_points, int* output_dims_data,
const float* expected_output_data, int16_t* expected_output_data_quantized,
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
TfLiteRegistration registration, int16_t* output_data);
} // namespace testing
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_

View File

@@ -0,0 +1,173 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/cumsum.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kAxisTensor = 1;
constexpr int kOutputTensor = 0;
constexpr int kCumSumIntegerShift = 20;
// only used with INT8 tensors
struct OpData {
int32_t output_activation_min;
int32_t output_activation_max;
int32_t input_offset;
int32_t output_offset;
int32_t input_multiplier;
int32_t output_multiplier;
int input_shift;
int output_shift;
int left_shift;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* axis = GetInput(context, node, kAxisTensor);
TF_LITE_ENSURE(context,
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, axis->type, kTfLiteInt32);
TF_LITE_ENSURE_EQ(context, NumElements(axis), 1);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE(context, HaveSameShapes(input, output));
if (output->type == kTfLiteInt8) {
node->user_data =
context->AllocatePersistentBuffer(context, sizeof(OpData));
OpData* data = static_cast<OpData*>(node->user_data);
// 8bit -> 8bit general quantized path, with general rescalings
data->input_offset = -input->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = kCumSumIntegerShift;
const double twice_max_input_scale =
2 * static_cast<double>(input->params.scale);
const double real_input_multiplier =
static_cast<double>(input->params.scale) / twice_max_input_scale;
const double real_output_multiplier =
twice_max_input_scale /
((1 << data->left_shift) * static_cast<double>(output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input_multiplier, &data->input_multiplier, &data->input_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, kTfLiteActNone, output, &data->output_activation_min,
&data->output_activation_max));
}
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* axis_tensor =
tflite::micro::GetEvalInput(context, node, kAxisTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
auto* cs_params = static_cast<TfLiteCumsumParams*>(node->builtin_data);
auto input_shape = tflite::micro::GetTensorShape(input);
int32_t axis = *tflite::micro::GetTensorData<int32_t>(axis_tensor);
if (axis < 0) axis += input_shape.DimensionsCount();
if (axis < 0 || axis >= input_shape.DimensionsCount()) {
TF_LITE_KERNEL_LOG(context, "CUMSUM Invalid axis: %d", axis);
return kTfLiteError;
}
switch (input->type) {
case kTfLiteFloat32: {
reference_ops::CumSum(tflite::micro::GetTensorData<float>(input),
input_shape, axis, cs_params->exclusive,
cs_params->reverse,
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
} break;
case kTfLiteInt8: {
auto* data = static_cast<OpData*>(node->user_data);
ArithmeticParams params;
params.left_shift = data->left_shift;
params.input1_offset = data->input_offset;
params.input1_multiplier = data->input_multiplier;
params.input1_shift = data->input_shift;
params.output_offset = data->output_offset;
params.output_multiplier = data->output_multiplier;
params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &params);
reference_ops::CumSum(params, tflite::micro::GetTensorData<int8_t>(input),
input_shape, axis, cs_params->exclusive,
cs_params->reverse,
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
} break;
default: {
TF_LITE_KERNEL_LOG(context,
"CUMSUM only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
}
return kTfLiteError;
}
} // namespace
TfLiteRegistration Register_CUMSUM() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,143 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
// input/output tensor shape rank associations
constexpr int kBatchRank = 0;
constexpr int kHeightRank = 1;
constexpr int kWidthRank = 2;
constexpr int kDepthRank = 3;
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteDepthToSpaceParams*>(node->builtin_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
auto data_type = output->type;
TF_LITE_ENSURE(context,
data_type == kTfLiteFloat32 || data_type == kTfLiteInt8);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
const int block_size = params->block_size;
TF_LITE_ENSURE(context, block_size > 0);
const int input_height = input->dims->data[kHeightRank];
const int input_width = input->dims->data[kWidthRank];
const int input_channels = input->dims->data[kDepthRank];
int output_height = input_height * block_size;
int output_width = input_width * block_size;
int output_channels = input_channels / block_size / block_size;
TF_LITE_ENSURE_EQ(context, input_height, output_height / block_size);
TF_LITE_ENSURE_EQ(context, input_width, output_width / block_size);
TF_LITE_ENSURE_EQ(context, input_channels,
output_channels * block_size * block_size);
// We must update the output tensor dimensions.
// The dims storage is expected to be the same area in memory
// for both TfLiteTensor and TfLiteEvalTensor. This is important
// because TfLiteTensor in the MicroInterpreter is a temporary
// allocation. For the KernelRunner interpreter, TfLiteEvalTensor
// is a temporary allocation. We must therefore relocate the dims
// from the FlatBuffer to the persistant storage arena.
TfLiteEvalTensor* output_eval =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
context, output, output_eval));
output->dims->data[kBatchRank] = input->dims->data[kBatchRank];
output->dims->data[kHeightRank] = output_height;
output->dims->data[kWidthRank] = output_width;
output->dims->data[kDepthRank] = output_channels;
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteDepthToSpaceParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::DepthToSpaceParams op_params;
op_params.block_size = static_cast<int32_t>(params->block_size);
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
reference_ops::DepthToSpace(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt8:
reference_ops::DepthToSpace(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(
context, "DEPTH_TO_SPACE only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_DEPTH_TO_SPACE() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -20,7 +20,6 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"

View File

@@ -18,7 +18,6 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
@@ -113,8 +112,7 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
context, input, filter, bias, output, params.activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift),
data->per_channel_output_multiplier, data->per_channel_output_shift,
output_channels));
}

View File

@@ -15,7 +15,6 @@ limitations under the License.
#include <numeric>
#define FLATBUFFERS_LOCALE_INDEPENDENT 0
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
@@ -117,12 +116,11 @@ struct OpData {
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
OpData* op_data = nullptr;
const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
op_data = reinterpret_cast<OpData*>(
context->AllocatePersistentBuffer(context, sizeof(OpData)));

View File

@@ -1,206 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/div.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
struct OpData {
// Parameters used in the quantized paths where the output is 8bit
int32_t input1_zero_point;
int32_t input2_zero_point;
int32_t output_zero_point;
int32_t output_activation_min;
int32_t output_activation_max;
// Parameters used in all quantized paths
int32_t output_multiplier;
int output_shift;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteDivParams* params, OpData* data) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
if (output->type == kTfLiteInt8) {
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
const double real_multiplier = static_cast<double>(
input1->params.scale / (input2->params.scale * output->params.scale));
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
&data->output_shift);
data->input1_zero_point = input1->params.zero_point;
data->input2_zero_point = input2->params.zero_point;
data->output_zero_point = output->params.zero_point;
}
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
auto* data = static_cast<OpData*>(node->user_data);
return CalculateOpData(context, node, params, data);
}
void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
const OpData* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
#define TF_LITE_DIV(type, opname, data_type) \
data_type output_activation_min, output_activation_max; \
CalculateActivationRange(params->activation, &output_activation_min, \
&output_activation_max); \
SetActivationParams(output_activation_min, output_activation_max, \
&op_params); \
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<data_type>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<data_type>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<data_type>(output))
bool requires_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (requires_broadcast) {
TF_LITE_DIV(reference_ops, BroadcastDivSlow, float);
} else {
TF_LITE_DIV(reference_ops, Div, float);
}
#undef TF_LITE_DIV
}
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteDivParams* params, const OpData* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
#define TF_LITE_DIV(type, opname, dtype) \
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<dtype>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<dtype>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<dtype>(output))
if (input1->type == kTfLiteInt8 && input2->type == kTfLiteInt8 &&
output->type == kTfLiteInt8) {
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
op_params.input1_offset = -data->input1_zero_point;
op_params.input2_offset = -data->input2_zero_point;
op_params.output_offset = data->output_zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
bool requires_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (requires_broadcast) {
TF_LITE_DIV(reference_ops, BroadcastDivSlow, int8_t);
} else {
TF_LITE_DIV(reference_ops, Div, int8_t);
}
#undef TF_LITE_DIV
} else {
TF_LITE_KERNEL_LOG(
context, "Unsupported combination of input and output types in DIV.");
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
auto* data = static_cast<OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
EvalDiv(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteInt8) {
TF_LITE_ENSURE_OK(context, EvalQuantized(context, node, params, data,
input1, input2, output));
} else {
TF_LITE_KERNEL_LOG(context,
"DIV only supports FLOAT32, quantized INT8 "
"now, got type %s (%d).",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_DIV() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -25,6 +25,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
namespace {
@@ -45,7 +46,10 @@ using TransformFunc = float (*)(float);
template <typename T>
void PopulateLookupTable(const TfLiteTensor* input, const TfLiteTensor* output,
const TransformFunc transform, OpData* data) {
if (sizeof(T) != 1) TF_LITE_FATAL("Lookup table valid only for 8bit");
if (sizeof(T) != 1) {
MicroPrintf("Lookup table valid only for 8bit");
TFLITE_ABORT;
}
const float inverse_scale = 1 / output->params.scale;
int32_t maxval = std::numeric_limits<T>::max();

View File

@@ -0,0 +1,130 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
// Input/output tensor index.
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
return nullptr;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
template <typename T>
TfLiteStatus EvalFloorDiv(TfLiteContext* context,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
const T* denominator_data = tflite::micro::GetTensorData<T>(input2);
// Validate the denominator.
for (int i = 0; i < tflite::ElementCount(*input2->dims); ++i) {
if (std::equal_to<T>()(denominator_data[i], 0)) {
TF_LITE_KERNEL_LOG(context, "Division by 0");
return kTfLiteError;
}
}
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
if (requires_broadcast) {
reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2), denominator_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), reference_ops::FloorDiv<T>);
} else {
reference_ops::BinaryFunction<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2), denominator_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), reference_ops::FloorDiv<T>);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input1->type) {
case kTfLiteFloat32: {
return EvalFloorDiv<float>(context, input1, input2, output);
}
default: {
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by FLOOR_DIV.",
TfLiteTypeGetName(input1->type));
return kTfLiteError;
}
}
}
} // namespace
TfLiteRegistration Register_FLOOR_DIV() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,128 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
// OLD-TODO(b/117523611): We should factor out a binary_op and put binary ops
// there.
namespace tflite {
namespace {
// Input/output tensor index.
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
// OLD-TODO(b/117912880): Support quantization.
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
return nullptr;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
template <typename T>
TfLiteStatus EvalFloorMod(TfLiteContext* context, bool requires_broadcast,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
const T* denominator_data = tflite::micro::GetTensorData<T>(input2);
if (requires_broadcast) {
reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2), denominator_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), reference_ops::FloorMod<T>);
} else {
reference_ops::BinaryFunction<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2), denominator_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), reference_ops::FloorMod<T>);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32: {
return EvalFloorMod<float>(context, requires_broadcast, input1, input2,
output);
}
default: {
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by FLOOR_MOD.",
TfLiteTypeGetName(input1->type));
return kTfLiteError;
}
}
}
} // namespace
TfLiteRegistration Register_FLOOR_MOD() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -109,19 +109,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
break;
}
case kTfLiteUInt8: {
tflite::reference_ops::FullyConnected(
FullyConnectedParamsQuantized(data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
}
default: {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);

View File

@@ -65,7 +65,7 @@ TfLiteStatus CalculateOpDataFullyConnected(
// (reference or optimized) must define this function.
TfLiteRegistration Register_FULLY_CONNECTED();
#if defined(CMSIS_NN) || defined(ARDUINO)
#if defined(CMSIS_NN)
// The Arduino is a special case where we use the CMSIS kernels, but because of
// the current approach to building for Arduino, we do not support -DCMSIS_NN as
// part of the build. As a result, we use defined(ARDUINO) as proxy for the

View File

@@ -65,6 +65,11 @@ TfLiteStatus CalculateOpDataFullyConnected(
&data->output_shift);
data->input_zero_point = input->params.zero_point;
// Filter weights will always be symmetric quantized since we only support
// int8 quantization. See
// https://github.com/tensorflow/tensorflow/issues/44912 for additional
// context.
TFLITE_DCHECK(filter->params.zero_point == 0);
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;

View File

@@ -0,0 +1,222 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kInputPositions = 1;
constexpr int kOutputTensor = 0;
template <typename InputT, typename CoordsT = int32_t>
TfLiteStatus Gather(const TfLiteGatherParams* params,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* coords, TfLiteEvalTensor* output) {
const InputT* input_data = tflite::micro::GetTensorData<InputT>(input);
const CoordsT* coords_data = tflite::micro::GetTensorData<CoordsT>(coords);
InputT* output_data = tflite::micro::GetTensorData<InputT>(output);
const TfLiteIntArray* input_dims = input->dims;
const int input_dims_size = input_dims->size;
int axis = params->axis;
if (axis < 0) {
axis += input_dims_size;
}
TFLITE_DCHECK_GE(axis, 0);
TFLITE_DCHECK_LT(axis, input_dims_size);
int batch_dims = params->batch_dims;
// batch_dims should be in range: [-rank(coords), rank(coords)].
// Negative batch_dims is added with rank of coords.
const TfLiteIntArray* coords_dims = coords->dims;
const int coords_dims_size = coords_dims->size;
if (batch_dims < 0) {
batch_dims += coords_dims_size;
}
TFLITE_DCHECK_GE(batch_dims, 0);
TFLITE_DCHECK_LT(batch_dims, input_dims_size);
TFLITE_DCHECK_LE(batch_dims, coords_dims_size);
TFLITE_DCHECK_GE(axis, batch_dims);
for (int i = 0; i < batch_dims; ++i) {
TFLITE_DCHECK_EQ(input_dims->data[i], coords_dims->data[i]);
}
const int axis_size = input_dims->data[axis];
int batch_size = 1;
for (int i = 0; i < batch_dims; ++i) {
batch_size *= input_dims->data[i];
}
int outer_size = 1;
for (int i = batch_dims; i < axis; ++i) {
outer_size *= input_dims->data[i];
}
int inner_size = 1;
for (int i = axis + 1; i < input_dims_size; ++i) {
inner_size *= input_dims->data[i];
}
int coord_size = 1;
for (int i = batch_dims; i < coords_dims_size; ++i) {
coord_size *= coords_dims->data[i];
}
for (int batch = 0; batch < batch_size; ++batch) {
for (int outer = 0; outer < outer_size; ++outer) {
for (int coord = 0; coord < coord_size; ++coord) {
TFLITE_DCHECK_GE(coords_data[coord], 0);
TFLITE_DCHECK_LT(coords_data[coord], axis_size);
std::memcpy(output_data +
(((batch * outer_size) + outer) * coord_size + coord) *
inner_size,
input_data + (((batch * outer_size) + outer) * axis_size +
coords_data[batch * coord_size + coord]) *
inner_size,
sizeof(InputT) * inner_size);
}
}
}
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const auto* params =
reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
const TfLiteTensor* coords;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputPositions, &coords));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
switch (coords->type) {
case kTfLiteInt32:
break;
default:
TF_LITE_KERNEL_LOG(context,
"Positions of type '%s' are not supported by gather.",
TfLiteTypeGetName(coords->type));
return kTfLiteError;
break;
}
// Assign to output the input type.
output->type = input->type;
// Check conditions for different types.
switch (input->type) {
case kTfLiteFloat32:
case kTfLiteInt8:
break;
default:
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by gather.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
break;
}
int axis = params->axis;
if (axis < 0) {
axis += NumDimensions(input);
}
TF_LITE_ENSURE(context, 0 <= axis && axis < NumDimensions(input));
int batch_dims = params->batch_dims;
// batch_dims should be in range: [-rank(coords), rank(coords)].
// Negative batch_dims is added with rank of coords.
if (batch_dims < 0) {
batch_dims += NumDimensions(coords);
}
TF_LITE_ENSURE(context, batch_dims <= axis);
TF_LITE_ENSURE(context, 0 <= batch_dims && batch_dims < NumDimensions(input));
TF_LITE_ENSURE(context, batch_dims <= NumDimensions(coords));
for (int i = 0; i < batch_dims; ++i) {
TF_LITE_ENSURE_EQ(context, input->dims->data[i], coords->dims->data[i]);
}
// GATHER updates the output tensor dimensions, but TfLiteTensor in the
// MicroInterpreter is a temporary allocation. We must therefore relocate the
// dims from the FlatBuffer to the persistant storage arena.
TfLiteEvalTensor* output_eval =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
context, output, output_eval));
TfLiteIntArray* output_shape = output->dims;
output_shape->size =
NumDimensions(input) + NumDimensions(coords) - 1 - batch_dims;
int output_index = 0;
for (int i = 0; i < axis; ++i) {
output_shape->data[output_index++] = input->dims->data[i];
}
for (int i = batch_dims; i < coords->dims->size; ++i) {
output_shape->data[output_index++] = coords->dims->data[i];
}
for (int i = axis + 1; i < input->dims->size; ++i) {
output_shape->data[output_index++] = input->dims->data[i];
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const auto* params =
reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* coords =
tflite::micro::GetEvalInput(context, node, kInputPositions);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (coords->type == kTfLiteInt32) {
switch (input->type) {
case kTfLiteFloat32:
return Gather<float, int32_t>(params, input, coords, output);
break;
case kTfLiteInt8:
return Gather<int8_t, int32_t>(params, input, coords, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by gather.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
break;
}
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_GATHER() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,201 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
constexpr int kParams = 0;
constexpr int kIndices = 1;
constexpr int kOutputTensor = 0;
constexpr int MAX_INDICES_ND = 5;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* params;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kParams, &params));
const TfLiteTensor* indices;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kIndices, &indices));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
switch (params->type) {
case kTfLiteFloat32:
case kTfLiteInt8:
break;
default:
TF_LITE_KERNEL_LOG(context,
"Params of type '%s' are not supported by gather_nd.",
TfLiteTypeGetName(params->type));
return kTfLiteError;
break;
}
switch (indices->type) {
case kTfLiteInt32:
break;
default:
TF_LITE_KERNEL_LOG(context,
"Indices of type '%s' are not supported by gather_nd.",
TfLiteTypeGetName(indices->type));
return kTfLiteError;
}
const int params_rank = NumDimensions(params);
const int indices_rank = NumDimensions(indices);
const int indices_nd = SizeOfDimension(indices, indices_rank - 1);
if (params_rank < 1) {
TF_LITE_KERNEL_LOG(context, "Params must be at least a vector.");
return kTfLiteError;
}
if (indices_rank < 1) {
TF_LITE_KERNEL_LOG(context, "Indices must be at least a vector.");
return kTfLiteError;
}
if (indices_nd > params_rank) {
TF_LITE_KERNEL_LOG(
context, "Index innermost dimension length must be <= params rank.");
return kTfLiteError;
}
if (indices_nd > MAX_INDICES_ND) {
TF_LITE_KERNEL_LOG(context,
"Index innermost dimension length must not exceed %d.",
MAX_INDICES_ND);
return kTfLiteError;
}
// Assign to output the input type.
output->type = params->type;
// TFLM gather_nd does not create the output tensor, but it needs to ensure
// that the output shape is correct. The result shape is
// indices.shape[:-1] + params.shape[indices.shape[-1]:]
TfLiteIntArray* output_shape = output->dims;
int output_index = 0;
for (int i = 0; i < indices_rank - 1; ++i) {
output_shape->data[output_index++] = indices->dims->data[i];
}
for (int i = indices_nd; i < params_rank; ++i) {
output_shape->data[output_index++] = params->dims->data[i];
}
output_shape->size = output_index;
return kTfLiteOk;
}
template <typename ParamsT, typename IndicesT>
TfLiteStatus GatherNd(const TfLiteEvalTensor* params,
const TfLiteEvalTensor* indices,
TfLiteEvalTensor* output) {
const int indices_dims = indices->dims->size;
const int indices_nd = indices->dims->data[indices_dims - 1];
const int params_dims = params->dims->size;
const IndicesT* index_data = tflite::micro::GetTensorData<IndicesT>(indices);
const ParamsT* param_data = tflite::micro::GetTensorData<ParamsT>(params);
ParamsT* output_data = tflite::micro::GetTensorData<ParamsT>(output);
int n_slices = 1;
for (int i = 0; i < indices_dims - 1; ++i) {
n_slices *= indices->dims->data[i];
}
// If indices[-1] == params.rank, fetch single elements.
// If indices[-1] < params.rank, fetch slices.
int slice_size = 1;
for (int i = indices_nd; i < params_dims; ++i) {
slice_size *= params->dims->data[i];
}
int remain_flat_size = ElementCount(*params->dims);
// Number of elements per dimension
int dims_to_count[MAX_INDICES_ND];
for (int i = 0; i < indices_nd; ++i) {
dims_to_count[i] = remain_flat_size / params->dims->data[i];
remain_flat_size = dims_to_count[i];
}
for (int i = 0; i < n_slices; ++i) {
int from_pos = 0;
for (int j = 0; j < indices_nd; ++j) {
int offset = i * indices_nd + j;
IndicesT index = index_data[offset];
from_pos += index * dims_to_count[j];
}
std::memcpy(output_data + i * slice_size, param_data + from_pos,
sizeof(ParamsT) * slice_size);
}
return kTfLiteOk;
}
template <typename IndicesT>
TfLiteStatus EvalGatherNd(TfLiteContext* context,
const TfLiteEvalTensor* params,
const TfLiteEvalTensor* indices,
TfLiteEvalTensor* output) {
switch (params->type) {
case kTfLiteFloat32:
return GatherNd<float, IndicesT>(params, indices, output);
break;
case kTfLiteInt8:
return GatherNd<int8_t, IndicesT>(params, indices, output);
break;
default:
TF_LITE_KERNEL_LOG(context,
"Params type '%s' are not supported by gather_nd.",
TfLiteTypeGetName(params->type));
return kTfLiteError;
}
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* params =
tflite::micro::GetEvalInput(context, node, kParams);
const TfLiteEvalTensor* indices =
tflite::micro::GetEvalInput(context, node, kIndices);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (indices->type) {
case kTfLiteInt32:
return EvalGatherNd<int32_t>(context, params, indices, output);
break;
default:
TF_LITE_KERNEL_LOG(context,
"Indices of type '%s' are not supported by gather_nd.",
TfLiteTypeGetName(indices->type));
return kTfLiteError;
}
}
} // namespace
TfLiteRegistration Register_GATHER_ND() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -23,72 +23,23 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/hard_swish.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace hard_swish {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
namespace {
void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
}
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
params->input_zero_point = input->params.zero_point;
params->output_zero_point = output->params.zero_point;
const float input_scale = input->params.scale;
const float hires_input_scale = (1.0f / 128.0f) * input_scale;
const float reluish_scale = 3.0f / 32768.0f;
const float output_scale = output->params.scale;
const double output_multiplier =
static_cast<double>(hires_input_scale / output_scale);
int32_t output_multiplier_fixedpoint_int32;
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
&params->output_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
output_multiplier_fixedpoint_int32,
&params->output_multiplier_fixedpoint_int16);
TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
const double reluish_multiplier =
static_cast<double>(hires_input_scale / reluish_scale);
int32_t reluish_multiplier_fixedpoint_int32;
QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_fixedpoint_int16);
}
return kTfLiteOk;
}
TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
tflite::micro::GetEvalInput(context, node, kHardSwishInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::micro::GetEvalOutput(context, node, kHardSwishOutputTensor);
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
switch (input->type) {
@@ -99,13 +50,6 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} break;
case kTfLiteUInt8: {
tflite::reference_ops::HardSwish<uint8_t>(
*params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} break;
case kTfLiteInt8: {
tflite::reference_ops::HardSwish<int8_t>(
*params, tflite::micro::GetTensorShape(input),
@@ -114,29 +58,24 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorData<int8_t>(output));
} break;
default: {
TF_LITE_KERNEL_LOG(
context,
"Only float32/int8_t/uint8_t are supported currently, got %s",
TfLiteTypeGetName(input->type));
MicroPrintf("Unsupported type %s", TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
return kTfLiteOk;
}
} // namespace hard_swish
} // namespace
TfLiteRegistration Register_HARD_SWISH() {
return {/*init=*/hard_swish::HardSwishInit,
return {/*init=*/HardSwishInit,
/*free=*/nullptr,
/*prepare=*/hard_swish::HardSwishPrepare,
/*invoke=*/hard_swish::HardSwishEval,
/*prepare=*/tflite::HardSwishPrepare,
/*invoke=*/HardSwishEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,30 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
extern const int kHardSwishInputTensor;
extern const int kHardSwishOutputTensor;
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_

View File

@@ -0,0 +1,79 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/hard_swish.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
const int kHardSwishInputTensor = 0;
const int kHardSwishOutputTensor = 0;
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kHardSwishInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kHardSwishOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
params->input_zero_point = input->params.zero_point;
params->output_zero_point = output->params.zero_point;
const float input_scale = input->params.scale;
const float hires_input_scale = (1.0f / 128.0f) * input_scale;
const float reluish_scale = 3.0f / 32768.0f;
const float output_scale = output->params.scale;
const double output_multiplier =
static_cast<double>(hires_input_scale / output_scale);
int32_t output_multiplier_fixedpoint_int32;
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
&params->output_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
output_multiplier_fixedpoint_int32,
&params->output_multiplier_fixedpoint_int16);
TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
const double reluish_multiplier =
static_cast<double>(hires_input_scale / reluish_scale);
int32_t reluish_multiplier_fixedpoint_int32;
QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_fixedpoint_int16);
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -0,0 +1,166 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <stddef.h>
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
struct OpData {
int then_subgraph_index;
int else_subgraph_index;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const auto* params =
reinterpret_cast<const TfLiteIfParams*>(node->builtin_data);
op_data->then_subgraph_index = params->then_subgraph_index;
op_data->else_subgraph_index = params->else_subgraph_index;
TF_LITE_ENSURE(context, node->inputs->size > 0);
// The first input is the condition.
const TfLiteTensor* cond;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &cond));
TF_LITE_ENSURE_EQ(context, cond->type, kTfLiteBool);
TF_LITE_ENSURE_EQ(context, NumElements(cond), 1);
// The first input of the node is the condition. The rest of inputs are
// passed to the branch subgraphs. Therefore, the number of subgraph inputs
// will be the number of node inputs - 1.
size_t num_inputs = node->inputs->size - 1;
size_t num_outputs = node->outputs->size;
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
TF_LITE_ENSURE(context,
op_data->then_subgraph_index < graph_info->NumSubgraphs());
TF_LITE_ENSURE(context,
op_data->else_subgraph_index < graph_info->NumSubgraphs());
TF_LITE_ENSURE_EQ(
context, num_inputs,
graph_info->NumSubgraphInputs(op_data->then_subgraph_index));
TF_LITE_ENSURE_EQ(
context, num_outputs,
graph_info->NumSubgraphOutputs(op_data->then_subgraph_index));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const TfLiteTensor* cond;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &cond));
bool cond_value = cond->data.b[0];
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
// Currently we copy the input / output between the subgraphs. This isn't
// optimized yet.
int active_branch_subgraph_index =
cond_value ? op_data->then_subgraph_index : op_data->else_subgraph_index;
for (size_t i = 0;
i < graph_info->NumSubgraphInputs(active_branch_subgraph_index); ++i) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, i + 1);
TfLiteEvalTensor* subgraph_input =
graph_info->GetSubgraphInput(active_branch_subgraph_index, i);
// These checks must occur in Eval since TfLiteEvalTensors are not available
// during Prepare.
size_t input_bytes;
size_t subgraph_input_bytes;
TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(input, &input_bytes));
TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(
subgraph_input, &subgraph_input_bytes));
TF_LITE_ENSURE_TYPES_EQ(context, input->type, subgraph_input->type);
TF_LITE_ENSURE_EQ(context, input_bytes, subgraph_input_bytes);
memcpy(subgraph_input->data.raw, input->data.raw, input_bytes);
}
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(active_branch_subgraph_index));
for (size_t i = 0;
i < graph_info->NumSubgraphOutputs(active_branch_subgraph_index); ++i) {
const TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, i);
TfLiteEvalTensor* subgraph_output =
graph_info->GetSubgraphOutput(active_branch_subgraph_index, i);
// These checks must occur in Eval since TfLiteEvalTensors are not available
// during Prepare.
size_t output_bytes;
size_t subgraph_output_bytes;
TF_LITE_ENSURE_OK(context,
TfLiteEvalTensorByteLength(output, &output_bytes));
TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(
subgraph_output, &subgraph_output_bytes));
TF_LITE_ENSURE_TYPES_EQ(context, output->type, subgraph_output->type);
TF_LITE_ENSURE_EQ(context, output_bytes, subgraph_output_bytes);
memcpy(output->data.raw, subgraph_output->data.raw, output_bytes);
}
return kTfLiteOk;
}
} // namespace.
TfLiteRegistration Register_IF() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -16,6 +16,8 @@ limitations under the License.
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
#include "tensorflow/lite/micro/test_helpers.h"
namespace tflite {
namespace micro {
@@ -37,7 +39,8 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
kKernelRunnerBuffer_,
kKernelRunnerBufferSize_)),
registration_(registration),
tensors_(tensors) {
tensors_(tensors),
mock_micro_graph_(allocator_) {
// Prepare TfLiteContext:
context_.impl_ = static_cast<void*>(this);
context_.ReportError = ReportOpError;
@@ -47,6 +50,8 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
context_.GetScratchBuffer = GetScratchBuffer;
context_.GetExecutionPlan = GetGraph;
context_.recommended_num_threads = 0;
// Prepare TfLiteNode:
node_.inputs = inputs;
@@ -157,5 +162,15 @@ void KernelRunner::ReportOpError(struct TfLiteContext* context,
va_end(args);
}
TfLiteStatus KernelRunner::GetGraph(struct TfLiteContext* context,
TfLiteIntArray** args) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
*args = reinterpret_cast<TfLiteIntArray*>(runner->GetMockGraph());
return kTfLiteOk;
}
} // namespace micro
} // namespace tflite

View File

@@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/mock_micro_graph.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
namespace tflite {
@@ -45,6 +46,10 @@ class KernelRunner {
// passed into the constructor of this class.
TfLiteStatus Invoke();
// Returns a pointer to the internal MockMicroGraph which KernelRunner uses
// to stub out MicroGraph methods and track invocations on each subgraph.
MockMicroGraph* GetMockGraph() { return &mock_micro_graph_; }
protected:
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_index);
@@ -57,6 +62,11 @@ class KernelRunner {
static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
// This method matches GetExecutionPlan from TfLiteContext since TFLM reuses
// this method to get the MicroGraph from an operator context.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
static TfLiteStatus GetGraph(struct TfLiteContext* context,
TfLiteIntArray** args);
private:
static constexpr int kNumScratchBuffers_ = 12;
@@ -67,6 +77,7 @@ class KernelRunner {
SimpleMemoryAllocator* allocator_ = nullptr;
const TfLiteRegistration& registration_;
TfLiteTensor* tensors_ = nullptr;
MockMicroGraph mock_micro_graph_;
TfLiteContext context_ = {};
TfLiteNode node_ = {};

View File

@@ -49,5 +49,30 @@ PaddingType RuntimePaddingType(TfLitePadding padding) {
}
}
// Relocate tensor dims from FlatBuffer to the persistent storage arena.
// The old dims data is copied to the new storage area.
// The tensor and eval_tensor must be the same tensor.
// Only use during Prepare phase.
TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
TfLiteTensor* tensor,
TfLiteEvalTensor* eval_tensor) {
TF_LITE_ENSURE(context, tensor != nullptr);
TF_LITE_ENSURE(context, eval_tensor != nullptr);
TF_LITE_ENSURE(context, context->AllocatePersistentBuffer != nullptr);
int ranks = tensor->dims->size;
size_t alloc_size = TfLiteIntArrayGetSizeInBytes(ranks);
TfLiteIntArray* new_dims = static_cast<TfLiteIntArray*>(
context->AllocatePersistentBuffer(context, alloc_size));
TfLiteIntArray* old_dims = tensor->dims;
new_dims->size = ranks;
tensor->dims = new_dims;
eval_tensor->dims = new_dims;
for (int i = 0; i < ranks; i++) {
new_dims->data[i] = old_dims->data[i];
}
return kTfLiteOk;
}
} // namespace micro
} // namespace tflite

View File

@@ -72,6 +72,14 @@ bool HaveSameShapes(const TfLiteEvalTensor* input1,
PaddingType RuntimePaddingType(TfLitePadding padding);
// Relocate tensor dims from FlatBuffer to the persistent storage arena.
// The old dims data is copied to the new storage area.
// The tensor and eval_tensor must be the same tensor.
// Only use during Prepare phase.
TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
TfLiteTensor* tensor,
TfLiteEvalTensor* eval_tensor);
} // namespace micro
} // namespace tflite

View File

@@ -70,7 +70,13 @@ TfLiteStatus L2Prepare(TfLiteContext* context, TfLiteNode* node) {
// The dims storage is expected to be the same area in memory
// for both TfLiteTensor and TfLiteEvalTensor. This is important
// because TfLiteTensor in the MicroInterpreter is a temporary
// allocation.
// allocation. For the KernelRunner interpreter, TfLiteEvalTensor
// is a temporary allocation. We must therefore relocate the dims
// from the FlatBuffer to the persistant storage arena.
TfLiteEvalTensor* output_eval =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
context, output, output_eval));
output->dims->data[kBatchRank] = batches;
output->dims->data[kHeightRank] = out_height;
output->dims->data[kWidthRank] = out_width;

View File

@@ -67,8 +67,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
data->input_zero_point = 0;
}
// TODO(ahentz): For some reason our implementations don't support
// activations.
// Our implementations don't currently support activations.
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
return kTfLiteOk;

View File

@@ -68,7 +68,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (output->type == kTfLiteInt8) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
LeakyReluOpData* data = static_cast<LeakyReluOpData*>(node->user_data);
const auto* params =
static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
@@ -127,6 +127,10 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
QuantizeLeakyRelu<int8_t>(data, input, output);
return kTfLiteOk;
} break;
case kTfLiteInt16: {
QuantizeLeakyRelu<int16_t>(data, input, output);
return kTfLiteOk;
} break;
default:
TF_LITE_KERNEL_LOG(
context, "Only float32, int8 are supported by LEAKY_RELU, got %s.",

View File

@@ -0,0 +1,150 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
// used only with quantized data
struct LogSoftmaxOpData {
int32_t input_multiplier;
int32_t input_left_shift;
int32_t reverse_scaling_divisor;
int32_t reverse_scaling_right_shift;
int diff_min;
size_t outer_size; // number of tensor elements skipping computation axis
size_t depth; // number of tensor elements on computation axis
};
// input/output tensor index
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE(context, HaveSameShapes(input, output));
if (input->type == kTfLiteInt8) {
node->user_data =
context->AllocatePersistentBuffer(context, sizeof(LogSoftmaxOpData));
auto data = static_cast<LogSoftmaxOpData*>(node->user_data);
// quantization datum
constexpr int32_t kOutputZeroPoint = 127;
constexpr float kOutputScale = 16.0 / 256;
constexpr double kBeta = 1.0;
constexpr int kScaledDiffIntegerBits = 5;
TF_LITE_ENSURE(context, output->params.scale == kOutputScale);
TF_LITE_ENSURE(context, output->params.zero_point == kOutputZeroPoint);
int input_left_shift;
int reverse_scaling_right_shift;
tflite::PreprocessLogSoftmaxScalingExp(
kBeta, static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
&data->input_multiplier, &input_left_shift,
&data->reverse_scaling_divisor, &reverse_scaling_right_shift);
data->input_left_shift = static_cast<int32_t>(input_left_shift);
data->reverse_scaling_right_shift =
static_cast<int32_t>(-reverse_scaling_right_shift);
// diff_min has a negative value, and is used to limit the maximum magnitude
// of the diffs, which are <= 0.
data->diff_min =
-tflite::CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
RuntimeShape input_shape = GetTensorShape(input);
const int trailing_dim = input_shape.DimensionsCount() - 1;
data->outer_size =
static_cast<size_t>(FlatSizeSkipDim(input_shape, trailing_dim));
data->depth = static_cast<size_t>(input_shape.Dims(trailing_dim));
}
return kTfLiteOk;
}
TfLiteStatus LogSoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
const LogSoftmaxOpData* data =
static_cast<LogSoftmaxOpData*>(node->user_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
SoftmaxParams op_params = {};
reference_ops::LogSoftmax(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8: {
SoftmaxParams op_params = {};
op_params.input_multiplier = data->input_multiplier;
op_params.input_left_shift = data->input_left_shift;
op_params.reverse_scaling_divisor = data->reverse_scaling_divisor;
op_params.reverse_scaling_right_shift = data->reverse_scaling_right_shift;
op_params.diff_min = data->diff_min;
reference_ops::LogSoftmax(op_params, data->outer_size, data->depth,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
default:
TF_LITE_KERNEL_LOG(context,
"LOG_SOFTMAX only supports float32, int8, got %s.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
} // namespace
TfLiteRegistration Register_LOG_SOFTMAX() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/LogSoftmaxPrepare,
/*invoke=*/LogSoftmaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/logical.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
@@ -19,60 +21,17 @@ limitations under the License.
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace logical {
namespace {
// Input/output tensor index.
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
bool (*func)(bool, bool)) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (tflite::micro::HaveSameShapes(input1, input2)) {
reference_ops::BinaryFunction<bool, bool, bool>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
} else {
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
}
return kTfLiteOk;
}
bool LogicalOr(bool x, bool y) { return x || y; }
TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) {
return LogicalImpl(context, node, LogicalOr);
}
bool LogicalAnd(bool x, bool y) { return x && y; }
TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
return LogicalImpl(context, node, LogicalAnd);
}
} // namespace
} // namespace logical
TfLiteRegistration Register_LOGICAL_OR() {
// Init, Free, Prepare, Eval are satisfying the Interface required by
@@ -80,7 +39,7 @@ TfLiteRegistration Register_LOGICAL_OR() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalOrEval,
/*invoke=*/LogicalOrEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
@@ -93,13 +52,11 @@ TfLiteRegistration Register_LOGICAL_AND() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalAndEval,
/*invoke=*/LogicalAndEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,35 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Input/output tensor index.
extern const int kLogicalInputTensor1;
extern const int kLogicalInputTensor2;
extern const int kLogicalOutputTensor;
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
bool (*func)(bool, bool));
bool LogicalOr(bool x, bool y);
bool LogicalAnd(bool x, bool y);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_

View File

@@ -0,0 +1,63 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/logical.h"
namespace tflite {
// Input/output tensor index.
const int kLogicalInputTensor1 = 0;
const int kLogicalInputTensor2 = 1;
const int kLogicalOutputTensor = 0;
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
bool (*func)(bool, bool)) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kLogicalInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kLogicalInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kLogicalOutputTensor);
if (tflite::micro::HaveSameShapes(input1, input2)) {
reference_ops::BinaryFunction<bool, bool, bool>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
} else {
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
}
return kTfLiteOk;
}
bool LogicalOr(bool x, bool y) { return x || y; }
bool LogicalAnd(bool x, bool y) { return x && y; }
} // namespace tflite

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -24,71 +24,24 @@ limitations under the License.
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/logistic.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
struct OpData {
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
OpData* data) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
std::numeric_limits<int8_t>::min());
static constexpr int kInputIntegerBits = 4;
const double input_real_multiplier =
static_cast<double>(input->params.scale) *
static_cast<double>(1 << (31 - kInputIntegerBits));
data->input_zero_point = input->params.zero_point;
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
return kTfLiteOk;
}
} // namespace
void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
return CalculateArithmeticOpData(context, node, data);
return context->AllocatePersistentBuffer(context, sizeof(OpDataLogistic));
}
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
tflite::micro::GetEvalInput(context, node, kLogisticInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::micro::GetEvalOutput(context, node, kLogisticOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
@@ -133,18 +86,16 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
} // namespace activations
} // namespace
TfLiteRegistration Register_LOGISTIC() {
return {/*init=*/activations::LogisticInit,
return {/*init=*/LogisticInit,
/*free=*/nullptr,
/*prepare=*/activations::LogisticPrepare,
/*invoke=*/activations::LogisticEval,
/*prepare=*/LogisticPrepare,
/*invoke=*/LogisticEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,42 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
extern const int kLogisticInputTensor;
extern const int kLogisticOutputTensor;
struct OpDataLogistic {
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
TfLiteNode* node,
OpDataLogistic* data);
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_

View File

@@ -0,0 +1,68 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/logistic.h"
namespace tflite {
const int kLogisticInputTensor = 0;
const int kLogisticOutputTensor = 0;
TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
TfLiteNode* node,
OpDataLogistic* data) {
const TfLiteTensor* input = GetInput(context, node, kLogisticInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kLogisticOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
std::numeric_limits<int8_t>::min());
static constexpr int kInputIntegerBits = 4;
const double input_real_multiplier =
static_cast<double>(input->params.scale) *
static_cast<double>(1 << (31 - kInputIntegerBits));
data->input_zero_point = input->params.zero_point;
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
return kTfLiteOk;
}
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
return CalculateArithmeticOpDataLogistic(context, node, data);
}
} // namespace tflite

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -32,23 +32,40 @@ namespace tflite {
// have their Register function declarations in the tflite namespace.
TfLiteRegistration Register_ADD_N();
TfLiteRegistration Register_AVERAGE_POOL_2D();
TfLiteRegistration Register_BATCH_TO_SPACE_ND();
TfLiteRegistration Register_CAST();
TfLiteRegistration Register_CONV_2D();
TfLiteRegistration Register_CUMSUM();
TfLiteRegistration Register_DEPTH_TO_SPACE();
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
TfLiteRegistration Register_DIV();
TfLiteRegistration Register_ELU();
TfLiteRegistration Register_EXP();
TfLiteRegistration Register_EXPAND_DIMS();
TfLiteRegistration Register_FILL();
TfLiteRegistration Register_FLOOR_DIV();
TfLiteRegistration Register_FLOOR_MOD();
TfLiteRegistration Register_GATHER();
TfLiteRegistration Register_GATHER_ND();
TfLiteRegistration Register_HARD_SWISH();
TfLiteRegistration Register_IF();
TfLiteRegistration Register_L2_POOL_2D();
TfLiteRegistration Register_LEAKY_RELU();
TfLiteRegistration Register_LOG_SOFTMAX();
TfLiteRegistration Register_LOGICAL_AND();
TfLiteRegistration Register_LOGICAL_OR();
TfLiteRegistration Register_LOGISTIC();
TfLiteRegistration Register_MAX_POOL_2D();
TfLiteRegistration Register_QUANTIZE();
TfLiteRegistration Register_RELU();
TfLiteRegistration Register_RELU6();
TfLiteRegistration Register_RESIZE_BILINEAR();
TfLiteRegistration Register_SHAPE();
TfLiteRegistration Register_SOFTMAX();
TfLiteRegistration Register_SPACE_TO_BATCH_ND();
TfLiteRegistration Register_SPACE_TO_DEPTH();
TfLiteRegistration Register_SQUEEZE();
TfLiteRegistration Register_SVDF();
TfLiteRegistration Register_TRANSPOSE();
TfLiteRegistration Register_TRANSPOSE_CONV();
TfLiteRegistration Register_ZEROS_LIKE();
@@ -59,7 +76,6 @@ TfLiteRegistration Register_ABS();
TfLiteRegistration Register_ADD();
TfLiteRegistration Register_ARG_MAX();
TfLiteRegistration Register_ARG_MIN();
TfLiteRegistration Register_AVERAGE_POOL_2D();
TfLiteRegistration Register_CEIL();
// TODO(b/160234179): Change custom OPs to also return by value.
TfLiteRegistration* Register_CIRCULAR_BUFFER();
@@ -70,16 +86,11 @@ TfLiteRegistration Register_EQUAL();
TfLiteRegistration Register_FLOOR();
TfLiteRegistration Register_GREATER();
TfLiteRegistration Register_GREATER_EQUAL();
TfLiteRegistration Register_HARD_SWISH();
TfLiteRegistration Register_LESS();
TfLiteRegistration Register_LESS_EQUAL();
TfLiteRegistration Register_LOG();
TfLiteRegistration Register_LOGICAL_AND();
TfLiteRegistration Register_LOGICAL_NOT();
TfLiteRegistration Register_LOGICAL_OR();
TfLiteRegistration Register_LOGISTIC();
TfLiteRegistration Register_MAXIMUM();
TfLiteRegistration Register_MAX_POOL_2D();
TfLiteRegistration Register_MEAN();
TfLiteRegistration Register_MINIMUM();
TfLiteRegistration Register_MUL();
@@ -90,8 +101,6 @@ TfLiteRegistration Register_PAD();
TfLiteRegistration Register_PADV2();
TfLiteRegistration Register_PRELU();
TfLiteRegistration Register_REDUCE_MAX();
TfLiteRegistration Register_RELU();
TfLiteRegistration Register_RELU6();
TfLiteRegistration Register_RESHAPE();
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
TfLiteRegistration Register_ROUND();

View File

@@ -62,7 +62,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
if (output->type == kTfLiteInt8) {
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
@@ -104,42 +104,21 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
reference_integer_ops::Mul(op_params,
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else if (output->type == kTfLiteUInt8) {
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::Mul(op_params,
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
reference_integer_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
@@ -203,7 +182,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input1->type) {
case kTfLiteUInt8:
case kTfLiteInt8:
EvalQuantized(context, node, data, input1, input2, output);
break;

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -15,163 +15,34 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/pooling.h"
namespace tflite {
namespace ops {
namespace micro {
namespace pooling {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
struct OpData {
TfLitePaddingValues padding;
int32_t activation_min;
int32_t activation_max;
float activation_min_f32;
float activation_max_f32;
};
TfLiteStatus CalculateOpData(const TfLiteContext* context,
const TfLitePoolParams* params,
const TfLiteTensor* input,
const TfLiteTensor* output, OpData* data) {
// input: batch, height, width, channel
int height = SizeOfDimension(input, 1);
int width = SizeOfDimension(input, 2);
int out_height, out_width;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
/*dilation_rate_height=*/1,
/*dilation_rate_width=*/1, height, width, params->filter_height,
params->filter_width, params->padding, &out_height, &out_width);
return kTfLiteOk;
}
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::AveragePool(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::MaxPool(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
} // namespace
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const OpDataPooling* data =
static_cast<const OpDataPooling*>(node->user_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
micro::GetEvalInput(context, node, kPoolingInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
micro::GetEvalOutput(context, node, kPoolingOutputTensor);
// Inputs and outputs share the same type, guaranteed by the converter.
switch (input->type) {
case kTfLiteFloat32:
AverageEvalFloat(context, node, params, data, input, output);
AveragePoolingEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
AverageEvalQuantized(context, node, params, data, input, output);
AveragePoolingEvalQuantized(context, node, params, data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
@@ -186,20 +57,20 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const OpDataPooling* data =
static_cast<const OpDataPooling*>(node->user_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
micro::GetEvalInput(context, node, kPoolingInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
micro::GetEvalOutput(context, node, kPoolingOutputTensor);
switch (input->type) {
case kTfLiteFloat32:
MaxEvalFloat(context, node, params, data, input, output);
MaxPoolingEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
MaxEvalQuantized(context, node, params, data, input, output);
MaxPoolingEvalQuantized(context, node, params, data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
@@ -211,42 +82,16 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
if (input->type == kTfLiteFloat32) {
CalculateActivationRange(params->activation, &data->activation_min_f32,
&data->activation_max_f32);
} else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
CalculateActivationRangeQuantized(context, params->activation, output,
&data->activation_min,
&data->activation_max);
}
return kTfLiteOk;
}
} // namespace pooling
} // namespace
TfLiteRegistration Register_AVERAGE_POOL_2D() {
return {/*init=*/pooling::Init,
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/pooling::Prepare,
/*invoke=*/pooling::AverageEval,
/*prepare=*/PoolingPrepare,
/*invoke=*/AverageEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
@@ -254,16 +99,14 @@ TfLiteRegistration Register_AVERAGE_POOL_2D() {
}
TfLiteRegistration Register_MAX_POOL_2D() {
return {/*init=*/pooling::Init,
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/pooling::Prepare,
/*invoke=*/pooling::MaxEval,
/*prepare=*/PoolingPrepare,
/*invoke=*/MaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,71 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
extern const int kPoolingInputTensor;
extern const int kPoolingOutputTensor;
struct OpDataPooling {
TfLitePaddingValues padding;
int32_t activation_min;
int32_t activation_max;
float activation_min_f32;
float activation_max_f32;
};
TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context,
const TfLitePoolParams* params,
const TfLiteTensor* input,
const TfLiteTensor* output,
OpDataPooling* data);
TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node);
void AveragePoolingEvalFloat(const TfLiteContext* context,
const TfLiteNode* node,
const TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output);
void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output);
void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output);
void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_

View File

@@ -0,0 +1,163 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/pooling.h"
namespace tflite {
const int kPoolingInputTensor = 0;
const int kPoolingOutputTensor = 0;
TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context,
const TfLitePoolParams* params,
const TfLiteTensor* input,
const TfLiteTensor* output,
OpDataPooling* data) {
// input: batch, height, width, channel
int height = SizeOfDimension(input, 1);
int width = SizeOfDimension(input, 2);
int out_height, out_width;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
/*dilation_rate_height=*/1,
/*dilation_rate_width=*/1, height, width, params->filter_height,
params->filter_width, params->padding, &out_height, &out_width);
return kTfLiteOk;
}
TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpDataPooling* data = static_cast<OpDataPooling*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kPoolingInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kPoolingOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_STATUS(
CalculateOpDataPooling(context, params, input, output, data));
if (input->type == kTfLiteFloat32) {
CalculateActivationRange(params->activation, &data->activation_min_f32,
&data->activation_max_f32);
} else if (input->type == kTfLiteInt8) {
CalculateActivationRangeQuantized(context, params->activation, output,
&data->activation_min,
&data->activation_max);
}
return kTfLiteOk;
}
void AveragePoolingEvalFloat(const TfLiteContext* context,
const TfLiteNode* node,
const TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteInt8);
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
reference_integer_ops::AveragePool(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
reference_integer_ops::MaxPool(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} // namespace tflite

View File

@@ -57,6 +57,7 @@ TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
if ((input->type == kTfLiteInt16 && output->type == kTfLiteInt8) ||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt8) ||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt16) ||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt32) ||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt16) ||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt32)) {
@@ -145,6 +146,13 @@ TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
reference_ops::Requantize(
tflite::micro::GetTensorData<int8_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int16_t>(output));
break;
case kTfLiteInt32:
reference_ops::Requantize(
tflite::micro::GetTensorData<int8_t>(input), size,

View File

@@ -103,14 +103,15 @@ TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const TfLiteTensor* output = GetOutput(context, node, 0);
if (input->type == kTfLiteInt8) {
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
const double real_multiplier = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier, &op_data->multiplier, &op_data->shift);
}
int output_size = NumElements(output);
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 ||
input->type == kTfLiteInt16) {
context->RequestScratchBufferInArena(context, output_size * sizeof(int32_t),
&op_data->temp_buffer_idx);
op_data->input_zp = input->params.zero_point;
@@ -213,6 +214,43 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
temp_buffer, false));
}
} break;
case kTfLiteInt16: {
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (params->keep_dims && special_case_4d_axes_1_and_2) {
reference_integer_ops::Mean(
op_params, op_data->multiplier, op_data->shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input), op_data->input_zp,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output), op_data->output_zp);
} else if (op_data->input_zp == op_data->output_zp &&
op_data->input_scale == op_data->output_scale) {
int32_t* temp_buffer = static_cast<int32_t*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
TF_LITE_ENSURE(
context,
reference_ops::Mean(tflite::micro::GetTensorData<int16_t>(input),
input->dims->data, input->dims->size,
tflite::micro::GetTensorData<int16_t>(output),
output->dims->data, output->dims->size,
tflite::micro::GetTensorData<int>(axis),
num_axis, params->keep_dims, temp_index,
resolved_axis, temp_buffer));
} else {
int32_t* temp_buffer = static_cast<int32_t*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
TF_LITE_ENSURE(
context,
reference_ops::QuantizedMeanOrSum(
tflite::micro::GetTensorData<int16_t>(input), op_data->input_zp,
op_data->input_scale, input->dims->data, input->dims->size,
tflite::micro::GetTensorData<int16_t>(output),
op_data->output_zp, op_data->output_scale, output->dims->data,
output->dims->size, tflite::micro::GetTensorData<int>(axis),
num_axis, params->keep_dims, temp_index, resolved_axis,
temp_buffer, false));
}
} break;
case kTfLiteUInt8: {
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (params->keep_dims && special_case_4d_axes_1_and_2) {

View File

@@ -0,0 +1,116 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kSizeTensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32);
output->type = input->type;
TF_LITE_ENSURE_MSG(context, IsConstantTensor(size),
"Non constant size tensor not supported");
// Ensure params are valid.
auto* params =
reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
if (params->half_pixel_centers && params->align_corners) {
TF_LITE_KERNEL_LOG(
context, "If half_pixel_centers is True, align_corners must be False.");
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* size =
tflite::micro::GetEvalInput(context, node, kSizeTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
tflite::ResizeBilinearParams op_params;
op_params.align_corners = params->align_corners;
op_params.half_pixel_centers = params->half_pixel_centers;
reference_ops::ResizeBilinear(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else if (output->type == kTfLiteInt8) {
tflite::ResizeBilinearParams op_params;
op_params.align_corners = params->align_corners;
op_params.half_pixel_centers = params->half_pixel_centers;
reference_ops::ResizeBilinearInteger(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context, "Output type is %d, requires float or int8.",
output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_RESIZE_BILINEAR() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -25,6 +25,21 @@ void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length);
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node);
// This is the most generic TfLiteRegistration. The actual supported types may
// still be target dependent. The only requirement is that every implementation
// (reference or optimized) must define this function.
TfLiteRegistration Register_SOFTMAX();
#if defined(XTENSA)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 input and int16 output.
TfLiteRegistration Register_SOFTMAX_INT8_INT16();
#else
inline TfLiteRegistration Register_SOFTMAX_INT8_INT16() {
return Register_SOFTMAX();
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_

View File

@@ -125,10 +125,12 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
// exp LUT only used on negative values
// we consider exp(-10.0) is insignificant to accumulation
gen_lut([](float value) { return std::exp(value); }, -10.0f, 0.0f,
op_data->exp_lut, kInt16LUTArraySize);
gen_lut([](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f,
op_data->one_over_one_plus_x_lut, kInt16LUTArraySize);
gen_lut<float, int16_t, int16_t>(
[](float value) { return std::exp(value); }, -10.0f, 0.0f, -1.0f, 1.0f,
op_data->exp_lut);
gen_lut<float, int16_t, int16_t>(
[](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f, -1.0f,
1.0f, op_data->one_over_one_plus_x_lut);
op_data->zero_point = output->params.zero_point;
op_data->scale = output->params.scale;
}

View File

@@ -0,0 +1,128 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
constexpr int kBatchRank = 0;
constexpr int kHeightRank = 1;
constexpr int kWidthRank = 2;
constexpr int kDepthRank = 3;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteSpaceToDepthParams*>(node->builtin_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
auto data_type = output->type;
TF_LITE_ENSURE(context,
data_type == kTfLiteFloat32 || data_type == kTfLiteInt8);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
const int block_size = params->block_size;
const int input_height = input->dims->data[kHeightRank];
const int input_width = input->dims->data[kWidthRank];
int output_height = input_height / block_size;
int output_width = input_width / block_size;
TF_LITE_ENSURE_EQ(context, input_height, output_height * block_size);
TF_LITE_ENSURE_EQ(context, input_width, output_width * block_size);
// Relocate dims to the persistent storage arena before changing them,
// otherwise we'd be modifying temporary copies made by the interpreters each
// time they process the layer.
TfLiteEvalTensor* output_eval =
micro::GetEvalOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_OK(context, micro::CreateWritableTensorDimsWithCopy(
context, output, output_eval));
output->dims->data[kBatchRank] = input->dims->data[kBatchRank];
output->dims->data[kHeightRank] = output_height;
output->dims->data[kWidthRank] = output_width;
output->dims->data[kDepthRank] =
input->dims->data[kDepthRank] * block_size * block_size;
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteSpaceToDepthParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
SpaceToDepthParams op_params;
op_params.block_size = params->block_size;
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
reference_ops::SpaceToDepth(op_params, micro::GetTensorShape(input),
micro::GetTensorData<float>(input),
micro::GetTensorShape(output),
micro::GetTensorData<float>(output));
break;
case kTfLiteInt8:
reference_ops::SpaceToDepth(op_params, micro::GetTensorShape(input),
micro::GetTensorData<int8_t>(input),
micro::GetTensorShape(output),
micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(
context, "SPACE_TO_DEPTH only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_SPACE_TO_DEPTH() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -167,6 +167,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
reference_ops::StridedSlice(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);

View File

@@ -19,6 +19,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
@@ -62,12 +63,17 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
OpData* data) {
data->requires_broadcast = !HaveSameShapes(input1, input2);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
output->type == kTfLiteInt16) {
// 8bit -> 8bit general quantized path, with general rescalings
data->input1_offset = -input1->params.zero_point;
data->input2_offset = -input2->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = 20;
// The shift is set to 15 in case of 16-bit and 20 in case of 8-bit,
// accordingly. In case of 16-bit we have 65535 << 15 which is less than 1
// << 31, therefore the addition will still fit in a 32 bit accumulator.
data->left_shift = output->type == kTfLiteInt16 ? 15 : 20;
const float twice_max_input_scale =
2 * std::max(input1->params.scale, input2->params.scale);
const double real_input1_multiplier =
@@ -84,6 +90,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
QuantizeMultiplierSmallerThanOneExp(
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
// Use add kernel for 16-bit sub, since it supports output requantization.
// This matches behavior in TFLite.
data->input2_multiplier *= (output->type == kTfLiteInt16) ? -1 : 1;
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
@@ -151,25 +160,25 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min, data->output_activation_max,
&op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
switch (output->type) {
case kTfLiteInt8: {
if (need_broadcast) {
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
@@ -187,27 +196,53 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else {
if (need_broadcast) {
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
tflite::reference_ops::Sub(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
break;
}
case kTfLiteInt16: {
if (need_broadcast) {
tflite::reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
tflite::reference_ops::Add(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output), false);
}
break;
}
case kTfLiteUInt8: {
if (need_broadcast) {
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
tflite::reference_ops::Sub(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Quantized type %s not currently supported.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
@@ -226,7 +261,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
if (output->type == kTfLiteFloat32) {
EvalSub(context, node, params, &data, input1, input2, output);
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
output->type == kTfLiteInt16) {
TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data,
input1, input2, output));
} else {

View File

@@ -0,0 +1,112 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/transpose.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace {
struct TransposeContext {
TransposeContext(TfLiteContext* context, TfLiteNode* node) {
input = GetInput(context, node, 0);
perm = GetInput(context, node, 1);
output = GetOutput(context, node, 0);
}
const TfLiteTensor* input;
const TfLiteTensor* perm;
TfLiteTensor* output;
};
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TransposeContext op_context(context, node);
// Ensure validity of input tensor.
TF_LITE_ENSURE_MSG(context, NumDimensions(op_context.input) <= 5,
"Transpose op only supports 1D-5D input arrays.");
TF_LITE_ENSURE_TYPES_EQ(context, op_context.input->type,
op_context.output->type);
int dims = NumDimensions(op_context.input);
const int32_t* perm_data = GetTensorData<int32_t>(op_context.perm);
// Ensure validity of the permutations tensor as a 1D tensor.
TF_LITE_ENSURE_EQ(context, NumDimensions(op_context.perm), 1);
TF_LITE_ENSURE_EQ(context, op_context.perm->dims->data[0], dims);
for (int idx = 0; idx < dims; ++idx) {
TF_LITE_ENSURE_MSG(context, (perm_data[idx] >= 0 && perm_data[idx] < dims),
"Transpose op permutations array is out of bounds.");
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TransposeContext op_context(context, node);
const int32_t* perm_data = GetTensorData<int32_t>(op_context.perm);
const int size = op_context.perm->dims->data[0];
TransposeParams params;
params.perm_count = size;
for (int i = 0; i < size; ++i) {
params.perm[i] = perm_data[i];
}
// Transpose kernel only does rearranging values not numeric evaluations
// on each cell. It's safe to implement per size of scalar type and this
// trick keeps the total code size in a reasonable range.
switch (op_context.input->type) {
case kTfLiteFloat32:
reference_ops::Transpose(params, GetTensorShape(op_context.input),
GetTensorData<float>(op_context.input),
GetTensorShape(op_context.output),
GetTensorData<float>(op_context.output));
break;
case kTfLiteInt8:
reference_ops::Transpose(params, GetTensorShape(op_context.input),
GetTensorData<int8_t>(op_context.input),
GetTensorShape(op_context.output),
GetTensorData<int8_t>(op_context.output));
break;
default:
TF_LITE_KERNEL_LOG(context,
"Type %s is currently not supported by Transpose. "
"Only float32 and int8 is supported",
TfLiteTypeGetName(op_context.input->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_TRANSPOSE() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -47,6 +47,10 @@ struct OpData {
// A scratch buffer is required for quantized implementations.
int scratch_buffer_index;
// TODO(b/192090531): Remove this once all 8x16 transpose conv models use
// 64-bit biases.
int bias_converted_buffer_index;
// Multiplier and shift arrays are required for the int8 implementation.
int32_t* per_channel_output_multiplier;
int32_t* per_channel_output_shift;
@@ -103,9 +107,21 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
&data->params.output_multiplier, &data->params.output_shift,
&data->params.quantized_activation_min,
&data->params.quantized_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift),
data->per_channel_output_multiplier, data->per_channel_output_shift,
output_channels));
// TODO(b/192090531): Remove this once all 8x16 transpose conv models use
// 64-bit biases.
if (input->type == kTfLiteInt16) {
TFLITE_DCHECK(filter->type == kTfLiteInt8);
TFLITE_DCHECK(output->type == kTfLiteInt16);
if (bias->type == kTfLiteInt16) {
TFLITE_DCHECK(
context->RequestScratchBufferInArena(
context, GetTensorShape(bias).FlatSize() * sizeof(std::int64_t),
&(data->bias_converted_buffer_index)) == kTfLiteOk);
}
}
}
return kTfLiteOk;
}
@@ -154,8 +170,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
&(data->scratch_buffer_index)) == kTfLiteOk);
}
// Quantized 16x8 kernels use an int64 scratch buffer.
if (input->type == kTfLiteInt16) {
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
TFLITE_DCHECK(context->RequestScratchBufferInArena(
context,
GetTensorShape(output).FlatSize() * sizeof(std::int64_t),
&(data->scratch_buffer_index)) == kTfLiteOk);
}
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
@@ -212,8 +237,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const OpData& data = *(static_cast<const OpData*>(node->user_data));
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
TF_LITE_ENSURE_MSG(
context,
input->type == filter->type ||
(input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
"Hybrid models are not supported on TFLite Micro.");
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32: {
@@ -245,6 +273,44 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
break;
}
case kTfLiteInt16: {
std::int64_t* scratch_buffer = static_cast<int64_t*>(
context->GetScratchBuffer(context, data.scratch_buffer_index));
// TODO(b/192090531): Remove this once all 8x16 transpose conv models use
// 64-bit biases.
if (bias->type == kTfLiteInt16) {
std::int64_t* bias_converted_buffer =
static_cast<int64_t*>(context->GetScratchBuffer(
context, data.bias_converted_buffer_index));
for (int i = 0; i < tflite::micro::GetTensorShape(bias).FlatSize();
i++) {
bias_converted_buffer[i] = bias->data.i16[i];
}
reference_integer_ops::TransposeConv(
data.params, data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias), bias_converted_buffer,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output),
tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
} else {
reference_integer_ops::TransposeConv(
data.params, data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<std::int64_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output),
tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
}
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);

View File

@@ -15,8 +15,28 @@ limitations under the License.
#include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_string.h"
namespace tflite {
namespace {
// Returns a character representing a numbered buffer
// for GreedyMemoryPlanner::PrintMemoryPlan()
char GetOrdinalCharacter(int i) {
if (i < 10) {
return '0' + i;
} else if (i < 36) {
return 'a' + (i - 10);
} else if (i < 62) {
return 'A' + (i - 36);
}
return '*';
}
} // namespace
// Simple stable in-place sort function. Not time-efficient for large arrays.
// Would normally be in an anonymous namespace to keep it private, but we want
// to be able to test it externally.
@@ -297,8 +317,6 @@ size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
while (entry) {
BufferRequirements* requirements =
&requirements_[entry->requirements_index];
// TODO(b/148246793): Update all size and offset variables types from
// int to size_t
const size_t current_size = entry->offset + requirements->size;
if (current_size > max_size) {
max_size = current_size;
@@ -311,17 +329,14 @@ size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
return max_size;
}
void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
void GreedyMemoryPlanner::PrintMemoryPlan() {
CalculateOffsetsIfNeeded();
for (int i = 0; i < buffer_count_; ++i) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Planner buffer ID: %d, calculated offset: %d, size required: %d, "
"first_time_created: %d, "
"last_time_used: %d",
i, buffer_offsets_[i], requirements_[i].size,
requirements_[i].first_time_used, requirements_[i].last_time_used);
MicroPrintf("%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d",
GetOrdinalCharacter(i), i, requirements_[i].size,
buffer_offsets_[i], requirements_[i].first_time_used,
requirements_[i].last_time_used);
}
constexpr int kLineWidth = 80;
@@ -345,6 +360,7 @@ void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
for (int c = 0; c < kLineWidth; ++c) {
line[c] = '.';
}
int memory_use = 0;
for (int i = 0; i < buffer_count_; ++i) {
BufferRequirements* requirements = &requirements_[i];
if ((t < requirements->first_time_used) ||
@@ -356,28 +372,21 @@ void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
continue;
}
const int size = requirements->size;
memory_use += size;
const int line_start = (offset * kLineWidth) / max_size;
const int line_end = ((offset + size) * kLineWidth) / max_size;
for (int n = line_start; n < line_end; ++n) {
if (line[n] == '.') {
char display;
if (i < 10) {
display = '0' + i;
} else if (i < 36) {
display = 'a' + (i - 10);
} else if (i < 62) {
display = 'A' + (i - 36);
} else {
display = '*';
}
line[n] = display;
line[n] = GetOrdinalCharacter(i);
} else {
line[n] = '!';
}
}
}
line[kLineWidth] = 0;
TF_LITE_REPORT_ERROR(error_reporter, "%s", (const char*)line);
MicroPrintf("%s%d: %s (%dk)", t < 10 ? " " : "", t, (const char*)line,
(memory_use + 1023) / 1024);
}
}

View File

@@ -81,7 +81,7 @@ class GreedyMemoryPlanner : public MemoryPlanner {
int buffer_index, int* offset) override;
// Prints an ascii-art diagram of the buffer layout plan.
void PrintMemoryPlan(ErrorReporter* error_reporter);
void PrintMemoryPlan();
// Debug method to check whether any buffer allocations are overlapping. This
// is an O(N^2) complexity operation, so only use for testing.

View File

@@ -29,7 +29,7 @@ limitations under the License.
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
#include "tensorflow/lite/micro/memory_planner/memory_planner.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/schema/schema_utils.h"
@@ -211,6 +211,8 @@ TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
}
}
uint32_t operators_size = NumSubgraphOperators(subgraph);
for (size_t i = 0; i < subgraph->inputs()->size(); ++i) {
const int tensor_index = subgraph->inputs()->Get(i);
AllocationInfo* current = &info_[tensor_index];
@@ -221,11 +223,11 @@ TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
for (size_t i = 0; i < subgraph->outputs()->size(); ++i) {
const int tensor_index = subgraph->outputs()->Get(i);
AllocationInfo* current = &info_[tensor_index];
current->last_used = subgraph->operators()->size() - 1;
current->last_used = operators_size - 1;
}
// Figure out when the first and last use of each tensor is.
for (int i = (subgraph->operators()->size() - 1); i >= 0; --i) {
for (int i = (operators_size - 1); i >= 0; --i) {
const auto* op = subgraph->operators()->Get(i);
for (size_t n = 0; n < op->inputs()->size(); ++n) {
const int tensor_index = op->inputs()->Get(n);
@@ -242,47 +244,11 @@ TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
}
}
}
// Sanity check for valid tensor lifetime.
for (size_t i = 0; i < tensor_count_; ++i) {
AllocationInfo* current = &info_[i];
// Even though tensor appears to be read only it may still need to be
// allocated.
const bool appears_read_only =
(current->first_created == -1) && (current->last_used != -1);
const bool has_partial_lifetime =
!appears_read_only &&
((current->first_created == -1) || (current->last_used == -1));
if (has_partial_lifetime && current->needs_allocating) {
TF_LITE_REPORT_ERROR(
reporter_,
"Logic error in memory planner, tensor %d has an invalid lifetime: "
"first_created: %d, last_used: %d",
i, current->first_created, current->last_used);
return kTfLiteError;
}
}
return kTfLiteOk;
}
// The tensor offsets will be encoded in the metadata:[Metadata] field of the
// Model. The following encoding applies:
//
// | Metadata component | Value |
// | name:string | “OfflineMemoryAllocation” |
// | buffer:unit | Index of buffer containing memory allocation data |
//
// The buffer contents for the memory allocation is a list of 32-bit integers.
// The number of tensors, n, must be equal to the number of tensors defined in
// the model. The following encoding applies:
//
// | Offset | Value |
// | 0 | Offline allocation format version set to 0 |
// | 1 | Subgraph index to which this allocation applies |
// | 2 | Number offsets following: n |
// | 3 | Arena byte offset of tensor #0 or -1 to allocate at runtime |
// | 4 | Arena byte offset of tensor #1 or -1 to allocate at runtime |
// | 3+(n-1) | Arena byte offset of tensor #(n-1) or -1 to allocate at runtime |
// Get offline tensors allocation plan. See
// micro/docs/memory_management.md for more info.
TfLiteStatus AllocationInfoBuilder::GetOfflinePlannedOffsets(
const Model* model, const int32_t** offline_planner_offsets) {
if (model->metadata()) {
@@ -404,18 +370,18 @@ TfLiteStatus FlatBufferVectorToTfLiteTypeArray(
// Big-endian architecture can not use the same memory layout as
// flatbuffers::Vector<kFlatBufferVectorType>. Allocate from the tail and
// copy values from the flatbuffer into the newly allocated chunk.
kTfLiteArrayType* array =
reinterpret_cast<kTfLiteArrayType*>(allocator->AllocateFromTail(
TfLiteIntArrayGetSizeInBytes(flatbuffer_array->Length()),
kTfLiteArrayType* array = reinterpret_cast<kTfLiteArrayType*>(
allocator->SimpleMemoryAllocator::AllocateFromTail(
TfLiteIntArrayGetSizeInBytes(flatbuffer_array->size()),
alignof(kTfLiteArrayType)));
if (array == nullptr) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Failed to allocate %d bytes of memory to copy an array.",
TfLiteIntArrayGetSizeInBytes(flatbuffer_array->Length()));
TfLiteIntArrayGetSizeInBytes(flatbuffer_array->size()));
return kTfLiteError;
}
array->size = flatbuffer_array->Length();
array->size = flatbuffer_array->size();
for (int i = 0; i < array->size; ++i) {
array->data[i] = flatbuffer_array->Get(i);
}
@@ -624,33 +590,46 @@ MicroAllocator* MicroAllocator::Create(SimpleMemoryAllocator* memory_allocator,
return allocator;
}
TfLiteStatus MicroAllocator::StartModelAllocation(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration** node_and_registrations,
TfLiteEvalTensor** eval_tensors) {
SubgraphAllocations* MicroAllocator::StartModelAllocation(const Model* model) {
TFLITE_DCHECK(model != nullptr);
if (model_is_allocating_) {
TF_LITE_REPORT_ERROR(error_reporter_,
"MicroAllocator: Model allocation started before "
"finishing previously allocated model");
return kTfLiteError;
return nullptr;
}
model_is_allocating_ = true;
TF_LITE_ENSURE_STATUS(InitScratchBufferData());
TF_LITE_ENSURE_STATUS(AllocateTfLiteEvalTensors(model, eval_tensors));
TF_LITE_ENSURE_STATUS(
AllocateNodeAndRegistrations(model, node_and_registrations));
TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer(
model, op_resolver, *node_and_registrations));
uint8_t* data_allocator_buffer = memory_allocator_->AllocateFromTail(
sizeof(MicroBuiltinDataAllocator), alignof(MicroBuiltinDataAllocator));
builtin_data_allocator_ =
new (data_allocator_buffer) MicroBuiltinDataAllocator(memory_allocator_);
return kTfLiteOk;
if (InitScratchBufferData() != kTfLiteOk) {
return nullptr;
}
// Allocate struct to store eval tensors, nodes and registrations.
SubgraphAllocations* output = reinterpret_cast<SubgraphAllocations*>(
memory_allocator_->AllocateFromTail(
sizeof(SubgraphAllocations) * model->subgraphs()->size(),
alignof(SubgraphAllocations)));
if (output == nullptr) {
MicroPrintf("Failed to allocate memory for model metadata.");
return nullptr;
}
if (AllocateTfLiteEvalTensors(model, output) != kTfLiteOk ||
AllocateNodeAndRegistrations(model, output) != kTfLiteOk) {
return nullptr;
}
return output;
}
TfLiteStatus MicroAllocator::FinishModelAllocation(
const Model* model, TfLiteEvalTensor* eval_tensors,
const Model* model, SubgraphAllocations* subgraph_allocations,
ScratchBufferHandle** scratch_buffer_handles) {
if (!model_is_allocating_) {
TF_LITE_REPORT_ERROR(error_reporter_,
@@ -659,15 +638,20 @@ TfLiteStatus MicroAllocator::FinishModelAllocation(
return kTfLiteError;
}
const SubGraph* subgraph = GetSubGraphFromModel(model);
TFLITE_DCHECK(subgraph != nullptr);
TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles(
scratch_buffer_handles, scratch_buffer_request_count_));
TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph, eval_tensors,
*scratch_buffer_handles));
TF_LITE_ENSURE_STATUS(AllocateVariables(subgraph, eval_tensors));
// TODO(b/187993197): Track scratch buffers for each subgraph.
for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
subgraph_idx++) {
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
TFLITE_DCHECK(subgraph != nullptr);
TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles(
scratch_buffer_handles, scratch_buffer_request_count_));
TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(
model, subgraph_allocations[subgraph_idx].tensors,
*scratch_buffer_handles, subgraph_idx));
TF_LITE_ENSURE_STATUS(AllocateVariables(
subgraph, subgraph_allocations[subgraph_idx].tensors));
}
model_is_allocating_ = false;
return kTfLiteOk;
}
@@ -677,6 +661,7 @@ void* MicroAllocator::AllocatePersistentBuffer(size_t bytes) {
}
TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes,
int subgraph_idx,
int* buffer_idx) {
// All scratch buffer requests are stored in the head section of the arena
// when a model is in the prepare phase. First align a scratch buffer request
@@ -751,153 +736,72 @@ size_t MicroAllocator::used_bytes() const {
}
TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
const Model* model, NodeAndRegistration** node_and_registrations) {
TFLITE_DCHECK(node_and_registrations);
const Model* model, SubgraphAllocations* subgraph_allocations) {
TFLITE_DCHECK(subgraph_allocations != nullptr);
const SubGraph* subgraph = GetSubGraphFromModel(model);
TFLITE_DCHECK(subgraph != nullptr);
for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
subgraph_idx++) {
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
TFLITE_DCHECK(subgraph != nullptr);
NodeAndRegistration* output = reinterpret_cast<NodeAndRegistration*>(
memory_allocator_->AllocateFromTail(
sizeof(NodeAndRegistration) * subgraph->operators()->size(),
alignof(NodeAndRegistration)));
if (output == nullptr) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to allocate memory for node_and_registrations.");
return kTfLiteError;
}
*node_and_registrations = output;
return kTfLiteOk;
}
uint32_t operators_size = NumSubgraphOperators(subgraph);
TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations) {
TFLITE_DCHECK(model != nullptr);
TFLITE_DCHECK(node_and_registrations != nullptr);
const SubGraph* subgraph = GetSubGraphFromModel(model);
TFLITE_DCHECK(subgraph != nullptr);
TfLiteStatus status = kTfLiteOk;
auto* opcodes = model->operator_codes();
MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_);
for (size_t i = 0; i < subgraph->operators()->size(); ++i) {
const auto* op = subgraph->operators()->Get(i);
const size_t index = op->opcode_index();
if (index >= opcodes->size()) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Missing registration for opcode_index %d\n", index);
// Initialize NodeAndRegistrations for the subgraph.
NodeAndRegistration* output = reinterpret_cast<NodeAndRegistration*>(
memory_allocator_->AllocateFromTail(
sizeof(NodeAndRegistration) * operators_size,
alignof(NodeAndRegistration)));
if (output == nullptr) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to allocate memory for node_and_registrations.");
return kTfLiteError;
}
auto* opcode = (*opcodes)[index];
status =
GetRegistrationFromOpCode(opcode, op_resolver, error_reporter_,
&(node_and_registrations[i].registration));
if (status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to get registration from op code %s\n ",
EnumNameBuiltinOperator(GetBuiltinCode(opcode)));
return status;
}
const auto* registration = node_and_registrations[i].registration;
if (registration == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_, "Skipping op for opcode_index %d\n",
index);
return kTfLiteError;
}
BuiltinOperator op_type =
static_cast<BuiltinOperator>(registration->builtin_code);
const char* custom_data = nullptr;
size_t custom_data_size = 0;
unsigned char* builtin_data = nullptr;
if (op_type == BuiltinOperator_CUSTOM) {
// Custom Ops may or may not have a non-null custom_options field.
if (op->custom_options() != nullptr) {
custom_data =
reinterpret_cast<const char*>(op->custom_options()->data());
custom_data_size = op->custom_options()->size();
}
} else {
if (op->custom_options() != nullptr) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Unsupported behavior: found builtin operator %s with custom "
"options.\n",
EnumNameBuiltinOperator(op_type));
return kTfLiteError;
}
MicroOpResolver::BuiltinParseFunction parser =
op_resolver.GetOpDataParser(op_type);
if (parser == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_, "Did not find a parser for %s",
EnumNameBuiltinOperator(op_type));
return kTfLiteError;
}
TF_LITE_ENSURE_STATUS(parser(op, error_reporter_, &builtin_data_allocator,
(void**)(&builtin_data)));
}
TfLiteIntArray* inputs_array;
TF_LITE_ENSURE_STATUS(internal::FlatBufferVectorToTfLiteTypeArray(
memory_allocator_, error_reporter_, op->inputs(), &inputs_array));
TfLiteIntArray* outputs_array;
TF_LITE_ENSURE_STATUS(internal::FlatBufferVectorToTfLiteTypeArray(
memory_allocator_, error_reporter_, op->outputs(), &outputs_array));
TfLiteNode* node = &(node_and_registrations[i].node);
*node = {};
node->inputs = inputs_array;
node->outputs = outputs_array;
node->builtin_data = reinterpret_cast<void*>(builtin_data);
node->custom_initial_data = custom_data;
node->custom_initial_data_size = custom_data_size;
subgraph_allocations[subgraph_idx].node_and_registrations = output;
}
return kTfLiteOk;
}
TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
const SubGraph* subgraph = GetSubGraphFromModel(model);
const Model* model, const SubgraphAllocations* subgraph_allocations,
int tensor_index, int subgraph_index) {
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_index);
TFLITE_DCHECK(subgraph != nullptr);
// This value is allocated from persistent arena space. It is guaranteed to be
// around for the lifetime of the application.
TfLiteTensor* tensor =
AllocatePersistentTfLiteTensorInternal(model, eval_tensors, tensor_index);
TfLiteTensor* tensor = AllocatePersistentTfLiteTensorInternal();
// Populate any fields from the flatbuffer, since this TfLiteTensor struct is
// allocated in the persistent section of the arena, ensure that additional
// allocations also take place in that section of the arena.
if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index,
/*allocate_temp=*/false) !=
kTfLiteOk) {
if (PopulateTfLiteTensorFromFlatbuffer(
model, tensor, tensor_index, subgraph_index,
/*allocate_temp=*/false) != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to populate a persistent TfLiteTensor struct "
"from flatbuffer data!");
return nullptr;
}
if (eval_tensors != nullptr) {
if (subgraph_allocations != nullptr) {
// Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
// and not located in the flatbuffer are stored on the pre-allocated list of
// TfLiteEvalTensors structs. These structs are the source of truth, simply
// point the corresponding buffer to the new TfLiteTensor data value.
tensor->data.data = eval_tensors[tensor_index].data.data;
tensor->data.data =
subgraph_allocations[subgraph_index].tensors[tensor_index].data.data;
// TfLiteEvalTensor structs must also be the source of truth for the
// TfLiteTensor dims.
tensor->dims =
subgraph_allocations[subgraph_index].tensors[tensor_index].dims;
}
return tensor;
}
TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
const SubGraph* subgraph = GetSubGraphFromModel(model);
const Model* model, const SubgraphAllocations* subgraph_allocations,
int tensor_index, int subgraph_index) {
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_index);
TFLITE_DCHECK(subgraph != nullptr);
// This value is allocated from temporary arena space. It is guaranteed to be
@@ -910,7 +814,8 @@ TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
// Populate any fields from the flatbuffer, since this TfLiteTensor struct is
// allocated in the temp section of the arena, ensure that additional
// allocations also take place in that section of the arena.
if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index,
if (PopulateTfLiteTensorFromFlatbuffer(model, tensor, tensor_index,
subgraph_index,
/*allocate_temp=*/true) != kTfLiteOk) {
TF_LITE_REPORT_ERROR(
error_reporter_,
@@ -918,12 +823,17 @@ TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
return nullptr;
}
if (eval_tensors != nullptr) {
if (subgraph_allocations != nullptr) {
// Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
// and not located in the flatbuffer are stored on the pre-allocated list of
// TfLiteEvalTensors structs. These structs are the source of truth, simply
// point the corresponding buffer to the new TfLiteTensor data value.
tensor->data.data = eval_tensors[tensor_index].data.data;
tensor->data.data =
subgraph_allocations[subgraph_index].tensors[tensor_index].data.data;
// TfLiteEvalTensor structs must also be the source of truth for the
// TfLiteTensor dims.
tensor->dims =
subgraph_allocations[subgraph_index].tensors[tensor_index].dims;
}
return tensor;
}
@@ -933,38 +843,41 @@ void MicroAllocator::ResetTempAllocations() {
}
TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors) {
TFLITE_DCHECK(eval_tensors != nullptr);
const Model* model, SubgraphAllocations* subgraph_allocations) {
TFLITE_DCHECK(subgraph_allocations != nullptr);
const SubGraph* subgraph = GetSubGraphFromModel(model);
TFLITE_DCHECK(subgraph != nullptr);
for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
subgraph_idx++) {
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
TFLITE_DCHECK(subgraph != nullptr);
size_t alloc_count = subgraph->tensors()->size();
TfLiteEvalTensor* tensors =
reinterpret_cast<TfLiteEvalTensor*>(memory_allocator_->AllocateFromTail(
sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor)));
if (tensors == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to allocate memory for context->eval_tensors, "
"%d bytes required",
sizeof(TfLiteEvalTensor) * alloc_count);
return kTfLiteError;
}
for (size_t i = 0; i < alloc_count; ++i) {
TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer(
memory_allocator_, *subgraph->tensors()->Get(i), model->buffers(),
error_reporter_, &tensors[i]);
if (status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
i);
size_t alloc_count = subgraph->tensors()->size();
TfLiteEvalTensor* tensors =
reinterpret_cast<TfLiteEvalTensor*>(memory_allocator_->AllocateFromTail(
sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor)));
if (tensors == nullptr) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to allocate memory for context->eval_tensors, "
"%d bytes required",
sizeof(TfLiteEvalTensor) * alloc_count);
return kTfLiteError;
}
for (size_t i = 0; i < alloc_count; ++i) {
TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer(
memory_allocator_, *subgraph->tensors()->Get(i), model->buffers(),
error_reporter_, &tensors[i]);
if (status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
i);
return kTfLiteError;
}
}
subgraph_allocations[subgraph_idx].tensors = tensors;
}
*eval_tensors = tensors;
return kTfLiteOk;
}
TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors) {
for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
@@ -988,20 +901,20 @@ TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
return kTfLiteOk;
}
TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal() {
return reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
sizeof(TfLiteTensor), alignof(TfLiteTensor)));
}
TfLiteStatus MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
int tensor_index, bool allocate_temp) {
const Model* model, TfLiteTensor* tensor, int tensor_index,
int subgraph_idx, bool allocate_temp) {
// TODO(b/162311891): This method serves as a stub to ensure quantized
// allocations in the tail can be recorded. Once the interpreter has APIs for
// accessing buffers on TfLiteEvalTensor this method can be dropped.
return internal::InitializeTfLiteTensorFromFlatbuffer(
memory_allocator_, allocate_temp, *subgraph->tensors()->Get(tensor_index),
memory_allocator_, allocate_temp,
*model->subgraphs()->Get(subgraph_idx)->tensors()->Get(tensor_index),
model->buffers(), error_reporter_, tensor);
}
@@ -1009,20 +922,9 @@ ErrorReporter* MicroAllocator::error_reporter() const {
return error_reporter_;
}
const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) {
auto* subgraphs = model->subgraphs();
if (subgraphs->size() != 1) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Only 1 subgraph is currently supported.\n");
return nullptr;
}
return (*subgraphs)[0];
}
TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
const Model* model, const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors,
ScratchBufferHandle* scratch_buffer_handles) {
const Model* model, TfLiteEvalTensor* eval_tensors,
ScratchBufferHandle* scratch_buffer_handles, int subgraph_idx) {
size_t head_usage = 0;
// Create static memory plan
// 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
@@ -1034,6 +936,7 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
// allocated from the temp section and cleaned up at the bottom of this
// function.
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
size_t allocation_info_count =
subgraph->tensors()->size() + scratch_buffer_request_count_;
size_t bytes = sizeof(AllocationInfo) * allocation_info_count;
@@ -1096,6 +999,9 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
TF_LITE_ENSURE_STATUS(CommitPlan(error_reporter_, &planner,
memory_allocator_->GetHeadBuffer(),
allocation_info, allocation_info_count));
#ifdef TF_LITE_SHOW_MEMORY_USE
planner.PrintMemoryPlan();
#endif
head_usage = planner.GetMaximumMemorySize();
// The head is used to store memory plans for one model at a time during the
@@ -1155,4 +1061,15 @@ internal::ScratchBufferRequest* MicroAllocator::GetScratchBufferRequests() {
alignof(internal::ScratchBufferRequest)));
}
TfLiteStatus MicroAllocator::FlatBufferVectorToTfLiteTypeArray(
const flatbuffers::Vector<int32_t>* flatbuffer_array,
TfLiteIntArray** result) {
return internal::FlatBufferVectorToTfLiteTypeArray(
memory_allocator_, error_reporter_, flatbuffer_array, result);
}
BuiltinDataAllocator* MicroAllocator::GetBuiltinDataAllocator() {
return builtin_data_allocator_;
}
} // namespace tflite

View File

@@ -18,11 +18,11 @@ limitations under the License.
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/flatbuffer_utils.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
#include "tensorflow/lite/schema/schema_generated.h"
@@ -75,6 +75,13 @@ typedef struct {
uint8_t* data;
} ScratchBufferHandle;
// Stores all per-subgraph allocations. This includes the node and registration
// array, tensor list and scratch buffer handles for each subgraph.
typedef struct {
NodeAndRegistration* node_and_registrations;
TfLiteEvalTensor* tensors;
} SubgraphAllocations;
// Allocator responsible for allocating memory for all intermediate tensors
// necessary to invoke a model.
//
@@ -114,28 +121,31 @@ class MicroAllocator {
static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
// Begin allocating internal resources required for model inference.
// Allocates internal resources required for model inference for each subgraph
// from the arena.
//
// This method will run through the flatbuffer data supplied in the model to
// properly allocate tensor, node, and op registration data. This method is
// expected to be followed with a call to FinishModelAllocation() before
// resuming allocation with another model. All persistent tensor buffers are
// stored in the out-param eval_tensors. This value is allocated from the
// persistent memory arena and will be used to host runtime tensor buffers.
TfLiteStatus StartModelAllocation(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration** node_and_registrations,
TfLiteEvalTensor** eval_tensors);
// expected to be followed with a call to FinishModelAllocation() Returns a
// pointer to an array of SubgraphAllocations (also stored in the tail of the
// arena) where each index corresponds to a different subgraph in the model.
// Return value is nullptr if the allocations failed.
SubgraphAllocations* StartModelAllocation(const Model* model);
// Finish allocating internal resources required for model inference.
// This method will plan non-persistent buffers and commit a memory plan to
// the 'head' section of the memory arena. All variable tensor data will also
// be allocated. This method should be called after assigning model resources
// in StartModelAllocation(). The eval_tensors pointer should be the value
// passed into this class during StartModelAllocation(). Scratch buffer
// handles are stored in the out-param `scratch_buffer_handles`. This value
// will be used in `GetScratchBuffer` call to retrieve scratch buffers.
//
// -Plan the memory for activation tensors and scratch buffers.
// -Update eval tensors for each subgraph based on planned offsets.
// -Allocate scratch buffer handles array and update based on planned offsets.
//
// This method should be called after assigning model resources
// in StartModelAllocation(). The subgraph_allocations pointer should be the
// value passed into this class during StartModelAllocation(). Scratch buffer
// handles are stored in the out-param `scratch_buffer_handles` array which is
// allocated in this method. This value will be used in `GetScratchBuffer`
// call to retrieve scratch buffers.
TfLiteStatus FinishModelAllocation(
const Model* model, TfLiteEvalTensor* eval_tensors,
const Model* model, SubgraphAllocations* subgraph_allocations,
ScratchBufferHandle** scratch_buffer_handles);
// Allocates a TfLiteTensor struct and populates the returned value with
@@ -145,17 +155,19 @@ class MicroAllocator {
// class during StartModelAllocation() and contains the source-of-truth for
// buffers.
virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
const Model* model, const SubgraphAllocations* subgraph_allocations,
int tensor_index, int subgraph_index);
// Allocates a TfLiteTensor struct and populates the returned value with
// properties from the model flatbuffer. This struct is allocated from
// temporary arena memory is only guaranteed until a call is made to
// ResetTempAllocations(). The eval_tensors pointer should be the value passed
// into this class during StartModelAllocation() and contains the
// source-of-truth for buffers.
virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
TfLiteEvalTensor* eval_tensors,
int tensor_index);
// ResetTempAllocations(). Subgraph_allocaitons contains the array of
// TfLiteEvalTensors. If the newly allocated temp at the specified subgraph
// and tensor index is already present int the TfLiteEvalTensor array, its
// data buffer will be re-used.
virtual TfLiteTensor* AllocateTempTfLiteTensor(
const Model* model, const SubgraphAllocations* subgraph_allocations,
int tensor_index, int subgraph_index);
// Resets all temporary allocations. This method should be called after a
// chain of temp allocations (e.g. chain of TfLiteTensor objects via
@@ -171,7 +183,8 @@ class MicroAllocator {
// This method only requests a buffer with a given size to be used after a
// model has finished allocation via FinishModelAllocation(). All requested
// buffers will be accessible by the out-param in that method.
TfLiteStatus RequestScratchBufferInArena(size_t bytes, int* buffer_idx);
TfLiteStatus RequestScratchBufferInArena(size_t bytes, int subgraph_idx,
int* buffer_idx);
// Finish allocating a specific NodeAndRegistration prepare block (kernel
// entry for a model) with a given node ID. This call ensures that any scratch
@@ -183,6 +196,14 @@ class MicroAllocator {
// `FinishModelAllocation`. Otherwise, it will return 0.
size_t used_bytes() const;
// Converts a flatbuffer int32_t array to a TfLiteIntArray, accounting for
// endiannes.
TfLiteStatus FlatBufferVectorToTfLiteTypeArray(
const flatbuffers::Vector<int32_t>* flatbuffer_array,
TfLiteIntArray** result);
BuiltinDataAllocator* GetBuiltinDataAllocator();
protected:
MicroAllocator(SimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
@@ -192,23 +213,13 @@ class MicroAllocator {
// registration pointers required to represent the inference graph of the
// model.
virtual TfLiteStatus AllocateNodeAndRegistrations(
const Model* model, NodeAndRegistration** node_and_registrations);
// Populates node and registration pointers representing the inference graph
// of the model from values inside the flatbuffer (loaded from the TfLiteModel
// instance). Persistent data (e.g. operator data) is allocated from the
// arena.
virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations);
const Model* model, SubgraphAllocations* subgraph_allocations);
// Allocates the list of persistent TfLiteEvalTensors that are used for the
// "eval" phase of model inference. These structs will be the source of truth
// for all tensor buffers. Allocation results are stored in the out-param
// eval_tensors.
// for all tensor buffers.
virtual TfLiteStatus AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors);
const Model* model, SubgraphAllocations* subgraph_allocations);
// Allocates persistent tensor buffers for variable tensors in the subgraph.
virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors);
@@ -216,21 +227,19 @@ class MicroAllocator {
// Allocate and return a persistent TfLiteTensor.
// TODO(b/162311891): Drop this method when the interpreter has an API for
// accessing TfLiteEvalTensor structs.
virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal();
// Populates a TfLiteTensor struct with data from the model flatbuffer. Any
// quantization data is allocated from either the tail (persistent) or temp
// sections of the arena based on the allocation flag.
virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
int tensor_index, bool allocate_temp);
virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
TfLiteTensor* tensor,
int tensor_index,
int subgraph_idx,
bool allocate_temp);
ErrorReporter* error_reporter() const;
// Returns the first subgraph from the model.
const SubGraph* GetSubGraphFromModel(const Model* model);
private:
// Commits a memory plan for all non-persistent buffer allocations in the
// 'head' section of the memory arena. The eval_tensors pointer is the list of
@@ -240,9 +249,8 @@ class MicroAllocator {
// ScratchBufferHandle structs that will point to allocated buffers also in
// the head section.
virtual TfLiteStatus CommitStaticMemoryPlan(
const Model* model, const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors,
ScratchBufferHandle* scratch_buffer_handles);
const Model* model, TfLiteEvalTensor* eval_tensors,
ScratchBufferHandle* scratch_buffer_handles, int subgraph_idx);
// Allocates an array of ScratchBufferHandle structs in the tail section for a
// given number of handles.
@@ -261,6 +269,9 @@ class MicroAllocator {
// A simple memory allocator that always allocate from the arena tail or head.
SimpleMemoryAllocator* memory_allocator_;
// Allocator used to allocate persistent builtin data.
BuiltinDataAllocator* builtin_data_allocator_;
ErrorReporter* error_reporter_;
bool model_is_allocating_;

View File

@@ -0,0 +1,245 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_graph.h"
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/flatbuffer_utils.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
return registration->custom_name;
} else {
return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
}
}
#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS)
} // namespace
MicroGraph::MicroGraph(TfLiteContext* context, const Model* model,
MicroAllocator* allocator)
: context_(context),
model_(model),
allocator_(allocator),
current_subgraph_index_(0) {
if (model != nullptr) {
subgraphs_ = model->subgraphs();
}
}
MicroGraph::~MicroGraph() {}
TfLiteStatus MicroGraph::InitSubgraphs() {
int previous_subgraph_idx = current_subgraph_index_;
for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size();
subgraph_idx++) {
current_subgraph_index_ = subgraph_idx;
uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx);
for (size_t i = 0; i < operators_size; ++i) {
TfLiteNode* node =
&(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node);
const TfLiteRegistration* registration =
subgraph_allocations_[subgraph_idx]
.node_and_registrations[i]
.registration;
size_t init_data_size;
const char* init_data;
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
init_data = reinterpret_cast<const char*>(node->custom_initial_data);
init_data_size = node->custom_initial_data_size;
} else {
init_data = reinterpret_cast<const char*>(node->builtin_data);
init_data_size = 0;
}
if (registration->init) {
node->user_data =
registration->init(context_, init_data, init_data_size);
}
}
}
current_subgraph_index_ = previous_subgraph_idx;
return kTfLiteOk;
}
TfLiteStatus MicroGraph::PrepareSubgraphs() {
int previous_subgraph_idx = current_subgraph_index_;
for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size();
subgraph_idx++) {
current_subgraph_index_ = subgraph_idx;
uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx);
for (size_t i = 0; i < operators_size; ++i) {
TfLiteNode* node =
&(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node);
const TfLiteRegistration* registration =
subgraph_allocations_[subgraph_idx]
.node_and_registrations[i]
.registration;
if (registration->prepare != nullptr) {
TfLiteStatus prepare_status = registration->prepare(context_, node);
if (prepare_status != kTfLiteOk) {
MicroPrintf("Node %s (number %df) failed to prepare with status %d",
OpNameFromRegistration(registration), i, prepare_status);
return kTfLiteError;
}
}
allocator_->FinishPrepareNodeAllocations(/*node_id=*/i);
}
}
current_subgraph_index_ = previous_subgraph_idx;
return kTfLiteOk;
}
TfLiteStatus MicroGraph::FreeSubgraphs() {
int previous_subgraph_idx = current_subgraph_index_;
for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size();
subgraph_idx++) {
current_subgraph_index_ = subgraph_idx;
uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx);
for (size_t i = 0; i < operators_size; ++i) {
TfLiteNode* node =
&(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node);
const TfLiteRegistration* registration =
subgraph_allocations_[subgraph_idx]
.node_and_registrations[i]
.registration;
// registration is allocated outside the interpreter, so double check to
// make sure it's not nullptr;
if (registration != nullptr && registration->free != nullptr) {
registration->free(context_, node->user_data);
}
}
}
current_subgraph_index_ = previous_subgraph_idx;
return kTfLiteOk;
}
TfLiteStatus MicroGraph::InvokeSubgraph(int subgraph_idx) {
int previous_subgraph_idx = current_subgraph_index_;
current_subgraph_index_ = subgraph_idx;
if (static_cast<size_t>(subgraph_idx) >= subgraphs_->size()) {
MicroPrintf("Accessing subgraph %d but only %d subgraphs found",
subgraph_idx, subgraphs_->size());
return kTfLiteError;
}
uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx);
for (size_t i = 0; i < operators_size; ++i) {
TfLiteNode* node =
&(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node);
const TfLiteRegistration* registration = subgraph_allocations_[subgraph_idx]
.node_and_registrations[i]
.registration;
// This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with
// -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is
// only defined for builds with the error strings.
#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
ScopedMicroProfiler scoped_profiler(
OpNameFromRegistration(registration),
reinterpret_cast<MicroProfiler*>(context_->profiler));
#endif
TFLITE_DCHECK(registration->invoke);
TfLiteStatus invoke_status = registration->invoke(context_, node);
// All TfLiteTensor structs used in the kernel are allocated from temp
// memory in the allocator. This creates a chain of allocations in the
// temp section. The call below resets the chain of allocations to
// prepare for the next call.
allocator_->ResetTempAllocations();
if (invoke_status == kTfLiteError) {
MicroPrintf("Node %s (number %d) failed to invoke with status %d",
OpNameFromRegistration(registration), i, invoke_status);
return kTfLiteError;
} else if (invoke_status != kTfLiteOk) {
return invoke_status;
}
}
current_subgraph_index_ = previous_subgraph_idx;
return kTfLiteOk;
}
TfLiteStatus MicroGraph::ResetVariableTensors() {
for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size();
subgraph_idx++) {
const SubGraph* subgraph = (*subgraphs_)[subgraph_idx];
for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
auto* tensor = subgraph->tensors()->Get(i);
if (tensor->is_variable()) {
size_t buffer_size;
TF_LITE_ENSURE_STATUS(TfLiteEvalTensorByteLength(
&subgraph_allocations_[subgraph_idx].tensors[i], &buffer_size));
int value = 0;
if (tensor->type() == tflite::TensorType_INT8) {
value = tensor->quantization()->zero_point()->Get(0);
}
memset(subgraph_allocations_[subgraph_idx].tensors[i].data.raw, value,
buffer_size);
}
}
}
return kTfLiteOk;
}
int MicroGraph::NumSubgraphs() { return model_->subgraphs()->size(); }
void MicroGraph::SetSubgraphAllocations(
SubgraphAllocations* subgraph_allocations) {
subgraph_allocations_ = subgraph_allocations;
}
size_t MicroGraph::NumSubgraphInputs(int subgraph_idx) {
return model_->subgraphs()->Get(subgraph_idx)->inputs()->size();
}
TfLiteEvalTensor* MicroGraph::GetSubgraphInput(int subgraph_idx,
int input_idx) {
int tensor_idx =
model_->subgraphs()->Get(subgraph_idx)->inputs()->Get(input_idx);
return &subgraph_allocations_[subgraph_idx].tensors[tensor_idx];
}
size_t MicroGraph::NumSubgraphOutputs(int subgraph_idx) {
return model_->subgraphs()->Get(subgraph_idx)->outputs()->size();
}
TfLiteEvalTensor* MicroGraph::GetSubgraphOutput(int subgraph_idx,
int output_idx) {
int tensor_idx =
model_->subgraphs()->Get(subgraph_idx)->outputs()->Get(output_idx);
return &subgraph_allocations_[subgraph_idx].tensors[tensor_idx];
}
} // namespace tflite

View File

@@ -0,0 +1,97 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_
#define TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Abstracts the details of interacting with the tflite::Model.
//
// Provides methods to access, initialize, prepare, invoke and free any
// subgraph in the tflite::Graph.
class MicroGraph {
public:
// The lifetime of the context, model, and allocator must be at least as long
// as that of the graph object, since the this class may need to access them
// at any time.
MicroGraph(TfLiteContext* context, const Model* model,
MicroAllocator* allocator);
virtual ~MicroGraph();
// Sets up builtin data and calls TfLiteRegistration->Init for every operator
// in every subgraph in the model.
virtual TfLiteStatus InitSubgraphs();
// Calls TfLiteRegistration->Prepare for every operator in every subgraph in
// the model.
virtual TfLiteStatus PrepareSubgraphs();
// Calls TfLiteRegistration->Free for every operator in every subgraph in the
// model.
virtual TfLiteStatus FreeSubgraphs();
// Calls TfLiteRegistration->Invoke for every operator in a single subgraph in
// the model.
virtual TfLiteStatus InvokeSubgraph(int subgraph_idx);
// Zeros out all variable tensors in all subgraphs in the model.
virtual TfLiteStatus ResetVariableTensors();
// Number of tensor inputs to a specified subgraph in the model.
virtual size_t NumSubgraphInputs(int subgraph_idx);
// Get the specified input tensor of a specified subgraph in the model.
virtual TfLiteEvalTensor* GetSubgraphInput(int subgraph_idx, int input_idx);
// Number of tensor outputs from a specified subgraph in the model.
virtual size_t NumSubgraphOutputs(int subgraph_idx);
// Get the specified output tensor of a specified subgraph in the model.
virtual TfLiteEvalTensor* GetSubgraphOutput(int subgraph_idx, int output_idx);
// Number of subgraphs in the model.
virtual int NumSubgraphs();
// Hook to pass in subgraph allocations tracked within the interpreter,
// allowing MicroGraph to init / prepare / invoke subgraphs in the model.
void SetSubgraphAllocations(SubgraphAllocations* subgraph_allocations);
// Get the current subgraph index. Within an on operator, this is guaranteed
// to be the subgraph of that operator.
int GetCurrentSubgraphIndex() { return current_subgraph_index_; }
// Gets the list of alloctions for each subgraph. This is the source of truth
// for all per-subgraph allocation data.
SubgraphAllocations* GetAllocations() { return subgraph_allocations_; }
private:
TfLiteContext* context_;
const Model* model_;
MicroAllocator* allocator_;
SubgraphAllocations* subgraph_allocations_ = nullptr;
int current_subgraph_index_;
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_

View File

@@ -22,87 +22,16 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/tensor_utils.h"
#include "tensorflow/lite/micro/flatbuffer_utils.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/schema/schema_utils.h"
namespace tflite {
namespace {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
return registration->custom_name;
} else {
return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
}
}
#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS)
} // namespace
namespace internal {
ContextHelper::ContextHelper(ErrorReporter* error_reporter,
MicroAllocator* allocator, const Model* model)
: allocator_(allocator), error_reporter_(error_reporter), model_(model) {}
void* ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
size_t bytes) {
return reinterpret_cast<ContextHelper*>(ctx->impl_)
->allocator_->AllocatePersistentBuffer(bytes);
}
TfLiteStatus ContextHelper::RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx) {
ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
return helper->allocator_->RequestScratchBufferInArena(bytes, buffer_idx);
}
void* ContextHelper::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
ScratchBufferHandle* handle = helper->scratch_buffer_handles_ + buffer_idx;
return handle->data;
}
void ContextHelper::ReportOpError(struct TfLiteContext* context,
const char* format, ...) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
va_list args;
va_start(args, format);
TF_LITE_REPORT_ERROR(helper->error_reporter_, format, args);
va_end(args);
#endif
}
TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context,
int tensor_idx) {
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
return helper->allocator_->AllocateTempTfLiteTensor(
helper->model_, helper->eval_tensors_, tensor_idx);
}
TfLiteEvalTensor* ContextHelper::GetEvalTensor(
const struct TfLiteContext* context, int tensor_idx) {
ContextHelper* helper = reinterpret_cast<ContextHelper*>(context->impl_);
return &helper->eval_tensors_[tensor_idx];
}
void ContextHelper::SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors) {
eval_tensors_ = eval_tensors;
}
void ContextHelper::SetScratchBufferHandles(
ScratchBufferHandle* scratch_buffer_handles) {
scratch_buffer_handles_ = scratch_buffer_handles;
}
} // namespace internal
MicroInterpreter::MicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
@@ -115,10 +44,10 @@ MicroInterpreter::MicroInterpreter(const Model* model,
error_reporter_(error_reporter),
allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size,
error_reporter)),
graph_(&context_, model, &allocator_),
tensors_allocated_(false),
initialization_status_(kTfLiteError),
eval_tensors_(nullptr),
context_helper_(error_reporter_, &allocator_, model),
input_tensors_(nullptr),
output_tensors_(nullptr) {
Init(profiler);
@@ -133,122 +62,159 @@ MicroInterpreter::MicroInterpreter(const Model* model,
op_resolver_(op_resolver),
error_reporter_(error_reporter),
allocator_(*allocator),
graph_(&context_, model, allocator),
tensors_allocated_(false),
initialization_status_(kTfLiteError),
eval_tensors_(nullptr),
context_helper_(error_reporter_, &allocator_, model),
input_tensors_(nullptr),
output_tensors_(nullptr) {
Init(profiler);
}
MicroInterpreter::~MicroInterpreter() {
if (node_and_registrations_ != nullptr) {
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
TfLiteNode* node = &(node_and_registrations_[i].node);
const TfLiteRegistration* registration =
node_and_registrations_[i].registration;
// registration is allocated outside the interpreter, so double check to
// make sure it's not nullptr;
if (registration != nullptr && registration->free != nullptr) {
registration->free(&context_, node->user_data);
}
}
if (graph_.GetAllocations() != nullptr) {
graph_.FreeSubgraphs();
}
}
void MicroInterpreter::Init(MicroProfiler* profiler) {
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
model_->subgraphs();
if (subgraphs->size() != 1) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Only 1 subgraph is currently supported.\n");
initialization_status_ = kTfLiteError;
return;
}
subgraph_ = (*subgraphs)[0];
context_.impl_ = static_cast<void*>(&context_helper_);
context_.ReportError = context_helper_.ReportOpError;
context_.GetTensor = context_helper_.GetTensor;
context_.GetEvalTensor = context_helper_.GetEvalTensor;
context_.recommended_num_threads = 1;
context_.impl_ = static_cast<void*>(this);
context_.ReportError = ReportOpError;
context_.GetTensor = GetTensor;
context_.ReportError = ReportOpError;
context_.GetTensor = GetTensor;
context_.GetEvalTensor = GetEvalTensor;
context_.profiler = profiler;
initialization_status_ = kTfLiteOk;
}
TfLiteStatus MicroInterpreter::PrepareNodeAndRegistrationDataFromFlatbuffer() {
for (int subgraph_idx = 0; subgraph_idx < graph_.NumSubgraphs();
subgraph_idx++) {
const SubGraph* subgraph = model_->subgraphs()->Get(subgraph_idx);
TFLITE_DCHECK(subgraph != nullptr);
auto* opcodes = model_->operator_codes();
BuiltinDataAllocator* builtin_data_allocator =
allocator_.GetBuiltinDataAllocator();
uint32_t operators_size = NumSubgraphOperators(subgraph);
for (size_t i = 0; i < operators_size; ++i) {
const auto* op = subgraph->operators()->Get(i);
const size_t index = op->opcode_index();
if (index >= opcodes->size()) {
MicroPrintf("Missing registration for opcode_index %d\n", index);
return kTfLiteError;
}
const auto* opcode = opcodes->Get(index);
TfLiteStatus status =
GetRegistrationFromOpCode(opcode, op_resolver_, error_reporter_,
&(graph_.GetAllocations()[subgraph_idx]
.node_and_registrations[i]
.registration));
if (status != kTfLiteOk) {
MicroPrintf("Failed to get registration from op code %s\n ",
EnumNameBuiltinOperator(GetBuiltinCode(opcode)));
return status;
}
const auto* registration = graph_.GetAllocations()[subgraph_idx]
.node_and_registrations[i]
.registration;
if (registration == nullptr) {
MicroPrintf("Skipping op for opcode_index %d\n", index);
return kTfLiteError;
}
BuiltinOperator op_type =
static_cast<BuiltinOperator>(registration->builtin_code);
const char* custom_data = nullptr;
size_t custom_data_size = 0;
unsigned char* builtin_data = nullptr;
if (op_type == BuiltinOperator_CUSTOM) {
// Custom Ops may or may not have a non-null custom_options field.
if (op->custom_options() != nullptr) {
custom_data =
reinterpret_cast<const char*>(op->custom_options()->data());
custom_data_size = op->custom_options()->size();
}
} else {
if (op->custom_options() != nullptr) {
MicroPrintf(
"Unsupported behavior: found builtin operator %s with custom "
"options.\n",
EnumNameBuiltinOperator(op_type));
return kTfLiteError;
}
MicroOpResolver::BuiltinParseFunction parser =
op_resolver_.GetOpDataParser(op_type);
if (parser == nullptr) {
MicroPrintf("Did not find a parser for %s",
EnumNameBuiltinOperator(op_type));
return kTfLiteError;
}
TF_LITE_ENSURE_STATUS(parser(op, error_reporter_,
builtin_data_allocator,
(void**)(&builtin_data)));
}
TfLiteIntArray* inputs_array;
TF_LITE_ENSURE_STATUS(allocator_.FlatBufferVectorToTfLiteTypeArray(
op->inputs(), &inputs_array));
TfLiteIntArray* outputs_array;
TF_LITE_ENSURE_STATUS(allocator_.FlatBufferVectorToTfLiteTypeArray(
op->outputs(), &outputs_array));
TfLiteNode* node = &(
graph_.GetAllocations()[subgraph_idx].node_and_registrations[i].node);
*node = {};
node->inputs = inputs_array;
node->outputs = outputs_array;
node->builtin_data = reinterpret_cast<void*>(builtin_data);
node->custom_initial_data = custom_data;
node->custom_initial_data_size = custom_data_size;
}
}
return kTfLiteOk;
}
TfLiteStatus MicroInterpreter::AllocateTensors() {
if (allocator_.StartModelAllocation(model_, op_resolver_,
&node_and_registrations_,
&eval_tensors_) != kTfLiteOk) {
SubgraphAllocations* allocations = allocator_.StartModelAllocation(model_);
if (allocations == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed starting model allocation.\n");
initialization_status_ = kTfLiteError;
return kTfLiteError;
}
// Update the pointer now that TfLiteEvalTensor allocation has completed on
// the context helper.
// TODO(b/16157777): This call would not be needed if ContextHelper rolled
// into the interpreter.
context_helper_.SetTfLiteEvalTensors(eval_tensors_);
context_.tensors_size = subgraph_->tensors()->size();
graph_.SetSubgraphAllocations(allocations);
TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer());
// Only allow AllocatePersistentBuffer in Init stage.
context_.AllocatePersistentBuffer = context_helper_.AllocatePersistentBuffer;
context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
context_.RequestScratchBufferInArena = nullptr;
context_.GetScratchBuffer = nullptr;
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
size_t init_data_size;
const char* init_data;
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
init_data = reinterpret_cast<const char*>(node->custom_initial_data);
init_data_size = node->custom_initial_data_size;
} else {
init_data = reinterpret_cast<const char*>(node->builtin_data);
init_data_size = 0;
}
if (registration->init) {
node->user_data =
registration->init(&context_, init_data, init_data_size);
}
}
context_.GetExecutionPlan = GetGraph;
graph_.InitSubgraphs();
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is
// available in Prepare stage.
context_.RequestScratchBufferInArena =
context_helper_.RequestScratchBufferInArena;
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
if (registration->prepare) {
TfLiteStatus prepare_status = registration->prepare(&context_, node);
if (prepare_status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Node %s (number %df) failed to prepare with status %d",
OpNameFromRegistration(registration), i, prepare_status);
return kTfLiteError;
}
}
allocator_.FinishPrepareNodeAllocations(/*node_id=*/i);
}
context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
graph_.PrepareSubgraphs();
// Prepare is done, we're ready for Invoke. Memory allocation is no longer
// allowed. Kernels can only fetch scratch buffers via GetScratchBuffer.
context_.AllocatePersistentBuffer = nullptr;
context_.RequestScratchBufferInArena = nullptr;
context_.GetScratchBuffer = context_helper_.GetScratchBuffer;
context_.GetScratchBuffer = GetScratchBuffer;
TF_LITE_ENSURE_OK(&context_,
allocator_.FinishModelAllocation(model_, eval_tensors_,
&scratch_buffer_handles_));
// TODO(b/16157777): Remove this when ContextHelper is rolled into this class.
context_helper_.SetScratchBufferHandles(scratch_buffer_handles_);
TF_LITE_ENSURE_OK(&context_, allocator_.FinishModelAllocation(
model_, graph_.GetAllocations(),
&scratch_buffer_handles_));
// TODO(b/162311891): Drop these allocations when the interpreter supports
// handling buffers from TfLiteEvalTensor.
@@ -266,7 +232,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
for (size_t i = 0; i < inputs_size(); ++i) {
input_tensors_[i] = allocator_.AllocatePersistentTfLiteTensor(
model_, eval_tensors_, inputs().Get(i));
model_, graph_.GetAllocations(), inputs().Get(i), 0);
if (input_tensors_[i] == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to initialize input tensor %d", i);
@@ -290,7 +256,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
for (size_t i = 0; i < outputs_size(); ++i) {
output_tensors_[i] = allocator_.AllocatePersistentTfLiteTensor(
model_, eval_tensors_, outputs().Get(i));
model_, graph_.GetAllocations(), outputs().Get(i), 0);
if (output_tensors_[i] == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to initialize output tensor %d", i);
@@ -316,41 +282,7 @@ TfLiteStatus MicroInterpreter::Invoke() {
if (!tensors_allocated_) {
TF_LITE_ENSURE_OK(&context_, AllocateTensors());
}
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
// This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with
// -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is
// only defined for builds with the error strings.
#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
ScopedMicroProfiler scoped_profiler(
OpNameFromRegistration(registration),
reinterpret_cast<MicroProfiler*>(context_.profiler));
#endif
TFLITE_DCHECK(registration->invoke);
TfLiteStatus invoke_status = registration->invoke(&context_, node);
// All TfLiteTensor structs used in the kernel are allocated from temp
// memory in the allocator. This creates a chain of allocations in the
// temp section. The call below resets the chain of allocations to
// prepare for the next call.
allocator_.ResetTempAllocations();
if (invoke_status == kTfLiteError) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Node %s (number %d) failed to invoke with status %d",
OpNameFromRegistration(registration), i, invoke_status);
return kTfLiteError;
} else if (invoke_status != kTfLiteOk) {
return invoke_status;
}
}
return kTfLiteOk;
return graph_.InvokeSubgraph(0);
}
TfLiteTensor* MicroInterpreter::input(size_t index) {
@@ -375,34 +307,68 @@ TfLiteTensor* MicroInterpreter::output(size_t index) {
return output_tensors_[index];
}
TfLiteTensor* MicroInterpreter::tensor(size_t index) {
const size_t length = tensors_size();
if (index >= length) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Tensor index %d out of range (length is %d)", index,
length);
return nullptr;
}
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
index);
TfLiteStatus MicroInterpreter::ResetVariableTensors() {
return graph_.ResetVariableTensors();
}
TfLiteStatus MicroInterpreter::ResetVariableTensors() {
for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) {
auto* tensor = subgraph_->tensors()->Get(i);
if (tensor->is_variable()) {
size_t buffer_size;
TF_LITE_ENSURE_STATUS(
TfLiteEvalTensorByteLength(&eval_tensors_[i], &buffer_size));
void* MicroInterpreter::AllocatePersistentBuffer(TfLiteContext* ctx,
size_t bytes) {
return reinterpret_cast<MicroInterpreter*>(ctx->impl_)
->allocator_.AllocatePersistentBuffer(bytes);
}
int value = 0;
if (tensor->type() == tflite::TensorType_INT8) {
value = tensor->quantization()->zero_point()->Get(0);
}
memset(eval_tensors_[i].data.raw, value, buffer_size);
}
}
TfLiteStatus MicroInterpreter::RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx) {
MicroInterpreter* interpreter =
reinterpret_cast<MicroInterpreter*>(ctx->impl_);
return interpreter->allocator_.RequestScratchBufferInArena(
bytes, interpreter->graph_.GetCurrentSubgraphIndex(), buffer_idx);
}
void* MicroInterpreter::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
MicroInterpreter* interpreter =
reinterpret_cast<MicroInterpreter*>(ctx->impl_);
ScratchBufferHandle* handle =
interpreter->scratch_buffer_handles_ + buffer_idx;
return handle->data;
}
void MicroInterpreter::ReportOpError(struct TfLiteContext* context,
const char* format, ...) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
MicroInterpreter* interpreter =
static_cast<MicroInterpreter*>(context->impl_);
va_list args;
va_start(args, format);
TF_LITE_REPORT_ERROR(interpreter->error_reporter_, format, args);
va_end(args);
#endif
}
TfLiteTensor* MicroInterpreter::GetTensor(const struct TfLiteContext* context,
int tensor_idx) {
MicroInterpreter* interpreter =
static_cast<MicroInterpreter*>(context->impl_);
return interpreter->allocator_.AllocateTempTfLiteTensor(
interpreter->model_, interpreter->graph_.GetAllocations(), tensor_idx,
interpreter->get_subgraph_index());
}
TfLiteEvalTensor* MicroInterpreter::GetEvalTensor(
const struct TfLiteContext* context, int tensor_idx) {
MicroInterpreter* interpreter =
reinterpret_cast<MicroInterpreter*>(context->impl_);
return &interpreter->graph_
.GetAllocations()[interpreter->get_subgraph_index()]
.tensors[tensor_idx];
}
TfLiteStatus MicroInterpreter::GetGraph(struct TfLiteContext* context,
TfLiteIntArray** args) {
MicroInterpreter* interpreter =
reinterpret_cast<MicroInterpreter*>(context->impl_);
*args = reinterpret_cast<TfLiteIntArray*>(&interpreter->graph_);
return kTfLiteOk;
}

View File

@@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/portable_type_to_tflitetype.h"
@@ -34,46 +35,6 @@ limitations under the License.
namespace tflite {
namespace internal {
// A helper class to encapsulate the implementation of APIs in Context.
// context->impl_ points to an instance of this class.
// Check tensorflow/lite/c/common.h for detailed descriptions.
// TODO(b/16157777): Consider rolling this class into MicroInterpreter.
class ContextHelper {
public:
explicit ContextHelper(ErrorReporter* error_reporter,
MicroAllocator* allocator, const Model* model);
// Functions that will be assigned to function pointers on TfLiteContext:
static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx);
static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_idx);
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
int tensor_idx);
// Sets the pointer to a list of TfLiteEvalTensor instances.
void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors);
// Sets the pointer to a list of ScratchBufferHandle instances.
void SetScratchBufferHandles(ScratchBufferHandle* scratch_buffer_handles);
private:
MicroAllocator* allocator_ = nullptr;
ErrorReporter* error_reporter_ = nullptr;
const Model* model_ = nullptr;
TfLiteEvalTensor* eval_tensors_ = nullptr;
ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
};
} // namespace internal
class MicroInterpreter {
public:
// The lifetime of the model, op resolver, tensor arena, error reporter and
@@ -108,22 +69,12 @@ class MicroInterpreter {
// TODO(b/149795762): Add this to the TfLiteStatus enum.
TfLiteStatus Invoke();
size_t tensors_size() const { return context_.tensors_size; }
TfLiteTensor* tensor(size_t tensor_index);
template <class T>
T* typed_tensor(int tensor_index) {
if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) {
if (tensor_ptr->type == typeToTfLiteType<T>()) {
return GetTensorData<T>(tensor_ptr);
}
}
return nullptr;
}
TfLiteTensor* input(size_t index);
size_t inputs_size() const { return subgraph_->inputs()->Length(); }
size_t inputs_size() const {
return model_->subgraphs()->Get(0)->inputs()->size();
}
const flatbuffers::Vector<int32_t>& inputs() const {
return *subgraph_->inputs();
return *model_->subgraphs()->Get(0)->inputs();
}
TfLiteTensor* input_tensor(size_t index) { return input(index); }
template <class T>
@@ -137,9 +88,11 @@ class MicroInterpreter {
}
TfLiteTensor* output(size_t index);
size_t outputs_size() const { return subgraph_->outputs()->Length(); }
size_t outputs_size() const {
return model_->subgraphs()->Get(0)->outputs()->size();
}
const flatbuffers::Vector<int32_t>& outputs() const {
return *subgraph_->outputs();
return *model_->subgraphs()->Get(0)->outputs();
}
TfLiteTensor* output_tensor(size_t index) { return output(index); }
template <class T>
@@ -157,12 +110,11 @@ class MicroInterpreter {
TfLiteStatus initialization_status() const { return initialization_status_; }
size_t operators_size() const { return subgraph_->operators()->size(); }
// For debugging only.
const NodeAndRegistration node_and_registration(int node_index) const {
return node_and_registrations_[node_index];
}
// Populates node and registration pointers representing the inference graph
// of the model from values inside the flatbuffer (loaded from the TfLiteModel
// instance). Persistent data (e.g. operator data) is allocated from the
// arena.
TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer();
// For debugging only.
// Returns the actual used arena in bytes. This method gives the optimal arena
@@ -181,24 +133,36 @@ class MicroInterpreter {
// error reporting during initialization.
void Init(MicroProfiler* profiler);
NodeAndRegistration* node_and_registrations_ = nullptr;
// Gets the current subgraph index used from within context methods.
int get_subgraph_index() { return graph_.GetCurrentSubgraphIndex(); }
// Static functions that are bound to the TfLiteContext instance:
static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx);
static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_idx);
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
int tensor_idx);
static TfLiteStatus GetGraph(struct TfLiteContext* context,
TfLiteIntArray** args);
const Model* model_;
const MicroOpResolver& op_resolver_;
ErrorReporter* error_reporter_;
TfLiteContext context_ = {};
MicroAllocator& allocator_;
MicroGraph graph_;
bool tensors_allocated_;
TfLiteStatus initialization_status_;
const SubGraph* subgraph_ = nullptr;
TfLiteEvalTensor* eval_tensors_ = nullptr;
ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
// TODO(b/16157777): Drop this reference:
internal::ContextHelper context_helper_;
// TODO(b/162311891): Clean these pointers up when this class supports buffers
// from TfLiteEvalTensor.
TfLiteTensor** input_tensors_;

View File

@@ -1,4 +1,4 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -24,9 +24,11 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/kernels/conv.h"
#include "tensorflow/lite/micro/kernels/ethosu.h"
#include "tensorflow/lite/micro/kernels/fully_connected.h"
#include "tensorflow/lite/micro/kernels/micro_ops.h"
#include "tensorflow/lite/micro/kernels/softmax.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
@@ -139,8 +141,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
TfLiteStatus AddAveragePool2D() {
return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D,
tflite::ops::micro::Register_AVERAGE_POOL_2D(),
ParsePool);
tflite::Register_AVERAGE_POOL_2D(), ParsePool);
}
TfLiteStatus AddBatchToSpaceNd() {
@@ -168,8 +169,9 @@ class MicroMutableOpResolver : public MicroOpResolver {
ParseConcatenation);
}
TfLiteStatus AddConv2D() {
return AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), ParseConv2D);
TfLiteStatus AddConv2D(
const TfLiteRegistration& registration = Register_CONV_2D()) {
return AddBuiltin(BuiltinOperator_CONV_2D, registration, ParseConv2D);
}
TfLiteStatus AddCos() {
@@ -177,6 +179,16 @@ class MicroMutableOpResolver : public MicroOpResolver {
ParseCos);
}
TfLiteStatus AddCumSum() {
return AddBuiltin(BuiltinOperator_CUMSUM, tflite::Register_CUMSUM(),
ParseCumsum);
}
TfLiteStatus AddDepthToSpace() {
return AddBuiltin(BuiltinOperator_DEPTH_TO_SPACE,
tflite::Register_DEPTH_TO_SPACE(), ParseDepthToSpace);
}
TfLiteStatus AddDepthwiseConv2D() {
return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
Register_DEPTHWISE_CONV_2D(), ParseDepthwiseConv2D);
@@ -193,10 +205,6 @@ class MicroMutableOpResolver : public MicroOpResolver {
tflite::Register_DETECTION_POSTPROCESS());
}
TfLiteStatus AddDiv() {
return AddBuiltin(BuiltinOperator_DIV, tflite::Register_DIV(), ParseDiv);
}
TfLiteStatus AddElu() {
return AddBuiltin(BuiltinOperator_ELU, tflite::Register_ELU(), ParseElu);
}
@@ -223,17 +231,41 @@ class MicroMutableOpResolver : public MicroOpResolver {
ParseExpandDims);
}
TfLiteStatus AddFill() {
return AddBuiltin(BuiltinOperator_FILL, tflite::Register_FILL(), ParseFill);
}
TfLiteStatus AddFloor() {
return AddBuiltin(BuiltinOperator_FLOOR,
tflite::ops::micro::Register_FLOOR(), ParseFloor);
}
TfLiteStatus AddFloorDiv() {
return AddBuiltin(BuiltinOperator_FLOOR_DIV, tflite::Register_FLOOR_DIV(),
ParseFloorDiv);
}
TfLiteStatus AddFloorMod() {
return AddBuiltin(BuiltinOperator_FLOOR_MOD, tflite::Register_FLOOR_MOD(),
ParseFloorMod);
}
TfLiteStatus AddFullyConnected(
const TfLiteRegistration& registration = Register_FULLY_CONNECTED()) {
return AddBuiltin(BuiltinOperator_FULLY_CONNECTED, registration,
ParseFullyConnected);
}
TfLiteStatus AddGather() {
return AddBuiltin(BuiltinOperator_GATHER, tflite::Register_GATHER(),
ParseGather);
}
TfLiteStatus AddGatherNd() {
return AddBuiltin(BuiltinOperator_GATHER_ND, tflite::Register_GATHER_ND(),
ParseGatherNd);
}
TfLiteStatus AddGreater() {
return AddBuiltin(BuiltinOperator_GREATER,
tflite::ops::micro::Register_GREATER(), ParseGreater);
@@ -246,11 +278,14 @@ class MicroMutableOpResolver : public MicroOpResolver {
}
TfLiteStatus AddHardSwish() {
return AddBuiltin(BuiltinOperator_HARD_SWISH,
tflite::ops::micro::Register_HARD_SWISH(),
return AddBuiltin(BuiltinOperator_HARD_SWISH, tflite::Register_HARD_SWISH(),
ParseHardSwish);
}
TfLiteStatus AddIf() {
return AddBuiltin(BuiltinOperator_IF, tflite::Register_IF(), ParseIf);
}
TfLiteStatus AddL2Normalization() {
return AddBuiltin(BuiltinOperator_L2_NORMALIZATION,
tflite::ops::micro::Register_L2_NORMALIZATION(),
@@ -285,8 +320,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
TfLiteStatus AddLogicalAnd() {
return AddBuiltin(BuiltinOperator_LOGICAL_AND,
tflite::ops::micro::Register_LOGICAL_AND(),
ParseLogicalAnd);
tflite::Register_LOGICAL_AND(), ParseLogicalAnd);
}
TfLiteStatus AddLogicalNot() {
@@ -296,14 +330,13 @@ class MicroMutableOpResolver : public MicroOpResolver {
}
TfLiteStatus AddLogicalOr() {
return AddBuiltin(BuiltinOperator_LOGICAL_OR,
tflite::ops::micro::Register_LOGICAL_OR(),
return AddBuiltin(BuiltinOperator_LOGICAL_OR, tflite::Register_LOGICAL_OR(),
ParseLogicalOr);
}
TfLiteStatus AddLogistic() {
return AddBuiltin(BuiltinOperator_LOGISTIC,
tflite::ops::micro::Register_LOGISTIC(), ParseLogistic);
return AddBuiltin(BuiltinOperator_LOGISTIC, tflite::Register_LOGISTIC(),
ParseLogistic);
}
TfLiteStatus AddMaximum() {
@@ -313,7 +346,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
TfLiteStatus AddMaxPool2D() {
return AddBuiltin(BuiltinOperator_MAX_POOL_2D,
tflite::ops::micro::Register_MAX_POOL_2D(), ParsePool);
tflite::Register_MAX_POOL_2D(), ParsePool);
}
TfLiteStatus AddMean() {
@@ -372,13 +405,12 @@ class MicroMutableOpResolver : public MicroOpResolver {
}
TfLiteStatus AddRelu() {
return AddBuiltin(BuiltinOperator_RELU, tflite::ops::micro::Register_RELU(),
ParseRelu);
return AddBuiltin(BuiltinOperator_RELU, tflite::Register_RELU(), ParseRelu);
}
TfLiteStatus AddRelu6() {
return AddBuiltin(BuiltinOperator_RELU6,
tflite::ops::micro::Register_RELU6(), ParseRelu6);
return AddBuiltin(BuiltinOperator_RELU6, tflite::Register_RELU6(),
ParseRelu6);
}
TfLiteStatus AddReshape() {
@@ -386,6 +418,11 @@ class MicroMutableOpResolver : public MicroOpResolver {
tflite::ops::micro::Register_RESHAPE(), ParseReshape);
}
TfLiteStatus AddResizeBilinear() {
return AddBuiltin(BuiltinOperator_RESIZE_BILINEAR,
Register_RESIZE_BILINEAR(), ParseResizeBilinear);
}
TfLiteStatus AddResizeNearestNeighbor() {
return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(),
@@ -411,9 +448,9 @@ class MicroMutableOpResolver : public MicroOpResolver {
ParseSin);
}
TfLiteStatus AddSoftmax() {
return AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(),
ParseSoftmax);
TfLiteStatus AddSoftmax(
const TfLiteRegistration& registration = Register_SOFTMAX()) {
return AddBuiltin(BuiltinOperator_SOFTMAX, registration, ParseSoftmax);
}
TfLiteStatus AddSpaceToBatchNd() {
@@ -421,6 +458,11 @@ class MicroMutableOpResolver : public MicroOpResolver {
Register_SPACE_TO_BATCH_ND(), ParseSpaceToBatchNd);
}
TfLiteStatus AddSpaceToDepth() {
return AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH(),
ParseSpaceToDepth);
}
TfLiteStatus AddSplit() {
return AddBuiltin(BuiltinOperator_SPLIT,
tflite::ops::micro::Register_SPLIT(), ParseSplit);
@@ -471,6 +513,11 @@ class MicroMutableOpResolver : public MicroOpResolver {
tflite::Register_TRANSPOSE_CONV(), ParseTransposeConv);
}
TfLiteStatus AddTranspose() {
return AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE(),
ParseTranspose);
}
TfLiteStatus AddUnpack() {
return AddBuiltin(BuiltinOperator_UNPACK,
tflite::ops::micro::Register_UNPACK(), ParseUnpack);

View File

@@ -55,4 +55,14 @@ void MicroProfiler::Log() const {
#endif
}
void MicroProfiler::LogCsv() const {
#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
MicroPrintf("\"Event\",\"Tag\",\"Ticks\"");
for (int i = 0; i < num_events_; ++i) {
int32_t ticks = end_ticks_[i] - start_ticks_[i];
MicroPrintf("%d,%s,%d", i, tags_[i], ticks);
}
#endif
}
} // namespace tflite

View File

@@ -1,4 +1,4 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -53,14 +53,19 @@ class MicroProfiler {
// event[i] <= start time of event[i+1]).
int32_t GetTotalTicks() const;
// Prints the profiling information of each of the events.
// Prints the profiling information of each of the events in human readable
// form.
void Log() const;
// Prints the profiling information of each of the events in CSV (Comma
// Separated Value) form.
void LogCsv() const;
private:
// Maximum number of events that this class can keep track of. If we call
// AddEvent more than kMaxEvents number of times, then the oldest event's
// profiling information will be overwritten.
static constexpr int kMaxEvents = 50;
static constexpr int kMaxEvents = 1024;
const char* tags_[kMaxEvents];
int32_t start_ticks_[kMaxEvents];
@@ -70,7 +75,7 @@ class MicroProfiler {
TF_LITE_REMOVE_VIRTUAL_DELETE;
};
#if defined(NDEBUG)
#if defined(TF_LITE_STRIP_ERROR_STRINGS)
// For release builds, the ScopedMicroProfiler is a noop.
//
// This is done because the ScipedProfiler is used as part of the
@@ -111,7 +116,7 @@ class ScopedMicroProfiler {
uint32_t event_handle_ = 0;
MicroProfiler* profiler_ = nullptr;
};
#endif // !defined(NDEBUG)
#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS)
} // namespace tflite

View File

@@ -283,6 +283,14 @@ extern "C" int MicroVsnprintf(char* output, int len, const char* format,
case '%':
output[output_index++] = *current++;
break;
case 'c':
if (usable_length - output_index < 1) {
output[output_index++] = '\0';
return output_index;
}
output[output_index++] = va_arg(args, int32_t);
current++;
break;
case 's':
char* string = va_arg(args, char*);
int string_idx = 0;

View File

@@ -21,6 +21,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
@@ -50,7 +51,8 @@ void SignedSymmetricPerChannelQuantize(const float* values,
stride = channel_count;
channel_stride = 1;
} else {
TF_LITE_FATAL("quantized dimension must be 0 or 3");
MicroPrintf("quantized dimension must be 0 or 3");
TFLITE_ABORT;
}
// Calculate scales for each channel.

View File

@@ -19,6 +19,7 @@ limitations under the License.
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <limits>
#include "tensorflow/lite/c/common.h"
@@ -43,11 +44,13 @@ T FloatToQuantizedType(const float value, const float scale, int zero_point) {
template <typename T>
T FloatToSymmetricQuantizedType(const float value, const float scale) {
int32_t result = round(value / scale);
result =
std::max(static_cast<int32_t>(std::numeric_limits<T>::min() + 1), result);
result =
std::min(static_cast<int32_t>(std::numeric_limits<T>::max()), result);
// 64-bit values are required since 8x16 conv accumulates to int64, meaning
// an int64 bias is required.
std::int64_t result = round(value / scale);
result = std::max(
static_cast<std::int64_t>(std::numeric_limits<T>::min() + 1), result);
result = std::min(static_cast<std::int64_t>(std::numeric_limits<T>::max()),
result);
return result;
}

View File

@@ -0,0 +1,66 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/mock_micro_graph.h"
#include "tensorflow/lite/micro/test_helpers.h"
namespace tflite {
MockMicroGraph::MockMicroGraph(SimpleMemoryAllocator* allocator)
: MicroGraph(nullptr, nullptr, nullptr),
allocator_(allocator),
init_count_(0),
prepare_count_(0),
free_count_(0) {
memset(invoke_counts_, 0, sizeof(invoke_counts_));
mock_tensor_ =
reinterpret_cast<TfLiteEvalTensor*>(allocator_->AllocateFromTail(
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
int* dims_array = reinterpret_cast<int*>(
allocator_->AllocateFromTail(3 * sizeof(int), alignof(int)));
float* data_array = reinterpret_cast<float*>(
allocator_->AllocateFromTail(2 * sizeof(float), alignof(float)));
int dims[] = {2, 1, 2};
memcpy(dims_array, dims, 3 * sizeof(int));
mock_tensor_->dims = testing::IntArrayFromInts(dims_array);
mock_tensor_->data.f = data_array;
mock_tensor_->type = kTfLiteFloat32;
}
TfLiteStatus MockMicroGraph::InvokeSubgraph(int subgraph_idx) {
invoke_counts_[subgraph_idx]++;
return kTfLiteOk;
}
TfLiteStatus MockMicroGraph::ResetVariableTensors() { return kTfLiteOk; }
size_t MockMicroGraph::NumSubgraphInputs(int subgraph_idx) { return 1; }
TfLiteEvalTensor* MockMicroGraph::GetSubgraphInput(int subgraph_idx,
int tensor_idx) {
return mock_tensor_;
}
size_t MockMicroGraph::NumSubgraphOutputs(int subgraph_idx) { return 1; }
TfLiteEvalTensor* MockMicroGraph::GetSubgraphOutput(int subgraph_idx,
int tensor_idx) {
return mock_tensor_;
}
int MockMicroGraph::NumSubgraphs() { return kMaxSubgraphs; }
} // namespace tflite

View File

@@ -0,0 +1,60 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_
#define TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// MockMicroGraph stubs out all MicroGraph methods used during invoke. A count
// of the number of calls to invoke for each subgraph is maintained for
// validation of control flow operators.
class MockMicroGraph : public MicroGraph {
public:
explicit MockMicroGraph(SimpleMemoryAllocator* allocator);
TfLiteStatus InvokeSubgraph(int subgraph_idx) override;
TfLiteStatus ResetVariableTensors() override;
size_t NumSubgraphInputs(int subgraph_idx) override;
TfLiteEvalTensor* GetSubgraphInput(int subgraph_idx, int tensor_idx) override;
size_t NumSubgraphOutputs(int subgraph_idx) override;
TfLiteEvalTensor* GetSubgraphOutput(int subgraph_idx,
int tensor_idx) override;
int NumSubgraphs() override;
int get_init_count() const { return init_count_; }
int get_prepare_count() const { return prepare_count_; }
int get_free_count() const { return free_count_; }
int get_invoke_count(int subgraph_idx) const {
return invoke_counts_[subgraph_idx];
}
private:
static constexpr int kMaxSubgraphs = 10;
SimpleMemoryAllocator* allocator_;
TfLiteEvalTensor* mock_tensor_;
int init_count_;
int prepare_count_;
int free_count_;
int invoke_counts_[kMaxSubgraphs];
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_

View File

@@ -130,58 +130,48 @@ void RecordingMicroAllocator::PrintRecordedAllocation(
}
TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations(
const Model* model, NodeAndRegistration** node_and_registrations) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status = MicroAllocator::AllocateNodeAndRegistrations(
model, node_and_registrations);
RecordAllocationUsage(allocations,
recorded_node_and_registration_array_data_);
// The allocation count in SimpleMemoryAllocator will only be 1. To provide
// better logging, decrement by 1 and add in the actual number of operators
// used in the graph:
// The allocation for this recording will always be 1. This is because the
// parent class mallocs one large allocation for the number of nodes in the
// graph (e.g. sizeof(NodeAndRegistration) * num_nodes).
// To prevent extra overhead and potential for fragmentation, manually adjust
// the accounting by decrementing by 1 and adding the actual number of nodes
// used in the graph:
recorded_node_and_registration_array_data_.count +=
GetSubGraphFromModel(model)->operators()->size() - 1;
return status;
}
TfLiteStatus
RecordingMicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations) {
const Model* model, SubgraphAllocations* subgraph_allocations) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status =
MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
model, op_resolver, node_and_registrations);
RecordAllocationUsage(allocations, recorded_op_data_);
MicroAllocator::AllocateNodeAndRegistrations(model, subgraph_allocations);
for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
subgraph_idx++) {
RecordAllocationUsage(allocations,
recorded_node_and_registration_array_data_);
// The allocation count in SimpleMemoryAllocator will only be 1. To provide
// better logging, decrement by 1 and add in the actual number of operators
// used in the graph:
// The allocation for this recording will always be 1. This is because the
// parent class mallocs one large allocation for the number of nodes in the
// graph (e.g. sizeof(NodeAndRegistration) * num_nodes).
// To prevent extra overhead and potential for fragmentation, manually
// adjust the accounting by decrementing by 1 and adding the actual number
// of nodes used in the graph:
recorded_node_and_registration_array_data_.count +=
model->subgraphs()->Get(subgraph_idx)->operators()->size() - 1;
}
return status;
}
TfLiteStatus RecordingMicroAllocator::AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors) {
const Model* model, SubgraphAllocations* subgraph_allocations) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status =
MicroAllocator::AllocateTfLiteEvalTensors(model, eval_tensors);
RecordAllocationUsage(allocations, recorded_tflite_eval_tensor_data_);
// The allocation for this recording will always be 1. This is because the
// parent class mallocs one large allocation for the number of tensors in the
// graph (e.g. sizeof(TfLiteEvalTensor) * num_tensors).
// To prevent extra overhead and potential for fragmentation, manually adjust
// the accounting by decrementing by 1 and adding the actual number of tensors
// used in the graph:
recorded_tflite_eval_tensor_data_.count +=
GetSubGraphFromModel(model)->tensors()->size() - 1;
MicroAllocator::AllocateTfLiteEvalTensors(model, subgraph_allocations);
for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
subgraph_idx++) {
RecordAllocationUsage(allocations, recorded_tflite_eval_tensor_data_);
// The allocation for this recording will always be 1. This is because the
// parent class mallocs one large allocation for the number of tensors in
// the graph (e.g. sizeof(TfLiteEvalTensor) * num_tensors). To prevent extra
// overhead and potential for fragmentation, manually adjust the accounting
// by decrementing by 1 and adding the actual number of tensors used in the
// graph:
recorded_tflite_eval_tensor_data_.count +=
model->subgraphs()->Get(subgraph_idx)->tensors()->size() - 1;
}
return status;
}
@@ -197,24 +187,24 @@ TfLiteStatus RecordingMicroAllocator::AllocateVariables(
return status;
}
TfLiteTensor* RecordingMicroAllocator::AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
TfLiteTensor*
RecordingMicroAllocator::AllocatePersistentTfLiteTensorInternal() {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteTensor* result = MicroAllocator::AllocatePersistentTfLiteTensorInternal(
model, eval_tensors, tensor_index);
TfLiteTensor* result =
MicroAllocator::AllocatePersistentTfLiteTensorInternal();
RecordAllocationUsage(allocations, recorded_persistent_tflite_tensor_data_);
return result;
}
TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
int tensor_index, bool allocate_temp) {
const Model* model, TfLiteTensor* tensor, int tensor_index,
int subgraph_index, bool allocate_temp) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
model, subgraph, tensor, tensor_index, allocate_temp);
model, tensor, tensor_index, subgraph_index, allocate_temp);
RecordAllocationUsage(allocations,
recorded_persistent_tflite_tensor_quantization_data_);

View File

@@ -72,27 +72,22 @@ class RecordingMicroAllocator : public MicroAllocator {
protected:
TfLiteStatus AllocateNodeAndRegistrations(
const Model* model,
NodeAndRegistration** node_and_registrations) override;
TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations) override;
const Model* model, SubgraphAllocations* subgraph_allocations) override;
TfLiteStatus AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors) override;
const Model* model, SubgraphAllocations* subgraph_allocations) override;
TfLiteStatus AllocateVariables(const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors) override;
// TODO(b/162311891): Once all kernels have been updated to the new API drop
// this method. It is only used to record TfLiteTensor persistent allocations.
TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors,
int tensor_index) override;
TfLiteTensor* AllocatePersistentTfLiteTensorInternal() override;
// TODO(b/162311891): Once all kernels have been updated to the new API drop
// this function since all allocations for quantized data will take place in
// the temp section.
TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
const SubGraph* subgraph,
TfLiteTensor* tensor,
int tensor_index,
int subgraph_index,
bool allocate_temp) override;
private:
@@ -115,6 +110,8 @@ class RecordingMicroAllocator : public MicroAllocator {
RecordedAllocation recorded_persistent_buffer_data_ = {};
RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {};
RecordedAllocation recorded_node_and_registration_array_data_ = {};
// TODO(b/187993291): Re-enable OpData allocating tracking.
RecordedAllocation recorded_op_data_ = {};
TF_LITE_REMOVE_VIRTUAL_DELETE

View File

@@ -37,11 +37,12 @@ class RecordingMicroInterpreter : public MicroInterpreter {
RecordingMicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
uint8_t* tensor_arena, size_t tensor_arena_size,
ErrorReporter* error_reporter)
ErrorReporter* error_reporter,
MicroProfiler* profiler = nullptr)
: MicroInterpreter(model, op_resolver,
RecordingMicroAllocator::Create(
tensor_arena, tensor_arena_size, error_reporter),
error_reporter),
error_reporter, profiler),
recording_micro_allocator_(
static_cast<const RecordingMicroAllocator&>(allocator())) {}

View File

@@ -195,7 +195,7 @@ const Model* ModelBuilder::BuildModel(
buffers[i] = metadata_buffers_[i - 1];
}
// TFLM only supports single subgraph.
// Default to single subgraph model.
constexpr size_t subgraphs_size = 1;
// Find out number of subgraph inputs.
@@ -341,6 +341,72 @@ const Model* BuildModelWithOfflinePlanning(int number_of_tensors,
node_conn[0].input, node_conn[num_conns - 1].output, num_subgraph_inputs);
}
const Model* BuildModelWithUnusedInputs() {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
constexpr size_t buffers_size = 1;
const Offset<Buffer> buffers[buffers_size] = {CreateBuffer(*builder)};
constexpr size_t tensor_shape_size = 2;
const int32_t tensor_shape[tensor_shape_size] = {1, 64};
constexpr size_t tensors_size = 4;
const Offset<Tensor> tensors[tensors_size] = {
CreateTensor(*builder,
builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT8, 0,
builder->CreateString("test_input_tensor"), 0, false),
CreateTensor(*builder,
builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT8, 0,
builder->CreateString("test_unused_input_tensor"), 0, false),
CreateTensor(*builder,
builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT8, 0,
builder->CreateString("test_output_tensor"), 0, false),
CreateTensor(*builder,
builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT8, 0,
builder->CreateString("test_unused_tensor"), 0, false),
};
constexpr size_t inputs_size = 2;
const int32_t inputs[inputs_size] = {0, 1};
constexpr size_t outputs_size = 1;
const int32_t outputs[outputs_size] = {2};
constexpr size_t operator_inputs_size = 1;
const int32_t operator_inputs[operator_inputs_size] = {0};
constexpr size_t operator_outputs_size = 1;
const int32_t operator_outputs[operator_outputs_size] = {2};
constexpr size_t operators_size = 1;
const Offset<Operator> operators[operators_size] = {
CreateOperator(
*builder, 0,
builder->CreateVector(operator_inputs, operator_inputs_size),
builder->CreateVector(operator_outputs, operator_outputs_size),
BuiltinOptions_NONE),
};
constexpr size_t subgraphs_size = 1;
const Offset<SubGraph> subgraphs[subgraphs_size] = {
CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size),
builder->CreateVector(inputs, inputs_size),
builder->CreateVector(outputs, outputs_size),
builder->CreateVector(operators, operators_size),
builder->CreateString("test_subgraph"))};
constexpr size_t operator_codes_size = 1;
const Offset<OperatorCode> operator_codes[operator_codes_size] = {
CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
"mock_custom",
/*version=*/0, BuiltinOperator_CUSTOM)};
const Offset<Model> model_offset = CreateModel(
*builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
builder->CreateVector(subgraphs, subgraphs_size),
builder->CreateString("test_model"),
builder->CreateVector(buffers, buffers_size));
FinishModelBuffer(*builder, model_offset);
void* model_pointer = builder->GetBufferPointer();
const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
return model;
}
const Model* BuildSimpleMockModel() {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
@@ -638,6 +704,125 @@ const Model* BuildSimpleMultipleInputsModel() {
return model;
}
const Model* BuildSimpleModelWithSubgraphsAndIf() {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
constexpr size_t buffers_size = 1;
const Offset<Buffer> buffers[buffers_size] = {
CreateBuffer(*builder),
};
const int32_t condition_tensor_shape[] = {1};
const int32_t data_tensor_shape[] = {1, 2};
constexpr size_t tensors_size = 4;
const Offset<Tensor> subgraph1_tensors[tensors_size] = {
CreateTensor(*builder, builder->CreateVector(condition_tensor_shape, 1),
TensorType_BOOL, 0,
builder->CreateString("condition tensor"), 0, false),
CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
TensorType_FLOAT32, 0,
builder->CreateString("input_tensor1"), 0, false),
CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
TensorType_FLOAT32, 0,
builder->CreateString("input_tensor2"), 0, false),
CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
TensorType_FLOAT32, 0,
builder->CreateString("output_tensor"), 0, false),
};
const Offset<Tensor> subgraph2_tensors[tensors_size] = {
CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
TensorType_FLOAT32, 0,
builder->CreateString("input_tensor1"), 0, false),
CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
TensorType_FLOAT32, 0,
builder->CreateString("input_tensor2"), 0, false),
CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
TensorType_FLOAT32, 0,
builder->CreateString("output_tensor"), 0, false),
};
const Offset<Tensor> subgraph3_tensors[tensors_size] = {
CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
TensorType_FLOAT32, 0,
builder->CreateString("input_tensor1"), 0, false),
CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
TensorType_FLOAT32, 0,
builder->CreateString("input_tensor2"), 0, false),
CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
TensorType_FLOAT32, 0,
builder->CreateString("output_tensor"), 0, false),
};
constexpr size_t if_inputs_size = 3;
const int32_t if_inputs[if_inputs_size] = {0, 1, 2};
constexpr size_t outputs_size = 1;
const int32_t if_outputs[outputs_size] = {3};
constexpr size_t operator_inputs_size = 2;
const int32_t operator_inputs[operator_inputs_size] = {0, 1};
const int32_t operator_outputs[outputs_size] = {2};
constexpr size_t operators_size = 1;
const Offset<Operator> subgraph1_operators[operators_size] = {
CreateOperator(
*builder, 0, builder->CreateVector(if_inputs, if_inputs_size),
builder->CreateVector(if_outputs, outputs_size),
BuiltinOptions_IfOptions, CreateIfOptions(*builder, 1, 2).Union()),
};
const Offset<Operator> subgraph2_operators[operators_size] = {
CreateOperator(
*builder, 1,
builder->CreateVector(operator_inputs, operator_inputs_size),
builder->CreateVector(operator_outputs, outputs_size),
BuiltinOptions_NONE),
};
const Offset<Operator> subgraph3_operators[operators_size] = {
CreateOperator(
*builder, 2,
builder->CreateVector(operator_inputs, operator_inputs_size),
builder->CreateVector(operator_outputs, outputs_size),
BuiltinOptions_NONE),
};
constexpr size_t subgraphs_size = 3;
const Offset<SubGraph> subgraphs[subgraphs_size] = {
CreateSubGraph(*builder, builder->CreateVector(subgraph1_tensors, 4),
builder->CreateVector(if_inputs, if_inputs_size),
builder->CreateVector(if_outputs, outputs_size),
builder->CreateVector(subgraph1_operators, operators_size),
builder->CreateString("if_subgraph")),
CreateSubGraph(
*builder, builder->CreateVector(subgraph2_tensors, 3),
builder->CreateVector(operator_inputs, operator_inputs_size),
builder->CreateVector(operator_outputs, outputs_size),
builder->CreateVector(subgraph2_operators, operators_size),
builder->CreateString("then_subgraph")),
CreateSubGraph(
*builder, builder->CreateVector(subgraph3_tensors, 3),
builder->CreateVector(operator_inputs, operator_inputs_size),
builder->CreateVector(operator_outputs, outputs_size),
builder->CreateVector(subgraph3_operators, operators_size),
builder->CreateString("else_subgraph")),
};
constexpr size_t operator_codes_size = 3;
const Offset<OperatorCode> operator_codes[operator_codes_size] = {
CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
"multiple_inputs_op",
/*version=*/0, BuiltinOperator_IF),
CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
"multiple_inputs_op",
/*version=*/0, BuiltinOperator_ADD),
CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
"multiple_inputs_op",
/*version=*/0, BuiltinOperator_MUL),
};
const Offset<Model> model_offset = CreateModel(
*builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
builder->CreateVector(subgraphs, subgraphs_size),
builder->CreateString("test_model"),
builder->CreateVector(buffers, buffers_size));
FinishModelBuffer(*builder, model_offset);
void* model_pointer = builder->GetBufferPointer();
const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
return model;
}
} // namespace
const TfLiteRegistration* SimpleStatefulOp::getRegistration() {
@@ -834,6 +1019,13 @@ AllOpsResolver GetOpResolver() {
MultipleInputs::GetMutableRegistration());
return op_resolver;
}
const Model* GetModelWithUnusedInputs() {
static Model* model = nullptr;
if (!model) {
model = const_cast<Model*>(BuildModelWithUnusedInputs());
}
return model;
}
const Model* GetSimpleMockModel() {
static Model* model = nullptr;
@@ -851,6 +1043,14 @@ const Model* GetSimpleMultipleInputsModel() {
return model;
}
const Model* GetSimpleModelWithSubgraphsAndIf() {
static Model* model = nullptr;
if (!model) {
model = const_cast<Model*>(BuildSimpleModelWithSubgraphsAndIf());
}
return model;
}
const Model* GetComplexMockModel() {
static Model* model = nullptr;
if (!model) {
@@ -984,9 +1184,8 @@ void ReportOpError(struct TfLiteContext* context, const char* format, ...) {
// Create a TfLiteIntArray from an array of ints. The first element in the
// supplied array must be the size of the array expressed as an int.
TfLiteIntArray* IntArrayFromInts(const int* int_array) {
return const_cast<TfLiteIntArray*>(
reinterpret_cast<const TfLiteIntArray*>(int_array));
TfLiteIntArray* IntArrayFromInts(int* int_array) {
return reinterpret_cast<TfLiteIntArray*>(int_array);
}
// Create a TfLiteFloatArray from an array of floats. The first element in the
@@ -999,6 +1198,20 @@ TfLiteFloatArray* FloatArrayFromFloats(const float* floats) {
return reinterpret_cast<TfLiteFloatArray*>(const_cast<float*>(floats));
}
TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized,
TfLiteIntArray* dims, float input_scale,
float weights_scale, bool is_variable) {
float bias_scale = input_scale * weights_scale;
tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
// Quantized int16_t tensors always have a zero point of 0, since the range of
// int16_t values is large, and because zero point costs extra cycles during
// processing.
TfLiteTensor result =
CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable);
return result;
}
TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
TfLiteIntArray* dims, float input_scale,
float weights_scale, bool is_variable) {
@@ -1013,11 +1226,27 @@ TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
return result;
}
TfLiteTensor CreateQuantizedBiasTensor(const float* data,
std::int64_t* quantized,
TfLiteIntArray* dims, float input_scale,
float weights_scale, bool is_variable) {
float bias_scale = input_scale * weights_scale;
tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
// Quantized int32_t tensors always have a zero point of 0, since the range of
// int32_t values is large, and because zero point costs extra cycles during
// processing.
TfLiteTensor result =
CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable);
return result;
}
// Quantizes int32_t bias tensor with per-channel weights determined by input
// scale multiplied by weight scale for each channel.
template <typename T>
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
const float* input, int32_t* quantized, TfLiteIntArray* dims,
float input_scale, float* weight_scales, float* scales, int* zero_points,
const float* input, T* quantized, TfLiteIntArray* dims, float input_scale,
float* weight_scales, float* scales, int* zero_points,
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
bool is_variable) {
int input_size = ElementCount(*dims);
@@ -1031,8 +1260,8 @@ TfLiteTensor CreatePerChannelQuantizedBiasTensor(
zero_points[i + 1] = 0;
}
SymmetricPerChannelQuantize<int32_t>(input, quantized, input_size,
num_channels, scales_array);
SymmetricPerChannelQuantize<T>(input, quantized, input_size, num_channels,
scales_array);
affine_quant->scale = FloatArrayFromFloats(scales);
affine_quant->zero_point = IntArrayFromInts(zero_points);
@@ -1043,6 +1272,26 @@ TfLiteTensor CreatePerChannelQuantizedBiasTensor(
return result;
}
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
const float* input, int32_t* quantized, TfLiteIntArray* dims,
float input_scale, float* weight_scales, float* scales, int* zero_points,
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
bool is_variable) {
return CreatePerChannelQuantizedBiasTensor<int32_t>(
input, quantized, dims, input_scale, weight_scales, scales, zero_points,
affine_quant, quantized_dimension, is_variable);
}
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
const float* input, std::int64_t* quantized, TfLiteIntArray* dims,
float input_scale, float* weight_scales, float* scales, int* zero_points,
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
bool is_variable) {
return CreatePerChannelQuantizedBiasTensor<std::int64_t>(
input, quantized, dims, input_scale, weight_scales, scales, zero_points,
affine_quant, quantized_dimension, is_variable);
}
TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
int* zero_points, TfLiteAffineQuantization* affine_quant,

View File

@@ -16,15 +16,13 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
#define TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
// Useful functions for writing tests.
#include <cstdint>
#include <limits>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite//kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/all_ops_resolver.h"
#include "tensorflow/lite/micro/micro_utils.h"
#include "tensorflow/lite/portable_type_to_tflitetype.h"
@@ -126,9 +124,16 @@ const Model* GetModelWithOfflinePlanning(int num_tensors,
int num_conns,
int num_subgraph_inputs = 0);
// Returns a flatbuffer with a single operator, two inputs (one unused) and one
// output.
const Model* GetModelWithUnusedInputs();
// Returns a flatbuffer model with `simple_stateful_op`
const Model* GetSimpleStatefulModel();
// Returns a flatbuffer model with "if" and two subgraphs.
const Model* GetSimpleModelWithSubgraphsAndIf();
// Builds a one-dimensional flatbuffer tensor of the given size.
const Tensor* Create1dFlatbufferTensor(int size, bool is_variable = false);
@@ -154,7 +159,7 @@ void PopulateContext(TfLiteTensor* tensors, int tensors_size,
// Create a TfLiteIntArray from an array of ints. The first element in the
// supplied array must be the size of the array expressed as an int.
TfLiteIntArray* IntArrayFromInts(const int* int_array);
TfLiteIntArray* IntArrayFromInts(int* int_array);
// Create a TfLiteFloatArray from an array of floats. The first element in the
// supplied array must be the size of the array expressed as a float.
@@ -199,11 +204,22 @@ TfLiteTensor CreateQuantizedTensor(const float* input, T* quantized,
return CreateQuantizedTensor(quantized, dims, scale, zero_point, is_variable);
}
TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized,
TfLiteIntArray* dims, float input_scale,
float weights_scale,
bool is_variable = false);
TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
TfLiteIntArray* dims, float input_scale,
float weights_scale,
bool is_variable = false);
TfLiteTensor CreateQuantizedBiasTensor(const float* data,
std::int64_t* quantized,
TfLiteIntArray* dims, float input_scale,
float weights_scale,
bool is_variable = false);
// Quantizes int32_t bias tensor with per-channel weights determined by input
// scale multiplied by weight scale for each channel.
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
@@ -212,6 +228,14 @@ TfLiteTensor CreatePerChannelQuantizedBiasTensor(
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
bool is_variable = false);
// Quantizes int64_t bias tensor with per-channel weights determined by input
// scale multiplied by weight scale for each channel.
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
const float* input, std::int64_t* quantized, TfLiteIntArray* dims,
float input_scale, float* weight_scales, float* scales, int* zero_points,
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
bool is_variable = false);
TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
int* zero_points, TfLiteAffineQuantization* affine_quant,