Update tflite

This commit is contained in:
jomjol
2020-11-08 03:27:52 +01:00
parent 05a0f6fa62
commit 84cea8e3d6
169 changed files with 16367 additions and 11456 deletions

View File

@@ -0,0 +1,94 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/all_ops_resolver.h"
#include "tensorflow/lite/micro/kernels/micro_ops.h"
namespace tflite {
namespace ops {
namespace micro {
namespace custom {
TfLiteRegistration* Register_ETHOSU();
const char* GetString_ETHOSU();
} // namespace custom
} // namespace micro
} // namespace ops
AllOpsResolver::AllOpsResolver() {
// Please keep this list of Builtin Operators in alphabetical order.
AddAbs();
AddAdd();
AddArgMax();
AddArgMin();
AddAveragePool2D();
AddCeil();
AddConcatenation();
AddConv2D();
AddCos();
AddDepthwiseConv2D();
AddDequantize();
AddEqual();
AddFloor();
AddFullyConnected();
AddGreater();
AddGreaterEqual();
AddHardSwish();
AddL2Normalization();
AddLess();
AddLessEqual();
AddLog();
AddLogicalAnd();
AddLogicalNot();
AddLogicalOr();
AddLogistic();
AddMaximum();
AddMaxPool2D();
AddMean();
AddMinimum();
AddMul();
AddNeg();
AddNotEqual();
AddPack();
AddPad();
AddPadV2();
AddPrelu();
AddQuantize();
AddReduceMax();
AddRelu();
AddRelu6();
AddReshape();
AddResizeNearestNeighbor();
AddRound();
AddRsqrt();
AddShape();
AddSin();
AddSoftmax();
AddSplit();
AddSplitV();
AddSqrt();
AddSquare();
AddStridedSlice();
AddSub();
AddSvdf();
AddTanh();
AddUnpack();
// TODO(b/159644355): Figure out if custom Ops belong in AllOpsResolver.
TfLiteRegistration* registration =
tflite::ops::micro::custom::Register_ETHOSU();
if (registration) {
AddCustom(tflite::ops::micro::custom::GetString_ETHOSU(), registration);
}
}
} // namespace tflite

View File

@@ -9,17 +9,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
#ifndef TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
namespace tflite {
namespace ops {
namespace micro {
class AllOpsResolver : public MicroMutableOpResolver {
// The magic number in the template parameter is the maximum number of ops that
// can be added to AllOpsResolver. It can be increased if needed. And most
// applications that care about the memory footprint will want to directly use
// MicroMutableOpResolver and have an application specific template parameter.
// The examples directory has sample code for this.
class AllOpsResolver : public MicroMutableOpResolver<128> {
public:
AllOpsResolver();
@@ -27,8 +30,6 @@ class AllOpsResolver : public MicroMutableOpResolver {
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
#endif // TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,22 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
#define TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
extern const unsigned char g_keyword_scrambled_model_data[];
extern const unsigned int g_keyword_scrambled_model_data_length;
#endif // TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_

View File

@@ -1,4 +1,4 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -36,6 +36,15 @@ limitations under the License.
#include "tensorflow/lite/micro/debug_log.h"
#ifndef TF_LITE_STRIP_ERROR_STRINGS
#include <cstdio>
#endif
extern "C" void DebugLog(const char* s) { fprintf(stderr, "%s", s); }
extern "C" void DebugLog(const char* s) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
// Reusing TF_LITE_STRIP_ERROR_STRINGS to disable DebugLog completely to get
// maximum reduction in binary size. This is because we have DebugLog calls
// via TF_LITE_CHECK that are not stubbed out by TF_LITE_REPORT_ERROR.
fprintf(stderr, "%s", s);
#endif
}

View File

@@ -15,9 +15,17 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
// This function should be implemented by each target platform, and provide a
// way for strings to be output to some text stream. For more information, see
// tensorflow/lite/micro/debug_log.cc.
extern "C" void DebugLog(const char* s);
void DebugLog(const char* s);
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_

View File

@@ -21,6 +21,8 @@ limitations under the License.
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/max.h"
#include "tensorflow/lite/kernels/internal/min.h"
namespace tflite {
namespace ops {
@@ -32,11 +34,11 @@ inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
case kTfLiteActNone:
return a;
case kTfLiteActRelu:
return std::max(0.0f, a);
case kTfLiteActRelu1:
return std::max(-1.0f, std::min(a, 1.0f));
return TfLiteMax(0.0f, a);
case kTfLiteActReluN1To1:
return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
case kTfLiteActRelu6:
return std::max(0.0f, std::min(a, 6.0f));
return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
case kTfLiteActTanh:
return std::tanh(a);
case kTfLiteActSignBit:

View File

@@ -18,30 +18,82 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
struct ReluOpData {
ReluParams params;
};
struct Relu6OpData {
int8_t six_int8;
int8_t zero_int8;
uint8_t six_uint8;
uint8_t zero_uint8;
};
} // namespace
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
template <typename Q>
inline void ReluQuantized(int32_t lower, const RuntimeShape& input_shape,
const Q* input_data, const RuntimeShape& output_shape,
Q* output_data) {
template <typename T>
inline void ReluQuantized(const ReluOpData& data,
const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const T* input_data,
T* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const Q val = input_data[i];
const Q clamped = val < lower ? lower : val;
output_data[i] = clamped;
const int32_t val = static_cast<int32_t>(input_data[i]);
int32_t clamped =
data.params.output_offset +
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
data.params.output_multiplier,
data.params.output_shift);
clamped = std::max(data.params.quantized_activation_min, clamped);
clamped = std::min(data.params.quantized_activation_max, clamped);
output_data[i] = static_cast<T>(clamped);
}
}
template <typename T>
inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data) {
float act_min = 0.0;
float act_max = std::numeric_limits<float>::infinity();
double real_multiplier =
static_cast<double>(input->params.scale / output->params.scale);
const RuntimeShape input_shape = GetTensorShape(input);
const RuntimeShape output_shape = GetTensorShape(output);
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
&data->params.output_shift);
data->params.quantized_activation_min = std::max(
static_cast<int32_t>(std::numeric_limits<T>::min()),
output->params.zero_point +
static_cast<int32_t>(roundf(act_min / output->params.scale)));
data->params.quantized_activation_max =
act_max == std::numeric_limits<float>::infinity()
? static_cast<int32_t>(std::numeric_limits<T>::max())
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
output->params.zero_point +
static_cast<int32_t>(
roundf(act_max / output->params.scale)));
data->params.input_offset = input->params.zero_point;
data->params.output_offset = output->params.zero_point;
}
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
@@ -77,33 +129,59 @@ inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
}
}
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
}
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
CalculateReluOpData<int8_t>(input, output, data);
} else if (input->type == kTfLiteUInt8) {
CalculateReluOpData<uint8_t>(input, output, data);
}
return kTfLiteOk;
}
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
ReluFloat(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
ReluFloat(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8: {
ReluQuantized<int8_t>(input->params.zero_point, GetTensorShape(input),
GetTensorData<int8_t>(input),
GetTensorShape(output),
GetTensorData<int8_t>(output));
ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
ReluQuantized<uint8_t>(input->params.zero_point, GetTensorShape(input),
GetTensorData<uint8_t>(input),
GetTensorShape(output),
GetTensorData<uint8_t>(output));
ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
}
default: {
@@ -114,37 +192,63 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
}
}
void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
}
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
if (input->type == kTfLiteInt8) {
data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
input->params.zero_point);
data->zero_int8 = input->params.zero_point;
} else if (input->type == kTfLiteUInt8) {
data->six_uint8 = FloatToQuantizedType<uint8_t>(6.0f, input->params.scale,
input->params.zero_point);
data->zero_uint8 = input->params.zero_point;
}
return kTfLiteOk;
}
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
Relu6Float(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
Relu6Float(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8: {
const int8_t six = FloatToAsymmetricQuantizedInt8(
6.0f, input->params.scale, input->params.zero_point);
const int8_t zero = input->params.zero_point;
Relu6Quantized<int8_t>(
zero, six, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
const uint8_t six = FloatToAsymmetricQuantizedUInt8(
6.0f, input->params.scale, input->params.zero_point);
const uint8_t zero = input->params.zero_point;
Relu6Quantized<uint8_t>(
zero, six, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
}
default: {
@@ -157,28 +261,26 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace activations
TfLiteRegistration* Register_RELU() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/activations::ReluPrepare,
/*invoke=*/activations::ReluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_RELU() {
return {/*init=*/activations::ReluInit,
/*free=*/nullptr,
/*prepare=*/activations::ReluPrepare,
/*invoke=*/activations::ReluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_RELU6() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/activations::Relu6Prepare,
/*invoke=*/activations::Relu6Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_RELU6() {
return {/*init=*/activations::Relu6Init,
/*free=*/nullptr,
/*prepare=*/activations::Relu6Prepare,
/*invoke=*/activations::Relu6Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -23,6 +23,8 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
namespace ops {
@@ -40,18 +42,22 @@ struct OpData {
// and the special 16-bit -> 16bit quantized path
int input1_shift;
int input2_shift;
int32 output_activation_min;
int32 output_activation_max;
int32_t output_activation_min;
int32_t output_activation_max;
// These fields are used only in the general 8-bit -> 8bit quantized path
int32 input1_multiplier;
int32 input2_multiplier;
int32 output_multiplier;
int32_t input1_multiplier;
int32_t input2_multiplier;
int32_t output_multiplier;
int output_shift;
int left_shift;
int32 input1_offset;
int32 input2_offset;
int32 output_offset;
int32_t input1_offset;
int32_t input2_offset;
int32_t output_offset;
// Used only for float evals:
float output_activation_min_f32;
float output_activation_max_f32;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
@@ -89,37 +95,44 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
} else if (output->type == kTfLiteFloat32) {
CalculateActivationRange(params->activation,
&data->output_activation_min_f32,
&data->output_activation_max_f32);
}
return kTfLiteOk;
}
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
const OpData* data, const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
const OpData* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
#define TF_LITE_ADD(opname) \
reference_ops::opname(op_params, GetTensorShape(input1), \
GetTensorData<float>(input1), GetTensorShape(input2), \
GetTensorData<float>(input2), GetTensorShape(output), \
GetTensorData<float>(output))
SetActivationParams(data->output_activation_min_f32,
data->output_activation_max_f32, &op_params);
if (data->requires_broadcast) {
TF_LITE_ADD(BroadcastAdd4DSlow);
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
TF_LITE_ADD(Add);
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
#undef TF_LITE_ADD
}
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteAddParams* params, const OpData* data,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output) {
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
@@ -135,46 +148,91 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
GetTensorShape(input1), GetTensorShape(input2), &op_params);
#define TF_LITE_ADD(type, opname, dtype) \
type::opname(op_params, GetTensorShape(input1), \
GetTensorData<dtype>(input1), GetTensorShape(input2), \
GetTensorData<dtype>(input2), GetTensorShape(output), \
GetTensorData<dtype>(output));
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
reference_integer_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_ADD(reference_integer_ops, Add, int8_t);
reference_integer_ops::Add(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else {
if (need_broadcast) {
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
TF_LITE_ADD(reference_ops, Add, uint8_t);
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
}
#undef TF_LITE_ADD
}
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
TF_LITE_ENSURE_STATUS(
CalculateOpData(context, params, input1, input2, output, data));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
OpData data;
TF_LITE_ENSURE_STATUS(
CalculateOpData(context, params, input1, input2, output, &data));
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
EvalAdd(context, node, params, &data, input1, input2, output);
EvalAdd(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, &data,
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
input1, input2, output));
} else {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@@ -187,16 +245,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace add
TfLiteRegistration* Register_ADD() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/add::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_ADD() {
return {/*init=*/add::Init,
/*free=*/nullptr,
/*prepare=*/add::Prepare,
/*invoke=*/add::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -1,83 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
#include "tensorflow/lite/micro/kernels/micro_ops.h"
namespace tflite {
namespace ops {
namespace micro {
// Register each supported op with:
// AddBuiltin(<operator ID>, <registration>, [min version], [max version])
AllOpsResolver::AllOpsResolver() {
AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED(), 1, 4);
AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D(), 1, 2);
AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(), 1, 2);
AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC(), 1, 2);
AddBuiltin(BuiltinOperator_SVDF, Register_SVDF(), 1, 3);
AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), 1, 3);
AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION(), 1, 3);
AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D(), 1,
3);
AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D(), 1, 2);
AddBuiltin(BuiltinOperator_ABS, Register_ABS());
AddBuiltin(BuiltinOperator_SIN, Register_SIN());
AddBuiltin(BuiltinOperator_COS, Register_COS());
AddBuiltin(BuiltinOperator_LOG, Register_LOG());
AddBuiltin(BuiltinOperator_SQRT, Register_SQRT());
AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT());
AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE());
AddBuiltin(BuiltinOperator_PRELU, Register_PRELU());
AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR());
AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM());
AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM());
AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX());
AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN());
AddBuiltin(BuiltinOperator_LOGICAL_OR, Register_LOGICAL_OR());
AddBuiltin(BuiltinOperator_LOGICAL_AND, Register_LOGICAL_AND());
AddBuiltin(BuiltinOperator_LOGICAL_NOT, Register_LOGICAL_NOT());
AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE());
AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL(), 1, 2);
AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(), 1, 2);
AddBuiltin(BuiltinOperator_GREATER, Register_GREATER(), 1, 2);
AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL(), 1, 2);
AddBuiltin(BuiltinOperator_LESS, Register_LESS(), 1, 2);
AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL(), 1, 2);
AddBuiltin(BuiltinOperator_CEIL, Register_CEIL());
AddBuiltin(BuiltinOperator_ROUND, Register_ROUND());
AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE());
AddBuiltin(BuiltinOperator_PACK, Register_PACK(), 1, 2);
AddBuiltin(BuiltinOperator_PAD, Register_PAD(), 1, 2);
AddBuiltin(BuiltinOperator_PADV2, Register_PADV2(), 1, 2);
AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), 1, 3);
AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK(), 1, 2);
AddBuiltin(BuiltinOperator_NEG, Register_NEG());
AddBuiltin(BuiltinOperator_ADD, Register_ADD(), 1, 2);
AddBuiltin(BuiltinOperator_MUL, Register_MUL(), 1, 3);
AddBuiltin(BuiltinOperator_SUB, Register_SUB(), 1, 2);
AddBuiltin(BuiltinOperator_QUANTIZE, Register_QUANTIZE());
AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE(), 1, 2);
AddBuiltin(BuiltinOperator_RELU, Register_RELU());
AddBuiltin(BuiltinOperator_RELU6, Register_RELU6());
AddBuiltin(BuiltinOperator_MEAN, Register_MEAN());
AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
Register_RESIZE_NEAREST_NEIGHBOR(),
/* min_version = */ 1,
/* max_version = */ 2);
AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION());
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -19,6 +19,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/micro_utils.h"
namespace tflite {
@@ -45,14 +46,20 @@ inline void ArgMinMaxHelper(const RuntimeShape& input1_shape,
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* axis = GetInput(context, node, kAxis);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* axis =
tflite::micro::GetEvalInput(context, node, kAxis);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
ArgMinMaxHelper(GetTensorShape(input), GetTensorData<data_type>(input), \
GetTensorData<axis_type>(axis), GetTensorShape(output), \
GetTensorData<output_type>(output), is_arg_max)
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
ArgMinMaxHelper(tflite::micro::GetTensorShape(input), \
tflite::micro::GetTensorData<data_type>(input), \
tflite::micro::GetTensorData<axis_type>(axis), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<output_type>(output), \
is_arg_max)
if (axis->type == kTfLiteInt32) {
if (output->type == kTfLiteInt32) {
switch (input->type) {
@@ -67,18 +74,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
break;
default:
TF_LITE_KERNEL_LOG(context,
"Only float32, uint8 and int8 are "
"Only float32, uint8_t and int8_t are "
"supported currently, got %s.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
TF_LITE_KERNEL_LOG(context,
"Only int32_t are supported currently, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
TF_LITE_KERNEL_LOG(context, "Only int32_t are supported currently, got %s.",
TfLiteTypeGetName(axis->type));
return kTfLiteError;
}
@@ -98,28 +106,26 @@ TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) {
} // namespace arg_min_max
TfLiteRegistration* Register_ARG_MAX() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/arg_min_max::ArgMaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_ARG_MAX() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/arg_min_max::ArgMaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_ARG_MIN() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/arg_min_max::ArgMinEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_ARG_MIN() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/arg_min_max::ArgMinEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -29,11 +30,13 @@ constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_EQ(context, output->type, input->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
for (int i = 0; i < output->dims->size; ++i) {
@@ -43,26 +46,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
reference_ops::Ceil(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
reference_ops::Ceil(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace ceil
TfLiteRegistration* Register_CEIL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/ceil::Prepare,
/*invoke=*/ceil::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_CEIL() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/ceil::Prepare,
/*invoke=*/ceil::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -17,11 +17,10 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
/*
* The circular buffer custom operator is used to implement strided streaming
@@ -78,7 +77,9 @@ void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE(context, output != nullptr);
@@ -89,10 +90,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[2]);
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
// The circular buffer custom operator currently only supports int8.
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteInt8);
// The circular buffer custom operator currently only supports int8_t.
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
// TODO(b/132070898): Use statically slotted OpData structures until a
// scratch memory API is ready.
@@ -121,8 +122,10 @@ void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) {
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
OpData* data = reinterpret_cast<OpData*>(node->user_data);
@@ -130,8 +133,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
int depth = output->dims->data[3];
if (input->type == kTfLiteInt8) {
EvalInt8(GetTensorData<int8_t>(input), num_slots, depth,
GetTensorData<int8_t>(output));
EvalInt8(tflite::micro::GetTensorData<int8_t>(input), num_slots, depth,
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);

View File

@@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -25,103 +26,109 @@ namespace micro {
namespace comparisons {
namespace {
struct OpData {
ComparisonParams params;
};
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
// TODO(ruic): optimize macros below to using template functions.
#define TF_LITE_QUANTIZE_COMPARISON(opname) \
template <typename input_dtype> \
void EvalQuantized##opname(TfLiteContext* context, TfLiteNode* node, \
const TfLiteTensor* input1, \
const TfLiteTensor* input2, TfLiteTensor* output, \
bool requires_broadcast) { \
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) { \
auto input1_offset = -input1->params.zero_point; \
auto input2_offset = -input2->params.zero_point; \
const int left_shift = 8; \
\
int32 input1_multiplier; \
int input1_shift; \
QuantizeMultiplierSmallerThanOneExp( \
static_cast<double>(input1->params.scale), &input1_multiplier, \
&input1_shift); \
int32 input2_multiplier; \
int input2_shift; \
QuantizeMultiplierSmallerThanOneExp( \
static_cast<double>(input2->params.scale), &input2_multiplier, \
&input2_shift); \
\
ComparisonParams op_params; \
op_params.left_shift = left_shift; \
op_params.input1_offset = input1_offset; \
op_params.input1_multiplier = input1_multiplier; \
op_params.input1_shift = input1_shift; \
op_params.input2_offset = input2_offset; \
op_params.input2_multiplier = input2_multiplier; \
op_params.input2_shift = input2_shift; \
if (requires_broadcast) { \
reference_ops::Broadcast4DSlow##opname##WithScaling( \
op_params, GetTensorShape(input1), \
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
GetTensorData<bool>(output)); \
} else { \
reference_ops::opname##WithScaling( \
op_params, GetTensorShape(input1), \
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
GetTensorData<bool>(output)); \
} \
} \
}
TF_LITE_QUANTIZE_COMPARISON(Equal);
TF_LITE_QUANTIZE_COMPARISON(NotEqual);
TF_LITE_QUANTIZE_COMPARISON(Greater);
TF_LITE_QUANTIZE_COMPARISON(GreaterEqual);
TF_LITE_QUANTIZE_COMPARISON(Less);
TF_LITE_QUANTIZE_COMPARISON(LessEqual);
#undef TF_LITE_QUANTIZE_COMPARISON
#define TF_LITE_COMPARISON(type, opname, requires_broadcast) \
{ \
ComparisonParams op_params; \
requires_broadcast \
? reference_ops::Broadcast4DSlow##opname##NoScaling( \
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
GetTensorShape(input2), GetTensorData<type>(input2), \
GetTensorShape(output), GetTensorData<bool>(output)) \
: reference_ops::opname##NoScaling( \
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
GetTensorShape(input2), GetTensorData<type>(input2), \
GetTensorShape(output), GetTensorData<bool>(output)); \
}
TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteBool:
TF_LITE_COMPARISON(bool, Equal, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<bool>(input1), input2_shape,
tflite::micro::GetTensorData<bool>(input2), output_shape,
output_data)
: reference_ops::EqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<bool>(input1), input2_shape,
tflite::micro::GetTensorData<bool>(input2), output_shape,
output_data);
break;
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, Equal, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::EqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, Equal, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::EqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, Equal, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::EqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
EvalQuantizedEqual<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::EqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
EvalQuantizedEqual<int8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::EqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@@ -133,30 +140,100 @@ TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
// TODO(renjieliu): Refactor the logic to avoid duplications.
TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteBool:
TF_LITE_COMPARISON(bool, NotEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<bool>(input1), input2_shape,
tflite::micro::GetTensorData<bool>(input2), output_shape,
output_data)
: reference_ops::NotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<bool>(input1), input2_shape,
tflite::micro::GetTensorData<bool>(input2), output_shape,
output_data);
break;
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, NotEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::NotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, NotEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::NotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, NotEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::NotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
EvalQuantizedNotEqual<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::NotEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
EvalQuantizedNotEqual<int8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::NotEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@@ -167,27 +244,87 @@ TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, Greater, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::GreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, Greater, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::GreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, Greater, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::GreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
EvalQuantizedGreater<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::GreaterWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
EvalQuantizedGreater<int8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::GreaterWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@@ -198,27 +335,87 @@ TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, GreaterEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, GreaterEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, GreaterEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
EvalQuantizedGreaterEqual<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
EvalQuantizedGreaterEqual<int8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@@ -229,27 +426,87 @@ TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, Less, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::LessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, Less, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::LessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, Less, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::LessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
EvalQuantizedLess<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::LessWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
EvalQuantizedLess<int8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::LessWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@@ -260,27 +517,87 @@ TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, LessEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::LessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, LessEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::LessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, LessEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::LessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
EvalQuantizedLessEqual<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::LessEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
EvalQuantizedLessEqual<int8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::LessEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@@ -291,78 +608,115 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
}
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) {
auto input1_offset = -input1->params.zero_point;
auto input2_offset = -input2->params.zero_point;
const int kLeftShift = 8;
int32_t input1_multiplier;
int input1_shift;
QuantizeMultiplierSmallerThanOneExp(
static_cast<double>(input1->params.scale), &input1_multiplier,
&input1_shift);
int32_t input2_multiplier;
int input2_shift;
QuantizeMultiplierSmallerThanOneExp(
static_cast<double>(input2->params.scale), &input2_multiplier,
&input2_shift);
data->params.left_shift = kLeftShift;
data->params.input1_offset = input1_offset;
data->params.input1_multiplier = input1_multiplier;
data->params.input1_shift = input1_shift;
data->params.input2_offset = input2_offset;
data->params.input2_multiplier = input2_multiplier;
data->params.input2_shift = input2_shift;
}
return kTfLiteOk;
}
} // namespace comparisons
TfLiteRegistration* Register_EQUAL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::EqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_EQUAL() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::EqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_NOT_EQUAL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::NotEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_NOT_EQUAL() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::NotEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_GREATER() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::GreaterEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_GREATER() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::GreaterEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_GREATER_EQUAL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::GreaterEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_GREATER_EQUAL() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::GreaterEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_LESS() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::LessEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_LESS() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::LessEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_LESS_EQUAL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::LessEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_LESS_EQUAL() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::LessEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -18,10 +18,11 @@ limitations under the License.
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -31,14 +32,116 @@ namespace concatenation {
constexpr int kMaxInputNum = 10; // Maximum number of input tensors
constexpr int kOutputTensor = 0;
struct OpData {
ConcatenationParams params;
};
// Handles negative axis index, coerces to positive index value.
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
if (axis >= 0) {
return axis;
} else {
return NumDimensions(output_tensor) + axis;
}
}
// The following functions are helpers to get tensor data in the format that the
// reference op implementation expects. They provide the same functionality as
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
// Gets shapes from a list of tensors.
inline void GetAllInputTensorShapes(const TfLiteContext* context,
const TfLiteNode* node,
RuntimeShape all_shapes[kMaxInputNum]) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(node != nullptr);
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
RuntimeShape shape = tflite::micro::GetTensorShape(t);
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
}
}
// Get shape pointers from a list of shapes.
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
const RuntimeShape* pointers[]) {
for (size_t i = 0; i < num; ++i) {
pointers[i] = &shapes[i];
}
}
// Gets data pointers from a list of tensors.
template <typename T>
inline void GetAllInputTensorData(const TfLiteContext* context,
const TfLiteNode* node,
T* all_data[kMaxInputNum]) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(node != nullptr);
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
all_data[i] = tflite::micro::GetTensorData<T>(t);
}
}
template <typename data_type>
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
// Collect the shapes and data pointer of input tensors
RuntimeShape inputs_shape[kMaxInputNum];
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
const data_type* inputs_data[kMaxInputNum];
GetAllInputTensorShapes(context, node, inputs_shape);
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
GetAllInputTensorData(context, node, inputs_data);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<data_type>(output));
}
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
// Collect the shapes and data pointer of input tensors
RuntimeShape inputs_shape[kMaxInputNum];
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
const uint8_t* inputs_data[kMaxInputNum];
GetAllInputTensorShapes(context, node, inputs_shape);
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
GetAllInputTensorData(context, node, inputs_data);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
reference_ops::ConcatenationWithScaling(
data->params, inputs_shape_ptr, inputs_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// This function only checks the types. Additional shape validations are
// performed in the reference implementation called during Eval().
const TfLiteConcatenationParams* params =
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
TfLiteType input_type = GetInput(context, node, 0)->type;
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
const TfLiteTensor* input_tensor = GetInput(context, node, 0);
TF_LITE_ENSURE(context, input_tensor != nullptr);
TfLiteType input_type = input_tensor->type;
const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output_tensor != nullptr);
TfLiteType output_type = output_tensor->type;
// Check activation and input type
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
@@ -57,133 +160,76 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// Shapes with dimensions >4 are not yet supported with static allocation.
for (int i = 0; i < num_inputs; ++i) {
const TfLiteTensor* input = GetInput(context, node, i);
TF_LITE_ENSURE(context, input != nullptr);
int num_dimensions = NumDimensions(input);
if (num_dimensions > 4) {
TF_LITE_KERNEL_LOG(
context,
"Op Concatenation does not currently support num dimensions >4 "
"Tensor '%s' has %d dimensions.",
input->name, num_dimensions);
"Tensor has %d dimensions.",
num_dimensions);
return kTfLiteError;
}
}
// Calculate OpData.
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
switch (output_type) { // Already know in/outtypes are same.
case kTfLiteFloat32:
case kTfLiteInt32:
case kTfLiteInt64: {
data->params.axis = CalculatePositiveAxis(params->axis, output);
data->params.inputs_count = node->inputs->size;
break;
}
case kTfLiteUInt8:
case kTfLiteInt8: {
data->params.axis = CalculatePositiveAxis(params->axis, output);
data->params.inputs_count = node->inputs->size;
float* input_scales =
reinterpret_cast<float*>(context->AllocatePersistentBuffer(
context, node->inputs->size * sizeof(float)));
int32_t* input_zero_points =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, node->inputs->size * sizeof(int32_t)));
// Allocate persistent scale and zeropoint buffers.
// Store input scale and zero point values in OpParams:
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteTensor* t = GetInput(context, node, i);
TF_LITE_ENSURE(context, t != nullptr);
input_scales[i] = t->params.scale;
input_zero_points[i] = t->params.zero_point;
}
data->params.input_scale = input_scales;
data->params.input_zeropoint = input_zero_points;
data->params.output_zeropoint = output->params.zero_point;
data->params.output_scale = output->params.scale;
break;
}
default:
TF_LITE_KERNEL_LOG(
context, "Op Concatenation does not currently support Type '%s'.",
TfLiteTypeGetName(output_type));
return kTfLiteError;
}
return kTfLiteOk;
}
// Handles negative axis index, coerces to positive index value.
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
if (axis >= 0) {
return axis;
} else {
return NumDimensions(output_tensor) + axis;
}
}
// The following functions are helpers to get tensor data in the format that the
// reference op implementation expects. They provide the same functionality as
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
// Gets shapes from a list of tensors.
inline void GetAllTensorShapes(const TfLiteContext& context,
const TfLiteIntArray& tensor_list,
RuntimeShape all_shapes[kMaxInputNum]) {
for (int i = 0; i < tensor_list.size; ++i) {
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
RuntimeShape shape = GetTensorShape(t);
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
}
}
// Get shape pointers from a list of shapes.
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
const RuntimeShape* pointers[]) {
for (size_t i = 0; i < num; ++i) {
pointers[i] = &shapes[i];
}
}
// Gets data pointers from a list of tensors.
template <typename T>
inline void GetAllTensorData(const TfLiteContext& context,
const TfLiteIntArray& tensor_list,
T* all_data[kMaxInputNum]) {
for (int i = 0; i < tensor_list.size; ++i) {
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
all_data[i] = GetTensorData<T>(t);
}
}
// Gets scale and zero point from a list of tensors
inline void GetAllQuantizationParam(const TfLiteContext& context,
const TfLiteIntArray& tensor_list,
float scales[kMaxInputNum],
int32 zero_points[kMaxInputNum]) {
for (int i = 0; i < tensor_list.size; ++i) {
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
scales[i] = t->params.scale;
zero_points[i] = t->params.zero_point;
}
}
template <typename data_type>
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
// Collect the shapes and data pointer of input tensors
RuntimeShape inputs_shape[kMaxInputNum];
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
const data_type* inputs_data[kMaxInputNum];
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
GetAllTensorData(*context, *node->inputs, inputs_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteConcatenationParams* params =
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
ConcatenationParams op_params;
op_params.axis = CalculatePositiveAxis(params->axis, output);
op_params.inputs_count = NumInputs(node);
reference_ops::Concatenation(op_params, inputs_shape_ptr, inputs_data,
GetTensorShape(output),
GetTensorData<data_type>(output));
}
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
// Collect the shapes and data pointer of input tensors
RuntimeShape inputs_shape[kMaxInputNum];
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
const uint8_t* inputs_data[kMaxInputNum];
float inputs_scale[kMaxInputNum];
int32 inputs_zero_point[kMaxInputNum];
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
GetAllTensorData(*context, *node->inputs, inputs_data);
GetAllQuantizationParam(*context, *node->inputs, inputs_scale,
inputs_zero_point);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteConcatenationParams* params =
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
ConcatenationParams op_params;
op_params.axis = CalculatePositiveAxis(params->axis, output);
op_params.inputs_count = NumInputs(node);
op_params.input_zeropoint = inputs_zero_point;
op_params.input_scale = inputs_scale;
op_params.output_zeropoint = output->params.zero_point;
op_params.output_scale = output->params.scale;
reference_ops::ConcatenationWithScaling(op_params, inputs_shape_ptr,
inputs_data, GetTensorShape(output),
GetTensorData<uint8>(output));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output_tensor != nullptr);
TfLiteType output_type = output_tensor->type;
switch (output_type) { // Already know in/outtypes are same.
case kTfLiteFloat32:
@@ -214,16 +260,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace concatenation
TfLiteRegistration* Register_CONCATENATION() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/concatenation::Prepare,
/*invoke=*/concatenation::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_CONCATENATION() {
return {/*init=*/concatenation::Init,
/*free=*/nullptr,
/*prepare=*/concatenation::Prepare,
/*invoke=*/concatenation::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -1,279 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/conv.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
namespace tflite {
namespace ops {
namespace micro {
namespace conv {
constexpr int kInputTensor = 0;
constexpr int kFilterTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
// Angepasst jomjol 05.06.20
//constexpr int kMaxChannels = 1024;
constexpr int kMaxChannels = 4096;
// Conv is quantized along dimension 0:
// https://www.tensorflow.org/lite/performance/quantization_spec
constexpr int kConvQuantizedDimension = 0;
// This file has 2 implementation of Conv.
struct OpData {
TfLitePaddingValues padding;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// Per channel output multiplier and shift.
// TODO(b/141139247): Allocate these dynamically when possible.
int32_t per_channel_output_multiplier[kMaxChannels];
int32_t per_channel_output_shift[kMaxChannels];
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
};
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
switch (padding) {
case TfLitePadding::kTfLitePaddingSame:
return PaddingType::kSame;
case TfLitePadding::kTfLitePaddingValid:
return PaddingType::kValid;
case TfLitePadding::kTfLitePaddingUnknown:
default:
return PaddingType::kNone;
}
}
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, int width, int height,
int filter_width, int filter_height, int out_width,
int out_height, const TfLiteType data_type,
OpData* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
// Matching GetWindowedOutputSize in TensorFlow.
auto padding = params->padding;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
params->dilation_height_factor, params->dilation_width_factor, height,
width, filter_height, filter_width, padding, &out_height, &out_width);
// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
int output_channels = filter->dims->data[kConvQuantizedDimension];
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, params->activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift),
output_channels));
}
return kTfLiteOk;
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* im2col,
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
const int32_t input_offset = -input->params.zero_point;
const int32_t filter_offset = -filter->params.zero_point;
const int32_t output_offset = output->params.zero_point;
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = -data->output_shift;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
reference_ops::Conv(op_params, GetTensorShape(input),
GetTensorData<uint8_t>(input), GetTensorShape(filter),
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
GetTensorData<int32_t>(bias), GetTensorShape(output),
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
GetTensorData<uint8_t>(im2col), nullptr);
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, OpData* data,
const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output,
TfLiteTensor* im2col) {
ConvParams op_params;
op_params.input_offset = -input->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
reference_integer_ops::ConvPerChannel(
op_params, data->per_channel_output_multiplier,
data->per_channel_output_shift, GetTensorShape(input),
GetTensorData<int8>(input), GetTensorShape(filter),
GetTensorData<int8>(filter), GetTensorShape(bias),
GetTensorData<int32>(bias), GetTensorShape(output),
GetTensorData<int8>(output));
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* im2col,
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
reference_ops::Conv(op_params, GetTensorShape(input),
GetTensorData<float>(input), GetTensorShape(filter),
GetTensorData<float>(filter), GetTensorShape(bias),
GetTensorData<float>(bias), GetTensorShape(output),
GetTensorData<float>(output), GetTensorShape(im2col),
GetTensorData<float>(im2col));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
int input_width = input->dims->data[2];
int input_height = input->dims->data[1];
int filter_width = filter->dims->data[2];
int filter_height = filter->dims->data[1];
int output_width = output->dims->data[2];
int output_height = output->dims->data[1];
OpData data;
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(
filter->quantization.params);
TF_LITE_ENSURE(context, affine_quantization);
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->zero_point);
TF_LITE_ENSURE(context,
affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kConvQuantizedDimension]);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
affine_quantization->zero_point->size);
}
TF_LITE_ENSURE_STATUS(CalculateOpData(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, &data));
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
EvalFloat(context, node, params, &data, input, filter, bias, nullptr,
nullptr, output);
break;
case kTfLiteInt8:
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
output, nullptr);
break;
case kTfLiteUInt8:
EvalQuantized(context, node, params, &data, input, filter, bias, nullptr,
nullptr, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace conv
TfLiteRegistration* Register_CONV_2D() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -23,19 +23,15 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace conv {
namespace {
constexpr int kInputTensor = 0;
constexpr int kFilterTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
// Angepasst jomjol 05.06.20
//constexpr int kMaxChannels = 1024;
constexpr int kMaxChannels = 32384;
// Conv is quantized along dimension 0:
// https://www.tensorflow.org/lite/performance/quantization_spec
@@ -45,15 +41,20 @@ constexpr int kConvQuantizedDimension = 0;
struct OpData {
TfLitePaddingValues padding;
// Cached tensor zero point values for quantized operations.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// Per channel output multiplier and shift.
// TODO(b/141139247): Allocate these dynamically when possible.
int32_t per_channel_output_multiplier[kMaxChannels];
int32_t per_channel_output_shift[kMaxChannels];
int32_t* per_channel_output_multiplier;
int32_t* per_channel_output_shift;
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
@@ -74,10 +75,10 @@ inline PaddingType RuntimePaddingType(TfLitePadding padding) {
}
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, int width, int height,
int filter_width, int filter_height, int out_width,
int out_height, const TfLiteType data_type,
OpData* data) {
const TfLiteConvParams* params, int width,
int height, int filter_width, int filter_height,
int out_width, int out_height,
const TfLiteType data_type, OpData* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
@@ -94,10 +95,13 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
int output_channels = filter->dims->data[kConvQuantizedDimension];
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
@@ -111,100 +115,24 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
return kTfLiteOk;
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* im2col,
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
const int32_t input_offset = -input->params.zero_point;
const int32_t filter_offset = -filter->params.zero_point;
const int32_t output_offset = output->params.zero_point;
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = -data->output_shift;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
reference_ops::Conv(op_params, GetTensorShape(input),
GetTensorData<uint8_t>(input), GetTensorShape(filter),
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
GetTensorData<int32_t>(bias), GetTensorShape(output),
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
GetTensorData<uint8_t>(im2col), nullptr);
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, OpData* data,
const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output,
TfLiteTensor* im2col) {
ConvParams op_params;
op_params.input_offset = -input->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
reference_integer_ops::ConvPerChannel(
op_params, data->per_channel_output_multiplier,
data->per_channel_output_shift, GetTensorShape(input),
GetTensorData<int8>(input), GetTensorShape(filter),
GetTensorData<int8>(filter), GetTensorShape(bias),
GetTensorData<int32>(bias), GetTensorShape(output),
GetTensorData<int8>(output));
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* im2col,
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
reference_ops::Conv(op_params, GetTensorShape(input),
GetTensorData<float>(input), GetTensorShape(filter),
GetTensorData<float>(filter), GetTensorShape(bias),
GetTensorData<float>(bias), GetTensorShape(output),
GetTensorData<float>(output), GetTensorShape(im2col),
GetTensorData<float>(im2col));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
OpData* data = static_cast<OpData*>(node->user_data);
const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
TF_LITE_ENSURE(context, filter != nullptr);
int input_width = input->dims->data[2];
int input_height = input->dims->data[1];
@@ -212,9 +140,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
int filter_height = filter->dims->data[1];
int output_width = output->dims->data[2];
int output_height = output->dims->data[1];
struct tflite::ops::micro::conv::OpData *data = (struct tflite::ops::micro::conv::OpData*) malloc(sizeof(struct tflite::ops::micro::conv::OpData));
// Dynimically allocate per-channel quantization parameters.
const int num_channels = filter->dims->data[kConvQuantizedDimension];
data->per_channel_output_multiplier =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
data->per_channel_output_shift =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
@@ -222,8 +156,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
kTfLiteAffineQuantization);
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(
filter->quantization.params);
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
TF_LITE_ENSURE(context, affine_quantization);
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->zero_point);
@@ -240,6 +173,136 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, data));
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
return kTfLiteOk;
} // namespace conv
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
TfLiteEvalTensor* im2col, TfLiteEvalTensor* hwcn_weights,
TfLiteEvalTensor* output) {
const int32_t input_offset = -data.input_zero_point;
const int32_t filter_offset = -data.filter_zero_point;
const int32_t output_offset = data.output_zero_point;
// TODO(b/154032858): Investigate removing extra copies.
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data.output_multiplier;
op_params.output_shift = -data.output_shift;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output),
tflite::micro::GetTensorShape(im2col),
tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output,
TfLiteEvalTensor* im2col) {
// TODO(b/154032858): Investigate removing extra copies.
ConvParams op_params;
op_params.input_offset = -data.input_zero_point;
op_params.output_offset = data.output_zero_point;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.padding_values.height = data.padding.height;
op_params.padding_values.width = data.padding.width;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
reference_integer_ops::ConvPerChannel(
op_params, data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, const OpData& data,
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col,
TfLiteEvalTensor* hwcn_weights, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
// TODO(b/154032858): Investigate removing extra copies.
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
tflite::micro::GetTensorShape(im2col),
tflite::micro::GetTensorData<float>(im2col));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kFilterTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
: nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
EvalFloat(context, node, params, data, input, filter, bias, nullptr,
@@ -256,27 +319,22 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
free(data);
return kTfLiteError;
}
free(data);
return kTfLiteOk;
}
} // namespace conv
} // namespace
TfLiteRegistration* Register_CONV_2D() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_CONV_2D() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -24,18 +24,15 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace depthwise_conv {
namespace {
constexpr int kInputTensor = 0;
constexpr int kFilterTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
constexpr int kMaxChannels = 1024;
// Depthwise conv is quantized along dimension 3:
// https://www.tensorflow.org/lite/performance/quantization_spec
@@ -43,16 +40,20 @@ constexpr int kDepthwiseConvQuantizedDimension = 3;
struct OpData {
TfLitePaddingValues padding;
// Cached tensor zero point values for quantized operations.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// Per channel output multiplier and shift.
// TODO(b/141139247): Allocate these dynamically when possible.
int32_t per_channel_output_multiplier[kMaxChannels];
int32_t per_channel_output_shift[kMaxChannels];
int32_t* per_channel_output_multiplier;
int32_t* per_channel_output_shift;
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
@@ -78,125 +79,44 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
return tflite::PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, params->activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels));
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels);
}
return kTfLiteOk;
}
} // namespace
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::DepthwiseParams op_params;
// Padding type is ignored, but still set.
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
tflite::reference_ops::DepthwiseConv(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(filter), GetTensorData<float>(filter),
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
GetTensorData<float>(output));
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, OpData* data,
const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
DepthwiseParams op_params;
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.input_offset = -input->params.zero_point;
op_params.weights_offset = 0;
op_params.output_offset = output->params.zero_point;
// TODO(b/130439627): Use calculated value for clamping.
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
reference_integer_ops::DepthwiseConvPerChannel(
op_params, data->per_channel_output_multiplier,
data->per_channel_output_shift, GetTensorShape(input),
GetTensorData<int8>(input), GetTensorShape(filter),
GetTensorData<int8>(filter), GetTensorShape(bias),
GetTensorData<int32>(bias), GetTensorShape(output),
GetTensorData<int8>(output));
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
const int32_t input_offset = -input->params.zero_point;
const int32_t filter_offset = -filter->params.zero_point;
const int32_t output_offset = output->params.zero_point;
tflite::DepthwiseParams op_params;
// Padding type is ignored, but still set.
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data->output_multiplier;
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
op_params.output_shift = -data->output_shift;
tflite::reference_ops::DepthwiseConv(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(filter), GetTensorData<uint8_t>(filter),
GetTensorShape(bias), GetTensorData<int32_t>(bias),
GetTensorShape(output), GetTensorData<uint8_t>(output));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
OpData* data = static_cast<OpData*>(node->user_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias =
(NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
TF_LITE_ENSURE(context, filter != nullptr);
const TfLiteType data_type = input->type;
int width = SizeOfDimension(input, 2);
@@ -204,7 +124,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
int filter_width = SizeOfDimension(filter, 2);
int filter_height = SizeOfDimension(filter, 1);
OpData data;
// Per channel quantization is only needed for int8_t inference. For other
// quantized types, only a single scale and zero point is needed.
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
// Dynimically allocate per-channel quantization parameters.
data->per_channel_output_multiplier =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
data->per_channel_output_shift =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
@@ -227,20 +156,151 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
filter_width, filter_height, data_type,
&data));
data));
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
return kTfLiteOk;
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, const OpData& data,
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::DepthwiseParams op_params;
// Padding type is ignored, but still set.
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
tflite::reference_ops::DepthwiseConv(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params,
const OpData& data, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
DepthwiseParams op_params;
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.input_offset = -data.input_zero_point;
op_params.weights_offset = 0;
op_params.output_offset = data.output_zero_point;
// TODO(b/130439627): Use calculated value for clamping.
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
reference_integer_ops::DepthwiseConvPerChannel(
op_params, data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
const int32_t input_offset = -data.input_zero_point;
const int32_t filter_offset = -data.filter_zero_point;
const int32_t output_offset = data.output_zero_point;
tflite::DepthwiseParams op_params;
// Padding type is ignored, but still set.
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data.output_multiplier;
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
op_params.output_shift = -data.output_shift;
tflite::reference_ops::DepthwiseConv(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params =
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kFilterTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
: nullptr;
// TODO(aselle): Consider whether float conv and quantized conv should be
// separate ops to avoid dispatch overhead here.
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
EvalFloat(context, node, params, &data, input, filter, bias, output);
EvalFloat(context, node, params, data, input, filter, bias, output);
break;
case kTfLiteInt8:
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
output);
break;
case kTfLiteUInt8:
EvalQuantized(context, node, params, &data, input, filter, bias, output);
EvalQuantized(context, node, params, data, input, filter, bias, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@@ -250,20 +310,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
} // namespace depthwise_conv
} // namespace
TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/depthwise_conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -22,19 +22,39 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace dequantize {
struct OpData {
tflite::DequantizationParams quantization_params;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
int32_t output_zero_point;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
// TODO(b/140515557): Add cached dequant to improve hybrid model performance.
const TfLiteTensor* input = GetInput(context, node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE(context, input->type == kTfLiteUInt8 ||
input->type == kTfLiteInt8 ||
@@ -42,32 +62,49 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(
context, output->type == kTfLiteFloat32 || output->type == kTfLiteInt32);
if (output->type == kTfLiteInt32) {
const double effective_output_scale =
static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
&data->output_shift);
}
data->quantization_params.zero_point = input->params.zero_point;
data->quantization_params.scale = static_cast<double>(input->params.scale);
data->output_zero_point = output->params.zero_point;
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
if (output->type == kTfLiteFloat32) {
tflite::DequantizationParams op_params;
op_params.zero_point = input->params.zero_point;
op_params.scale = static_cast<double>(input->params.scale);
switch (input->type) {
case kTfLiteUInt8:
reference_ops::Dequantize(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<float>(output));
reference_ops::Dequantize(data->quantization_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt8:
reference_ops::Dequantize(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<float>(output));
reference_ops::Dequantize(data->quantization_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt16:
reference_ops::Dequantize(
op_params, GetTensorShape(input), GetTensorData<int16_t>(input),
GetTensorShape(output), GetTensorData<float>(output));
reference_ops::Dequantize(data->quantization_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
@@ -76,28 +113,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
}
} else if (output->type == kTfLiteInt32) {
int32_t output_multiplier;
int output_shift;
const double effective_output_scale =
static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_output_scale, &output_multiplier,
&output_shift);
int flat_size =
MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output));
switch (input->type) {
case kTfLiteInt16: {
reference_ops::Requantize(
GetTensorData<int16_t>(input), flat_size, output_multiplier,
output_shift, input->params.zero_point, output->params.zero_point,
GetTensorData<int32_t>(output));
tflite::micro::GetTensorData<int16_t>(input), flat_size,
data->output_multiplier, data->output_shift,
data->quantization_params.zero_point, data->output_zero_point,
tflite::micro::GetTensorData<int32_t>(output));
break;
}
case kTfLiteInt8: {
reference_ops::Requantize(
GetTensorData<int8_t>(input), flat_size, output_multiplier,
output_shift, input->params.zero_point, output->params.zero_point,
GetTensorData<int32_t>(output));
tflite::micro::GetTensorData<int8_t>(input), flat_size,
data->output_multiplier, data->output_shift,
data->quantization_params.zero_point, data->output_zero_point,
tflite::micro::GetTensorData<int32_t>(output));
break;
}
default:
@@ -118,16 +150,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace dequantize
TfLiteRegistration* Register_DEQUANTIZE() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/dequantize::Prepare,
/*invoke=*/dequantize::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_DEQUANTIZE() {
return {/*init=*/dequantize::Init,
/*free=*/nullptr,
/*prepare=*/dequantize::Prepare,
/*invoke=*/dequantize::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -18,6 +18,8 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
@@ -39,8 +41,10 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (!IsSupportedType(input->type)) {
TF_LITE_KERNEL_LOG(context, "Input data type %s (%d) is not supported.",
TfLiteTypeGetName(input->type), input->type);
@@ -52,13 +56,13 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
template <typename T>
inline TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node,
T func(T), TfLiteType expected_type) {
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE_EQ(context, input->type, expected_type);
const int64_t num_elements = NumElements(input);
const T* in_data = GetTensorData<T>(input);
T* out_data = GetTensorData<T>(output);
for (int64_t i = 0; i < num_elements; ++i) {
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, expected_type);
const size_t num_elements = ElementCount(*input->dims);
const T* in_data = tflite::micro::GetTensorData<T>(input);
T* out_data = tflite::micro::GetTensorData<T>(output);
for (size_t i = 0; i < num_elements; ++i) {
out_data[i] = func(in_data[i]);
}
return kTfLiteOk;
@@ -109,116 +113,100 @@ TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) {
} // namespace
} // namespace elementwise
TfLiteRegistration* Register_ABS() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::AbsEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_ABS() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::AbsEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_SIN() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SinEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SIN() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SinEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_COS() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::CosEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_COS() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::CosEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_LOG() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::LogEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_LOG() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::LogEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_SQRT() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SqrtEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SQRT() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SqrtEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_RSQRT() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::RsqrtEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_RSQRT() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::RsqrtEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_SQUARE() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SquareEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SQUARE() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SquareEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_LOGICAL_NOT() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
/*invoke=*/elementwise::LogicalNotEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_LOGICAL_NOT() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
/*invoke=*/elementwise::LogicalNotEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -1,4 +1,4 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -12,16 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Optional debugging functionality. For small sized binaries, these are not
// needed.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
#define TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
#include "tensorflow/lite/micro/micro_interpreter.h"
//
// This is a stub file for non-Ethos platforms
//
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Prints a dump of what tensors and what nodes are in the interpreter.
void PrintInterpreterState(MicroInterpreter* interpreter);
} // namespace tflite
namespace ops {
namespace micro {
namespace custom {
TfLiteRegistration* Register_ETHOSU() { return nullptr; }
#endif // TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
const char* GetString_ETHOSU() { return ""; }
} // namespace custom
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -17,7 +17,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -28,25 +28,28 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
reference_ops::Floor(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
reference_ops::Floor(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace floor
TfLiteRegistration* Register_FLOOR() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/floor::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_FLOOR() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/floor::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -1,4 +1,4 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -13,20 +13,19 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "tensorflow/lite/micro/kernels/fully_connected.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace fully_connected {
namespace {
struct OpData {
@@ -40,6 +39,10 @@ struct OpData {
int32_t output_activation_max;
// The index of the temporary tensor where the quantized inputs are cached.
int input_quantized_index;
// Cached zero point values of tensors.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
};
constexpr int kInputTensor = 0;
@@ -64,20 +67,17 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, activation, output, &data->output_activation_min,
&data->output_activation_max));
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
}
return status;
}
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
void* data = nullptr;
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
kTfLiteError) {
return nullptr;
}
return data;
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
@@ -89,11 +89,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
@@ -102,13 +105,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
const OpData& data, const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
tflite::FullyConnectedParams op_params;
op_params.input_offset = -input->params.zero_point;
op_params.weights_offset = -filter->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.input_offset = -data.input_zero_point;
op_params.weights_offset = -data.filter_zero_point;
op_params.output_offset = data.output_zero_point;
op_params.output_multiplier = data.output_multiplier;
// TODO(b/138810107): Figure out whether output shift should be inverted
op_params.output_shift = -data.output_shift;
@@ -116,20 +121,25 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
op_params.quantized_activation_max = data.output_activation_max;
reference_integer_ops::FullyConnected(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(filter), GetTensorData<int8_t>(filter),
GetTensorShape(bias), GetTensorData<int32_t>(bias),
GetTensorShape(output), GetTensorData<int8_t>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
const OpData& data, const TfLiteTensor* input,
const TfLiteTensor* filter, const TfLiteTensor* bias,
TfLiteTensor* output) {
const int32_t input_offset = -input->params.zero_point;
const int32_t filter_offset = -filter->params.zero_point;
const int32_t output_offset = output->params.zero_point;
const OpData& data, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
const int32_t input_offset = -data.input_zero_point;
const int32_t filter_offset = -data.filter_zero_point;
const int32_t output_offset = data.output_zero_point;
tflite::FullyConnectedParams op_params;
op_params.input_offset = input_offset;
@@ -141,12 +151,16 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
reference_ops::FullyConnected( \
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
GetTensorShape(filter), GetTensorData<uint8_t>(filter), \
GetTensorShape(bias), GetTensorData<int32_t>(bias), \
GetTensorShape(output), GetTensorData<output_data_type>(output))
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
reference_ops::FullyConnected( \
op_params, tflite::micro::GetTensorShape(input), \
tflite::micro::GetTensorData<uint8_t>(input), \
tflite::micro::GetTensorShape(filter), \
tflite::micro::GetTensorData<uint8_t>(filter), \
tflite::micro::GetTensorShape(bias), \
tflite::micro::GetTensorData<int32_t>(bias), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<output_data_type>(output))
switch (output->type) {
case kTfLiteUInt8:
TF_LITE_FULLY_CONNECTED(uint8_t);
@@ -165,8 +179,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteFusedActivation activation,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(activation, &output_activation_min,
&output_activation_max);
@@ -174,10 +189,14 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
tflite::reference_ops::FullyConnected(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(filter), GetTensorData<float>(filter),
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
GetTensorData<float>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
@@ -186,10 +205,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const auto* params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kWeightsTensor);
const TfLiteEvalTensor* bias =
tflite::micro::GetEvalInput(context, node, kBiasTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
@@ -214,20 +237,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
} // namespace fully_connected
} // namespace
TfLiteRegistration* Register_FULLY_CONNECTED() {
static TfLiteRegistration r = {/*init=*/fully_connected::Init,
/*free=*/nullptr,
/*prepare=*/fully_connected::Prepare,
/*invoke=*/fully_connected::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_FULLY_CONNECTED() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,50 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
#include "tensorflow/lite/c/common.h"
namespace tflite {
// This is the most generic TfLiteRegistration. The actual supported types may
// still be target dependent. The only requirement is that every implementation
// (reference or optimized) must define this function.
TfLiteRegistration Register_FULLY_CONNECTED();
#if defined(CMSIS_NN) || defined(ARDUINO)
// The Arduino is a special case where we use the CMSIS kernels, but because of
// the current approach to building for Arduino, we do not support -DCMSIS_NN as
// part of the build. As a result, we use defined(ARDUINO) as proxy for the
// CMSIS kernels for this one special case.
// Returns a TfLiteRegistration struct for cmsis-nn kernel variant that only
// supports int8.
TfLiteRegistration Register_FULLY_CONNECTED_INT8();
#else
// Note that while this block gets used for both reference and optimized kernels
// that do not have any specialized implementations, the only goal here is to
// define fallback implementation that allow reference kernels to still be used
// from applications that call a more specific kernel variant.
inline TfLiteRegistration Register_FULLY_CONNECTED_INT8() {
return Register_FULLY_CONNECTED();
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_

View File

@@ -0,0 +1,142 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace hard_swish {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
}
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
params->input_zero_point = input->params.zero_point;
params->output_zero_point = output->params.zero_point;
const float input_scale = input->params.scale;
const float hires_input_scale = (1.0f / 128.0f) * input_scale;
const float reluish_scale = 3.0f / 32768.0f;
const float output_scale = output->params.scale;
const double output_multiplier =
static_cast<double>(hires_input_scale / output_scale);
int32_t output_multiplier_fixedpoint_int32;
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
&params->output_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
output_multiplier_fixedpoint_int32,
&params->output_multiplier_fixedpoint_int16);
TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
const double reluish_multiplier =
static_cast<double>(hires_input_scale / reluish_scale);
int32_t reluish_multiplier_fixedpoint_int32;
QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_fixedpoint_int16);
}
return kTfLiteOk;
}
TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
switch (input->type) {
case kTfLiteFloat32: {
tflite::reference_ops::HardSwish<float>(
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} break;
case kTfLiteUInt8: {
tflite::reference_ops::HardSwish<uint8_t>(
*params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} break;
case kTfLiteInt8: {
tflite::reference_ops::HardSwish<int8_t>(
*params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} break;
default: {
TF_LITE_KERNEL_LOG(
context,
"Only float32/int8_t/uint8_t are supported currently, got %s",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
return kTfLiteOk;
}
} // namespace hard_swish
TfLiteRegistration Register_HARD_SWISH() {
return {/*init=*/hard_swish::HardSwishInit,
/*free=*/nullptr,
/*prepare=*/hard_swish::HardSwishPrepare,
/*invoke=*/hard_swish::HardSwishEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,165 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
namespace tflite {
namespace micro {
namespace {
constexpr size_t kBufferAlignment = 16;
} // namespace
// TODO(b/161841696): Consider moving away from global arena buffers:
constexpr int KernelRunner::kNumScratchBuffers_;
constexpr int KernelRunner::kKernelRunnerBufferSize_;
uint8_t KernelRunner::kKernelRunnerBuffer_[];
KernelRunner::KernelRunner(const TfLiteRegistration& registration,
TfLiteTensor* tensors, int tensors_size,
TfLiteIntArray* inputs, TfLiteIntArray* outputs,
void* builtin_data, ErrorReporter* error_reporter)
: allocator_(SimpleMemoryAllocator::Create(
error_reporter, kKernelRunnerBuffer_, kKernelRunnerBufferSize_)),
registration_(registration),
tensors_(tensors),
error_reporter_(error_reporter) {
// Prepare TfLiteContext:
context_.impl_ = static_cast<void*>(this);
context_.ReportError = ReportOpError;
context_.recommended_num_threads = 1;
context_.GetTensor = GetTensor;
context_.GetEvalTensor = GetEvalTensor;
context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
context_.GetScratchBuffer = GetScratchBuffer;
// Prepare TfLiteNode:
node_.inputs = inputs;
node_.outputs = outputs;
node_.builtin_data = builtin_data;
}
TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data) {
if (registration_.init) {
node_.user_data = registration_.init(&context_, init_data, /*length=*/0);
}
if (registration_.prepare) {
TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_));
}
return kTfLiteOk;
}
TfLiteStatus KernelRunner::Invoke() {
if (registration_.invoke == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"TfLiteRegistration missing invoke function pointer!");
return kTfLiteError;
}
return registration_.invoke(&context_, &node_);
}
TfLiteTensor* KernelRunner::GetTensor(const struct TfLiteContext* context,
int tensor_index) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
return &runner->tensors_[tensor_index];
}
TfLiteEvalTensor* KernelRunner::GetEvalTensor(
const struct TfLiteContext* context, int tensor_index) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
TfLiteEvalTensor* eval_tensor =
reinterpret_cast<TfLiteEvalTensor*>(runner->allocator_->AllocateTemp(
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
TFLITE_DCHECK(eval_tensor != nullptr);
// In unit tests, the TfLiteTensor pointer contains the source of truth for
// buffers and values:
eval_tensor->data = runner->tensors_[tensor_index].data;
eval_tensor->dims = runner->tensors_[tensor_index].dims;
eval_tensor->type = runner->tensors_[tensor_index].type;
return eval_tensor;
}
void* KernelRunner::AllocatePersistentBuffer(TfLiteContext* context,
size_t bytes) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
return runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
}
TfLiteStatus KernelRunner::RequestScratchBufferInArena(TfLiteContext* context,
size_t bytes,
int* buffer_index) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(buffer_index != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
if (runner->scratch_buffer_count_ == kNumScratchBuffers_) {
TF_LITE_REPORT_ERROR(
runner->error_reporter_,
"Exceeded the maximum number of scratch tensors allowed (%d).",
kNumScratchBuffers_);
return kTfLiteError;
}
// For tests, we allocate scratch buffers from the tail and keep them around
// for the lifetime of model. This means that the arena size in the tests will
// be more than what we would have if the scratch buffers could share memory.
runner->scratch_buffers_[runner->scratch_buffer_count_] =
runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
TFLITE_DCHECK(runner->scratch_buffers_[runner->scratch_buffer_count_] !=
nullptr);
*buffer_index = runner->scratch_buffer_count_++;
return kTfLiteOk;
}
void* KernelRunner::GetScratchBuffer(TfLiteContext* context, int buffer_index) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
TFLITE_DCHECK(runner->scratch_buffer_count_ <= kNumScratchBuffers_);
if (buffer_index >= runner->scratch_buffer_count_) {
return nullptr;
}
return runner->scratch_buffers_[buffer_index];
}
void KernelRunner::ReportOpError(struct TfLiteContext* context,
const char* format, ...) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
va_list args;
va_start(args, format);
TF_LITE_REPORT_ERROR(runner->error_reporter_, format, args);
va_end(args);
}
} // namespace micro
} // namespace tflite

View File

@@ -0,0 +1,83 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
namespace tflite {
namespace micro {
// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle
// (init, prepare, invoke). All internal allocations are handled by this class.
// Simply pass in the registration, list of required tensors, inputs array,
// outputs array, and any pre-builtin data. Calling Invoke() will automatically
// walk the kernl and outputs will be ready on the the TfLiteTensor output
// provided during construction.
class KernelRunner {
public:
KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors,
int tensors_size, TfLiteIntArray* inputs,
TfLiteIntArray* outputs, void* builtin_data,
ErrorReporter* error_reporter);
// Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any
// exceptions will be reported through the error_reporter and returned as a
// status code here.
TfLiteStatus InitAndPrepare(const char* init_data = nullptr);
// Calls init, prepare, and invoke on a given TfLiteRegistration pointer.
// After successful invoke, results will be available in the output tensor as
// passed into the constructor of this class.
TfLiteStatus Invoke();
protected:
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_index);
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
int tensor_index);
static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context,
size_t bytes,
int* buffer_index);
static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
private:
static constexpr int kNumScratchBuffers_ = 5;
static constexpr int kKernelRunnerBufferSize_ = 10000;
static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
SimpleMemoryAllocator* allocator_ = nullptr;
const TfLiteRegistration& registration_;
TfLiteTensor* tensors_ = nullptr;
ErrorReporter* error_reporter_ = nullptr;
TfLiteContext context_ = {};
TfLiteNode node_ = {};
int scratch_buffer_count_ = 0;
uint8_t* scratch_buffers_[kNumScratchBuffers_];
};
} // namespace micro
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_

View File

@@ -0,0 +1,41 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
namespace micro {
bool HaveSameShapes(const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2) {
TFLITE_DCHECK(input1 != nullptr);
TFLITE_DCHECK(input2 != nullptr);
return TfLiteIntArrayEqual(input1->dims, input2->dims);
}
const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) {
if (tensor == nullptr || tensor->dims == nullptr) {
return RuntimeShape();
}
TfLiteIntArray* dims = tensor->dims;
const int dims_size = dims->size;
const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data);
return RuntimeShape(dims_size, dims_data);
}
} // namespace micro
} // namespace tflite

View File

@@ -0,0 +1,75 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace micro {
// Returns a mutable tensor for a given input index. is_variable must be checked
// during prepare when the full TfLiteTensor is available.
inline TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context,
const TfLiteNode* node,
int index) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(node != nullptr);
return context->GetEvalTensor(context, node->inputs->data[index]);
}
// Returns the TfLiteEvalTensor struct for a given input index in a node.
inline const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context,
const TfLiteNode* node, int index) {
return GetMutableEvalInput(context, node, index);
}
// Returns the TfLiteEvalTensor struct for a given output index in a node.
inline TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context,
const TfLiteNode* node, int index) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(node != nullptr);
return context->GetEvalTensor(context, node->outputs->data[index]);
}
// Returns data for a TfLiteEvalTensor struct.
template <typename T>
T* GetTensorData(TfLiteEvalTensor* tensor) {
return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
}
// Returns const data for a TfLiteEvalTensor struct.
template <typename T>
const T* GetTensorData(const TfLiteEvalTensor* tensor) {
TFLITE_DCHECK(tensor != nullptr);
return reinterpret_cast<const T*>(tensor->data.raw);
}
// Returns the shape of a TfLiteEvalTensor struct.
const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor);
// Return true if the given tensors have the same shape.
bool HaveSameShapes(const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2);
} // namespace micro
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_

View File

@@ -14,16 +14,19 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h"
#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace l2norm {
namespace {
// This file has two implementation of L2Norm.
enum KernelType {
kReference,
@@ -33,44 +36,59 @@ enum KernelType {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
} // namespace
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
#if defined(DEBUG)
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
L2NormalizationParams* data =
static_cast<L2NormalizationParams*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32 ||
output->type == kTfLiteUInt8 ||
output->type == kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.scale, (1. / 128.));
if (output->type == kTfLiteUInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 128);
}
if (output->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
}
data->input_zero_point = input->params.zero_point;
} else if (output->type == kTfLiteFloat32) {
data->input_zero_point = 0;
}
// TODO(ahentz): For some reason our implementations don't support
// activations.
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
#endif
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context,
sizeof(L2NormalizationParams));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const L2NormalizationParams& data =
*(static_cast<const L2NormalizationParams*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
// TODO(b/143912164): instead of hardcode the epsilon here, we should read it
// from tensorflow, i.e., adding a params.
@@ -87,39 +105,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
// So we don't even need to do handle the epsilon for quantized kernel case.
const float epsilon = 1e-6f;
if (output->type == kTfLiteFloat32) {
#define TF_LITE_L2NORM(type) \
tflite::L2NormalizationParams op_params; \
op_params.input_zero_point = 0; \
type::L2Normalization(op_params, GetTensorShape(input), \
GetTensorData<float>(input), GetTensorShape(output), \
GetTensorData<float>(output), epsilon)
TF_LITE_L2NORM(reference_ops);
#undef TF_LITE_L2NORM
reference_ops::L2Normalization(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
epsilon);
} else if (output->type == kTfLiteUInt8) {
#define TF_LITE_L2NORM(type) \
tflite::L2NormalizationParams op_params; \
op_params.input_zero_point = input->params.zero_point; \
type::L2Normalization(op_params, GetTensorShape(input), \
GetTensorData<uint8>(input), GetTensorShape(output), \
GetTensorData<uint8>(output))
TF_LITE_L2NORM(reference_ops);
#undef TF_LITE_L2NORM
reference_ops::L2Normalization(
data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else if (output->type == kTfLiteInt8) {
const auto input_shape = GetTensorShape(input);
const auto output_shape = GetTensorShape(output);
const auto input_shape = tflite::micro::GetTensorShape(input);
const auto output_shape = tflite::micro::GetTensorShape(output);
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
reference_integer_ops::L2Normalization(input->params.zero_point, outer_size,
depth, GetTensorData<int8>(input),
GetTensorData<int8>(output));
reference_integer_ops::L2Normalization(
data.input_zero_point, outer_size, depth,
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context, "Output type is %d, requires float.",
output->type);
TF_LITE_KERNEL_LOG(context, "Output type is %s, requires float.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
@@ -128,22 +139,18 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace l2norm
TfLiteRegistration* Register_L2NORM_REF() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/l2norm::Prepare,
/*invoke=*/l2norm::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_L2NORM_REF() {
return {/*init=*/l2norm::Init,
/*free=*/nullptr,
/*prepare=*/l2norm::Prepare,
/*invoke=*/l2norm::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_L2_NORMALIZATION() {
return Register_L2NORM_REF();
}
TfLiteRegistration Register_L2_NORMALIZATION() { return Register_L2NORM_REF(); }
} // namespace micro
} // namespace ops

View File

@@ -15,8 +15,8 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -31,20 +31,29 @@ constexpr int kOutputTensor = 0;
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
bool (*func)(bool, bool)) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (HaveSameShapes(input1, input2)) {
if (tflite::micro::HaveSameShapes(input1, input2)) {
reference_ops::BinaryFunction<bool, bool, bool>(
GetTensorShape(input1), GetTensorData<bool>(input1),
GetTensorShape(input2), GetTensorData<bool>(input2),
GetTensorShape(output), GetTensorData<bool>(output), func);
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
} else {
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
GetTensorShape(input1), GetTensorData<bool>(input1),
GetTensorShape(input2), GetTensorData<bool>(input2),
GetTensorShape(output), GetTensorData<bool>(output), func);
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
}
return kTfLiteOk;
@@ -65,32 +74,30 @@ TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
} // namespace
} // namespace logical
TfLiteRegistration* Register_LOGICAL_OR() {
TfLiteRegistration Register_LOGICAL_OR() {
// Init, Free, Prepare, Eval are satisfying the Interface required by
// TfLiteRegistration.
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalOrEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalOrEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_LOGICAL_AND() {
TfLiteRegistration Register_LOGICAL_AND() {
// Init, Free, Prepare, Eval are satisfying the Interface required by
// TfLiteRegistration.
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalAndEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalAndEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -42,9 +43,11 @@ struct OpData {
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
OpData* data) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
std::numeric_limits<int8_t>::min());
@@ -54,6 +57,8 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
static_cast<double>(input->params.scale) *
static_cast<double>(1 << (31 - kInputIntegerBits));
data->input_zero_point = input->params.zero_point;
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
@@ -64,18 +69,34 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
}
} // namespace
void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
return CalculateArithmeticOpData(context, node, data);
}
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
OpData data;
CalculateArithmeticOpData(context, node, &data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteFloat32: {
reference_ops::Logistic(
GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
reference_ops::Logistic(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
default:
@@ -88,10 +109,11 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
switch (output->type) {
case kTfLiteInt8: {
reference_integer_ops::Logistic(
input->params.zero_point, data.input_range_radius,
data.input_multiplier, data.input_left_shift,
NumElements(input->dims), GetTensorData<int8_t>(input),
GetTensorData<int8_t>(output));
data->input_zero_point, data->input_range_radius,
data->input_multiplier, data->input_left_shift,
NumElements(input->dims),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
default:
@@ -113,16 +135,15 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
} // namespace activations
TfLiteRegistration* Register_LOGISTIC() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/activations::LogisticEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_LOGISTIC() {
return {/*init=*/activations::LogisticInit,
/*free=*/nullptr,
/*prepare=*/activations::LogisticPrepare,
/*invoke=*/activations::LogisticEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops

View File

@@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -40,13 +41,13 @@ constexpr int kOutputTensor = 0;
struct OpContext {
OpContext(TfLiteContext* context, TfLiteNode* node) {
input1 = GetInput(context, node, kInputTensor1);
input2 = GetInput(context, node, kInputTensor2);
output = GetOutput(context, node, kOutputTensor);
input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1);
input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2);
output = tflite::micro::GetEvalOutput(context, node, kOutputTensor);
}
const TfLiteTensor* input1;
const TfLiteTensor* input2;
TfLiteTensor* output;
const TfLiteEvalTensor* input1;
const TfLiteEvalTensor* input2;
TfLiteEvalTensor* output;
};
struct MaximumOp {
@@ -69,12 +70,12 @@ template <typename data_type, typename op_type>
void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
const OpContext& op_context) {
reference_ops::MaximumMinimumBroadcastSlow(
GetTensorShape(op_context.input1),
GetTensorData<data_type>(op_context.input1),
GetTensorShape(op_context.input2),
GetTensorData<data_type>(op_context.input2),
GetTensorShape(op_context.output),
GetTensorData<data_type>(op_context.output),
tflite::micro::GetTensorShape(op_context.input1),
tflite::micro::GetTensorData<data_type>(op_context.input1),
tflite::micro::GetTensorShape(op_context.input2),
tflite::micro::GetTensorData<data_type>(op_context.input2),
tflite::micro::GetTensorShape(op_context.output),
tflite::micro::GetTensorData<data_type>(op_context.output),
op_type::template op<data_type>);
}
@@ -116,34 +117,30 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace maximum_minimum
TfLiteRegistration* Register_MAXIMUM() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/
maximum_minimum::Eval<maximum_minimum::kReference,
maximum_minimum::MaximumOp>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_MAXIMUM() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/
maximum_minimum::Eval<maximum_minimum::kReference,
maximum_minimum::MaximumOp>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_MINIMUM() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/
maximum_minimum::Eval<maximum_minimum::kReference,
maximum_minimum::MinimumOp>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_MINIMUM() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/
maximum_minimum::Eval<maximum_minimum::kReference,
maximum_minimum::MinimumOp>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -17,10 +17,6 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
namespace tflite {
namespace ops {
namespace micro {
// Forward declaration of all micro op kernel registration methods. These
// registrations are included with the standard `BuiltinOpResolver`.
//
@@ -29,58 +25,73 @@ namespace micro {
// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
// registration in turn allows the linker to strip unused kernels.
TfLiteRegistration* Register_ABS();
TfLiteRegistration* Register_ADD();
TfLiteRegistration* Register_ARG_MAX();
TfLiteRegistration* Register_ARG_MIN();
TfLiteRegistration* Register_AVERAGE_POOL_2D();
TfLiteRegistration* Register_CEIL();
namespace tflite {
// TFLM is incrementally moving towards a flat tflite namespace
// (https://abseil.io/tips/130). Any new ops (or cleanup of existing ops should
// have their Register function declarations in the tflite namespace.
TfLiteRegistration Register_CONV_2D();
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
TfLiteRegistration Register_QUANTIZE();
TfLiteRegistration Register_SHAPE();
TfLiteRegistration Register_SOFTMAX();
TfLiteRegistration Register_SVDF();
namespace ops {
namespace micro {
TfLiteRegistration Register_ABS();
TfLiteRegistration Register_ADD();
TfLiteRegistration Register_ARG_MAX();
TfLiteRegistration Register_ARG_MIN();
TfLiteRegistration Register_AVERAGE_POOL_2D();
TfLiteRegistration Register_CEIL();
// TODO(b/160234179): Change custom OPs to also return by value.
TfLiteRegistration* Register_CIRCULAR_BUFFER();
TfLiteRegistration* Register_CONV_2D();
TfLiteRegistration* Register_CONCATENATION();
TfLiteRegistration* Register_COS();
TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
TfLiteRegistration* Register_DEQUANTIZE();
TfLiteRegistration* Register_EQUAL();
TfLiteRegistration* Register_FLOOR();
TfLiteRegistration* Register_FULLY_CONNECTED();
TfLiteRegistration* Register_GREATER();
TfLiteRegistration* Register_GREATER_EQUAL();
TfLiteRegistration* Register_LESS();
TfLiteRegistration* Register_LESS_EQUAL();
TfLiteRegistration* Register_LOG();
TfLiteRegistration* Register_LOGICAL_AND();
TfLiteRegistration* Register_LOGICAL_NOT();
TfLiteRegistration* Register_LOGICAL_OR();
TfLiteRegistration* Register_LOGISTIC();
TfLiteRegistration* Register_MAXIMUM();
TfLiteRegistration* Register_MAX_POOL_2D();
TfLiteRegistration* Register_MEAN();
TfLiteRegistration* Register_MINIMUM();
TfLiteRegistration* Register_MUL();
TfLiteRegistration* Register_NEG();
TfLiteRegistration* Register_NOT_EQUAL();
TfLiteRegistration* Register_PACK();
TfLiteRegistration* Register_PAD();
TfLiteRegistration* Register_PADV2();
TfLiteRegistration* Register_PRELU();
TfLiteRegistration* Register_QUANTIZE();
TfLiteRegistration* Register_RELU();
TfLiteRegistration* Register_RELU6();
TfLiteRegistration* Register_RESHAPE();
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR();
TfLiteRegistration* Register_ROUND();
TfLiteRegistration* Register_RSQRT();
TfLiteRegistration* Register_SIN();
TfLiteRegistration* Register_SOFTMAX();
TfLiteRegistration* Register_SPLIT();
TfLiteRegistration* Register_SQRT();
TfLiteRegistration* Register_SQUARE();
TfLiteRegistration* Register_STRIDED_SLICE();
TfLiteRegistration* Register_SUB();
TfLiteRegistration* Register_SVDF();
TfLiteRegistration* Register_UNPACK();
TfLiteRegistration* Register_L2_NORMALIZATION();
TfLiteRegistration Register_CONCATENATION();
TfLiteRegistration Register_COS();
TfLiteRegistration Register_DEQUANTIZE();
TfLiteRegistration Register_EQUAL();
TfLiteRegistration Register_FLOOR();
TfLiteRegistration Register_GREATER();
TfLiteRegistration Register_GREATER_EQUAL();
TfLiteRegistration Register_HARD_SWISH();
TfLiteRegistration Register_LESS();
TfLiteRegistration Register_LESS_EQUAL();
TfLiteRegistration Register_LOG();
TfLiteRegistration Register_LOGICAL_AND();
TfLiteRegistration Register_LOGICAL_NOT();
TfLiteRegistration Register_LOGICAL_OR();
TfLiteRegistration Register_LOGISTIC();
TfLiteRegistration Register_MAXIMUM();
TfLiteRegistration Register_MAX_POOL_2D();
TfLiteRegistration Register_MEAN();
TfLiteRegistration Register_MINIMUM();
TfLiteRegistration Register_MUL();
TfLiteRegistration Register_NEG();
TfLiteRegistration Register_NOT_EQUAL();
TfLiteRegistration Register_PACK();
TfLiteRegistration Register_PAD();
TfLiteRegistration Register_PADV2();
TfLiteRegistration Register_PRELU();
TfLiteRegistration Register_REDUCE_MAX();
TfLiteRegistration Register_RELU();
TfLiteRegistration Register_RELU6();
TfLiteRegistration Register_RESHAPE();
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
TfLiteRegistration Register_ROUND();
TfLiteRegistration Register_RSQRT();
TfLiteRegistration Register_SIN();
TfLiteRegistration Register_SPLIT();
TfLiteRegistration Register_SPLIT_V();
TfLiteRegistration Register_SQRT();
TfLiteRegistration Register_SQUARE();
TfLiteRegistration Register_STRIDED_SLICE();
TfLiteRegistration Register_SUB();
TfLiteRegistration Register_UNPACK();
TfLiteRegistration Register_L2_NORMALIZATION();
TfLiteRegistration Register_TANH();
} // namespace micro
} // namespace ops

View File

@@ -21,132 +21,194 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
namespace ops {
namespace micro {
namespace mul {
namespace {
constexpr int kInput1Tensor = 0;
constexpr int kInput2Tensor = 1;
constexpr int kOutputTensor = 0;
struct OpData {
int32_t input1_zero_point;
int32_t input2_zero_point;
int32_t output_activation_min;
int32_t output_activation_max;
int32_t output_zero_point;
int32_t output_multiplier;
int output_shift;
float output_activation_min_f32;
float output_activation_max_f32;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpData* data) {
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
TF_LITE_ENSURE(context, input1 != nullptr);
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
double real_multiplier = static_cast<double>(input1->params.scale) *
static_cast<double>(input2->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
&data->output_shift);
data->input1_zero_point = input1->params.zero_point;
data->input2_zero_point = input2->params.zero_point;
data->output_zero_point = output->params.zero_point;
} else {
CalculateActivationRange(params->activation,
&data->output_activation_min_f32,
&data->output_activation_max_f32);
}
return kTfLiteOk;
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpData* data,
const TfLiteTensor* input1, const TfLiteTensor* input2,
TfLiteTensor* output) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
tflite::ArithmeticParams op_params;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
op_params.input1_offset = -input1->params.zero_point;
op_params.input2_offset = -input2->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
GetTensorShape(input1), GetTensorShape(input2), &op_params);
} // namespace
#define TF_LITE_MUL(type, opname, dtype) \
type::opname(op_params, GetTensorShape(input1), \
GetTensorData<dtype>(input1), GetTensorShape(input2), \
GetTensorData<dtype>(input2), GetTensorShape(output), \
GetTensorData<dtype>(output));
void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
op_params.float_activation_max = data->output_activation_max_f32;
op_params.input1_offset = -data->input1_zero_point;
op_params.input2_offset = -data->input2_zero_point;
op_params.output_offset = data->output_zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
} else {
TF_LITE_MUL(reference_integer_ops, Mul, int8_t);
}
} else if (output->type == kTfLiteUInt8) {
if (need_broadcast) {
TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
} else {
TF_LITE_MUL(reference_ops, Mul, uint8_t);
}
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
reference_integer_ops::Mul(op_params,
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else if (output->type == kTfLiteUInt8) {
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::Mul(op_params,
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
#undef TF_LITE_MUL
}
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpData* data,
const TfLiteTensor* input1, const TfLiteTensor* input2,
TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
TfLiteMulParams* params, const OpData* data,
const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
op_params.float_activation_min = data->output_activation_min_f32;
op_params.float_activation_max = data->output_activation_max_f32;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
GetTensorShape(input1), GetTensorShape(input2), &op_params);
#define TF_LITE_MUL(opname) \
reference_ops::opname(op_params, GetTensorShape(input1), \
GetTensorData<float>(input1), GetTensorShape(input2), \
GetTensorData<float>(input2), GetTensorShape(output), \
GetTensorData<float>(output));
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (need_broadcast) {
TF_LITE_MUL(BroadcastMul4DSlow);
reference_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
TF_LITE_MUL(Mul);
reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
#undef TF_LITE_MUL
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
return CalculateOpData(context, node, params, data);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
OpData data;
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
CalculateOpData(context, node, params, &data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInput1Tensor);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInput2Tensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input1->type) {
case kTfLiteUInt8:
case kTfLiteInt8:
EvalQuantized(context, node, params, &data, input1, input2, output);
EvalQuantized(context, node, data, input1, input2, output);
break;
case kTfLiteFloat32:
EvalFloat(context, node, params, &data, input1, input2, output);
EvalFloat(context, node, params, data, input1, input2, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@@ -158,16 +220,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
}
} // namespace mul
TfLiteRegistration* Register_MUL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/mul::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_MUL() {
return {/*init=*/mul::Init,
/*free=*/nullptr,
/*prepare=*/mul::Prepare,
/*invoke=*/mul::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -17,7 +17,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -28,14 +28,17 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
// TODO(wangtz): handle for kTfLiteInt8
case kTfLiteFloat32:
reference_ops::Negate(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output),
GetTensorData<float>(output));
reference_ops::Negate(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@@ -47,16 +50,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace neg
TfLiteRegistration* Register_NEG() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/neg::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_NEG() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/neg::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -16,7 +16,7 @@ limitations under the License.
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -28,9 +28,11 @@ constexpr int kOutputTensor = 0;
template <typename T>
TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
TfLiteTensor* output, int values_count, int axis) {
TfLiteEvalTensor* output, int values_count, int axis) {
const TfLiteEvalTensor* input0 =
tflite::micro::GetEvalInput(context, node, 0);
const int dimensions = output->dims->size;
const TfLiteTensor* input0 = GetInput(context, node, 0);
const TfLiteIntArray* input_dims = input0->dims;
const TfLiteIntArray* output_dims = output->dims;
@@ -52,11 +54,11 @@ TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
}
TFLITE_DCHECK_EQ(input_size, copy_size * outer_size);
T* output_data = GetTensorData<T>(output);
T* output_data = tflite::micro::GetTensorData<T>(output);
for (int i = 0; i < values_count; ++i) {
const TfLiteTensor* t = GetInput(context, node, i);
const T* input_data = GetTensorData<T>(t);
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
const T* input_data = tflite::micro::GetTensorData<T>(t);
for (int k = 0; k < outer_size; ++k) {
const T* input_ptr = input_data + copy_size * k;
int loc = k * values_count * copy_size + i * copy_size;
@@ -72,7 +74,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLitePackParams* data =
reinterpret_cast<TfLitePackParams*>(node->builtin_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (output->type) {
case kTfLiteFloat32: {
@@ -108,16 +111,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace
} // namespace pack
TfLiteRegistration* Register_PACK() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/pack::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_PACK() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/pack::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -16,189 +16,208 @@ limitations under the License.
#include <string.h>
#include "tensorflow/lite/kernels/internal/types.h"
#ifdef MEMORY_SANITIZER
#include <sanitizer/msan_interface.h>
#else
#define __msan_check_mem_is_initialized(ptr, size)
#endif
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace pad {
namespace {
struct PadContext {
PadContext(TfLiteContext* context, TfLiteNode* node) {
input = GetInput(context, node, 0);
paddings = GetInput(context, node, 1);
constant_values = nullptr;
if (NumInputs(node) == 3) {
constant_values = GetOptionalInputTensor(context, node, 2);
} else {
constant_values = nullptr;
}
output = GetOutput(context, node, 0);
dims = NumDimensions(input);
resizing_category = ResizingCategory::kGenericResize;
const int paddings_total = GetTensorShape(paddings).FlatSize();
const int32* paddings_data = GetTensorData<int32>(paddings);
// Paddings will be a n,2 array, and we need to detect 4D arrays with the
// pattern { {0,0}, {a, b}, {c, d}, {0,0} }.
if (IsConstantTensor(paddings) && paddings_total == 8 &&
(paddings_data[0] == 0 && paddings_data[1] == 0) &&
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
resizing_category = ResizingCategory::kImageStyle;
}
}
const TfLiteTensor* constant_values;
const TfLiteTensor* input;
const TfLiteTensor* paddings;
TfLiteTensor* output;
int dims;
ResizingCategory resizing_category;
struct OpData {
PadParams params;
int32_t output_zero_point;
};
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
PadContext op_context(context, node);
TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
if (op_context.constant_values != nullptr) {
TF_LITE_ENSURE_EQ(context, op_context.input->type,
op_context.constant_values->type);
const TfLiteTensor* input = GetInput(context, node, /*index=*/0);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1);
TF_LITE_ENSURE(context, paddings != nullptr);
const TfLiteTensor* constant_values =
NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr;
TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
// Current implementations rely on the inputs being <= 4D.
TF_LITE_ENSURE(context, NumDimensions(input) <=
reference_ops::PadKernelMaxDimensionCount());
if (constant_values != nullptr) {
TF_LITE_ENSURE_EQ(context, input->type, constant_values->type);
// Ensure that constant_values is a scalar.
TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1);
}
// There must be a pair of paddings for each output dimension.
TF_LITE_ENSURE_EQ(context, GetTensorShape(op_context.paddings).FlatSize(),
op_context.output->dims->size * 2);
TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(),
output->dims->size * 2);
// On Micro, outputs must be properly sized by the converter.
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
for (int i = 0; i < op_context.output->dims->size; i++) {
int output_dim = op_context.output->dims->data[i];
int expected_dim = op_context.input->dims->data[i] + paddings_data[i * 2] +
paddings_data[i * 2 + 1];
// NOTE: This data is only available because the paddings buffer is stored in
// the flatbuffer:
TF_LITE_ENSURE(context, IsConstantTensor(paddings));
const int32_t* paddings_data = GetTensorData<int32_t>(paddings);
for (int i = 0; i < output->dims->size; i++) {
int output_dim = output->dims->data[i];
int expected_dim =
input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1];
TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
}
// Current implementations rely on the inputs being <= 4D.
TF_LITE_ENSURE(
context, op_context.dims <= reference_ops::PadKernelMaxDimensionCount());
TF_LITE_ENSURE(context, IsConstantTensor(op_context.paddings));
// Calculate OpData:
data->params.resizing_category = ResizingCategory::kGenericResize;
const int paddings_total = GetTensorShape(paddings).FlatSize();
if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) &&
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
data->params.resizing_category = ResizingCategory::kImageStyle;
}
const int num_input_dimensions = NumDimensions(input);
data->params.left_padding_count = num_input_dimensions;
data->params.right_padding_count = num_input_dimensions;
for (int idx = num_input_dimensions - 1; idx >= 0; --idx) {
data->params.left_padding[idx] = paddings_data[idx * 2];
data->params.right_padding[idx] = paddings_data[idx * 2 + 1];
}
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
if (constant_values == nullptr) {
// Quantized Pad requires that 0 is represented in the quantized
// range.
if (input->type == kTfLiteUInt8) {
TF_LITE_ENSURE(context, output->params.zero_point >=
std::numeric_limits<uint8_t>::min());
TF_LITE_ENSURE(context, output->params.zero_point <=
std::numeric_limits<uint8_t>::max());
} else {
TF_LITE_ENSURE(context, output->params.zero_point >=
std::numeric_limits<int8_t>::min());
TF_LITE_ENSURE(context, output->params.zero_point <=
std::numeric_limits<int8_t>::max());
}
} else {
// Quantized Pad requires that 'constant_values' is represented in the
// same quantized range as the input and output tensors.
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
constant_values->params.zero_point);
TF_LITE_ENSURE_EQ(context, static_cast<double>(output->params.scale),
static_cast<double>(constant_values->params.scale));
}
data->output_zero_point = output->params.zero_point;
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
PadContext op_context(context, node);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
if (op_context.constant_values != nullptr) {
// Ensure that constant_values is a scalar.
TF_LITE_ENSURE_EQ(context, NumElements(op_context.constant_values), 1);
}
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, /*index=*/0);
const TfLiteEvalTensor* constant_values =
NumInputs(node) == 3
? tflite::micro::GetEvalInput(context, node, /*index=*/2)
: nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, /*index=*/0);
// Create before and after padding arrays that are accepted by the kernel.
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
tflite::PadParams op_params;
memset(&op_params, 0, sizeof(PadParams));
op_params.left_padding_count = op_context.dims;
op_params.right_padding_count = op_context.dims;
for (int idx = op_context.dims - 1; idx >= 0; --idx) {
op_params.left_padding[idx] = paddings_data[idx * 2];
op_params.right_padding[idx] = paddings_data[idx * 2 + 1];
}
#define TF_LITE_PAD(type, op_name, scalar, pad_value) \
const scalar pad_value_copy = pad_value; \
\
type::op_name(op_params, GetTensorShape(op_context.input), \
GetTensorData<scalar>(op_context.input), &pad_value_copy, \
GetTensorShape(op_context.output), \
GetTensorData<scalar>(op_context.output))
switch (op_context.input->type) {
switch (input->type) {
case kTfLiteFloat32: {
float pad_value = op_context.constant_values == nullptr
? 0.f
: *GetTensorData<float>(op_context.constant_values);
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
TF_LITE_PAD(reference_ops, PadImageStyle, float, pad_value);
float pad_value =
constant_values == nullptr
? 0.f
: *tflite::micro::GetTensorData<float>(constant_values);
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
reference_ops::PadImageStyle(
data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input), &pad_value,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
TF_LITE_PAD(reference_ops, Pad, float, pad_value);
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
} break;
case kTfLiteUInt8: {
uint8_t pad_value;
if (op_context.constant_values == nullptr) {
// Quantized Pad requires that 0 is represented in the quantized
// range.
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
std::numeric_limits<uint8_t>::min());
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
std::numeric_limits<uint8_t>::max());
pad_value = static_cast<uint8_t>(op_context.output->params.zero_point);
if (constant_values == nullptr) {
pad_value = static_cast<uint8_t>(data->output_zero_point);
} else {
// Quantized Pad requires that 'constant_values' is represented in the
// same quantized range as the input and output tensors.
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
op_context.constant_values->params.zero_point);
TF_LITE_ENSURE_EQ(
context, static_cast<double>(op_context.output->params.scale),
static_cast<double>(op_context.constant_values->params.scale));
pad_value = *GetTensorData<uint8_t>(op_context.constant_values);
pad_value = *tflite::micro::GetTensorData<uint8_t>(constant_values);
}
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
TF_LITE_PAD(reference_ops, PadImageStyle, uint8_t, pad_value);
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
reference_ops::PadImageStyle(
data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input), &pad_value,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
TF_LITE_PAD(reference_ops, Pad, uint8_t, pad_value);
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
} break;
case kTfLiteInt8: {
int8_t pad_value;
if (op_context.constant_values == nullptr) {
// Quantized Pad requires that 0 is represented in the quantized
// range.
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
std::numeric_limits<int8_t>::min());
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
std::numeric_limits<int8_t>::max());
pad_value = static_cast<int8_t>(op_context.output->params.zero_point);
if (constant_values == nullptr) {
pad_value = static_cast<uint8_t>(data->output_zero_point);
} else {
// Quantized Pad requires that 'constant_values' is represented in the
// same quantized range as the input and output tensors.
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
op_context.constant_values->params.zero_point);
TF_LITE_ENSURE(context, op_context.output->params.scale ==
op_context.constant_values->params.scale);
pad_value = *GetTensorData<int8_t>(op_context.constant_values);
pad_value = *tflite::micro::GetTensorData<int8_t>(constant_values);
}
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
TF_LITE_PAD(reference_ops, PadImageStyle, int8_t, pad_value);
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
reference_ops::PadImageStyle(
data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input), &pad_value,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_PAD(reference_ops, Pad, int8_t, pad_value);
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} break;
case kTfLiteInt32: {
int32_t pad_value =
op_context.constant_values == nullptr
constant_values == nullptr
? 0
: *GetTensorData<int32_t>(op_context.constant_values);
TF_LITE_PAD(reference_ops, Pad, int32_t, pad_value);
: *tflite::micro::GetTensorData<int32_t>(constant_values);
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int32_t>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int32_t>(output));
} break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported by Pad.",
TfLiteTypeGetName(op_context.input->type));
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
#undef TF_LITE_PAD
@@ -207,29 +226,27 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace pad
TfLiteRegistration* Register_PAD() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/pad::Prepare,
/*invoke=*/pad::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_PAD() {
return {/*init=*/pad::Init,
/*free=*/nullptr,
/*prepare=*/pad::Prepare,
/*invoke=*/pad::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
// Also register Pad as PadV2.
TfLiteRegistration* Register_PADV2() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/pad::Prepare,
/*invoke=*/pad::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_PADV2() {
return {/*init=*/pad::Init,
/*free=*/nullptr,
/*prepare=*/pad::Prepare,
/*invoke=*/pad::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -19,6 +19,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -32,6 +33,10 @@ constexpr int kOutputTensor = 0;
struct OpData {
TfLitePaddingValues padding;
int32_t activation_min;
int32_t activation_max;
float activation_min_f32;
float activation_max_f32;
};
TfLiteStatus CalculateOpData(const TfLiteContext* context,
@@ -55,11 +60,7 @@ TfLiteStatus CalculateOpData(const TfLiteContext* context,
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
float activation_min, activation_max;
CalculateActivationRange(params->activation, &activation_min,
&activation_max);
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
@@ -67,20 +68,19 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = activation_min;
op_params.float_activation_max = activation_max;
reference_ops::AveragePool(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
int32_t activation_min, activation_max;
(void)CalculateActivationRangeQuantized(context, params->activation, output,
&activation_min, &activation_max);
PoolParams op_params;
op_params.stride_height = params->stride_height;
@@ -89,27 +89,26 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = activation_min;
op_params.quantized_activation_max = activation_max;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::AveragePool(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::AveragePool(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
float activation_min, activation_max;
CalculateActivationRange(params->activation, &activation_min,
&activation_max);
TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
@@ -117,22 +116,17 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = activation_min;
op_params.float_activation_max = activation_max;
reference_ops::MaxPool(op_params, GetTensorShape(input),
GetTensorData<float>(input), GetTensorShape(output),
GetTensorData<float>(output));
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
int32_t activation_min, activation_max;
(void)CalculateActivationRangeQuantized(context, params->activation, output,
&activation_min, &activation_max);
TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
@@ -140,39 +134,44 @@ void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = activation_min;
op_params.quantized_activation_max = activation_max;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::MaxPool(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::MaxPool(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
} // namespace
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
OpData data;
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
// Inputs and outputs share the same type, guaranteed by the converter.
switch (input->type) {
case kTfLiteFloat32:
AverageEvalFloat(context, node, params, &data, input, output);
AverageEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
AverageEvalQuantized(context, node, params, &data, input, output);
AverageEvalQuantized(context, node, params, data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
@@ -183,21 +182,24 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
OpData data;
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32:
MaxEvalFloat(context, node, params, &data, input, output);
MaxEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
MaxEvalQuantized(context, node, params, &data, input, output);
MaxEvalQuantized(context, node, params, data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
@@ -207,30 +209,59 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
} // namespace pooling
TfLiteRegistration* Register_AVERAGE_POOL_2D() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/pooling::AverageEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteRegistration* Register_MAX_POOL_2D() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/pooling::MaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
if (input->type == kTfLiteFloat32) {
CalculateActivationRange(params->activation, &data->activation_min_f32,
&data->activation_max_f32);
} else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
CalculateActivationRangeQuantized(context, params->activation, output,
&data->activation_min,
&data->activation_max);
}
return kTfLiteOk;
}
} // namespace pooling
TfLiteRegistration Register_AVERAGE_POOL_2D() {
return {/*init=*/pooling::Init,
/*free=*/nullptr,
/*prepare=*/pooling::Prepare,
/*invoke=*/pooling::AverageEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_MAX_POOL_2D() {
return {/*init=*/pooling::Init,
/*free=*/nullptr,
/*prepare=*/pooling::Prepare,
/*invoke=*/pooling::MaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -15,20 +15,45 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
const TfLiteTensor* alpha,
TfLiteTensor* output, PreluParams* params) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8 ||
output->type == kTfLiteInt16) {
double real_multiplier_1 = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
double real_multiplier_2 = static_cast<double>(input->params.scale) *
static_cast<double>(alpha->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier_1, &params->output_multiplier_1,
&params->output_shift_1);
QuantizeMultiplier(real_multiplier_2, &params->output_multiplier_2,
&params->output_shift_2);
params->input_offset = -input->params.zero_point;
params->alpha_offset = -alpha->params.zero_point;
params->output_offset = output->params.zero_point;
}
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
} // namespace
inline void BroadcastPrelu4DSlowFloat(
const RuntimeShape& unextended_input1_shape, const float* input1_data,
const RuntimeShape& unextended_input2_shape, const float* input2_data,
@@ -60,43 +85,67 @@ inline void BroadcastPrelu4DSlowFloat(
}
}
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(PreluParams));
}
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
PreluParams* params = static_cast<PreluParams*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, 0);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* alpha = GetInput(context, node, 1);
TF_LITE_ENSURE(context, alpha != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
int32_t output_multiplier = 0;
int output_shift = 0;
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
double real_multiplier = static_cast<double>(input->params.scale) *
static_cast<double>(alpha->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
&output_shift);
}
TF_LITE_ENSURE(context, output != nullptr);
return CalculatePreluParams(input, alpha, output, params);
}
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const PreluParams& params =
*(static_cast<const PreluParams*>(node->user_data));
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* alpha = tflite::micro::GetEvalInput(context, node, 1);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
switch (input->type) {
case kTfLiteFloat32: {
BroadcastPrelu4DSlowFloat(
GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(alpha), GetTensorData<float>(alpha),
GetTensorShape(output), GetTensorData<float>(output));
BroadcastPrelu4DSlowFloat(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(alpha),
tflite::micro::GetTensorData<float>(alpha),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
} break;
case kTfLiteUInt8: {
PreluParams op_params;
op_params.input_offset = -input->params.zero_point;
op_params.alpha_offset = -alpha->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.output_multiplier = output_multiplier;
op_params.output_shift = output_shift;
reference_ops::BroadcastPrelu4DSlow(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
GetTensorShape(output), GetTensorData<uint8_t>(output));
params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(alpha),
tflite::micro::GetTensorData<uint8_t>(alpha),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
} break;
case kTfLiteInt8: {
reference_ops::BroadcastPrelu4DSlow(
params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(alpha),
tflite::micro::GetTensorData<int8_t>(alpha),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
} break;
default:
TF_LITE_KERNEL_LOG(
context, "Only float32 and uint8 are supported currently, got %d.",
context, "Only float32 and uint8_t are supported currently, got %d.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
@@ -104,16 +153,15 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
} // namespace activations
TfLiteRegistration* Register_PRELU() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/activations::PreluPrepare,
/*invoke=*/activations::PreluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_PRELU() {
return {/*init=*/activations::PreluInit,
/*free=*/nullptr,
/*prepare=*/activations::PreluPrepare,
/*invoke=*/activations::PreluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -19,19 +19,38 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace quantize {
namespace {
struct OpData {
tflite::QuantizationParams quantization_params;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
int32_t input_zero_point;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE(context, output != nullptr);
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
// Currently this only support affine per-layer quantization.
@@ -43,34 +62,61 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
TF_LITE_ENSURE(context,
input->type == kTfLiteFloat32 || input->type == kTfLiteInt16);
TF_LITE_ENSURE(context,
output->type == kTfLiteUInt8 || output->type == kTfLiteInt8);
TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 ||
input->type == kTfLiteInt16 ||
input->type == kTfLiteInt8);
TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 ||
output->type == kTfLiteInt8 ||
output->type == kTfLiteInt16 ||
output->type == kTfLiteInt32);
if (((input->type == kTfLiteInt16 || input->type == kTfLiteInt8) &&
output->type == kTfLiteInt8) ||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt16)) {
double effective_scale = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_scale, &data->output_multiplier,
&data->output_shift);
}
data->quantization_params.zero_point = output->params.zero_point;
data->quantization_params.scale = static_cast<double>(output->params.scale);
data->input_zero_point = input->params.zero_point;
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
tflite::QuantizationParams op_params;
op_params.zero_point = output->params.zero_point;
op_params.scale = static_cast<double>(output->params.scale);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteInt8:
reference_ops::AffineQuantize(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteUInt8:
reference_ops::AffineQuantize(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
case kTfLiteInt16:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
@@ -79,17 +125,45 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
}
} else if (input->type == kTfLiteInt16) {
size_t size = ElementCount(*input->dims);
int32_t output_multiplier;
int output_shift;
double effective_scale =
static_cast<double>(input->params.scale / output->params.scale);
switch (output->type) {
case kTfLiteInt8:
QuantizeMultiplier(effective_scale, &output_multiplier, &output_shift);
reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
size, data->output_multiplier,
data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
reference_ops::Requantize(
GetTensorData<int16_t>(input), size, output_multiplier,
output_shift, input->params.zero_point, output->params.zero_point,
GetTensorData<int8_t>(output));
tflite::micro::GetTensorData<int16_t>(input), size,
data->output_multiplier, data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
case kTfLiteInt32:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->output_multiplier, data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int32_t>(output));
return kTfLiteOk;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt8) {
// Int8 to Int8 requantization, required if the input and output tensors
// have different scales and/or zero points.
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(tflite::micro::GetTensorData<int8_t>(input),
size, data->output_multiplier,
data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
@@ -107,23 +181,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
} // namespace quantize
} // namespace
// This Op (QUANTIZE) quantizes the input and produces quantized output.
// AffineQuantize takes scale and zero point and quantizes the float value to
// quantized output, in int8 or uint8 format.
TfLiteRegistration* Register_QUANTIZE() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/quantize::Prepare,
/*invoke=*/quantize::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_QUANTIZE() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -18,9 +18,12 @@ limitations under the License.
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mean.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
@@ -30,10 +33,27 @@ namespace reduce {
constexpr int kMaxNumberOfAxis = 4;
constexpr int kMaxNumberOfReducedAxis = 2;
struct OpData {
int32_t multiplier;
int shift;
int temp_buffer_idx;
int resolved_axis_idx;
int input_zp;
float input_scale;
int output_zp;
float output_scale;
int num_output_elements;
};
void* InitReduce(TfLiteContext* context, const char* buffer, size_t length) {
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
// Inputs Tensor (dtype depends on quantization):
// [0] = Input
// [1] = Axis
const TfLiteTensor* input = GetInput(context, node, 0);
// Outputs Tensor (dtype depends on quantization):
// [0] = Output
@@ -44,13 +64,63 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
// Validate axis type
const TfLiteTensor* axis = GetInput(context, node, 1);
TF_LITE_ENSURE(context, axis != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
if (input->type == kTfLiteInt8) {
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* output = GetOutput(context, node, 0);
const double real_multiplier = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier, &data->multiplier, &data->shift);
}
return kTfLiteOk;
}
TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
OpData* op_data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, 0);
const TfLiteTensor* output = GetOutput(context, node, 0);
const TfLiteTensor* axis = GetInput(context, node, 1);
op_data->input_scale = input->params.scale;
op_data->output_scale = output->params.scale;
op_data->num_output_elements = NumElements(output);
context->RequestScratchBufferInArena(context, sizeof(int) * input->dims->size,
&op_data->temp_buffer_idx);
context->RequestScratchBufferInArena(
context, sizeof(int) * static_cast<int>(ElementCount(*axis->dims)),
&op_data->resolved_axis_idx);
return kTfLiteOk;
}
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const TfLiteTensor* output = GetOutput(context, node, 0);
if (input->type == kTfLiteInt8) {
const double real_multiplier = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier, &op_data->multiplier, &op_data->shift);
}
int output_size = NumElements(output);
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
context->RequestScratchBufferInArena(context, output_size * sizeof(int32_t),
&op_data->temp_buffer_idx);
op_data->input_zp = input->params.zero_point;
op_data->input_scale = input->params.scale;
op_data->output_zp = output->params.zero_point;
op_data->output_scale = output->params.scale;
}
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
return kTfLiteOk;
}
@@ -58,7 +128,7 @@ void ResolveAxis(const int* axis_data, int axis_count,
tflite::MeanParams* op_params) {
int i = 0;
for (; i < axis_count; ++i) {
op_params->axis[i] = static_cast<int16>(axis_data[i]);
op_params->axis[i] = static_cast<int16_t>(axis_data[i]);
}
for (; i < 4; ++i) {
op_params->axis[i] = 1;
@@ -67,69 +137,206 @@ void ResolveAxis(const int* axis_data, int axis_count,
}
TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
const TfLiteTensor* axis = GetInput(context, node, 1);
TfLiteTensor* output = GetOutput(context, node, 0);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TfLiteReducerParams* params =
reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
int num_axis = static_cast<int>(NumElements(axis));
int num_axis = static_cast<int>(ElementCount(*axis->dims));
int temp_index[kMaxNumberOfAxis];
int resolved_axis[kMaxNumberOfReducedAxis];
tflite::MeanParams op_params;
ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis, &op_params);
// Special case mean implementation exists for 4D mean across axes 1 and 2.
bool special_case_4d_axes_1_and_2 =
input->dims->size == 4 && op_params.axis_count == 2 &&
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
switch (input->type) {
case kTfLiteFloat32: {
tflite::MeanParams op_params;
ResolveAxis(GetTensorData<int>(axis), num_axis, &op_params);
// TODO(b/146571391): Support only 4D Input and 2D Axis for Mean until
// scratch tensor allocation has been implemented in (b/132070898)
bool is_valid_inputs =
(NumDimensions(input) == 4 && op_params.axis_count == 2 &&
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
(op_params.axis[0] == 2 && op_params.axis[1] == 1)));
TF_LITE_ENSURE_MSG(
context, is_valid_inputs == true,
"Number of Input "
"dimensions != 4 OR the Axis is not either [1, 2] or [2, 1]");
// TODO(b/139102329): Handle the below special case in the combined
// reference method.
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (params->keep_dims) {
reference_ops::Mean(op_params, GetTensorShape(input),
GetTensorData<float>(input), GetTensorShape(output),
GetTensorData<float>(output));
if (params->keep_dims && special_case_4d_axes_1_and_2) {
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
TF_LITE_ENSURE(
context,
reference_ops::Mean(GetTensorData<float>(input), input->dims->data,
input->dims->size, GetTensorData<float>(output),
reference_ops::Mean(
tflite::micro::GetTensorData<float>(input), input->dims->data,
input->dims->size, tflite::micro::GetTensorData<float>(output),
output->dims->data, output->dims->size,
tflite::micro::GetTensorData<int>(axis), num_axis,
params->keep_dims, temp_index, resolved_axis,
tflite::micro::GetTensorData<float>(output)));
}
} break;
case kTfLiteInt8: {
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (params->keep_dims && special_case_4d_axes_1_and_2) {
reference_integer_ops::Mean(
op_params, op_data->multiplier, op_data->shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output), op_data->output_zp);
} else if (op_data->input_zp == op_data->output_zp &&
op_data->input_scale == op_data->output_scale) {
int32_t* temp_buffer = static_cast<int32_t*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
TF_LITE_ENSURE(
context,
reference_ops::Mean(
tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
output->dims->data, output->dims->size,
tflite::micro::GetTensorData<int>(axis), num_axis,
params->keep_dims, temp_index, resolved_axis, temp_buffer));
} else {
int32_t* temp_buffer = static_cast<int32_t*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
TF_LITE_ENSURE(
context,
reference_ops::QuantizedMeanOrSum(
tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
op_data->input_scale, input->dims->data, input->dims->size,
tflite::micro::GetTensorData<int8_t>(output),
op_data->output_zp, op_data->output_scale, output->dims->data,
output->dims->size, tflite::micro::GetTensorData<int>(axis),
num_axis, params->keep_dims, temp_index, resolved_axis,
temp_buffer, false));
}
} break;
case kTfLiteUInt8: {
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (params->keep_dims && special_case_4d_axes_1_and_2) {
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
op_data->input_zp, op_data->input_scale,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output),
op_data->output_zp, op_data->output_scale);
} else if (op_data->input_zp == op_data->output_zp &&
op_data->input_scale == op_data->output_scale) {
uint32_t* temp_buffer = static_cast<uint32_t*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
TF_LITE_ENSURE(
context,
reference_ops::Mean(tflite::micro::GetTensorData<uint8_t>(input),
input->dims->data, input->dims->size,
tflite::micro::GetTensorData<uint8_t>(output),
output->dims->data, output->dims->size,
GetTensorData<int>(axis), num_axis,
params->keep_dims, temp_index, resolved_axis,
GetTensorData<float>(output)));
tflite::micro::GetTensorData<int>(axis),
num_axis, params->keep_dims, temp_index,
resolved_axis, temp_buffer));
} else {
uint32_t* temp_buffer = static_cast<uint32_t*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
TF_LITE_ENSURE(
context,
reference_ops::QuantizedMeanOrSum(
tflite::micro::GetTensorData<uint8_t>(input), op_data->input_zp,
op_data->input_scale, input->dims->data, input->dims->size,
tflite::micro::GetTensorData<uint8_t>(output),
op_data->output_zp, op_data->output_scale, output->dims->data,
output->dims->size, tflite::micro::GetTensorData<int>(axis),
num_axis, params->keep_dims, temp_index, resolved_axis,
temp_buffer, false));
}
} break;
default:
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
TF_LITE_ENSURE_MSG(context, false,
"Currently, only float32 input type "
"Currently, only float32, int8 or uint8 input type "
"is supported.");
}
return kTfLiteOk;
}
TfLiteStatus EvalMax(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TfLiteReducerParams* params =
static_cast<TfLiteReducerParams*>(node->builtin_data);
OpData* op_data = static_cast<OpData*>(node->user_data);
// Interpret an axis tensor with null dimensions as a scalar
int num_axis = static_cast<int>(ElementCount(*axis->dims));
int* temp_buffer = static_cast<int*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
int* resolved_axis = static_cast<int*>(
context->GetScratchBuffer(context, op_data->resolved_axis_idx));
switch (input->type) {
case kTfLiteFloat32:
TF_LITE_ENSURE(
context,
reference_ops::ReduceGeneric<float>(
tflite::micro::GetTensorData<float>(input), input->dims->data,
input->dims->size, tflite::micro::GetTensorData<float>(output),
output->dims->data, output->dims->size,
tflite::micro::GetTensorData<int>(axis), num_axis,
params->keep_dims, temp_buffer, resolved_axis,
std::numeric_limits<float>::lowest(),
[](const float current, const float in) -> float {
return (in > current) ? in : current;
}));
break;
case kTfLiteInt8:
TF_LITE_ENSURE_EQ(context, static_cast<double>(op_data->input_scale),
static_cast<double>(op_data->output_scale));
TF_LITE_ENSURE_EQ(context, op_data->input_zp, op_data->output_zp);
TF_LITE_ENSURE(
context,
reference_ops::ReduceGeneric<int8_t>(
tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
output->dims->data, output->dims->size,
tflite::micro::GetTensorData<int>(axis), num_axis,
params->keep_dims, temp_buffer, resolved_axis,
std::numeric_limits<int8_t>::lowest(),
[](const int8_t current, const int8_t in) -> int8_t {
return (in > current) ? in : current;
}));
break;
default:
TF_LITE_KERNEL_LOG(context,
"Only float32 and int8 types are supported.\n");
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace reduce
TfLiteRegistration* Register_MEAN() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/reduce::PrepareMeanOrSum,
/*invoke=*/reduce::EvalMean,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_MEAN() {
return {/*init=*/reduce::InitReduce,
/*free=*/nullptr,
/*prepare=*/reduce::PrepareMeanOrSum,
/*invoke=*/reduce::EvalMean,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_REDUCE_MAX() {
return {/*init=*/reduce::InitReduce,
/*free=*/nullptr,
/*prepare=*/reduce::PrepareMax,
/*invoke=*/reduce::EvalMax,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -18,6 +18,9 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
@@ -29,7 +32,9 @@ constexpr int kOutputTensor = 0;
TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// Tensorflow's Reshape allows one of the shape components to have the
// special -1 value, meaning it will be calculated automatically based on the
// input. Here we calculate what that dimension should be so that the number
@@ -61,7 +66,7 @@ TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
num_output_elements *= output_shape->data[stretch_dim];
}
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
return kTfLiteOk;
}
@@ -74,13 +79,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
// TODO(b/162522304): storing input bytes in OpData increases some models
// significantly, possibly due to alignment issues.
size_t input_bytes;
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes));
input_bytes *= ElementCount(*input->dims);
// Do nothing for in-place reshape.
if (input->data.raw != output->data.raw) {
// Otherwise perform reshape with copy.
for (size_t i = 0; i < input->bytes; ++i) {
for (size_t i = 0; i < input_bytes; ++i) {
output->data.raw[i] = input->data.raw[i];
}
}
@@ -89,16 +102,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace reshape
TfLiteRegistration* Register_RESHAPE() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/reshape::Prepare,
/*invoke=*/reshape::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_RESHAPE() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/reshape::Prepare,
/*invoke=*/reshape::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -20,6 +20,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -31,7 +32,6 @@ constexpr int kSizeTensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
#if defined(DEBUG)
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
@@ -49,11 +49,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
output->type = input->type;
if (!IsConstantTensor(size)) {
TF_LITE_KERNEL_LOG(context,
"Dynamic tensors are unsupported in tfmicro.");
TF_LITE_KERNEL_LOG(context, "Dynamic tensors are unsupported in tfmicro.");
return kTfLiteError;
}
#endif
return kTfLiteOk;
}
@@ -61,9 +59,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteResizeNearestNeighborParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* size =
tflite::micro::GetEvalInput(context, node, kSizeTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::ResizeNearestNeighborParams op_params;
op_params.align_corners = params->align_corners;
@@ -71,22 +72,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
if (output->type == kTfLiteFloat32) {
reference_ops::ResizeNearestNeighbor(
op_params, GetTensorShape(input), GetTensorData<int32>(input),
GetTensorShape(size), GetTensorData<int32>(size),
GetTensorShape(output), GetTensorData<int32>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int32_t>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int32_t>(output));
} else if (output->type == kTfLiteUInt8) {
reference_ops::ResizeNearestNeighbor(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(size), GetTensorData<int32>(size),
GetTensorShape(output), GetTensorData<uint8_t>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else if (output->type == kTfLiteInt8) {
reference_ops::ResizeNearestNeighbor(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(size), GetTensorData<int32>(size),
GetTensorShape(output), GetTensorData<int8_t>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context,
"Output type is %d, requires float, uint8 or int8.",
"Output type is %d, requires float, uint8_t or int8_t.",
output->type);
return kTfLiteError;
}
@@ -95,16 +105,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
}
} // namespace resize_nearest_neighbor
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/resize_nearest_neighbor::Prepare,
/*invoke=*/resize_nearest_neighbor::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/resize_nearest_neighbor::Prepare,
/*invoke=*/resize_nearest_neighbor::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -29,11 +30,13 @@ constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_EQ(context, output->type, input->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
for (int i = 0; i < output->dims->size; ++i) {
@@ -43,26 +46,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
reference_ops::Round(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
reference_ops::Round(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace round
TfLiteRegistration* Register_ROUND() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/round::Prepare,
/*invoke=*/round::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_ROUND() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/round::Prepare,
/*invoke=*/round::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -0,0 +1,73 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
void ExtractShape(const TfLiteEvalTensor* input, int32_t* output_data) {
for (int i = 0; i < input->dims->size; ++i) {
output_data[i] = input->dims->data[i];
}
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type != kTfLiteInt32) {
TF_LITE_KERNEL_LOG(context, "Output type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
} else {
ExtractShape(input, tflite::micro::GetTensorData<int32_t>(output));
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_SHAPE() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -22,29 +22,35 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
// Softmax parameter data that persists in user_data
static constexpr int kInt16LUTArraySize = 513;
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
const TfLiteTensor* input,
TfLiteTensor* output,
const TfLiteSoftmaxParams* params,
SoftmaxParams* op_data) {
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8 ||
input->type == kTfLiteInt16) {
if (input->type == kTfLiteUInt8) {
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteUInt8);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
} else {
} else if (input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768,
(0.001f * 1.f / 32768));
} else { // input->type == kTfLiteInt8
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
// NOTE: Current int16 softmax output does not require symmetric scaling
// - so no need to verify scale here.
} else {
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 65536,
(0.001f * 1.f / 65536));
} else { // output->type == kTfLiteint8
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
@@ -53,15 +59,28 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
static const int kScaledDiffIntegerBits = 5;
int input_left_shift;
tflite::PreprocessSoftmaxScaling(
static_cast<double>(params->beta),
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
&op_data->input_multiplier, &input_left_shift);
op_data->input_left_shift = input_left_shift;
op_data->diff_min =
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
op_data->input_left_shift);
// Calculate input_multiplier and input_left_shift
if (input->type == kTfLiteInt16) {
int input_left_shift;
double input_scale_beta_rescale =
static_cast<double>(input->params.scale) *
static_cast<double>(params->beta) /
(10.0 / 65535.0); // scale the input_diff such that [-65535, 0]
// correspond to [-10.0, 0.0]
QuantizeMultiplier(input_scale_beta_rescale, &op_data->input_multiplier,
&input_left_shift);
op_data->input_left_shift = input_left_shift;
} else {
int input_left_shift;
tflite::PreprocessSoftmaxScaling(
static_cast<double>(params->beta),
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
&op_data->input_multiplier, &input_left_shift);
op_data->input_left_shift = input_left_shift;
op_data->diff_min =
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
op_data->input_left_shift);
}
} else {
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
@@ -70,53 +89,106 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
return kTfLiteOk;
}
} // namespace
// Takes a tensor and performs softmax along the last dimension.
void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
const SoftmaxParams& op_data) {
tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
const SoftmaxParams& op_data) {
if (input->type == kTfLiteUInt8) {
tflite::reference_ops::Softmax(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else if (input->type == kTfLiteInt8) {
if (output->type == kTfLiteInt16) {
tflite::reference_ops::Softmax(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
tflite::reference_ops::Softmax(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else {
tflite::reference_ops::SoftmaxInt16(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
}
}
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
}
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE(context, output != nullptr);
return kTfLiteOk;
}
// Takes a tensor and performs softmax along the last dimension.
void SoftmaxFloat(const TfLiteTensor* input, TfLiteTensor* output,
const SoftmaxParams& op_data) {
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
}
void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output,
const SoftmaxParams& op_data) {
if (input->type == kTfLiteUInt8) {
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
} else {
if (output->type == kTfLiteInt16) {
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int16_t>(output));
} else {
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
}
TF_LITE_ENSURE(context, node->user_data != nullptr);
SoftmaxParams* op_data = static_cast<SoftmaxParams*>(node->user_data);
// Only allocate LUTs for KTfLiteInt16 data type
if (input->type == kTfLiteInt16) {
void* raw_exp_lut = context->AllocatePersistentBuffer(
context, sizeof(int16_t) * kInt16LUTArraySize);
TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
op_data->exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
context, sizeof(int16_t) * kInt16LUTArraySize);
TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
op_data->one_over_one_plus_x_lut =
reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
}
if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE(context, input->type == kTfLiteInt8 ||
input->type == kTfLiteUInt8 ||
input->type == kTfLiteInt16);
} else {
TF_LITE_ENSURE_EQ(context, input->type, output->type);
}
// Populate LUT if required
if (input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
// exp LUT only used on negative values
// we consider exp(-10.0) is insignificant to accumulation
gen_lut([](float value) { return std::exp(value); }, -10.0f, 0.0f,
op_data->exp_lut, kInt16LUTArraySize);
gen_lut([](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f,
op_data->one_over_one_plus_x_lut, kInt16LUTArraySize);
op_data->zero_point = output->params.zero_point;
op_data->scale = output->params.scale;
}
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
return CalculateSoftmaxParams(context, input, output, params, op_data);
}
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
SoftmaxParams op_data;
TF_LITE_ENSURE_STATUS(
CalculateSoftmaxParams(context, input, output, params, &op_data));
TFLITE_DCHECK(node->user_data != nullptr);
SoftmaxParams op_data = *static_cast<SoftmaxParams*>(node->user_data);
switch (input->type) {
case kTfLiteFloat32: {
@@ -124,7 +196,8 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
case kTfLiteInt8:
case kTfLiteUInt8: {
case kTfLiteUInt8:
case kTfLiteInt16: {
SoftmaxQuantized(input, output, op_data);
return kTfLiteOk;
}
@@ -134,20 +207,17 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
}
}
} // namespace activations
} // namespace
TfLiteRegistration* Register_SOFTMAX() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/activations::SoftmaxPrepare,
/*invoke=*/activations::SoftmaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SOFTMAX() {
return {/*init=*/SoftmaxInit,
/*free=*/nullptr,
/*prepare=*/SoftmaxPrepare,
/*invoke=*/SoftmaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -17,6 +17,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -25,10 +26,11 @@ namespace split {
template <typename T>
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* input, int axis_value) {
const TfLiteEvalTensor* input, int axis_value) {
const int output_count = NumOutputs(node);
const TfLiteIntArray* input_dims = input->dims;
const TfLiteTensor* output0 = GetOutput(context, node, 0);
const TfLiteEvalTensor* output0 =
tflite::micro::GetEvalOutput(context, node, 0);
const TfLiteIntArray* output_dims = output0->dims;
const int split_dimensions = input_dims->size;
@@ -50,11 +52,11 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
base_inner_size *= input_dims->data[i];
}
const T* input_ptr = GetTensorData<T>(input);
const T* input_ptr = tflite::micro::GetTensorData<T>(input);
for (int k = 0; k < outer_size; ++k) {
for (int i = 0; i < output_count; ++i) {
TfLiteTensor* t = GetOutput(context, node, i);
T* output_data = GetTensorData<T>(t);
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
T* output_data = tflite::micro::GetTensorData<T>(t);
const int copy_size = output_dims->data[axis] * base_inner_size;
T* output_ptr = output_data + k * copy_size;
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
@@ -65,23 +67,29 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* axis = GetInput(context, node, 0);
const TfLiteTensor* input = GetInput(context, node, 1);
TF_LITE_ENSURE(context, axis != nullptr);
// Dynamic output tensors are needed if axis tensor is not constant.
// But Micro doesn't support dynamic memory allocation, so we only support
// constant axis tensor for now.
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
"Non constant axis tensor not supported");
return kTfLiteOk;
}
int axis_value = GetTensorData<int32_t>(axis)[0];
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 1);
int axis_value = tflite::micro::GetTensorData<int32_t>(axis)[0];
if (axis_value < 0) {
axis_value += NumDimensions(input);
axis_value += input->dims->size;
}
TF_LITE_ENSURE(context, axis_value >= 0);
TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
TF_LITE_ENSURE(context, axis_value < input->dims->size);
switch (input->type) {
case kTfLiteFloat32: {
@@ -111,16 +119,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace split
TfLiteRegistration* Register_SPLIT() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/split::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SPLIT() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/split::Prepare,
/*invoke=*/split::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -0,0 +1,135 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace split_v {
template <typename T>
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* input, int axis_value) {
const TfLiteIntArray* input_dims = input->dims;
const TfLiteEvalTensor* output0 =
tflite::micro::GetEvalOutput(context, node, 0);
const int split_dimensions = input_dims->size;
TFLITE_DCHECK_LT(axis_value, split_dimensions);
TFLITE_DCHECK_EQ(output0->dims->size, split_dimensions);
int64_t split_size = 0;
const int output_count = NumOutputs(node);
for (int i = 0; i < output_count; i++) {
split_size +=
tflite::micro::GetEvalOutput(context, node, i)->dims->data[axis_value];
}
TFLITE_DCHECK_EQ(split_size, input_dims->data[axis_value]);
int64_t outer_size = 1;
for (int i = 0; i < axis_value; ++i) {
outer_size *= input_dims->data[i];
}
int64_t base_inner_size = 1;
for (int i = axis_value + 1; i < split_dimensions; ++i) {
base_inner_size *= input_dims->data[i];
}
const T* input_ptr = tflite::micro::GetTensorData<T>(input);
for (int k = 0; k < outer_size; ++k) {
for (int i = 0; i < output_count; ++i) {
TfLiteEvalTensor* output_tensor =
tflite::micro::GetEvalOutput(context, node, i);
T* output_data = tflite::micro::GetTensorData<T>(output_tensor);
const int copy_size =
output_tensor->dims->data[axis_value] * base_inner_size;
T* output_ptr = output_data + k * copy_size;
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
input_ptr += copy_size;
}
}
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
// Dynamic output tensors are needed if axis tensor is not constant.
// But Micro doesn't support dynamic memory allocation, so we only support
// constant axis tensor for now.
const TfLiteTensor* axis = GetInput(context, node, 2);
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
"Non constant axis tensor not supported");
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 2);
int axis_value = tflite::micro::GetTensorData<int32_t>(axis)[0];
if (axis_value < 0) {
axis_value += input->dims->size;
}
TF_LITE_ENSURE(context, axis_value >= 0);
TF_LITE_ENSURE(context, axis_value < input->dims->size);
switch (input->type) {
case kTfLiteFloat32: {
return SplitImpl<float>(context, node, input, axis_value);
}
case kTfLiteInt8: {
return SplitImpl<int8_t>(context, node, input, axis_value);
}
case kTfLiteInt16: {
return SplitImpl<int16_t>(context, node, input, axis_value);
}
case kTfLiteInt32: {
return SplitImpl<int32_t>(context, node, input, axis_value);
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s currently not supported.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace split_v
TfLiteRegistration Register_SPLIT_V() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/split_v::Prepare,
/*invoke=*/split_v::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -15,23 +15,20 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
#include <cmath>
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace strided_slice {
enum KernelType {
kReference,
// TODO(soroosh): add kGenericOptimized
};
constexpr int kInputTensor = 0;
constexpr int kBeginTensor = 1;
constexpr int kEndTensor = 2;
@@ -120,64 +117,74 @@ TfLiteStatus CheckOutputSize(TfLiteContext* context,
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(StridedSliceParams));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
StridedSliceParams* op_params =
static_cast<StridedSliceParams*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 4);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
StridedSliceContext op_context(context, node);
TF_LITE_ENSURE_MSG(context, op_context.dims <= kMaxDim,
"input dim should not exceed 4");
auto params = BuildStridedSliceParams(&op_context);
memcpy(op_params, &params, sizeof(StridedSliceParams));
return CheckOutputSize(context, &op_context);
}
template <KernelType kernel_type>
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
StridedSliceContext op_context(context, node);
auto op_params = BuildStridedSliceParams(&op_context);
TFLITE_DCHECK(node->user_data != nullptr);
const StridedSliceParams& op_params =
*(static_cast<const StridedSliceParams*>(node->user_data));
#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \
kernel_type::StridedSlice(op_params, GetTensorShape(op_context.input), \
GetTensorData<data_type>(op_context.input), \
GetTensorShape(op_context.output), \
GetTensorData<data_type>(op_context.output))
switch (op_context.input->type) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (output->type) {
case kTfLiteFloat32:
if (kernel_type == kReference) {
TF_LITE_STRIDED_SLICE(reference_ops, float);
}
reference_ops::StridedSlice(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteUInt8:
if (kernel_type == kReference) {
TF_LITE_STRIDED_SLICE(reference_ops, uint8_t);
}
reference_ops::StridedSlice(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
case kTfLiteInt8:
if (kernel_type == kReference) {
TF_LITE_STRIDED_SLICE(reference_ops, int8_t);
}
reference_ops::StridedSlice(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(op_context.input->type),
op_context.input->type);
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
#undef TF_LITE_STRIDED_SLICE
return kTfLiteOk;
}
} // namespace strided_slice
TfLiteRegistration* Register_STRIDED_SLICE() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/strided_slice::Prepare,
/*invoke=*/strided_slice::Eval<strided_slice::kReference>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_STRIDED_SLICE() {
return {/*init=*/strided_slice::Init,
/*free=*/nullptr,
/*prepare=*/strided_slice::Prepare,
/*invoke=*/strided_slice::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -21,8 +21,10 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -40,18 +42,18 @@ struct OpData {
// and the special 16-bit -> 16bit quantized path
int input1_shift;
int input2_shift;
int32 output_activation_min;
int32 output_activation_max;
int32_t output_activation_min;
int32_t output_activation_max;
// These fields are used only in the general 8-bit -> 8bit quantized path
int32 input1_multiplier;
int32 input2_multiplier;
int32 output_multiplier;
int32_t input1_multiplier;
int32_t input2_multiplier;
int32_t output_multiplier;
int output_shift;
int left_shift;
int32 input1_offset;
int32 input2_offset;
int32 output_offset;
int32_t input1_offset;
int32_t input2_offset;
int32_t output_offset;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
@@ -93,31 +95,62 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_STATUS(
CalculateOpData(context, params, input1, input2, output, data));
return kTfLiteOk;
}
void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
const OpData* data, const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output) {
const OpData* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
#define TF_LITE_SUB(opname) \
opname(op_params, GetTensorShape(input1), GetTensorData<float>(input1), \
GetTensorShape(input2), GetTensorData<float>(input2), \
GetTensorShape(output), GetTensorData<float>(output))
if (data->requires_broadcast) {
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow);
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
TF_LITE_SUB(tflite::reference_ops::SubWithActivation);
tflite::reference_ops::SubWithActivation(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
#undef TF_LITE_SUB
}
TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteSubParams* params, const OpData* data,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output) {
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
@@ -133,25 +166,46 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
GetTensorShape(input1), GetTensorShape(input2), &op_params);
#define TF_LITE_SUB(opname, dtype) \
opname(op_params, GetTensorShape(input1), GetTensorData<dtype>(input1), \
GetTensorShape(input2), GetTensorData<dtype>(input2), \
GetTensorShape(output), GetTensorData<dtype>(output));
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, int8_t);
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_SUB(tflite::reference_ops::Sub, int8_t);
tflite::reference_ops::Sub(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else {
if (need_broadcast) {
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, uint8_t);
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
TF_LITE_SUB(tflite::reference_ops::Sub, uint8_t);
tflite::reference_ops::Sub(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
}
#undef TF_LITE_SUB
}
return kTfLiteOk;
@@ -160,13 +214,15 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
OpData data;
TF_LITE_ENSURE_STATUS(
CalculateOpData(context, params, input1, input2, output, &data));
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
if (output->type == kTfLiteFloat32) {
EvalSub(context, node, params, &data, input1, input2, output);
@@ -184,16 +240,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace sub
TfLiteRegistration* Register_SUB() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/sub::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SUB() {
return {/*init=*/sub::Init,
/*free=*/nullptr,
/*prepare=*/sub::Prepare,
/*invoke=*/sub::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -23,25 +23,38 @@ limitations under the License.
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/activation_utils.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace svdf {
namespace {
struct OpData {
int32 effective_scale_1_a;
int32 effective_scale_2_a;
int32_t effective_scale_1_a;
int32_t effective_scale_2_a;
// b versions of each scale are kept at int since the numbers are just the
// shift value - typically between [-32, 32].
int effective_scale_1_b;
int effective_scale_2_b;
int scratch_tensor_index;
int scratch_output_tensor_index;
// Cached tensor zero point values for quantized operations.
int input_zero_point;
int output_zero_point;
};
// Input tensors.
constexpr int kInputTensor = 0;
constexpr int kWeightsFeatureTensor = 1;
constexpr int kWeightsTimeTensor = 2;
constexpr int kBiasTensor = 3;
// This is a variable tensor, and will be modified by this op.
constexpr int kInputActivationStateTensor = 4;
// Output tensor.
constexpr int kOutputTensor = 0;
/**
* This version of SVDF is specific to TFLite Micro. It contains the following
* differences between the TFLite version:
@@ -107,18 +120,19 @@ static inline void ApplyTimeWeightsBiasAndActivation(
for (int b = 0; b < batch_size; ++b) {
float* output_ptr_batch = output_ptr + b * num_units;
for (int i = 0; i < num_units; ++i) {
*output_ptr_batch = ActivationValFloat(activation, *output_ptr_batch);
*output_ptr_batch =
tflite::ops::micro::ActivationValFloat(activation, *output_ptr_batch);
++output_ptr_batch;
}
}
}
inline void EvalFloatSVDF(
TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input,
const TfLiteTensor* weights_feature, const TfLiteTensor* weights_time,
const TfLiteTensor* bias, const TfLiteSVDFParams* params,
int scratch_tensor_index, TfLiteTensor* activation_state,
TfLiteTensor* output) {
TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* weights_feature,
const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
const TfLiteSVDFParams* params, int scratch_tensor_index,
TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) {
const int rank = params->rank;
const int batch_size = input->dims->data[0];
const int input_size = input->dims->data[1];
@@ -126,12 +140,14 @@ inline void EvalFloatSVDF(
const int num_units = num_filters / rank;
const int memory_size = weights_time->dims->data[1];
const float* weights_feature_ptr = GetTensorData<float>(weights_feature);
const float* weights_time_ptr = GetTensorData<float>(weights_time);
const float* bias_ptr = GetTensorData<float>(bias);
const float* input_ptr = GetTensorData<float>(input);
const float* weights_feature_ptr =
tflite::micro::GetTensorData<float>(weights_feature);
const float* weights_time_ptr =
tflite::micro::GetTensorData<float>(weights_time);
const float* bias_ptr = tflite::micro::GetTensorData<float>(bias);
const float* input_ptr = tflite::micro::GetTensorData<float>(input);
float* state_ptr = GetTensorData<float>(activation_state);
float* state_ptr = tflite::micro::GetTensorData<float>(activation_state);
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
@@ -139,7 +155,7 @@ inline void EvalFloatSVDF(
float* scratch_ptr = static_cast<float*>(
context->GetScratchBuffer(context, scratch_tensor_index));
float* output_ptr = GetTensorData<float>(output);
float* output_ptr = tflite::micro::GetTensorData<float>(output);
// Left shift the activation_state.
{
@@ -185,14 +201,13 @@ inline void EvalFloatSVDF(
}
void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* input_tensor,
const TfLiteTensor* weights_feature_tensor,
const TfLiteTensor* weights_time_tensor,
const TfLiteTensor* bias_tensor,
const TfLiteEvalTensor* input_tensor,
const TfLiteEvalTensor* weights_feature_tensor,
const TfLiteEvalTensor* weights_time_tensor,
const TfLiteEvalTensor* bias_tensor,
const TfLiteSVDFParams* params,
TfLiteTensor* activation_state_tensor,
TfLiteTensor* output_tensor, const OpData& data,
int32_t input_zp, int32_t output_zp) {
TfLiteEvalTensor* activation_state_tensor,
TfLiteEvalTensor* output_tensor, const OpData& data) {
const int n_rank = params->rank;
const int n_batch = input_tensor->dims->data[0];
const int n_input = input_tensor->dims->data[1];
@@ -209,7 +224,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
// Shift states.
int16_t* const state_ptr = GetTensorData<int16_t>(activation_state_tensor);
int16_t* const state_ptr =
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
// Left shift the activation_state.
{
@@ -225,10 +241,11 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
// Feature matmul.
{
int16_t* state = GetTensorData<int16_t>(activation_state_tensor);
const int8_t* input = GetTensorData<int8_t>(input_tensor);
int16_t* state =
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
const int8_t* weight_feature =
GetTensorData<int8_t>(weights_feature_tensor);
tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
const int32_t output_max = std::numeric_limits<int16_t>::max();
const int32_t output_min = std::numeric_limits<int16_t>::min();
int16_t* result_in_batch = state + (n_memory - 1);
@@ -238,7 +255,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
int32_t dot_prod = 0;
const int8_t* vector_in_batch = input + b * n_input;
for (int c = 0; c < n_input; c++) {
dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
dot_prod +=
*matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
}
dot_prod = MultiplyByQuantizedMultiplier(
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
@@ -261,9 +279,10 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
// Perform batched vector dot product:
const int16_t* vector1_ptr = GetTensorData<int16_t>(weights_time_tensor);
const int16_t* vector1_ptr =
tflite::micro::GetTensorData<int16_t>(weights_time_tensor);
const int16_t* vector2_ptr =
GetTensorData<int16_t>(activation_state_tensor) +
tflite::micro::GetTensorData<int16_t>(activation_state_tensor) +
b * n_memory * n_filter;
for (int i = 0; i < n_filter; i++) {
@@ -281,7 +300,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
// Add bias.
if (bias_tensor) {
// Vector batch assign:
const int32_t* bias_data = GetTensorData<int32_t>(bias_tensor);
const int32_t* bias_data =
tflite::micro::GetTensorData<int32_t>(bias_tensor);
for (int i = 0; i < n_batch; ++i) {
int32_t* output_ptr = scratch_output_tensor + i * n_unit;
const int32_t* bias_ptr = bias_data;
@@ -316,34 +336,17 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
int32_t x1 = scratch_output_tensor[i];
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
data.effective_scale_2_b);
int32_t x3 = x2 + output_zp;
int32_t x3 = x2 + data.output_zero_point;
int32_t x4 = std::min(std::max(output_min, x3), output_max);
GetTensorData<int8_t>(output_tensor)[i] = static_cast<int8_t>(x4);
tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
static_cast<int8_t>(x4);
}
}
}
} // namespace
// Input tensors.
constexpr int kInputTensor = 0;
constexpr int kWeightsFeatureTensor = 1;
constexpr int kWeightsTimeTensor = 2;
constexpr int kBiasTensor = 3;
// This is a variable tensor, and will be modified by this op.
constexpr int kInputActivationStateTensor = 4;
// Output tensor.
constexpr int kOutputTensor = 0;
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
void* data = nullptr;
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
kTfLiteError) {
return nullptr;
}
return data;
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
@@ -359,13 +362,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// [4] = Activation State (variable),
// {2, batch_size, memory_size * num_filters}
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* weights_feature =
GetInput(context, node, kWeightsFeatureTensor);
TF_LITE_ENSURE(context, weights_feature != nullptr);
const TfLiteTensor* weights_time =
GetInput(context, node, kWeightsTimeTensor);
TF_LITE_ENSURE(context, weights_time != nullptr);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
const TfLiteTensor* activation_state =
GetInput(context, node, kInputActivationStateTensor);
TF_LITE_ENSURE(context, activation_state != nullptr);
// Define input constants based on input tensor definition above:
const int rank = params->rank;
@@ -382,9 +389,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
// Validate Tensor Output:
// [0] = float/int8, {2, batch_size, num_units}
// [0] = float/int8_t, {2, batch_size, num_units}
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
@@ -408,9 +416,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
memory_size * num_filters);
// Since is_variable is not part of TFLiteEvalTensor, check is_variable here.
TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true);
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
@@ -419,35 +432,30 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
}
TF_LITE_ENSURE_EQ(context, output->type, kTfLiteInt8);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
const auto* input_params =
reinterpret_cast<TfLiteAffineQuantization*>(input->quantization.params);
const auto* weights_feature_params =
static_cast<const TfLiteAffineQuantization*>(
weights_feature->quantization.params);
const auto* state_params = static_cast<const TfLiteAffineQuantization*>(
activation_state->quantization.params);
const auto* weight_time_params =
static_cast<const TfLiteAffineQuantization*>(
weights_time->quantization.params);
const auto* output_params = static_cast<const TfLiteAffineQuantization*>(
output->quantization.params);
const double effective_scale_1 = static_cast<double>(
input_params->scale->data[0] * weights_feature_params->scale->data[0] /
state_params->scale->data[0]);
const double effective_scale_2 = static_cast<double>(
state_params->scale->data[0] * weight_time_params->scale->data[0] /
output_params->scale->data[0]);
input->params.scale * weights_feature->params.scale /
activation_state->params.scale);
const double effective_scale_2 =
static_cast<double>(activation_state->params.scale *
weights_time->params.scale / output->params.scale);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
// TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
TF_LITE_ENSURE(
context,
std::abs(static_cast<double>(bias->params.scale) -
static_cast<double>(activation_state->params.scale *
weights_time->params.scale)) < 1e-5);
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
&(data->effective_scale_1_b));
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
&(data->effective_scale_2_b));
data->input_zero_point = input->params.zero_point;
data->output_zero_point = output->params.zero_point;
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
@@ -467,10 +475,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
if (bias != nullptr) {
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
}
TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
@@ -484,20 +489,24 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* weights_feature =
GetInput(context, node, kWeightsFeatureTensor);
const TfLiteTensor* weights_time =
GetInput(context, node, kWeightsTimeTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* activation_state =
GetVariableInput(context, node, kInputActivationStateTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* weights_feature =
tflite::micro::GetEvalInput(context, node, kWeightsFeatureTensor);
const TfLiteEvalTensor* weights_time =
tflite::micro::GetEvalInput(context, node, kWeightsTimeTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 5)
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
: nullptr;
TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput(
context, node, kInputActivationStateTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (weights_feature->type) {
case kTfLiteFloat32: {
EvalFloatSVDF(context, node, input, weights_feature, weights_time, bias,
@@ -508,11 +517,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
}
case kTfLiteInt8: {
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu);
EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
params, activation_state, output, data,
input->params.zero_point, output->params.zero_point);
params, activation_state, output, data);
return kTfLiteOk;
break;
}
@@ -525,20 +531,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
} // namespace svdf
} // namespace
TfLiteRegistration* Register_SVDF() {
static TfLiteRegistration r = {/*init=*/svdf::Init,
/*free=*/nullptr,
/*prepare=*/svdf::Prepare,
/*invoke=*/svdf::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SVDF() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,158 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/tanh.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
struct OpData {
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
void* TanhInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
OpData* data) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
static constexpr int kInputIntegerBits = 4;
const double input_real_multiplier =
static_cast<double>(input->params.scale) *
static_cast<double>(1 << (31 - kInputIntegerBits));
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
return kTfLiteOk;
}
TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
data->input_zero_point = input->params.zero_point;
return CalculateArithmeticOpData(context, node, data);
}
} // namespace
TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
switch (input->type) {
case kTfLiteFloat32: {
reference_ops::Tanh(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
} break;
case kTfLiteInt16: {
TanhParams params;
params.input_left_shift = data.input_left_shift;
reference_ops::Tanh(params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
} break;
case kTfLiteUInt8: {
TanhParams params;
params.input_zero_point = data.input_zero_point;
params.input_range_radius = data.input_range_radius;
params.input_multiplier = data.input_multiplier;
params.input_left_shift = data.input_left_shift;
reference_ops::Tanh(params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
} break;
case kTfLiteInt8: {
reference_integer_ops::Tanh(
data.input_zero_point, data.input_range_radius, data.input_multiplier,
data.input_left_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
} break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
}
} // namespace activations
TfLiteRegistration Register_TANH() {
return {/*init=*/activations::TanhInit,
/*free=*/nullptr,
/*prepare=*/activations::TanhPrepare,
/*invoke=*/activations::TanhEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -17,6 +17,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@@ -28,14 +29,16 @@ constexpr int kInputTensor = 0;
template <typename T>
TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* input, int output_count, int axis) {
const TfLiteTensor* output0 = GetOutput(context, node, 0);
const TfLiteEvalTensor* input, int output_count,
int axis) {
const TfLiteEvalTensor* output0 =
tflite::micro::GetEvalOutput(context, node, 0);
const TfLiteIntArray* input_dims = input->dims;
const TfLiteIntArray* output_dims = output0->dims;
const int dimensions = input_dims->size;
if (axis < 0) {
axis += NumDimensions(input);
axis += input->dims->size;
}
TFLITE_DCHECK_LT(axis, dimensions);
@@ -54,11 +57,11 @@ TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
}
TFLITE_DCHECK_EQ(output_size, copy_size * outer_size);
const T* input_data = GetTensorData<T>(input);
const T* input_data = tflite::micro::GetTensorData<T>(input);
for (int i = 0; i < output_count; ++i) {
TfLiteTensor* t = GetOutput(context, node, i);
T* output_data = GetTensorData<T>(t);
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
T* output_data = tflite::micro::GetTensorData<T>(t);
for (int k = 0; k < outer_size; ++k) {
T* output_ptr = output_data + copy_size * k;
int loc = k * output_count * copy_size + i * copy_size;
@@ -74,7 +77,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteUnpackParams* data =
reinterpret_cast<TfLiteUnpackParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
switch (input->type) {
case kTfLiteFloat32: {
@@ -101,16 +105,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace
} // namespace unpack
TfLiteRegistration* Register_UNPACK() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/unpack::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_UNPACK() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/unpack::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@@ -15,9 +15,15 @@ limitations under the License.
#include "tensorflow/lite/micro/memory_helpers.h"
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
@@ -40,8 +46,7 @@ size_t AlignSizeUp(size_t size, size_t alignment) {
return aligned_size;
}
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
ErrorReporter* reporter) {
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) {
switch (type) {
case kTfLiteFloat32:
*size = sizeof(float);
@@ -67,9 +72,10 @@ TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
case kTfLiteComplex64:
*size = sizeof(float) * 2;
break;
case kTfLiteComplex128:
*size = sizeof(double) * 2;
break;
default:
reporter->Report("Type %s (%d) not is not supported",
TfLiteTypeGetName(type), type);
return kTfLiteError;
}
return kTfLiteOk;
@@ -79,17 +85,71 @@ TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
size_t* bytes, size_t* type_size,
ErrorReporter* error_reporter) {
int element_count = 1;
for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
element_count *= flatbuffer_tensor.shape()->Get(n);
// If flatbuffer_tensor.shape == nullptr, then flatbuffer_tensor is a scalar
// so has 1 element.
if (flatbuffer_tensor.shape() != nullptr) {
for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
element_count *= flatbuffer_tensor.shape()->Get(n);
}
}
TfLiteType tf_lite_type;
TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
&tf_lite_type, error_reporter));
TF_LITE_ENSURE_STATUS(
TfLiteTypeSizeOf(tf_lite_type, type_size, error_reporter));
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(tf_lite_type, type_size));
*bytes = element_count * (*type_size);
return kTfLiteOk;
}
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
size_t* out_bytes) {
TFLITE_DCHECK(out_bytes != nullptr);
int element_count = 1;
// If eval_tensor->dims == nullptr, then tensor is a scalar so has 1 element.
if (eval_tensor->dims != nullptr) {
for (int n = 0; n < eval_tensor->dims->size; ++n) {
element_count *= eval_tensor->dims->data[n];
}
}
size_t type_size;
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(eval_tensor->type, &type_size));
*out_bytes = element_count * type_size;
return kTfLiteOk;
}
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output) {
const TfLiteTensor* input = nullptr;
TF_LITE_ENSURE(context, input1->dims != nullptr);
TF_LITE_ENSURE(context, input2->dims != nullptr);
TF_LITE_ENSURE(context, output->dims->size == 0);
input = input1->dims->size > input2->dims->size ? input1 : input2;
TF_LITE_ENSURE(context, output->type == input->type);
size_t size = 0;
TfLiteTypeSizeOf(input->type, &size);
const int dimensions_count = tflite::GetTensorShape(input).DimensionsCount();
for (int i = 0; i < dimensions_count; i++) {
size *= input->dims->data[i];
}
output->bytes = size;
output->dims =
reinterpret_cast<TfLiteIntArray*>(context->AllocatePersistentBuffer(
context, TfLiteIntArrayGetSizeInBytes(size)));
output->dims->size = input->dims->size;
for (int i = 0; i < dimensions_count; i++) {
output->dims->data[i] = input->dims->data[i];
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -15,6 +15,9 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/schema/schema_generated.h"
@@ -31,14 +34,26 @@ uint8_t* AlignPointerDown(uint8_t* data, size_t alignment);
size_t AlignSizeUp(size_t size, size_t alignment);
// Returns size in bytes for a given TfLiteType.
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
ErrorReporter* reporter);
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size);
// How many bytes are needed to hold a tensor's contents.
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
size_t* bytes, size_t* type_size,
ErrorReporter* error_reporter);
// How many bytes are used in a TfLiteEvalTensor instance. The byte length is
// returned in out_bytes.
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
size_t* out_bytes);
// Deduce output dimensions from input and allocate given size.
// Useful for operators with two inputs where the largest input should equal the
// output dimension.
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_

View File

@@ -48,10 +48,10 @@ GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
next_free += sizeof(BufferRequirements) * max_buffer_count_;
buffer_sizes_sorted_by_size_ = reinterpret_cast<int*>(next_free);
buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
next_free += sizeof(int) * max_buffer_count_;
buffer_ids_sorted_by_size_ = reinterpret_cast<int*>(next_free);
buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
next_free += sizeof(int) * max_buffer_count_;
buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
@@ -76,11 +76,24 @@ TfLiteStatus GreedyMemoryPlanner::AddBuffer(
current->size = size;
current->first_time_used = first_time_used;
current->last_time_used = last_time_used;
current->offline_offset = kOnlinePlannedBuffer;
++buffer_count_;
need_to_calculate_offsets_ = true;
return kTfLiteOk;
}
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
int last_time_used, int offline_offset) {
BufferRequirements* current = &requirements_[buffer_count_];
if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
kTfLiteOk) {
return kTfLiteError;
}
current->offline_offset = offline_offset;
return kTfLiteOk;
}
bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
const int last_time_used) const {
@@ -102,7 +115,7 @@ GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
ListEntry* result = nullptr;
ListEntry* candidate_next_entry;
if (start == nullptr) {
candidate_next_entry = &buffers_sorted_by_offset_[0];
candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
} else {
if (start->next_entry_index == -1) {
return nullptr;
@@ -134,29 +147,51 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
// This helps find a more compact layout. Intuitively, you can think
// about putting the large buffers in place first, and then the
// smaller buffers can fit in the gaps, rather than fragmenting the
// gaps with small buffers at the beginning.
// gaps with small buffers at the beginning. Add offline planned offsets
// first in the list, since they have a predetermined offset.
int idx_from_tail = buffer_count_;
int idx_from_head = 0;
for (int i = 0; i < buffer_count_; ++i) {
buffer_sizes_sorted_by_size_[i] = requirements_[i].size;
buffer_ids_sorted_by_size_[i] = i;
buffer_offsets_[i] = -1;
if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
idx_from_tail--;
buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
buffer_ids_sorted_[idx_from_tail] = i;
buffer_offsets_[i] = -1;
} else {
buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
buffer_ids_sorted_[idx_from_head] = i;
buffer_offsets_[i] = requirements_[i].offline_offset;
idx_from_head++;
}
}
// This sorting algorithm is naive, and may end up taking a very long time
// with hundreds of buffers.
ReverseSortInPlace(buffer_sizes_sorted_by_size_, buffer_ids_sorted_by_size_,
buffer_count_);
// Put the largest buffer at offset zero to start the process.
ListEntry* first_entry = &buffers_sorted_by_offset_[0];
first_entry->offset = 0;
first_entry->requirements_index = buffer_ids_sorted_by_size_[0];
first_entry->next_entry_index = -1;
// This sorting algorithm is naive, and may end up taking a very long time
// with hundreds of buffers. Do not sort the offline planned offsets.
ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
&buffer_ids_sorted_[idx_from_head],
buffer_count_ - idx_from_head);
// Initialize the first entry to the first buffer in
// buffer_ids_sorted_.
// - If there are no offline planned offsets, the largest buffer will be
// first, and the buffers will be handled in size order.
// - If offline offsets are present, these will be handled first in order
// for the greedy algorithm to utilized gaps in the offline plan.
first_entry_index_ = 0;
next_free_entry_ = 1;
buffer_offsets_[buffer_ids_sorted_by_size_[0]] = 0;
ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
first_entry->next_entry_index = -1; // to mark the entry as end of list
int buffer_id = buffer_ids_sorted_[0];
first_entry->requirements_index = buffer_id;
if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
buffer_offsets_[buffer_id] = 0;
}
first_entry->offset = buffer_offsets_[buffer_id];
// Work through the rest of the buffers to find a good gap to place each one.
for (int i = 1; i < buffer_count_; ++i) {
// The id is the order the buffer was originally added by the client.
const int buffer_id = buffer_ids_sorted_by_size_[i];
buffer_id = buffer_ids_sorted_[i];
// Look at what size and time range the buffer needs to be active.
BufferRequirements* wanted_requirements = &requirements_[buffer_id];
const int wanted_size = wanted_requirements->size;
@@ -168,37 +203,43 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
// so that it's easy to find the next buffer in memory, and so the gap.
// The candidate_entry variable holds the buffer that we're considering
// placing the current buffer after.
ListEntry* prior_entry = nullptr;
int candidate_offset = 0;
// Loop through the offset-ordered list of buffers, looking for gaps.
while (true) {
// Find out what the next active buffer is.
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
prior_entry, wanted_first_time_used, wanted_last_time_used);
if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
ListEntry* prior_entry = nullptr;
while (true) {
// Find out what the next active buffer is.
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
prior_entry, wanted_first_time_used, wanted_last_time_used);
if (prior_entry) {
BufferRequirements* candidate_requirements =
&requirements_[prior_entry->requirements_index];
const int prior_entry_offset =
prior_entry->offset + candidate_requirements->size;
if (prior_entry_offset > candidate_offset) {
candidate_offset = prior_entry_offset;
if (prior_entry) {
BufferRequirements* candidate_requirements =
&requirements_[prior_entry->requirements_index];
const int prior_entry_offset =
prior_entry->offset + candidate_requirements->size;
if (prior_entry_offset > candidate_offset) {
candidate_offset = prior_entry_offset;
}
}
if (next_entry == nullptr) {
// We're at the end of the list, so we can always append the buffer
// here.
break;
}
// Find out how much space there is between us and the next buffer.
const int gap = next_entry->offset - candidate_offset;
if (gap >= wanted_size) {
// This entry has a big enough gap between it and the next, so
// use it!
break;
}
// The gap wasn't big enough, so move on to another candidate.
prior_entry = next_entry;
}
if (next_entry == nullptr) {
// We're at the end of the list, so we can always append the buffer
// here.
break;
}
// Find out how much space there is between us and the next buffer.
const int gap = next_entry->offset - candidate_offset;
if (gap >= wanted_size) {
// This entry has a big enough gap between it and the next, so
// use it!
break;
}
// The gap wasn't big enough, so move on to another candidate.
prior_entry = next_entry;
} else {
// Offline planned offset are to be considered constant
candidate_offset = wanted_requirements->offline_offset;
}
// At this point, we've either found a gap (possibly at the end of the
// list) and want to place the buffer there, or there are no other active
@@ -212,26 +253,36 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
new_entry->requirements_index = buffer_id;
const int new_entry_index = next_free_entry_;
++next_free_entry_;
ListEntry* current_entry = first_entry;
// Make sure that we insert the buffer at the correct place in the ordered
// list.
while (true) {
const int next_entry_index = current_entry->next_entry_index;
if (next_entry_index == -1) {
// We're at the end of the list, so just add the new entry here.
current_entry->next_entry_index = new_entry_index;
new_entry->next_entry_index = -1;
break;
if (first_entry->offset > candidate_offset) {
// The new entry offset is smaller than the first entry offset =>
// replace the first entry
first_entry = new_entry;
first_entry->next_entry_index = first_entry_index_;
first_entry_index_ = new_entry_index;
} else {
ListEntry* current_entry = first_entry;
// Make sure that we insert the buffer at the correct place in the
// buffer-offset-ordered list
while (true) {
const int next_entry_index = current_entry->next_entry_index;
if (next_entry_index == -1) {
// We're at the end of the list, so just add the new entry here.
current_entry->next_entry_index = new_entry_index;
new_entry->next_entry_index = -1;
break;
}
// not at the end of the list -> take a look at next entry
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
if (next_entry->offset > candidate_offset) {
// We're at the right spot to do an insertion and retain the sorting
// order, so place the new entry here.
new_entry->next_entry_index = current_entry->next_entry_index;
current_entry->next_entry_index = new_entry_index;
break;
}
current_entry = next_entry;
}
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
if (next_entry->offset > candidate_offset) {
// We're at the right spot to do an insertion and retain the sorting
// order, so place the new entry here.
new_entry->next_entry_index = current_entry->next_entry_index;
current_entry->next_entry_index = new_entry_index;
break;
}
current_entry = next_entry;
}
}
}
@@ -241,7 +292,7 @@ size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
if (buffer_count_ == 0) {
return 0;
}
ListEntry* entry = &buffers_sorted_by_offset_[0];
ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
size_t max_size = 0;
while (entry) {
BufferRequirements* requirements =

View File

@@ -21,6 +21,8 @@ limitations under the License.
namespace tflite {
constexpr int kOnlinePlannedBuffer = -1;
// A memory planner that uses a greedy algorithm to arrange buffers in memory
// to minimize the overall arena size needed.
//
@@ -59,6 +61,12 @@ class GreedyMemoryPlanner : public MemoryPlanner {
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used) override;
// Record details of an offline planned buffer offset we want to place.
// offline_offset is the buffer offset from the start of the arena.
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used,
int offline_offset);
// Returns the high-water mark of used memory. This is the minimum size of a
// memory arena you'd need to allocate to hold these buffers.
size_t GetMaximumMemorySize() override;
@@ -90,8 +98,8 @@ class GreedyMemoryPlanner : public MemoryPlanner {
static size_t per_buffer_size() {
const int per_buffer_size =
sizeof(BufferRequirements) + // requirements_
sizeof(int) + // buffer_sizes_sorted_by_size_
sizeof(int) + // buffer_ids_sorted_by_size_
sizeof(int) + // buffer_sizes_sorted_
sizeof(int) + // buffer_ids_sorted_
sizeof(ListEntry) + // buffers_sorted_by_offset_
sizeof(int); // buffer_offsets_;
return per_buffer_size;
@@ -121,16 +129,25 @@ class GreedyMemoryPlanner : public MemoryPlanner {
// Records the client-provided information about each buffer.
struct BufferRequirements {
int size;
int offline_offset;
int first_time_used;
int last_time_used;
};
// Working arrays used during the layout algorithm.
BufferRequirements* requirements_;
int* buffer_sizes_sorted_by_size_;
int* buffer_ids_sorted_by_size_;
// buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
// {
// offline planned buffers,
// online planned buffers sorted by size
// }
int* buffer_sizes_sorted_;
int* buffer_ids_sorted_;
ListEntry* buffers_sorted_by_offset_;
int next_free_entry_;
int next_free_entry_; // Index of the next free entry of
// buffers_sorted_by_offset_
int first_entry_index_; // Index of the first entry (smallest offset) of
// buffers_sorted_by_offset_
// Stores the outcome of the plan, the location of each buffer in the arena.
int* buffer_offsets_;

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -15,33 +15,42 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Namespace used for unittests.
namespace internal {
// Sets up all of the data structure members for a runtime tensor
// based on the contents of a serialized tensor.
TfLiteStatus InitializeRuntimeTensor(
SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
// Sets up all of the data structure members for a TfLiteTensor based on the
// contents of a serialized tensor in the flatbuffer.
// TODO(b/162311891): Drop this method when the interpreter has an API for
// returning buffers on TfLiteEvalTensor.
TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
SimpleMemoryAllocator* allocator, bool allocate_temp,
const tflite::Tensor& flatbuffer_tensor,
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
ErrorReporter* error_reporter, TfLiteTensor* result);
// A handle tracking scratch buffer allocation. This handle is created by
// `RequestScratchBufferInArena`. `data` field is populated in
// `FinishTensorAllocation` after static memory planning.
// TODO(b/150257460) As a future optimization, this struct could be replaced by
// a union, since once `data` is populated, `bytes` and `node_idx` is not
// needed.
// Holds placeholder information for a scratch buffer request from a kernel.
// This struct is only used during the model prepare stage. Each request from a
// kernel is stored in the head section. During the prepare stage, the head
// section will at least hold kMaxScratchBuffersPerOp number of requests plus
// any requests from previous kernel requests.
//
// When the memory plan is finalized, these structs are no longer used in favor
// of a sequential, array of ScratchBufferHandle allocations in the tail
// section. These allocations are indexed by the request API defined in the
// TfLiteContext struct.
typedef struct {
// Pointer to the scratch buffer.
uint8_t* data;
// Number of bytes required by the buffer. The actual allocated size might be
// greater than `bytes` due to buffer alignment.
size_t bytes;
@@ -49,7 +58,8 @@ typedef struct {
// determine the lifetime of the buffer. In AllocationInfo, this buffer will
// have `before` = node_idx and `after` = node_idx.
int node_idx;
} ScratchBufferHandle;
} ScratchBufferRequest;
} // namespace internal
typedef struct {
@@ -57,9 +67,27 @@ typedef struct {
const TfLiteRegistration* registration;
} NodeAndRegistration;
// Holds a pointer to a buffer for a scratch buffer requested by a kernel during
// the model prepare stage. This struct is allocated in-place and allows for
// quick pointer-indexed lookup for speed during model inference.
typedef struct {
// Pointer to location of the scratch buffer:
uint8_t* data;
} ScratchBufferHandle;
// Allocator responsible for allocating memory for all intermediate tensors
// necessary to invoke a model.
//
// The lifetime of the model, tensor arena and error reporter must be at
// least as long as that of the allocator object, since the allocator needs
// them to be accessible during its entire lifetime.
//
// The MicroAllocator simply plans out additional allocations that are required
// to standup a model for inference in TF Micro. This class currently relies on
// an additional allocator - SimpleMemoryAllocator - for all allocations from an
// arena. These allocations are divided into head (non-persistent) and tail
// (persistent) regions:
//
// Memory layout to help understand how it works
// This information could change in the future version.
// ************** .memory_allocator->GetBuffer()
@@ -72,76 +100,179 @@ typedef struct {
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
class MicroAllocator {
public:
// The lifetime of the model, tensor allocator and error reporter must be at
// least as long as that of the allocator object, since the allocator needs
// them to be accessible during its entire lifetime.
// Creates a MicroAllocator instance from a given tensor arena. This arena
// will be managed by the created instance.
// Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
// bytes aligned, otherwise some head room will be wasted.
MicroAllocator(TfLiteContext* context, const Model* model,
uint8_t* tensor_arena, size_t arena_size,
ErrorReporter* error_reporter);
// TODO(b/157615197): Cleanup constructor + factory usage.
static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
ErrorReporter* error_reporter);
// Runs through the model and allocates all necessary input, output and
// intermediate tensors.
// WARNING: doing any allocation after calling this method has the risk of
// corrupting tensor data so this method should be the last non-const method
// called in this class.
TfLiteStatus FinishTensorAllocation();
// Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
// intance. This allocator instance will use the SimpleMemoryAllocator
// instance to manage allocations internally.
static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
// Returns the arena usage in bytes, only available after
// `FinishTensorAllocation`. Otherwise, it will return 0.
size_t used_bytes() const {
if (active_) {
return 0;
}
return memory_allocator_->GetUsedBytes();
}
// Begin allocating internal resources required for model inference.
// This method will run through the flatbuffer data supplied in the model to
// properly allocate tensor, node, and op registration data. This method is
// expected to be followed with a call to FinishModelAllocation() before
// resuming allocation with another model. All persistent tensor buffers are
// stored in the out-param eval_tensors. This value is allocated from the
// persistent memory arena and will be used to host runtime tensor buffers.
TfLiteStatus StartModelAllocation(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration** node_and_registrations,
TfLiteEvalTensor** eval_tensors);
// Run through the model to allocate nodes and registrations. We need to keep
// them for the entire life time of the model to allow persistent tensors.
// This method needs to be called before FinishTensorAllocation method.
TfLiteStatus AllocateNodeAndRegistrations(
const OpResolver& op_resolver,
NodeAndRegistration** node_and_registrations);
// Finish allocating internal resources required for model inference.
// This method will plan non-persistent buffers and commit a memory plan to
// the 'head' section of the memory arena. All variable tensor data will also
// be allocated. This method should be called after assigning model resources
// in StartModelAllocation(). The eval_tensors pointer should be the value
// passed into this class during StartModelAllocation(). Scratch buffer
// handles are stored in the out-param `scratch_buffer_handles`. This value
// will be used in `GetScratchBuffer` call to retrieve scratch buffers.
TfLiteStatus FinishModelAllocation(
const Model* model, TfLiteEvalTensor* eval_tensors,
ScratchBufferHandle** scratch_buffer_handles);
// Allocates a TfLiteTensor struct and populates the returned value with
// properties from the model flatbuffer. This struct is allocated from
// persistent arena memory is only guaranteed for the lifetime of the
// application. The eval_tensors pointer should be the value passed into this
// class during StartModelAllocation() and contains the source-of-truth for
// buffers.
virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
// Allocates a TfLiteTensor struct and populates the returned value with
// properties from the model flatbuffer. This struct is allocated from
// temporary arena memory is only guaranteed until a call is made to
// ResetTempAllocations(). The eval_tensors pointer should be the value passed
// into this class during StartModelAllocation() and contains the
// source-of-truth for buffers.
virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
TfLiteEvalTensor* eval_tensors,
int tensor_index);
// Resets all temporary allocations. This method should be called after a
// chain of temp allocations (e.g. chain of TfLiteTensor objects via
// AllocateTfLiteTensor()).
virtual void ResetTempAllocations();
// Allocates persistent buffer which has the same life time as the allocator.
// The memory is immediately available and is allocated from the tail of the
// arena.
TfLiteStatus AllocatePersistentBuffer(size_t bytes, void** ptr);
virtual void* AllocatePersistentBuffer(size_t bytes);
// Register a scratch buffer of size `bytes` for Node with `node_id`.
// This method only allocates a BufferHandle holding information for memory
// planning. The buffer ptr is ready after `FinishTensorAllocation` and can
// be retrieved by `GetScratchBuffer` method using the returned buffer_idx.
// Note that there should be no tail allocation between two consecutive
// `RequestScratchBufferInArena` calls.
TfLiteStatus RequestScratchBufferInArena(int node_id, size_t bytes,
int* buffer_idx);
// Returns the pointer to the planned scratch buffer.
void* GetScratchBuffer(int buffer_idx) const;
// This method only requests a buffer with a given size to be used after a
// model has finished allocation via FinishModelAllocation(). All requested
// buffers will be accessible by the out-param in that method.
TfLiteStatus RequestScratchBufferInArena(size_t bytes, int* buffer_idx);
// Finish allocating a specific NodeAndRegistration prepare block (kernel
// entry for a model) with a given node ID. This call ensures that any scratch
// buffer requests and temporary allocations are handled and ready for the
// next node prepare block.
TfLiteStatus FinishPrepareNodeAllocations(int node_id);
// Returns the arena usage in bytes, only available after
// `FinishModelAllocation`. Otherwise, it will return 0.
size_t used_bytes() const;
protected:
MicroAllocator(SimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
virtual ~MicroAllocator();
// Allocates an array in the arena to hold pointers to the node and
// registration pointers required to represent the inference graph of the
// model.
virtual TfLiteStatus AllocateNodeAndRegistrations(
const Model* model, NodeAndRegistration** node_and_registrations);
// Populates node and registration pointers representing the inference graph
// of the model from values inside the flatbuffer (loaded from the TfLiteModel
// instance). Persistent data (e.g. operator data) is allocated from the
// arena.
virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations);
// Allocates the list of persistent TfLiteEvalTensors that are used for the
// "eval" phase of model inference. These structs will be the source of truth
// for all tensor buffers. Allocation results are stored in the out-param
// eval_tensors.
virtual TfLiteStatus AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors);
// Allocates persistent tensor buffers for variable tensors in the subgraph.
virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors);
// Allocate and return a persistent TfLiteTensor.
// TODO(b/162311891): Drop this method when the interpreter has an API for
// accessing TfLiteEvalTensor structs.
virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
// Populates a TfLiteTensor struct with data from the model flatbuffer. Any
// quantization data is allocated from either the tail (persistent) or temp
// sections of the arena based on the allocation flag.
virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
int tensor_index, bool allocate_temp);
ErrorReporter* error_reporter() const;
// Returns the first subgraph from the model.
const SubGraph* GetSubGraphFromModel(const Model* model);
private:
TfLiteStatus Init();
// Commits a memory plan for all non-persistent buffer allocations in the
// 'head' section of the memory arena. The eval_tensors pointer is the list of
// pre-allocated TfLiteEvalTensor structs that will point to the buffers that
// will be allocated into the head section in this function call. The
// scratch_buffer_handles pointer is the array of pre-allocated
// ScratchBufferHandle structs that will point to allocated buffers also in
// the head section.
virtual TfLiteStatus CommitStaticMemoryPlan(
const Model* model, const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors,
ScratchBufferHandle* scratch_buffer_handles);
const Model* model_;
// A simple memory allocator that always allocate from the arena tail.
// Allocates an array of ScratchBufferHandle structs in the tail section for a
// given number of handles.
virtual TfLiteStatus AllocateScratchBufferHandles(
ScratchBufferHandle** scratch_buffer_handles, size_t handle_count);
// Clears all internal scratch buffer request counts and resets the head to
// prepare for kernels to request scratch buffer data when a model is
// preparing.
TfLiteStatus InitScratchBufferData();
// Returns the pointer for the array of ScratchBufferRequest allocations in
// the head section.
internal::ScratchBufferRequest* GetScratchBufferRequests();
// A simple memory allocator that always allocate from the arena tail or head.
SimpleMemoryAllocator* memory_allocator_;
ErrorReporter* error_reporter_;
TfLiteContext* context_;
// Indicating if the allocator is ready for allocation.
bool active_ = false;
bool model_is_allocating_;
// In reverse order for efficiency.
// i.e. scratch_buffer_handles_[0] is the handle for the last buffer,
// corresponding to the last RequestScratchBufferInArena call.
internal::ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
// How many scratch buffers have been allocated.
size_t scratch_buffer_count_ = 0;
// Holds the number of ScratchBufferRequest instances stored in the head
// section when a model is allocating.
size_t scratch_buffer_request_count_ = 0;
const SubGraph* subgraph_;
const flatbuffers::Vector<flatbuffers::Offset<Operator>>* operators_;
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors_;
// Holds the byte length of the memory plan with the largest head usage. Used
// to ensure that multi-tenant allocations can share the head for buffers.
size_t max_head_buffer_usage_ = 0;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite

View File

@@ -15,7 +15,10 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include <cstdarg>
#ifndef TF_LITE_STRIP_ERROR_STRINGS
#include "tensorflow/lite/micro/debug_log.h"
#include "tensorflow/lite/micro/micro_string.h"
#endif

View File

@@ -15,9 +15,10 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
#include <cstdarg>
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/debug_log.h"
namespace tflite {

View File

@@ -1,4 +1,4 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -14,16 +14,24 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_interpreter.h"
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/tensor_utils.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_optional_debug_tools.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
return registration->custom_name;
@@ -31,88 +39,111 @@ const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
}
}
#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS)
} // namespace
namespace internal {
TfLiteStatus ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
size_t bytes, void** ptr) {
ContextHelper::ContextHelper(ErrorReporter* error_reporter,
MicroAllocator* allocator, const Model* model)
: allocator_(allocator), error_reporter_(error_reporter), model_(model) {}
void* ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
size_t bytes) {
return reinterpret_cast<ContextHelper*>(ctx->impl_)
->allocator_->AllocatePersistentBuffer(bytes, ptr);
->allocator_->AllocatePersistentBuffer(bytes);
}
TfLiteStatus ContextHelper::RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx) {
ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
return helper->allocator_->RequestScratchBufferInArena(
helper->current_node_idx_, bytes, buffer_idx);
return helper->allocator_->RequestScratchBufferInArena(bytes, buffer_idx);
}
void* ContextHelper::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
return reinterpret_cast<ContextHelper*>(ctx->impl_)
->allocator_->GetScratchBuffer(buffer_idx);
ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
ScratchBufferHandle* handle = helper->scratch_buffer_handles_ + buffer_idx;
return handle->data;
}
void ContextHelper::ReportOpError(struct TfLiteContext* context,
const char* format, ...) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
va_list args;
va_start(args, format);
TF_LITE_REPORT_ERROR(helper->error_reporter_, format, args);
va_end(args);
#endif
}
TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context,
int tensor_idx) {
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
return helper->allocator_->AllocateTempTfLiteTensor(
helper->model_, helper->eval_tensors_, tensor_idx);
}
TfLiteEvalTensor* ContextHelper::GetEvalTensor(
const struct TfLiteContext* context, int tensor_idx) {
ContextHelper* helper = reinterpret_cast<ContextHelper*>(context->impl_);
return &helper->eval_tensors_[tensor_idx];
}
void ContextHelper::SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors) {
eval_tensors_ = eval_tensors;
}
void ContextHelper::SetScratchBufferHandles(
ScratchBufferHandle* scratch_buffer_handles) {
scratch_buffer_handles_ = scratch_buffer_handles;
}
} // namespace internal
MicroInterpreter::MicroInterpreter(const Model* model,
const OpResolver& op_resolver,
const MicroOpResolver& op_resolver,
uint8_t* tensor_arena,
size_t tensor_arena_size,
ErrorReporter* error_reporter)
ErrorReporter* error_reporter,
tflite::Profiler* profiler)
: model_(model),
op_resolver_(op_resolver),
error_reporter_(error_reporter),
allocator_(&context_, model_, tensor_arena, tensor_arena_size,
error_reporter_),
allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size,
error_reporter)),
tensors_allocated_(false),
context_helper_(error_reporter_, &allocator_) {
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
model->subgraphs();
if (subgraphs->size() != 1) {
TF_LITE_REPORT_ERROR(error_reporter,
"Only 1 subgraph is currently supported.\n");
initialization_status_ = kTfLiteError;
return;
}
subgraph_ = (*subgraphs)[0];
tensors_ = subgraph_->tensors();
operators_ = subgraph_->operators();
initialization_status_(kTfLiteError),
eval_tensors_(nullptr),
context_helper_(error_reporter_, &allocator_, model),
input_tensor_(nullptr),
output_tensor_(nullptr) {
Init(profiler);
}
context_.impl_ = static_cast<void*>(&context_helper_);
context_.ReportError = context_helper_.ReportOpError;
context_.recommended_num_threads = 1;
// If the system is big endian then convert weights from the flatbuffer from
// little to big endian on startup so that it does not need to be done during
// inference.
// NOTE: This requires that the flatbuffer is held in memory which can be
// modified by this process.
if (!FLATBUFFERS_LITTLEENDIAN) {
for (size_t t = 0; t < tensors_size(); ++t) {
TfLiteTensor* thisTensor = &context_.tensors[t];
if (thisTensor->allocation_type == kTfLiteMmapRo)
CorrectTensorEndianness(thisTensor);
}
}
initialization_status_ = kTfLiteOk;
MicroInterpreter::MicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
MicroAllocator* allocator,
ErrorReporter* error_reporter,
tflite::Profiler* profiler)
: model_(model),
op_resolver_(op_resolver),
error_reporter_(error_reporter),
allocator_(*allocator),
tensors_allocated_(false),
initialization_status_(kTfLiteError),
eval_tensors_(nullptr),
context_helper_(error_reporter_, &allocator_, model),
input_tensor_(nullptr),
output_tensor_(nullptr) {
Init(profiler);
}
MicroInterpreter::~MicroInterpreter() {
if (node_and_registrations_ != nullptr) {
for (size_t i = 0; i < operators_->size(); ++i) {
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
TfLiteNode* node = &(node_and_registrations_[i].node);
const TfLiteRegistration* registration =
node_and_registrations_[i].registration;
@@ -125,7 +156,28 @@ MicroInterpreter::~MicroInterpreter() {
}
}
void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) {
void MicroInterpreter::Init(tflite::Profiler* profiler) {
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
model_->subgraphs();
if (subgraphs->size() != 1) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Only 1 subgraph is currently supported.\n");
initialization_status_ = kTfLiteError;
return;
}
subgraph_ = (*subgraphs)[0];
context_.impl_ = static_cast<void*>(&context_helper_);
context_.ReportError = context_helper_.ReportOpError;
context_.GetTensor = context_helper_.GetTensor;
context_.GetEvalTensor = context_helper_.GetEvalTensor;
context_.recommended_num_threads = 1;
context_.profiler = profiler;
initialization_status_ = kTfLiteOk;
}
void MicroInterpreter::CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr) {
int32_t tensorSize = 1;
for (int d = 0; d < tensorCorr->dims->size; ++d)
tensorSize *= reinterpret_cast<const int32_t*>(tensorCorr->dims->data)[d];
@@ -149,6 +201,9 @@ void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) {
case TfLiteType::kTfLiteComplex64:
CorrectTensorDataEndianness(tensorCorr->data.c64, tensorSize);
break;
case TfLiteType::kTfLiteComplex128:
CorrectTensorDataEndianness(tensorCorr->data.c128, tensorSize);
break;
default:
// Do nothing for other data types.
break;
@@ -163,16 +218,50 @@ void MicroInterpreter::CorrectTensorDataEndianness(T* data, int32_t size) {
}
TfLiteStatus MicroInterpreter::AllocateTensors() {
TF_LITE_ENSURE_OK(&context_, allocator_.AllocateNodeAndRegistrations(
op_resolver_, &node_and_registrations_));
if (allocator_.StartModelAllocation(model_, op_resolver_,
&node_and_registrations_,
&eval_tensors_) != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed starting model allocation.\n");
initialization_status_ = kTfLiteError;
return kTfLiteError;
}
// Update the pointer now that TfLiteEvalTensor allocation has completed on
// the context helper.
// TODO(b/16157777): This call would not be needed if ContextHelper rolled
// into the interpreter.
context_helper_.SetTfLiteEvalTensors(eval_tensors_);
context_.tensors_size = subgraph_->tensors()->size();
// If the system is big endian then convert weights from the flatbuffer from
// little to big endian on startup so that it does not need to be done during
// inference.
// NOTE: This requires that the flatbuffer is held in memory which can be
// modified by this process.
if (!FLATBUFFERS_LITTLEENDIAN) {
for (size_t t = 0; t < subgraph_->tensors()->size(); ++t) {
if (auto* buffer =
(*model_->buffers())[subgraph_->tensors()->Get(t)->buffer()]) {
// If we've found a buffer, does it have any data?
if (auto* array = buffer->data()) {
// If it has any data, is the data size larger than zero?
if (array->size()) {
// Update the endianness of the corresponding eval tensor since that
// struct holds the buffer used at inference time.
CorrectTensorEndianness(&eval_tensors_[t]);
}
}
}
}
}
// Only allow AllocatePersistentBuffer in Init stage.
context_.AllocatePersistentBuffer = context_helper_.AllocatePersistentBuffer;
context_.RequestScratchBufferInArena = nullptr;
context_.GetScratchBuffer = nullptr;
for (size_t i = 0; i < operators_->size(); ++i) {
context_helper_.SetNodeIndex(i);
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
size_t init_data_size;
@@ -189,15 +278,12 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
registration->init(&context_, init_data, init_data_size);
}
}
context_helper_.SetNodeIndex(-1);
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is available
// in Prepare stage.
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is
// available in Prepare stage.
context_.RequestScratchBufferInArena =
context_helper_.RequestScratchBufferInArena;
for (size_t i = 0; i < operators_->size(); ++i) {
// Set node idx to annotate the lifetime for scratch buffers.
context_helper_.SetNodeIndex(i);
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
if (registration->prepare) {
@@ -210,8 +296,8 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
return kTfLiteError;
}
}
allocator_.FinishPrepareNodeAllocations(/*node_id=*/i);
}
context_helper_.SetNodeIndex(-1);
// Prepare is done, we're ready for Invoke. Memory allocation is no longer
// allowed. Kernels can only fetch scratch buffers via GetScratchBuffer.
@@ -219,7 +305,14 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
context_.RequestScratchBufferInArena = nullptr;
context_.GetScratchBuffer = context_helper_.GetScratchBuffer;
TF_LITE_ENSURE_OK(&context_, allocator_.FinishTensorAllocation());
TF_LITE_ENSURE_OK(&context_,
allocator_.FinishModelAllocation(model_, eval_tensors_,
&scratch_buffer_handles_));
// TODO(b/16157777): Remove this when ContextHelper is rolled into this class.
context_helper_.SetScratchBufferHandles(scratch_buffer_handles_);
TF_LITE_ENSURE_STATUS(ResetVariableTensors());
tensors_allocated_ = true;
return kTfLiteOk;
}
@@ -237,12 +330,28 @@ TfLiteStatus MicroInterpreter::Invoke() {
TF_LITE_ENSURE_OK(&context_, AllocateTensors());
}
for (size_t i = 0; i < operators_->size(); ++i) {
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
if (registration->invoke) {
TfLiteStatus invoke_status = registration->invoke(&context_, node);
TfLiteStatus invoke_status;
#ifndef NDEBUG // Omit profiler overhead from release builds.
// The case where profiler == nullptr is handled by
// ScopedOperatorProfile.
tflite::Profiler* profiler =
reinterpret_cast<tflite::Profiler*>(context_.profiler);
ScopedOperatorProfile scoped_profiler(
profiler, OpNameFromRegistration(registration), i);
#endif
invoke_status = registration->invoke(&context_, node);
// All TfLiteTensor structs used in the kernel are allocated from temp
// memory in the allocator. This creates a chain of allocations in the
// temp section. The call below resets the chain of allocations to
// prepare for the next call.
allocator_.ResetTempAllocations();
if (invoke_status == kTfLiteError) {
TF_LITE_REPORT_ERROR(
error_reporter_,
@@ -259,50 +368,82 @@ TfLiteStatus MicroInterpreter::Invoke() {
TfLiteTensor* MicroInterpreter::input(size_t index) {
const size_t length = inputs_size();
if ((index < 0) || (index >= length)) {
if (index >= length) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Input index %d out of range (length is %d)", index,
length);
return nullptr;
}
return &(context_.tensors[inputs().Get(index)]);
if (index != 0) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Input tensors not at index 0 are allocated from the "
"persistent memory arena. Repeat calls will cause excess "
"allocation!");
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
inputs().Get(index));
}
if (input_tensor_ == nullptr) {
input_tensor_ = allocator_.AllocatePersistentTfLiteTensor(
model_, eval_tensors_, inputs().Get(index));
}
return input_tensor_;
}
TfLiteTensor* MicroInterpreter::output(size_t index) {
const size_t length = outputs_size();
if ((index < 0) || (index >= length)) {
if (index >= length) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Output index %d out of range (length is %d)", index,
length);
return nullptr;
}
return &(context_.tensors[outputs().Get(index)]);
if (index != 0) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Output tensors not at index 0 are allocated from the "
"persistent memory arena. Repeat calls will cause excess "
"allocation!");
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
outputs().Get(index));
}
if (output_tensor_ == nullptr) {
// TODO(b/162311891): Drop these allocations when the interpreter supports
// handling buffers from TfLiteEvalTensor.
output_tensor_ = allocator_.AllocatePersistentTfLiteTensor(
model_, eval_tensors_, outputs().Get(index));
}
return output_tensor_;
}
TfLiteTensor* MicroInterpreter::tensor(size_t index) {
const size_t length = tensors_size();
if ((index < 0) || (index >= length)) {
if (index >= length) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Tensor index %d out of range (length is %d)", index,
length);
return nullptr;
}
return &context_.tensors[index];
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
index);
}
TfLiteStatus MicroInterpreter::ResetVariableTensors() {
const size_t length = tensors_size();
for (size_t i = 0; i < length; ++i) {
TfLiteTensor* cur_tensor = tensor(i);
if (cur_tensor->is_variable) {
TfLiteStatus status = tflite::ResetVariableTensor(cur_tensor);
if (status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to reset variable tensor at index: %d", i);
return status;
for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) {
auto* tensor = subgraph_->tensors()->Get(i);
if (tensor->is_variable()) {
size_t buffer_size;
TF_LITE_ENSURE_STATUS(
TfLiteEvalTensorByteLength(&eval_tensors_[i], &buffer_size));
int value = 0;
if (tensor->type() == tflite::TensorType_INT8) {
value = tensor->quantization()->zero_point()->Get(0);
}
memset(eval_tensors_[i].data.raw, value, buffer_size);
}
}
return kTfLiteOk;
}

View File

@@ -1,4 +1,4 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -15,13 +15,18 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/op_resolver.h"
#include "tensorflow/lite/core/api/profiler.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/portable_type_to_tflitetype.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/type_to_tflitetype.h"
namespace tflite {
@@ -30,47 +35,63 @@ namespace internal {
// A helper class to encapsulate the implementation of APIs in Context.
// context->impl_ points to an instance of this class.
// Check tensorflow/lite/c/common.h for detailed descriptions.
// TODO(b/16157777): Consider rolling this class into MicroInterpreter.
class ContextHelper {
public:
explicit ContextHelper(ErrorReporter* error_reporter,
MicroAllocator* allocator)
: allocator_(allocator), error_reporter_(error_reporter) {}
static TfLiteStatus AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes,
void** ptr);
MicroAllocator* allocator, const Model* model);
// Functions that will be assigned to function pointers on TfLiteContext:
static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx);
static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_idx);
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
int tensor_idx);
void SetNodeIndex(int idx) { current_node_idx_ = idx; }
// Sets the pointer to a list of TfLiteEvalTensor instances.
void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors);
// Sets the pointer to a list of ScratchBufferHandle instances.
void SetScratchBufferHandles(ScratchBufferHandle* scratch_buffer_handles);
private:
MicroAllocator* allocator_;
ErrorReporter* error_reporter_;
int current_node_idx_ = -1;
MicroAllocator* allocator_ = nullptr;
ErrorReporter* error_reporter_ = nullptr;
const Model* model_ = nullptr;
TfLiteEvalTensor* eval_tensors_ = nullptr;
ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
};
} // namespace internal
class MicroInterpreter {
public:
// The lifetime of the model, op resolver, tensor arena, and error reporter
// must be at least as long as that of the interpreter object, since the
// interpreter may need to access them at any time. This means that you should
// usually create them with the same scope as each other, for example having
// them all allocated on the stack as local variables through a top-level
// function.
// The interpreter doesn't do any deallocation of any of the pointed-to
// objects, ownership remains with the caller.
MicroInterpreter(const Model* model, const OpResolver& op_resolver,
// The lifetime of the model, op resolver, tensor arena, error reporter and
// profiler must be at least as long as that of the interpreter object, since
// the interpreter may need to access them at any time. This means that you
// should usually create them with the same scope as each other, for example
// having them all allocated on the stack as local variables through a
// top-level function. The interpreter doesn't do any deallocation of any of
// the pointed-to objects, ownership remains with the caller.
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
uint8_t* tensor_arena, size_t tensor_arena_size,
ErrorReporter* error_reporter);
ErrorReporter* error_reporter,
tflite::Profiler* profiler = nullptr);
// Create an interpreter instance using an existing MicroAllocator instance.
// This constructor should be used when creating an allocator that needs to
// have allocation handled in more than one interpreter or for recording
// allocations inside the interpreter. The lifetime of the allocator must be
// as long as that of the interpreter object.
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
MicroAllocator* allocator, ErrorReporter* error_reporter,
tflite::Profiler* profiler = nullptr);
~MicroInterpreter();
@@ -132,7 +153,7 @@ class MicroInterpreter {
TfLiteStatus initialization_status() const { return initialization_status_; }
size_t operators_size() const { return operators_->size(); }
size_t operators_size() const { return subgraph_->operators()->size(); }
// For debugging only.
const NodeAndRegistration node_and_registration(int node_index) const {
@@ -147,8 +168,16 @@ class MicroInterpreter {
// arena_used_bytes() + 16.
size_t arena_used_bytes() const { return allocator_.used_bytes(); }
protected:
const MicroAllocator& allocator() const { return allocator_; }
const TfLiteContext& context() const { return context_; }
private:
void CorrectTensorEndianness(TfLiteTensor* tensorCorr);
// TODO(b/158263161): Consider switching to Create() function to enable better
// error reporting during initialization.
void Init(tflite::Profiler* profiler);
void CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr);
template <class T>
void CorrectTensorDataEndianness(T* data, int32_t size);
@@ -156,18 +185,25 @@ class MicroInterpreter {
NodeAndRegistration* node_and_registrations_ = nullptr;
const Model* model_;
const OpResolver& op_resolver_;
const MicroOpResolver& op_resolver_;
ErrorReporter* error_reporter_;
TfLiteContext context_ = {};
MicroAllocator allocator_;
MicroAllocator& allocator_;
bool tensors_allocated_;
TfLiteStatus initialization_status_;
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors_;
const flatbuffers::Vector<flatbuffers::Offset<Operator>>* operators_;
const SubGraph* subgraph_;
const SubGraph* subgraph_ = nullptr;
TfLiteEvalTensor* eval_tensors_ = nullptr;
ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
// TODO(b/16157777): Drop this reference:
internal::ContextHelper context_helper_;
// TODO(b/162311891): Clean these pointers up when this class supports buffers
// from TfLiteEvalTensor.
TfLiteTensor* input_tensor_;
TfLiteTensor* output_tensor_;
};
} // namespace tflite

View File

@@ -1,4 +1,4 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -15,109 +15,454 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/op_resolver.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include <cstdio>
#include <cstring>
#ifndef TFLITE_REGISTRATIONS_MAX
#define TFLITE_REGISTRATIONS_MAX (128)
#endif
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/kernels/fully_connected.h"
#include "tensorflow/lite/micro/kernels/micro_ops.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Op versions discussed in this file are enumerated here:
// tensorflow/lite/tools/versioning/op_version.cc
inline int MicroOpResolverAnyVersion() { return 0; }
template <unsigned int tOpCount = TFLITE_REGISTRATIONS_MAX>
class MicroOpResolver : public OpResolver {
template <unsigned int tOpCount>
class MicroMutableOpResolver : public MicroOpResolver {
public:
const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
int version) const override {
explicit MicroMutableOpResolver(ErrorReporter* error_reporter = nullptr)
: error_reporter_(error_reporter) {}
const TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override {
if (op == BuiltinOperator_CUSTOM) return nullptr;
for (unsigned int i = 0; i < registrations_len_; ++i) {
const TfLiteRegistration& registration = registrations_[i];
if ((registration.builtin_code == op) &&
(registration.version == MicroOpResolverAnyVersion() ||
registration.version == version)) {
if (registration.builtin_code == op) {
return &registration;
}
}
return nullptr;
}
const TfLiteRegistration* FindOp(const char* op, int version) const override {
const TfLiteRegistration* FindOp(const char* op) const override {
for (unsigned int i = 0; i < registrations_len_; ++i) {
const TfLiteRegistration& registration = registrations_[i];
if ((registration.builtin_code == BuiltinOperator_CUSTOM) &&
(strcmp(registration.custom_name, op) == 0) &&
(registration.version == MicroOpResolverAnyVersion() ||
registration.version == version)) {
(strcmp(registration.custom_name, op) == 0)) {
return &registration;
}
}
return nullptr;
}
void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
int version = 1) {
if (registrations_len_ >= tOpCount) {
// TODO(b/147748244) - Add error reporting hooks so we can report this!
return;
MicroOpResolver::BuiltinParseFunction GetOpDataParser(
BuiltinOperator op) const override {
TFLITE_DCHECK(num_buitin_ops_ <= tOpCount);
for (unsigned int i = 0; i < num_buitin_ops_; ++i) {
if (builtin_codes_[i] == op) return builtin_parsers_[i];
}
TfLiteRegistration* new_registration = &registrations_[registrations_len_];
registrations_len_ += 1;
*new_registration = *registration;
new_registration->builtin_code = op;
new_registration->version = version;
return nullptr;
}
void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
int min_version, int max_version) {
for (int version = min_version; version <= max_version; ++version) {
AddBuiltin(op, registration, version);
}
}
void AddCustom(const char* name, TfLiteRegistration* registration,
int version = 1) {
// Registers a Custom Operator with the MicroOpResolver.
//
// Only the first call for a given name will be successful. i.e. if this
// function is called again for a previously added Custom Operator, the
// MicroOpResolver will be unchanged and this function will return
// kTfLiteError.
TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration) {
if (registrations_len_ >= tOpCount) {
// TODO(b/147748244) - Add error reporting hooks so we can report this!
return;
if (error_reporter_) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Couldn't register custom op '%s', resolver size is too small (%d)",
name, tOpCount);
}
return kTfLiteError;
}
if (FindOp(name) != nullptr) {
if (error_reporter_ != nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Calling AddCustom for the same op more than once "
"is not supported (Op: %s).",
name);
}
return kTfLiteError;
}
TfLiteRegistration* new_registration = &registrations_[registrations_len_];
registrations_len_ += 1;
*new_registration = *registration;
new_registration->builtin_code = BuiltinOperator_CUSTOM;
new_registration->custom_name = name;
new_registration->version = version;
return kTfLiteOk;
}
void AddCustom(const char* name, TfLiteRegistration* registration,
int min_version, int max_version) {
for (int version = min_version; version <= max_version; ++version) {
AddCustom(name, registration, version);
}
// The Add* functions below add the various Builtin operators to the
// MicroMutableOpResolver object.
TfLiteStatus AddAbs() {
return AddBuiltin(BuiltinOperator_ABS, tflite::ops::micro::Register_ABS(),
ParseAbs);
}
TfLiteStatus AddAdd() {
return AddBuiltin(BuiltinOperator_ADD, tflite::ops::micro::Register_ADD(),
ParseAdd);
}
TfLiteStatus AddArgMax() {
return AddBuiltin(BuiltinOperator_ARG_MAX,
tflite::ops::micro::Register_ARG_MAX(), ParseArgMax);
}
TfLiteStatus AddArgMin() {
return AddBuiltin(BuiltinOperator_ARG_MIN,
tflite::ops::micro::Register_ARG_MIN(), ParseArgMin);
}
TfLiteStatus AddAveragePool2D() {
return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D,
tflite::ops::micro::Register_AVERAGE_POOL_2D(),
ParsePool);
}
TfLiteStatus AddCeil() {
return AddBuiltin(BuiltinOperator_CEIL, tflite::ops::micro::Register_CEIL(),
ParseCeil);
}
TfLiteStatus AddCircularBuffer() {
return AddCustom("CIRCULAR_BUFFER",
tflite::ops::micro::Register_CIRCULAR_BUFFER());
}
TfLiteStatus AddConcatenation() {
return AddBuiltin(BuiltinOperator_CONCATENATION,
tflite::ops::micro::Register_CONCATENATION(),
ParseConcatenation);
}
TfLiteStatus AddConv2D() {
return AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), ParseConv2D);
}
TfLiteStatus AddCos() {
return AddBuiltin(BuiltinOperator_COS, tflite::ops::micro::Register_COS(),
ParseCos);
}
TfLiteStatus AddDepthwiseConv2D() {
return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
Register_DEPTHWISE_CONV_2D(), ParseDepthwiseConv2D);
}
TfLiteStatus AddDequantize() {
return AddBuiltin(BuiltinOperator_DEQUANTIZE,
tflite::ops::micro::Register_DEQUANTIZE(),
ParseDequantize);
}
TfLiteStatus AddEqual() {
return AddBuiltin(BuiltinOperator_EQUAL,
tflite::ops::micro::Register_EQUAL(), ParseEqual);
}
TfLiteStatus AddFloor() {
return AddBuiltin(BuiltinOperator_FLOOR,
tflite::ops::micro::Register_FLOOR(), ParseFloor);
}
TfLiteStatus AddFullyConnected(
const TfLiteRegistration& registration = Register_FULLY_CONNECTED()) {
return AddBuiltin(BuiltinOperator_FULLY_CONNECTED, registration,
ParseFullyConnected);
}
TfLiteStatus AddGreater() {
return AddBuiltin(BuiltinOperator_GREATER,
tflite::ops::micro::Register_GREATER(), ParseGreater);
}
TfLiteStatus AddGreaterEqual() {
return AddBuiltin(BuiltinOperator_GREATER_EQUAL,
tflite::ops::micro::Register_GREATER_EQUAL(),
ParseGreaterEqual);
}
TfLiteStatus AddHardSwish() {
return AddBuiltin(BuiltinOperator_HARD_SWISH,
tflite::ops::micro::Register_HARD_SWISH(),
ParseHardSwish);
}
TfLiteStatus AddL2Normalization() {
return AddBuiltin(BuiltinOperator_L2_NORMALIZATION,
tflite::ops::micro::Register_L2_NORMALIZATION(),
ParseL2Normalization);
}
TfLiteStatus AddLess() {
return AddBuiltin(BuiltinOperator_LESS, tflite::ops::micro::Register_LESS(),
ParseLess);
}
TfLiteStatus AddLessEqual() {
return AddBuiltin(BuiltinOperator_LESS_EQUAL,
tflite::ops::micro::Register_LESS_EQUAL(),
ParseLessEqual);
}
TfLiteStatus AddLog() {
return AddBuiltin(BuiltinOperator_LOG, tflite::ops::micro::Register_LOG(),
ParseLog);
}
TfLiteStatus AddLogicalAnd() {
return AddBuiltin(BuiltinOperator_LOGICAL_AND,
tflite::ops::micro::Register_LOGICAL_AND(),
ParseLogicalAnd);
}
TfLiteStatus AddLogicalNot() {
return AddBuiltin(BuiltinOperator_LOGICAL_NOT,
tflite::ops::micro::Register_LOGICAL_NOT(),
ParseLogicalNot);
}
TfLiteStatus AddLogicalOr() {
return AddBuiltin(BuiltinOperator_LOGICAL_OR,
tflite::ops::micro::Register_LOGICAL_OR(),
ParseLogicalOr);
}
TfLiteStatus AddLogistic() {
return AddBuiltin(BuiltinOperator_LOGISTIC,
tflite::ops::micro::Register_LOGISTIC(), ParseLogistic);
}
TfLiteStatus AddMaximum() {
return AddBuiltin(BuiltinOperator_MAXIMUM,
tflite::ops::micro::Register_MAXIMUM(), ParseMaximum);
}
TfLiteStatus AddMaxPool2D() {
return AddBuiltin(BuiltinOperator_MAX_POOL_2D,
tflite::ops::micro::Register_MAX_POOL_2D(), ParsePool);
}
TfLiteStatus AddMean() {
return AddBuiltin(BuiltinOperator_MEAN, tflite::ops::micro::Register_MEAN(),
ParseReducer);
}
TfLiteStatus AddMinimum() {
return AddBuiltin(BuiltinOperator_MINIMUM,
tflite::ops::micro::Register_MINIMUM(), ParseMinimum);
}
TfLiteStatus AddMul() {
return AddBuiltin(BuiltinOperator_MUL, tflite::ops::micro::Register_MUL(),
ParseMul);
}
TfLiteStatus AddNeg() {
return AddBuiltin(BuiltinOperator_NEG, tflite::ops::micro::Register_NEG(),
ParseNeg);
}
TfLiteStatus AddNotEqual() {
return AddBuiltin(BuiltinOperator_NOT_EQUAL,
tflite::ops::micro::Register_NOT_EQUAL(), ParseNotEqual);
}
TfLiteStatus AddPack() {
return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(),
ParsePack);
}
TfLiteStatus AddPad() {
return AddBuiltin(BuiltinOperator_PAD, tflite::ops::micro::Register_PAD(),
ParsePad);
}
TfLiteStatus AddPadV2() {
return AddBuiltin(BuiltinOperator_PADV2,
tflite::ops::micro::Register_PADV2(), ParsePadV2);
}
TfLiteStatus AddPrelu() {
return AddBuiltin(BuiltinOperator_PRELU,
tflite::ops::micro::Register_PRELU(), ParsePrelu);
}
TfLiteStatus AddQuantize() {
return AddBuiltin(BuiltinOperator_QUANTIZE, Register_QUANTIZE(),
ParseQuantize);
}
TfLiteStatus AddReduceMax() {
return AddBuiltin(BuiltinOperator_REDUCE_MAX,
tflite::ops::micro::Register_REDUCE_MAX(), ParseReducer);
}
TfLiteStatus AddRelu() {
return AddBuiltin(BuiltinOperator_RELU, tflite::ops::micro::Register_RELU(),
ParseRelu);
}
TfLiteStatus AddRelu6() {
return AddBuiltin(BuiltinOperator_RELU6,
tflite::ops::micro::Register_RELU6(), ParseRelu6);
}
TfLiteStatus AddReshape() {
return AddBuiltin(BuiltinOperator_RESHAPE,
tflite::ops::micro::Register_RESHAPE(), ParseReshape);
}
TfLiteStatus AddResizeNearestNeighbor() {
return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(),
ParseResizeNearestNeighbor);
}
TfLiteStatus AddRound() {
return AddBuiltin(BuiltinOperator_ROUND,
tflite::ops::micro::Register_ROUND(), ParseRound);
}
TfLiteStatus AddRsqrt() {
return AddBuiltin(BuiltinOperator_RSQRT,
tflite::ops::micro::Register_RSQRT(), ParseRsqrt);
}
TfLiteStatus AddShape() {
return AddBuiltin(BuiltinOperator_SHAPE, Register_SHAPE(), ParseShape);
}
TfLiteStatus AddSin() {
return AddBuiltin(BuiltinOperator_SIN, tflite::ops::micro::Register_SIN(),
ParseSin);
}
TfLiteStatus AddSoftmax() {
return AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(),
ParseSoftmax);
}
TfLiteStatus AddSplit() {
return AddBuiltin(BuiltinOperator_SPLIT,
tflite::ops::micro::Register_SPLIT(), ParseSplit);
}
TfLiteStatus AddSplitV() {
return AddBuiltin(BuiltinOperator_SPLIT_V,
tflite::ops::micro::Register_SPLIT_V(), ParseSplitV);
}
TfLiteStatus AddSqrt() {
return AddBuiltin(BuiltinOperator_SQRT, tflite::ops::micro::Register_SQRT(),
ParseSqrt);
}
TfLiteStatus AddSquare() {
return AddBuiltin(BuiltinOperator_SQUARE,
tflite::ops::micro::Register_SQUARE(), ParseSquare);
}
TfLiteStatus AddStridedSlice() {
return AddBuiltin(BuiltinOperator_STRIDED_SLICE,
tflite::ops::micro::Register_STRIDED_SLICE(),
ParseStridedSlice);
}
TfLiteStatus AddSub() {
return AddBuiltin(BuiltinOperator_SUB, tflite::ops::micro::Register_SUB(),
ParseSub);
}
TfLiteStatus AddSvdf() {
return AddBuiltin(BuiltinOperator_SVDF, Register_SVDF(), ParseSvdf);
}
TfLiteStatus AddTanh() {
return AddBuiltin(BuiltinOperator_TANH, tflite::ops::micro::Register_TANH(),
ParseTanh);
}
TfLiteStatus AddUnpack() {
return AddBuiltin(BuiltinOperator_UNPACK,
tflite::ops::micro::Register_UNPACK(), ParseUnpack);
}
unsigned int GetRegistrationLength() { return registrations_len_; }
private:
TF_LITE_REMOVE_VIRTUAL_DELETE
TfLiteStatus AddBuiltin(tflite::BuiltinOperator op,
const TfLiteRegistration& registration,
MicroOpResolver::BuiltinParseFunction parser) {
if (op == BuiltinOperator_CUSTOM) {
if (error_reporter_ != nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Invalid parameter BuiltinOperator_CUSTOM to the "
"AddBuiltin function.");
}
return kTfLiteError;
}
if (FindOp(op) != nullptr) {
if (error_reporter_ != nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Calling AddBuiltin with the same op more than "
"once is not supported (Op: #%d).",
op);
}
return kTfLiteError;
}
if (registrations_len_ >= tOpCount) {
if (error_reporter_) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Couldn't register builtin op #%d, resolver size "
"is too small (%d).",
op, tOpCount);
}
return kTfLiteError;
}
registrations_[registrations_len_] = registration;
// Strictly speaking, the builtin_code is not necessary for TFLM but filling
// it in regardless.
registrations_[registrations_len_].builtin_code = op;
registrations_len_++;
builtin_codes_[num_buitin_ops_] = op;
builtin_parsers_[num_buitin_ops_] = parser;
num_buitin_ops_++;
return kTfLiteOk;
}
TfLiteRegistration registrations_[tOpCount];
unsigned int registrations_len_ = 0;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
// Arrays (and counter) to store the builtin codes and their corresponding
// parse functions as these are registered with the Op Resolver.
BuiltinOperator builtin_codes_[tOpCount];
MicroOpResolver::BuiltinParseFunction builtin_parsers_[tOpCount];
unsigned int num_buitin_ops_ = 0;
// TODO(b/147854028): Consider switching all uses of MicroMutableOpResolver to
// MicroOpResolver.
class MicroMutableOpResolver
: public MicroOpResolver<TFLITE_REGISTRATIONS_MAX> {
private:
TF_LITE_REMOVE_VIRTUAL_DELETE
ErrorReporter* error_reporter_;
};
}; // namespace tflite

View File

@@ -0,0 +1,73 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/core/api/op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// This is an interface for the OpResolver for TFLiteMicro. The differences from
// the TFLite OpResolver base class are to:
// * explicitly remove support for Op versions
// * allow for finer grained registration of the Builtin Ops to reduce code
// size for TFLiteMicro.
//
// We need an interface class instead of directly using MicroMutableOpResolver
// because MicroMutableOpResolver is a class template with the number of
// registered Ops as the template parameter.
class MicroOpResolver : public OpResolver {
public:
typedef TfLiteStatus (*BuiltinParseFunction)(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
// Returns the Op registration struct corresponding to the enum code from the
// flatbuffer schema. Returns nullptr if the op is not found or if op ==
// BuiltinOperator_CUSTOM.
virtual const TfLiteRegistration* FindOp(BuiltinOperator op) const = 0;
// Returns the Op registration struct corresponding to the custom operator by
// name.
virtual const TfLiteRegistration* FindOp(const char* op) const = 0;
// This implementation exists for compatibility with the OpResolver base class
// and disregards the version parameter.
const TfLiteRegistration* FindOp(BuiltinOperator op,
int version) const final {
return FindOp(op);
}
// This implementation exists for compatibility with the OpResolver base class
// and disregards the version parameter.
const TfLiteRegistration* FindOp(const char* op, int version) const final {
return FindOp(op);
}
// Returns the operator specific parsing function for the OpData for a
// BuiltinOperator (if registered), else nullptr.
virtual BuiltinParseFunction GetOpDataParser(BuiltinOperator op) const = 0;
~MicroOpResolver() override {}
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_

View File

@@ -1,144 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_optional_debug_tools.h"
// `cinttypes` requires `__STDC_FORMAT_MACROS` to be defined to expose `PRId32`.
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <cinttypes>
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
std::vector<int> flatbuffersVector2StdVector(
const flatbuffers::Vector<int32_t>& fVector) {
std::vector<int> stdVector;
stdVector.reserve(fVector.size());
for (size_t i = 0; i < fVector.size(); i++) {
stdVector.push_back(fVector.Get(i));
}
return stdVector;
}
void PrintIntVector(const std::vector<int>& v) {
for (const auto& it : v) {
printf(" %d", it);
}
printf("\n");
}
void PrintTfLiteIntVector(const TfLiteIntArray* v) {
if (!v) {
printf(" (null)\n");
return;
}
for (int k = 0; k < v->size; k++) {
printf(" %d", v->data[k]);
}
printf("\n");
}
const char* TensorTypeName(TfLiteType type) {
switch (type) {
case kTfLiteNoType:
return "kTfLiteNoType";
case kTfLiteFloat32:
return "kTfLiteFloat32";
case kTfLiteInt32:
return "kTfLiteInt32";
case kTfLiteUInt8:
return "kTfLiteUInt8";
case kTfLiteInt8:
return "kTfLiteInt8";
case kTfLiteInt64:
return "kTfLiteInt64";
case kTfLiteString:
return "kTfLiteString";
case kTfLiteBool:
return "kTfLiteBool";
case kTfLiteInt16:
return "kTfLiteInt16";
case kTfLiteComplex64:
return "kTfLiteComplex64";
case kTfLiteFloat16:
return "kTfLiteFloat16";
case kTfLiteFloat64:
return "kTfLiteFloat64";
}
return "(invalid)";
}
const char* AllocTypeName(TfLiteAllocationType type) {
switch (type) {
case kTfLiteMemNone:
return "kTfLiteMemNone";
case kTfLiteMmapRo:
return "kTfLiteMmapRo";
case kTfLiteDynamic:
return "kTfLiteDynamic";
case kTfLiteArenaRw:
return "kTfLiteArenaRw";
case kTfLiteArenaRwPersistent:
return "kTfLiteArenaRwPersistent";
}
return "(invalid)";
}
} // namespace
// Prints a dump of what tensors and what nodes are in the interpreter.
void PrintInterpreterState(MicroInterpreter* interpreter) {
printf("Interpreter has %zu tensors and %zu nodes\n",
interpreter->tensors_size(), interpreter->operators_size());
printf("Inputs:");
PrintIntVector(flatbuffersVector2StdVector(interpreter->inputs()));
printf("Outputs:");
PrintIntVector(flatbuffersVector2StdVector(interpreter->outputs()));
printf("\n");
for (size_t tensor_index = 0; tensor_index < interpreter->tensors_size();
tensor_index++) {
TfLiteTensor* tensor = interpreter->tensor(static_cast<int>(tensor_index));
printf("Tensor %3zu %-20s %10s %15s %10zu bytes (%4.1f MB) ", tensor_index,
tensor->name, TensorTypeName(tensor->type),
AllocTypeName(tensor->allocation_type), tensor->bytes,
static_cast<double>(tensor->bytes / (1 << 20)));
PrintTfLiteIntVector(tensor->dims);
}
printf("\n");
for (size_t node_index = 0; node_index < interpreter->operators_size();
node_index++) {
const NodeAndRegistration node_and_reg =
interpreter->node_and_registration(static_cast<int>(node_index));
const TfLiteNode& node = node_and_reg.node;
const TfLiteRegistration* reg = node_and_reg.registration;
if (reg->custom_name != nullptr) {
printf("Node %3zu Operator Custom Name %s\n", node_index,
reg->custom_name);
} else {
printf("Node %3zu Operator Builtin Code %3" PRId32 " %s\n", node_index,
reg->builtin_code, EnumNamesBuiltinOperator()[reg->builtin_code]);
}
printf(" Inputs:");
PrintTfLiteIntVector(node.inputs);
printf(" Outputs:");
PrintTfLiteIntVector(node.outputs);
}
}
} // namespace tflite

View File

@@ -0,0 +1,42 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/micro_time.h"
namespace tflite {
MicroProfiler::MicroProfiler(tflite::ErrorReporter* reporter)
: reporter_(reporter) {}
uint32_t MicroProfiler::BeginEvent(const char* tag, EventType event_type,
int64_t event_metadata1,
int64_t event_metadata2) {
start_time_ = GetCurrentTimeTicks();
TFLITE_DCHECK(tag != nullptr);
event_tag_ = tag;
return 0;
}
void MicroProfiler::EndEvent(uint32_t event_handle) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
int32_t end_time = GetCurrentTimeTicks();
TF_LITE_REPORT_ERROR(reporter_, "%s took %d cycles\n", event_tag_,
end_time - start_time_);
#endif
}
} // namespace tflite

View File

@@ -0,0 +1,71 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/profiler.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// MicroProfiler creates a common way to gain fine-grained insight into runtime
// performance. Bottleck operators can be identified along with slow code
// sections. This can be used in conjunction with running the relevant micro
// benchmark to evaluate end-to-end performance.
//
// Usage example:
// MicroProfiler profiler(error_reporter);
// {
// ScopedProfile scoped_profile(profiler, tag);
// work_to_profile();
// }
//
// This will call the following methods in order:
// int event_handle = profiler->BeginEvent(op_name, EventType::DEFAULT, 0)
// work_to_profile();
// profiler->EndEvent(event_handle)
class MicroProfiler : public tflite::Profiler {
public:
explicit MicroProfiler(tflite::ErrorReporter* reporter);
~MicroProfiler() override = default;
// AddEvent is unused for Tf Micro.
void AddEvent(const char* tag, EventType event_type, uint64_t start,
uint64_t end, int64_t event_metadata1,
int64_t event_metadata2) override{};
// BeginEvent followed by code followed by EndEvent will profile the code
// enclosed. Multiple concurrent events are unsupported, so the return value
// is always 0. Event_metadata1 and event_metadata2 are unused. The tag
// pointer must be valid until EndEvent is called.
uint32_t BeginEvent(const char* tag, EventType event_type,
int64_t event_metadata1,
int64_t event_metadata2) override;
// Event_handle is ignored since TF Micro does not support concurrent events.
void EndEvent(uint32_t event_handle) override;
private:
tflite::ErrorReporter* reporter_;
int32_t start_time_;
const char* event_tag_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_

View File

@@ -23,6 +23,7 @@ limitations under the License.
#include <cstdarg>
#include <cstdint>
#include <cstring>
namespace {
@@ -125,7 +126,8 @@ char* FastFloatToBufferLeft(float f, char* buffer) {
const int32_t exponent_shift = 23;
const int32_t exponent_bias = 127;
const uint32_t fraction_mask = 0x007fffff;
const uint32_t u = *reinterpret_cast<uint32_t*>(&f);
uint32_t u;
memcpy(&u, &f, sizeof(int32_t));
const int32_t exponent =
((u & exponent_mask) >> exponent_shift) - exponent_bias;
const uint32_t fraction = (u & fraction_mask);
@@ -163,7 +165,49 @@ char* FastFloatToBufferLeft(float f, char* buffer) {
*current = '.';
current += 1;
*current = 0;
// Prepend leading zeros to fill in all 7 bytes of the fraction. Truncate
// zeros off the end of the fraction. Every fractional value takes 7 bytes.
// For example, 2500 would be written into the buffer as 0002500 since it
// represents .00025.
constexpr int kMaxFractionalDigits = 7;
// Abort early if there is not enough space in the buffer.
if (current_end - current <= kMaxFractionalDigits) {
return current;
}
// Pre-fill buffer with zeros to ensure zero-truncation works properly.
for (int i = 1; i < kMaxFractionalDigits; i++) {
*(current + i) = '0';
}
// Track how large the fraction is to add leading zeros.
char* previous = current;
current = StrCatUInt32(current, (current_end - current), scaled_fraction, 10);
int fraction_digits = current - previous;
int leading_zeros = kMaxFractionalDigits - fraction_digits;
// Overwrite the null terminator from StrCatUInt32 to ensure zero-trunctaion
// works properly.
*current = '0';
// Shift fraction values and prepend zeros if necessary.
if (leading_zeros != 0) {
for (int i = 0; i < fraction_digits; i++) {
current--;
*(current + leading_zeros) = *current;
*current = '0';
}
current += kMaxFractionalDigits;
}
// Truncate trailing zeros for cleaner logs. Ensure we leave at least one
// fractional character for the case when scaled_fraction is 0.
while (*(current - 1) == '0' && (current - 1) > previous) {
current--;
}
*current = 0;
current = StrCatStr(current, (current_end - current), "*2^");
current = StrCatInt32(current, (current_end - current), exponent);
return current;

View File

@@ -15,34 +15,15 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_utils.h"
#include <limits.h>
#include <math.h>
#include <stdint.h>
#include <cmath>
#include <cstdint>
#include <limits>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace {
static const uint8_t kAsymmetricUInt8Min = 0;
static const uint8_t kAsymmetricUInt8Max = UINT8_MAX;
static const uint8_t kSymmetricUInt8Min = 1;
static const uint8_t kSymmetricUInt8Max = UINT8_MAX;
static const int8_t kAsymmetricInt8Min = INT8_MIN;
static const int8_t kAsymmetricInt8Max = INT8_MAX;
static const int kSymmetricInt8Scale = kAsymmetricInt8Max;
static const int16_t kAsymmetricInt16Min = INT16_MIN;
static const int16_t kAsymmetricInt16Max = INT16_MAX;
static const int kSymmetricInt16Scale = kAsymmetricInt16Max;
static const int32_t kAsymmetricInt32Max = INT32_MAX;
static const int kSymmetricInt32Scale = kAsymmetricInt32Max;
} // namespace
int ElementCount(const TfLiteIntArray& dims) {
int result = 1;
for (int i = 0; i < dims.size; ++i) {
@@ -51,109 +32,6 @@ int ElementCount(const TfLiteIntArray& dims) {
return result;
}
// Converts a float value into an unsigned eight-bit quantized value.
uint8_t FloatToAsymmetricQuantizedUInt8(const float value, const float scale,
const int zero_point) {
int32_t result = round(value / scale) + zero_point;
if (result < kAsymmetricUInt8Min) {
result = kAsymmetricUInt8Min;
}
if (result > kAsymmetricUInt8Max) {
result = kAsymmetricUInt8Max;
}
return result;
}
uint8_t FloatToSymmetricQuantizedUInt8(const float value, const float scale) {
int32_t result = round(value / scale);
if (result < kSymmetricUInt8Min) {
result = kSymmetricUInt8Min;
}
if (result > kSymmetricUInt8Max) {
result = kSymmetricUInt8Max;
}
return result;
}
int8_t FloatToAsymmetricQuantizedInt8(const float value, const float scale,
const int zero_point) {
int32_t result = round(value / scale) + zero_point;
if (result < kAsymmetricInt8Min) {
result = kAsymmetricInt8Min;
}
if (result > kAsymmetricInt8Max) {
result = kAsymmetricInt8Max;
}
return result;
}
int16_t FloatToAsymmetricQuantizedInt16(const float value, const float scale,
const int zero_point) {
int32_t result = round(value / scale) + zero_point;
if (result < kAsymmetricInt16Min) {
result = kAsymmetricInt16Min;
}
if (result > kAsymmetricInt16Max) {
result = kAsymmetricInt16Max;
}
return result;
}
int8_t FloatToSymmetricQuantizedInt8(const float value, const float scale) {
return FloatToAsymmetricQuantizedInt8(value, scale, 0.0f);
}
int32_t FloatToSymmetricQuantizedInt32(const float value, const float scale) {
float quantized = round(value / scale);
if (static_cast<int>(quantized) > INT_MAX) {
quantized = static_cast<float>(INT_MAX);
} else if (quantized < INT_MIN) {
quantized = static_cast<float> INT_MIN;
}
return static_cast<int>(quantized);
}
void AsymmetricQuantize(const float* input, int8_t* output, int num_elements,
float scale, int zero_point) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToAsymmetricQuantizedInt8(input[i], scale, zero_point);
}
}
void AsymmetricQuantize(const float* input, uint8_t* output, int num_elements,
float scale, int zero_point) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToAsymmetricQuantizedUInt8(input[i], scale, zero_point);
}
}
void AsymmetricQuantize(const float* input, int16_t* output, int num_elements,
float scale, int zero_point) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToAsymmetricQuantizedInt16(input[i], scale, zero_point);
}
}
void SymmetricQuantize(const float* input, int32_t* output, int num_elements,
float scale) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToSymmetricQuantizedInt32(input[i], scale);
}
}
void SymmetricPerChannelQuantize(const float* input, int32_t* output,
int num_elements, int num_channels,
float* scales) {
int elements_per_channel = num_elements / num_channels;
for (int i = 0; i < num_channels; i++) {
for (int j = 0; j < elements_per_channel; j++) {
output[i * elements_per_channel + j] = FloatToSymmetricQuantizedInt32(
input[i * elements_per_channel + j], scales[i]);
}
}
}
void SignedSymmetricPerChannelQuantize(const float* values,
TfLiteIntArray* dims,
int quantized_dimension,
@@ -186,94 +64,17 @@ void SignedSymmetricPerChannelQuantize(const float* values,
max = fmaxf(max, values[idx]);
}
scaling_factors[channel] =
fmaxf(fabs(min), fabs(max)) / kSymmetricInt8Scale;
fmaxf(fabs(min), fabs(max)) / std::numeric_limits<int8_t>::max();
for (int i = 0; i < per_channel_size; i++) {
int idx = channel * channel_stride + i * stride;
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[idx] / scaling_factors[channel]));
// Clamp: just in case some odd numeric offset.
quantized_values[idx] = fminf(
kSymmetricInt8Scale, fmaxf(-kSymmetricInt8Scale, quantized_value));
quantized_values[idx] =
fminf(std::numeric_limits<int8_t>::max(),
fmaxf(std::numeric_limits<int8_t>::min() + 1, quantized_value));
}
}
}
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int8_t* quantized_values, float* scaling_factor) {
int input_size = ElementCount(*dims);
float min = 0;
float max = 0;
for (int i = 0; i < input_size; i++) {
min = fminf(min, values[i]);
max = fmaxf(max, values[i]);
}
*scaling_factor = fmaxf(fabs(min), fabs(max)) / kSymmetricInt8Scale;
for (int i = 0; i < input_size; i++) {
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[i] / *scaling_factor));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = fminf(kSymmetricInt8Scale,
fmaxf(-kSymmetricInt8Scale, quantized_value));
}
}
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int16_t* quantized_values, float* scaling_factor) {
int input_size = ElementCount(*dims);
float min = 0;
float max = 0;
for (int i = 0; i < input_size; i++) {
min = fminf(min, values[i]);
max = fmaxf(max, values[i]);
}
*scaling_factor = fmaxf(fabs(min), fabs(max)) / kSymmetricInt16Scale;
for (int i = 0; i < input_size; i++) {
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[i] / *scaling_factor));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = fminf(kSymmetricInt16Scale,
fmaxf(-kSymmetricInt16Scale, quantized_value));
}
}
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int32_t* quantized_values, float* scaling_factor) {
int input_size = ElementCount(*dims);
float min = 0;
float max = 0;
for (int i = 0; i < input_size; i++) {
min = fminf(min, values[i]);
max = fmaxf(max, values[i]);
}
*scaling_factor =
fmaxf(fabs(min), fabs(max)) / static_cast<float>(kSymmetricInt32Scale);
for (int i = 0; i < input_size; i++) {
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[i] / *scaling_factor));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = fminf(
static_cast<float>(kSymmetricInt32Scale),
fmaxf(static_cast<float>(-kSymmetricInt32Scale), quantized_value));
}
}
void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
uint8_t* quantized_values, float* scaling_factor) {
SignedSymmetricQuantize(values, dims,
reinterpret_cast<int8_t*>(quantized_values),
scaling_factor);
}
void SymmetricDequantize(const int8_t* values, const int size,
const float dequantization_scale,
float* dequantized_values) {
for (int i = 0; i < size; ++i) {
dequantized_values[i] = values[i] * dequantization_scale;
}
}
} // namespace tflite

View File

@@ -16,7 +16,9 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
#define TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
#include <stdint.h>
#include <algorithm>
#include <cmath>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
@@ -26,51 +28,68 @@ namespace tflite {
int ElementCount(const TfLiteIntArray& dims);
uint8_t FloatToAsymmetricQuantizedUInt8(const float value, const float scale,
const int zero_point);
// Converts a float value into a quantized value. Note that large values (close
// to max int and min int) may see significant error due to a lack of floating
// point granularity for large values.
template <typename T>
T FloatToQuantizedType(const float value, const float scale, int zero_point) {
int32_t result = round(value / scale) + zero_point;
result =
std::max(static_cast<int32_t>(std::numeric_limits<T>::min()), result);
result =
std::min(static_cast<int32_t>(std::numeric_limits<T>::max()), result);
return result;
}
uint8_t FloatToSymmetricQuantizedUInt8(const float value, const float scale);
int8_t FloatToAsymmetricQuantizedInt8(const float value, const float scale,
const int zero_point);
int16_t FloatToAsymmetricQuantizedInt16(const float value, const float scale,
const int zero_point);
int8_t FloatToSymmetricQuantizedInt8(const float value, const float scale);
// Converts a float value into a signed thirty-two-bit quantized value. Note
// that values close to max int and min int may see significant error due to
// a lack of floating point granularity for large values.
int32_t FloatToSymmetricQuantizedInt32(const float value, const float scale);
template <typename T>
T FloatToSymmetricQuantizedType(const float value, const float scale) {
int32_t result = round(value / scale);
result =
std::max(static_cast<int32_t>(std::numeric_limits<T>::min() + 1), result);
result =
std::min(static_cast<int32_t>(std::numeric_limits<T>::max()), result);
return result;
}
// Helper methods to quantize arrays of floats to the desired format.
//
// There are several key flavors of quantization in TfLite:
// asymmetric symmetric per channel
// int8 | X | X | X |
// uint8 | X | X | |
// int16 | X | | |
// int32 | | X | X |
// int8_t | X | X | X |
// uint8_t | X | X | |
// int16_t | X | | |
// int32_t | | X | X |
//
// The per-op quantization spec can be found here:
// https://www.tensorflow.org/lite/performance/quantization_spec
template <typename T>
void Quantize(const float* input, T* output, int num_elements, float scale,
int zero_point) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToQuantizedType<T>(input[i], scale, zero_point);
}
}
void AsymmetricQuantize(const float* input, int8_t* output, int num_elements,
float scale, int zero_point = 0);
template <typename T>
void SymmetricQuantize(const float* input, T* output, int num_elements,
float scale) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToSymmetricQuantizedType<T>(input[i], scale);
}
}
void AsymmetricQuantize(const float* input, uint8_t* output, int num_elements,
float scale, int zero_point = 128);
void AsymmetricQuantize(const float* input, int16_t* output, int num_elements,
float scale, int zero_point = 0);
void SymmetricQuantize(const float* input, int32_t* output, int num_elements,
float scale);
void SymmetricPerChannelQuantize(const float* input, int32_t* output,
template <typename T>
void SymmetricPerChannelQuantize(const float* input, T* output,
int num_elements, int num_channels,
float* scales);
float* scales) {
int elements_per_channel = num_elements / num_channels;
for (int i = 0; i < num_channels; i++) {
for (int j = 0; j < elements_per_channel; j++) {
output[i * elements_per_channel + j] = FloatToSymmetricQuantizedType<T>(
input[i * elements_per_channel + j], scales[i]);
}
}
}
void SignedSymmetricPerChannelQuantize(const float* values,
TfLiteIntArray* dims,
@@ -78,21 +97,37 @@ void SignedSymmetricPerChannelQuantize(const float* values,
int8_t* quantized_values,
float* scaling_factor);
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int8_t* quantized_values, float* scaling_factor);
// Quantizes inputs based on the values provided, choosing the smallest range
// which includes all input values.
template <typename T>
void SymmetricQuantizeCalculateScales(const float* values, TfLiteIntArray* dims,
T* output, float* scale) {
int input_size = ElementCount(*dims);
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int16_t* quantized_values, float* scaling_factor);
float min = 0;
float max = 0;
for (int i = 0; i < input_size; i++) {
min = fminf(min, values[i]);
max = fmaxf(max, values[i]);
}
*scale = fmaxf(std::abs(min), std::abs(max)) / std::numeric_limits<T>::max();
for (int i = 0; i < input_size; i++) {
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[i] / *scale));
// Clamp: just in case some odd numeric offset.
quantized_value = fminf(std::numeric_limits<T>::max(), quantized_value);
quantized_value = fmaxf(std::numeric_limits<T>::min() + 1, quantized_value);
output[i] = quantized_value;
}
}
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int32_t* quantized_values, float* scaling_factor);
void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
uint8_t* quantized_values, float* scaling_factor);
void SymmetricDequantize(const int8_t* values, const int size,
const float dequantization_scale,
float* dequantized_values);
template <typename T>
void Dequantize(const T* values, const int size, const float scale,
int zero_point, float* dequantized_values) {
for (int i = 0; i < size; ++i) {
dequantized_values[i] = (values[i] - zero_point) * scale;
}
}
} // namespace tflite

View File

@@ -0,0 +1,244 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/recording_micro_allocator.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
namespace tflite {
RecordingMicroAllocator::RecordingMicroAllocator(
RecordingSimpleMemoryAllocator* recording_memory_allocator,
ErrorReporter* error_reporter)
: MicroAllocator(recording_memory_allocator, error_reporter),
recording_memory_allocator_(recording_memory_allocator) {}
RecordingMicroAllocator* RecordingMicroAllocator::Create(
uint8_t* tensor_arena, size_t arena_size, ErrorReporter* error_reporter) {
TFLITE_DCHECK(error_reporter != nullptr);
RecordingSimpleMemoryAllocator* simple_memory_allocator =
RecordingSimpleMemoryAllocator::Create(error_reporter, tensor_arena,
arena_size);
TFLITE_DCHECK(simple_memory_allocator != nullptr);
uint8_t* allocator_buffer = simple_memory_allocator->AllocateFromTail(
sizeof(RecordingMicroAllocator), alignof(RecordingMicroAllocator));
RecordingMicroAllocator* allocator = new (allocator_buffer)
RecordingMicroAllocator(simple_memory_allocator, error_reporter);
return allocator;
}
RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation(
RecordedAllocationType allocation_type) const {
switch (allocation_type) {
case RecordedAllocationType::kTfLiteEvalTensorData:
return recorded_tflite_eval_tensor_data_;
case RecordedAllocationType::kPersistentTfLiteTensorData:
return recorded_persistent_tflite_tensor_data_;
case RecordedAllocationType::kPersistentTfLiteTensorQuantizationData:
return recorded_persistent_tflite_tensor_quantization_data_;
case RecordedAllocationType::kPersistentBufferData:
return recorded_persistent_buffer_data_;
case RecordedAllocationType::kTfLiteTensorVariableBufferData:
return recorded_tflite_tensor_variable_buffer_data_;
case RecordedAllocationType::kNodeAndRegistrationArray:
return recorded_node_and_registration_array_data_;
case RecordedAllocationType::kOpData:
return recorded_op_data_;
}
TF_LITE_REPORT_ERROR(error_reporter(), "Invalid allocation type supplied: %d",
allocation_type);
return RecordedAllocation();
}
const RecordingSimpleMemoryAllocator*
RecordingMicroAllocator::GetSimpleMemoryAllocator() const {
return recording_memory_allocator_;
}
void RecordingMicroAllocator::PrintAllocations() const {
TF_LITE_REPORT_ERROR(
error_reporter(),
"[RecordingMicroAllocator] Arena allocation total %d bytes",
recording_memory_allocator_->GetUsedBytes());
TF_LITE_REPORT_ERROR(
error_reporter(),
"[RecordingMicroAllocator] Arena allocation head %d bytes",
recording_memory_allocator_->GetHeadUsedBytes());
TF_LITE_REPORT_ERROR(
error_reporter(),
"[RecordingMicroAllocator] Arena allocation tail %d bytes",
recording_memory_allocator_->GetTailUsedBytes());
PrintRecordedAllocation(RecordedAllocationType::kTfLiteEvalTensorData,
"TfLiteEvalTensor data", "allocations");
PrintRecordedAllocation(RecordedAllocationType::kPersistentTfLiteTensorData,
"Persistent TfLiteTensor data", "tensors");
PrintRecordedAllocation(
RecordedAllocationType::kPersistentTfLiteTensorQuantizationData,
"Persistent TfLiteTensor quantization data", "allocations");
PrintRecordedAllocation(RecordedAllocationType::kPersistentBufferData,
"Persistent buffer data", "allocations");
PrintRecordedAllocation(
RecordedAllocationType::kTfLiteTensorVariableBufferData,
"TfLiteTensor variable buffer data", "allocations");
PrintRecordedAllocation(RecordedAllocationType::kNodeAndRegistrationArray,
"NodeAndRegistration struct",
"NodeAndRegistration structs");
PrintRecordedAllocation(RecordedAllocationType::kOpData,
"Operator runtime data", "OpData structs");
}
void* RecordingMicroAllocator::AllocatePersistentBuffer(size_t bytes) {
RecordedAllocation allocations = SnapshotAllocationUsage();
void* buffer = MicroAllocator::AllocatePersistentBuffer(bytes);
RecordAllocationUsage(allocations, recorded_persistent_buffer_data_);
return buffer;
}
void RecordingMicroAllocator::PrintRecordedAllocation(
RecordedAllocationType allocation_type, const char* allocation_name,
const char* allocation_description) const {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
RecordedAllocation allocation = GetRecordedAllocation(allocation_type);
if (allocation.used_bytes > 0 || allocation.requested_bytes > 0) {
TF_LITE_REPORT_ERROR(
error_reporter(),
"[RecordingMicroAllocator] '%s' used %d bytes with alignment overhead "
"(requested %d bytes for %d %s)",
allocation_name, allocation.used_bytes, allocation.requested_bytes,
allocation.count, allocation_description);
}
#endif
}
TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations(
const Model* model, NodeAndRegistration** node_and_registrations) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status = MicroAllocator::AllocateNodeAndRegistrations(
model, node_and_registrations);
RecordAllocationUsage(allocations,
recorded_node_and_registration_array_data_);
// The allocation count in SimpleMemoryAllocator will only be 1. To provide
// better logging, decrement by 1 and add in the actual number of operators
// used in the graph:
// The allocation for this recording will always be 1. This is because the
// parent class mallocs one large allocation for the number of nodes in the
// graph (e.g. sizeof(NodeAndRegistration) * num_nodes).
// To prevent extra overhead and potential for fragmentation, manually adjust
// the accounting by decrementing by 1 and adding the actual number of nodes
// used in the graph:
recorded_node_and_registration_array_data_.count +=
GetSubGraphFromModel(model)->operators()->size() - 1;
return status;
}
TfLiteStatus
RecordingMicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status =
MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
model, op_resolver, node_and_registrations);
RecordAllocationUsage(allocations, recorded_op_data_);
return status;
}
TfLiteStatus RecordingMicroAllocator::AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status =
MicroAllocator::AllocateTfLiteEvalTensors(model, eval_tensors);
RecordAllocationUsage(allocations, recorded_tflite_eval_tensor_data_);
// The allocation for this recording will always be 1. This is because the
// parent class mallocs one large allocation for the number of tensors in the
// graph (e.g. sizeof(TfLiteEvalTensor) * num_tensors).
// To prevent extra overhead and potential for fragmentation, manually adjust
// the accounting by decrementing by 1 and adding the actual number of tensors
// used in the graph:
recorded_tflite_eval_tensor_data_.count +=
GetSubGraphFromModel(model)->tensors()->size() - 1;
return status;
}
TfLiteStatus RecordingMicroAllocator::AllocateVariables(
const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status =
MicroAllocator::AllocateVariables(subgraph, eval_tensors);
RecordAllocationUsage(allocations,
recorded_tflite_tensor_variable_buffer_data_);
return status;
}
TfLiteTensor* RecordingMicroAllocator::AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteTensor* result = MicroAllocator::AllocatePersistentTfLiteTensorInternal(
model, eval_tensors, tensor_index);
RecordAllocationUsage(allocations, recorded_persistent_tflite_tensor_data_);
return result;
}
TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
int tensor_index, bool allocate_temp) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
model, subgraph, tensor, tensor_index, allocate_temp);
RecordAllocationUsage(allocations,
recorded_persistent_tflite_tensor_quantization_data_);
return status;
}
RecordedAllocation RecordingMicroAllocator::SnapshotAllocationUsage() const {
return {/*requested_bytes=*/recording_memory_allocator_->GetRequestedBytes(),
/*used_bytes=*/recording_memory_allocator_->GetUsedBytes(),
/*count=*/recording_memory_allocator_->GetAllocatedCount()};
}
void RecordingMicroAllocator::RecordAllocationUsage(
const RecordedAllocation& snapshotted_allocation,
RecordedAllocation& recorded_allocation) {
recorded_allocation.requested_bytes +=
recording_memory_allocator_->GetRequestedBytes() -
snapshotted_allocation.requested_bytes;
recorded_allocation.used_bytes +=
recording_memory_allocator_->GetUsedBytes() -
snapshotted_allocation.used_bytes;
recorded_allocation.count +=
recording_memory_allocator_->GetAllocatedCount() -
snapshotted_allocation.count;
}
} // namespace tflite

View File

@@ -0,0 +1,125 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
namespace tflite {
// List of buckets currently recorded by this class. Each type keeps a list of
// allocated information during model initialization.
// TODO(b/169834511): Add tracking for scratch buffer allocations.
enum class RecordedAllocationType {
kTfLiteEvalTensorData,
kPersistentTfLiteTensorData,
kPersistentTfLiteTensorQuantizationData,
kPersistentBufferData,
kTfLiteTensorVariableBufferData,
kNodeAndRegistrationArray,
kOpData,
};
// Container for holding information about allocation recordings by a given
// type. Each recording contains the number of bytes requested, the actual bytes
// allocated (can defer from requested by alignment), and the number of items
// allocated.
struct RecordedAllocation {
size_t requested_bytes;
size_t used_bytes;
size_t count;
};
// Utility subclass of MicroAllocator that records all allocations
// inside the arena. A summary of allocations can be logged through the
// ErrorReporter by invoking LogAllocations(). This special allocator requires
// an instance of RecordingSimpleMemoryAllocator to capture allocations in the
// head and tail. Arena allocation recording can be retrieved by type through
// the GetRecordedAllocation() function. This class should only be used for
// auditing memory usage or integration testing.
class RecordingMicroAllocator : public MicroAllocator {
public:
static RecordingMicroAllocator* Create(uint8_t* tensor_arena,
size_t arena_size,
ErrorReporter* error_reporter);
// Returns the recorded allocations information for a given allocation type.
RecordedAllocation GetRecordedAllocation(
RecordedAllocationType allocation_type) const;
const RecordingSimpleMemoryAllocator* GetSimpleMemoryAllocator() const;
// Logs out through the ErrorReporter all allocation recordings by type
// defined in RecordedAllocationType.
void PrintAllocations() const;
void* AllocatePersistentBuffer(size_t bytes) override;
protected:
TfLiteStatus AllocateNodeAndRegistrations(
const Model* model,
NodeAndRegistration** node_and_registrations) override;
TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations) override;
TfLiteStatus AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors) override;
TfLiteStatus AllocateVariables(const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors) override;
// TODO(b/162311891): Once all kernels have been updated to the new API drop
// this method. It is only used to record TfLiteTensor persistent allocations.
TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors,
int tensor_index) override;
// TODO(b/162311891): Once all kernels have been updated to the new API drop
// this function since all allocations for quantized data will take place in
// the temp section.
TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
const SubGraph* subgraph,
TfLiteTensor* tensor,
int tensor_index,
bool allocate_temp) override;
private:
RecordingMicroAllocator(RecordingSimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
void PrintRecordedAllocation(RecordedAllocationType allocation_type,
const char* allocation_name,
const char* allocation_description) const;
RecordedAllocation SnapshotAllocationUsage() const;
void RecordAllocationUsage(const RecordedAllocation& snapshotted_allocation,
RecordedAllocation& recorded_allocation);
const RecordingSimpleMemoryAllocator* recording_memory_allocator_;
RecordedAllocation recorded_tflite_eval_tensor_data_ = {};
RecordedAllocation recorded_persistent_tflite_tensor_data_ = {};
RecordedAllocation recorded_persistent_tflite_tensor_quantization_data_ = {};
RecordedAllocation recorded_persistent_buffer_data_ = {};
RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {};
RecordedAllocation recorded_node_and_registration_array_data_ = {};
RecordedAllocation recorded_op_data_ = {};
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_

View File

@@ -0,0 +1,65 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/micro/recording_micro_allocator.h"
namespace tflite {
// Utility subclass that enables internal recordings of the MicroInterpreter.
// This class should be used to audit and analyze memory arena usage for a given
// model and interpreter.
//
// After construction and the first Invoke() or AllocateTensors() call - the
// memory usage is recorded and available through the GetMicroAllocator()
// function. See RecordingMicroAlloctor for more details on what is currently
// recorded from arena allocations.
//
// It is recommended for users to increase the tensor arena size by at least 1kb
// to ensure enough additional memory is available for internal recordings.
class RecordingMicroInterpreter : public MicroInterpreter {
public:
RecordingMicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
uint8_t* tensor_arena, size_t tensor_arena_size,
ErrorReporter* error_reporter)
: MicroInterpreter(model, op_resolver,
RecordingMicroAllocator::Create(
tensor_arena, tensor_arena_size, error_reporter),
error_reporter),
recording_micro_allocator_(
static_cast<const RecordingMicroAllocator&>(allocator())) {}
RecordingMicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
RecordingMicroAllocator* allocator,
ErrorReporter* error_reporter)
: MicroInterpreter(model, op_resolver, allocator, error_reporter),
recording_micro_allocator_(*allocator) {}
const RecordingMicroAllocator& GetMicroAllocator() const {
return recording_micro_allocator_;
}
private:
const RecordingMicroAllocator& recording_micro_allocator_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_

View File

@@ -0,0 +1,84 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
#include <new>
#include "tensorflow/lite/kernels/internal/compatibility.h"
namespace tflite {
RecordingSimpleMemoryAllocator::RecordingSimpleMemoryAllocator(
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size)
: SimpleMemoryAllocator(error_reporter, buffer_head, buffer_size),
requested_head_bytes_(0),
requested_tail_bytes_(0),
used_bytes_(0),
alloc_count_(0) {}
RecordingSimpleMemoryAllocator::~RecordingSimpleMemoryAllocator() {}
RecordingSimpleMemoryAllocator* RecordingSimpleMemoryAllocator::Create(
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) {
TFLITE_DCHECK(error_reporter != nullptr);
TFLITE_DCHECK(buffer_head != nullptr);
RecordingSimpleMemoryAllocator tmp =
RecordingSimpleMemoryAllocator(error_reporter, buffer_head, buffer_size);
uint8_t* allocator_buffer =
tmp.AllocateFromTail(sizeof(RecordingSimpleMemoryAllocator),
alignof(RecordingSimpleMemoryAllocator));
// Use the default copy constructor to populate internal states.
return new (allocator_buffer) RecordingSimpleMemoryAllocator(tmp);
}
size_t RecordingSimpleMemoryAllocator::GetRequestedBytes() const {
return requested_head_bytes_ + requested_tail_bytes_;
}
size_t RecordingSimpleMemoryAllocator::GetUsedBytes() const {
return used_bytes_;
}
size_t RecordingSimpleMemoryAllocator::GetAllocatedCount() const {
return alloc_count_;
}
TfLiteStatus RecordingSimpleMemoryAllocator::SetHeadBufferSize(
size_t size, size_t alignment) {
const uint8_t* previous_head = head();
TfLiteStatus status =
SimpleMemoryAllocator::SetHeadBufferSize(size, alignment);
if (status == kTfLiteOk) {
used_bytes_ += head() - previous_head;
requested_head_bytes_ = size;
}
return status;
}
uint8_t* RecordingSimpleMemoryAllocator::AllocateFromTail(size_t size,
size_t alignment) {
const uint8_t* previous_tail = tail();
uint8_t* result = SimpleMemoryAllocator::AllocateFromTail(size, alignment);
if (result != nullptr) {
used_bytes_ += previous_tail - tail();
requested_tail_bytes_ += size;
alloc_count_++;
}
return result;
}
} // namespace tflite

View File

@@ -0,0 +1,64 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
namespace tflite {
// Utility class used to log allocations of a SimpleMemoryAllocator. Should only
// be used in debug/evaluation settings or unit tests to evaluate allocation
// usage.
class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator {
public:
RecordingSimpleMemoryAllocator(ErrorReporter* error_reporter,
uint8_t* buffer_head, size_t buffer_size);
// TODO(b/157615197): Cleanup constructors/destructor and use factory
// functions.
~RecordingSimpleMemoryAllocator() override;
static RecordingSimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
uint8_t* buffer_head,
size_t buffer_size);
// Returns the number of bytes requested from the head or tail.
size_t GetRequestedBytes() const;
// Returns the number of bytes actually allocated from the head or tail. This
// value will be >= to the number of requested bytes due to padding and
// alignment.
size_t GetUsedBytes() const;
// Returns the number of alloc calls from the head or tail.
size_t GetAllocatedCount() const;
TfLiteStatus SetHeadBufferSize(size_t size, size_t alignment) override;
uint8_t* AllocateFromTail(size_t size, size_t alignment) override;
private:
size_t requested_head_bytes_;
size_t requested_tail_bytes_;
size_t used_bytes_;
size_t alloc_count_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_

View File

@@ -1,4 +1,4 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -17,50 +17,133 @@ limitations under the License.
#include <cstddef>
#include <cstdint>
#include <new>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
SimpleMemoryAllocator* CreateInPlaceSimpleMemoryAllocator(
ErrorReporter* error_reporter, uint8_t* buffer, size_t buffer_size) {
SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter,
uint8_t* buffer_head,
uint8_t* buffer_tail)
: error_reporter_(error_reporter),
buffer_head_(buffer_head),
buffer_tail_(buffer_tail),
head_(buffer_head),
tail_(buffer_tail),
temp_(buffer_head_) {}
SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter,
uint8_t* buffer,
size_t buffer_size)
: SimpleMemoryAllocator(error_reporter, buffer, buffer + buffer_size) {}
/* static */
SimpleMemoryAllocator* SimpleMemoryAllocator::Create(
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) {
TFLITE_DCHECK(error_reporter != nullptr);
TFLITE_DCHECK(buffer_head != nullptr);
SimpleMemoryAllocator tmp =
SimpleMemoryAllocator(error_reporter, buffer, buffer_size);
SimpleMemoryAllocator* in_place_allocator =
reinterpret_cast<SimpleMemoryAllocator*>(tmp.AllocateFromTail(
sizeof(SimpleMemoryAllocator), alignof(SimpleMemoryAllocator)));
*in_place_allocator = tmp;
return in_place_allocator;
SimpleMemoryAllocator(error_reporter, buffer_head, buffer_size);
// Allocate enough bytes from the buffer to create a SimpleMemoryAllocator.
// The new instance will use the current adjusted tail buffer from the tmp
// allocator instance.
uint8_t* allocator_buffer = tmp.AllocateFromTail(
sizeof(SimpleMemoryAllocator), alignof(SimpleMemoryAllocator));
// Use the default copy constructor to populate internal states.
return new (allocator_buffer) SimpleMemoryAllocator(tmp);
}
uint8_t* SimpleMemoryAllocator::AllocateFromHead(size_t size,
size_t alignment) {
uint8_t* const aligned_result = AlignPointerUp(head_, alignment);
SimpleMemoryAllocator::~SimpleMemoryAllocator() {}
TfLiteStatus SimpleMemoryAllocator::SetHeadBufferSize(size_t size,
size_t alignment) {
if (head_ != temp_) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Internal error: SetHeadBufferSize() needs to be called "
"after ResetTempAllocations().");
return kTfLiteError;
}
uint8_t* const aligned_result = AlignPointerUp(buffer_head_, alignment);
const size_t available_memory = tail_ - aligned_result;
if (available_memory < size) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to allocate memory. Requested: %u, available %u, missing: %u",
"Failed to set head size. Requested: %u, available %u, missing: %u",
size, available_memory, size - available_memory);
return nullptr;
return kTfLiteError;
}
head_ = aligned_result + size;
return aligned_result;
temp_ = head_;
return kTfLiteOk;
}
uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size,
size_t alignment) {
uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment);
if (aligned_result < head_) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
const size_t missing_memory = head_ - aligned_result;
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to allocate memory. Requested: %u, available %u, missing: %u",
size, size - missing_memory, missing_memory);
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to allocate tail memory. Requested: %u, "
"available %u, missing: %u",
size, size - missing_memory, missing_memory);
#endif
return nullptr;
}
tail_ = aligned_result;
return aligned_result;
}
uint8_t* SimpleMemoryAllocator::AllocateTemp(size_t size, size_t alignment) {
uint8_t* const aligned_result = AlignPointerUp(temp_, alignment);
const size_t available_memory = tail_ - aligned_result;
if (available_memory < size) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to allocate temp memory. Requested: %u, "
"available %u, missing: %u",
size, available_memory, size - available_memory);
return nullptr;
}
temp_ = aligned_result + size;
return aligned_result;
}
void SimpleMemoryAllocator::ResetTempAllocations() { temp_ = head_; }
uint8_t* SimpleMemoryAllocator::GetHeadBuffer() const { return buffer_head_; }
size_t SimpleMemoryAllocator::GetHeadUsedBytes() const {
return head_ - buffer_head_;
}
size_t SimpleMemoryAllocator::GetTailUsedBytes() const {
return buffer_tail_ - tail_;
}
size_t SimpleMemoryAllocator::GetAvailableMemory(size_t alignment) const {
uint8_t* const aligned_temp = AlignPointerUp(temp_, alignment);
uint8_t* const aligned_tail = AlignPointerDown(tail_, alignment);
return aligned_tail - aligned_temp;
}
size_t SimpleMemoryAllocator::GetUsedBytes() const {
return GetBufferSize() - (tail_ - temp_);
}
size_t SimpleMemoryAllocator::GetBufferSize() const {
return buffer_tail_ - buffer_head_;
}
uint8_t* SimpleMemoryAllocator::head() const { return head_; }
uint8_t* SimpleMemoryAllocator::tail() const { return tail_; }
} // namespace tflite

View File

@@ -1,4 +1,4 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -16,10 +16,12 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
@@ -28,43 +30,82 @@ namespace tflite {
// This makes it pretty wasteful, so we should use a more intelligent method.
class SimpleMemoryAllocator {
public:
// TODO(b/157615197): Cleanup constructors/destructor and use factory
// functions.
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer_head,
uint8_t* buffer_tail)
: error_reporter_(error_reporter),
buffer_head_(buffer_head),
buffer_tail_(buffer_tail),
head_(buffer_head),
tail_(buffer_tail) {}
uint8_t* buffer_tail);
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer,
size_t buffer_size)
: SimpleMemoryAllocator(error_reporter, buffer, buffer + buffer_size) {}
size_t buffer_size);
virtual ~SimpleMemoryAllocator();
// Creates a new SimpleMemoryAllocator from a given buffer head and size.
static SimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
uint8_t* buffer_head,
size_t buffer_size);
// Adjusts the head (lowest address and moving upwards) memory allocation to a
// given size. Calls to this method will also invalidate all temporary
// allocation values (it sets the location of temp space at the end of the
// head section). This call will fail if a chain of allocations through
// AllocateTemp() have not been cleaned up with a call to
// ResetTempAllocations().
virtual TfLiteStatus SetHeadBufferSize(size_t size, size_t alignment);
// Allocates memory starting at the head of the arena (lowest address and
// moving upwards).
uint8_t* AllocateFromHead(size_t size, size_t alignment);
// Allocates memory starting at the tail of the arena (highest address and
// moving downwards).
uint8_t* AllocateFromTail(size_t size, size_t alignment);
virtual uint8_t* AllocateFromTail(size_t size, size_t alignment);
uint8_t* GetHead() const { return head_; }
uint8_t* GetTail() const { return tail_; }
size_t GetAvailableMemory() const { return tail_ - head_; }
size_t GetUsedBytes() const { return GetBufferSize() - GetAvailableMemory(); }
// Allocates a temporary buffer from the head of the arena (lowest address and
// moving upwards) but does not update the actual head allocation size or
// position. The returned buffer is guaranteed until either
// ResetTempAllocations() is called or another call to AllocateFromHead().
// Repeat calls to this function will create a chain of temp allocations. All
// calls to AllocateTemp() must end with a call to ResetTempAllocations(). If
// AllocateFromHead() is called before a call to ResetTempAllocations(), it
// will fail with an error message.
virtual uint8_t* AllocateTemp(size_t size, size_t alignment);
// Resets a chain of temporary allocations back to the current head of the
// arena (lowest address).
virtual void ResetTempAllocations();
// Returns a pointer to the buffer currently assigned to the head section.
// This buffer is set by calling SetHeadSize().
uint8_t* GetHeadBuffer() const;
// Returns the size of the head section in bytes.
size_t GetHeadUsedBytes() const;
// Returns the size of all allocations in the tail section in bytes.
size_t GetTailUsedBytes() const;
// Returns the number of bytes available with a given alignment. This number
// takes in account any temporary allocations.
size_t GetAvailableMemory(size_t alignment) const;
// Returns the number of used bytes in the allocator. This number takes in
// account any temporary allocations.
size_t GetUsedBytes() const;
protected:
// Returns a pointer to the current end of the head buffer.
uint8_t* head() const;
// Returns a pointer to the current end of the tail buffer.
uint8_t* tail() const;
private:
size_t GetBufferSize() const { return buffer_tail_ - buffer_head_; }
size_t GetBufferSize() const;
ErrorReporter* error_reporter_;
uint8_t* buffer_head_;
uint8_t* buffer_tail_;
uint8_t* head_;
uint8_t* tail_;
};
uint8_t* temp_;
// Allocate a SimpleMemoryAllocator from the buffer and then return the pointer
// to this allocator.
SimpleMemoryAllocator* CreateInPlaceSimpleMemoryAllocator(
ErrorReporter* error_reporter, uint8_t* buffer, size_t buffer_size);
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite

View File

@@ -15,14 +15,24 @@ limitations under the License.
#include "tensorflow/lite/micro/test_helpers.h"
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include <initializer_list>
#include <new>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/tensor_utils.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/all_ops_resolver.h"
#include "tensorflow/lite/micro/micro_utils.h"
#include "tensorflow/lite/schema/schema_generated.h"
// TODO(b/170464050): Use TFLM test only version of schema_utils.
namespace tflite {
namespace testing {
namespace {
@@ -48,7 +58,7 @@ class StackAllocator : public flatbuffers::Allocator {
return *inst;
}
static constexpr size_t kStackAllocatorSize = 4096;
static constexpr size_t kStackAllocatorSize = 8192;
private:
uint8_t data_backing_[kStackAllocatorSize];
@@ -76,8 +86,7 @@ class ModelBuilder {
: builder_(builder) {}
// Registers an operator that will be used in the model.
Operator RegisterOp(BuiltinOperator op, const char* custom_code,
int32_t version);
Operator RegisterOp(BuiltinOperator op, const char* custom_code);
// Adds a tensor to the model.
Tensor AddTensor(TensorType type, std::initializer_list<int32_t> shape) {
@@ -94,10 +103,16 @@ class ModelBuilder {
Node AddNode(Operator op, std::initializer_list<Tensor> inputs,
std::initializer_list<Tensor> outputs);
void AddMetadata(const char* description_string,
const int32_t* metadata_buffer_data, size_t num_elements);
// Constructs the flatbuffer model using `builder_` and return a pointer to
// it. The returned model has the same lifetime as `builder_`.
// Note the default value of 0 for num_subgraph_inputs means all tensor inputs
// are in subgraph input list.
const Model* BuildModel(std::initializer_list<Tensor> inputs,
std::initializer_list<Tensor> outputs);
std::initializer_list<Tensor> outputs,
size_t num_subgraph_inputs = 0);
private:
// Adds a tensor to the model.
@@ -116,15 +131,24 @@ class ModelBuilder {
static constexpr int kMaxTensors = 50;
flatbuffers::Offset<tflite::Tensor> tensors_[kMaxTensors];
static constexpr int kMaxMetadataBuffers = 10;
static constexpr int kMaxMetadatas = 10;
flatbuffers::Offset<Metadata> metadata_[kMaxMetadatas];
flatbuffers::Offset<Buffer> metadata_buffers_[kMaxMetadataBuffers];
int nbr_of_metadata_buffers_ = 0;
int next_tensor_id_ = 0;
};
ModelBuilder::Operator ModelBuilder::RegisterOp(BuiltinOperator op,
const char* custom_code,
int32_t version) {
const char* custom_code) {
TFLITE_DCHECK(next_operator_code_id_ <= kMaxOperatorCodes);
operator_codes_[next_operator_code_id_] =
tflite::CreateOperatorCodeDirect(*builder_, op, custom_code, version);
operator_codes_[next_operator_code_id_] = tflite::CreateOperatorCodeDirect(
*builder_, /*deprecated_builtin_code=*/0, custom_code, /*version=*/0, op);
next_operator_code_id_++;
return next_operator_code_id_ - 1;
}
@@ -142,29 +166,75 @@ ModelBuilder::Node ModelBuilder::AddNode(
return next_operator_id_ - 1;
}
void ModelBuilder::AddMetadata(const char* description_string,
const int32_t* metadata_buffer_data,
size_t num_elements) {
metadata_[ModelBuilder::nbr_of_metadata_buffers_] =
CreateMetadata(*builder_, builder_->CreateString(description_string),
1 + ModelBuilder::nbr_of_metadata_buffers_);
metadata_buffers_[nbr_of_metadata_buffers_] = tflite::CreateBuffer(
*builder_, builder_->CreateVector((uint8_t*)metadata_buffer_data,
sizeof(uint32_t) * num_elements));
ModelBuilder::nbr_of_metadata_buffers_++;
}
const Model* ModelBuilder::BuildModel(
std::initializer_list<ModelBuilder::Tensor> inputs,
std::initializer_list<ModelBuilder::Tensor> outputs) {
std::initializer_list<ModelBuilder::Tensor> outputs,
size_t num_subgraph_inputs) {
// Model schema requires an empty buffer at idx 0.
constexpr size_t kBufferSize = 1;
const flatbuffers::Offset<Buffer> buffers[kBufferSize] = {
tflite::CreateBuffer(*builder_)};
size_t buffer_size = 1 + ModelBuilder::nbr_of_metadata_buffers_;
flatbuffers::Offset<Buffer> buffers[kMaxMetadataBuffers];
buffers[0] = tflite::CreateBuffer(*builder_);
// Place the metadata buffers first in the buffer since the indices for them
// have already been set in AddMetadata()
for (int i = 1; i < ModelBuilder::nbr_of_metadata_buffers_ + 1; ++i) {
buffers[i] = metadata_buffers_[i - 1];
}
// TFLM only supports single subgraph.
constexpr size_t subgraphs_size = 1;
// Find out number of subgraph inputs.
if (num_subgraph_inputs == 0) {
// This is the default case.
num_subgraph_inputs = inputs.size();
} else {
// A non-zero value of num_subgraph_inputs means that some of
// the operator input tensors are not subgraph inputs.
TFLITE_DCHECK(num_subgraph_inputs <= inputs.size());
}
const flatbuffers::Offset<SubGraph> subgraphs[subgraphs_size] = {
tflite::CreateSubGraph(
*builder_, builder_->CreateVector(tensors_, next_tensor_id_),
builder_->CreateVector(inputs.begin(), inputs.size()),
builder_->CreateVector(inputs.begin(), num_subgraph_inputs),
builder_->CreateVector(outputs.begin(), outputs.size()),
builder_->CreateVector(operators_, next_operator_id_),
builder_->CreateString("test_subgraph"))};
const flatbuffers::Offset<Model> model_offset = tflite::CreateModel(
*builder_, 0,
builder_->CreateVector(operator_codes_, next_operator_code_id_),
builder_->CreateVector(subgraphs, subgraphs_size),
builder_->CreateString("teset_model"),
builder_->CreateVector(buffers, kBufferSize));
flatbuffers::Offset<Model> model_offset;
if (ModelBuilder::nbr_of_metadata_buffers_ > 0) {
model_offset = tflite::CreateModel(
*builder_, 0,
builder_->CreateVector(operator_codes_, next_operator_code_id_),
builder_->CreateVector(subgraphs, subgraphs_size),
builder_->CreateString("teset_model"),
builder_->CreateVector(buffers, buffer_size), 0,
builder_->CreateVector(metadata_,
ModelBuilder::nbr_of_metadata_buffers_));
} else {
model_offset = tflite::CreateModel(
*builder_, 0,
builder_->CreateVector(operator_codes_, next_operator_code_id_),
builder_->CreateVector(subgraphs, subgraphs_size),
builder_->CreateString("teset_model"),
builder_->CreateVector(buffers, buffer_size));
}
tflite::FinishModelBuffer(*builder_, model_offset);
void* model_pointer = builder_->GetBufferPointer();
const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
@@ -190,7 +260,7 @@ const Model* BuildSimpleStatefulModel() {
ModelBuilder model_builder(fb_builder);
const int op_id =
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "simple_stateful_op", 0);
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "simple_stateful_op");
const int input_tensor = model_builder.AddTensor(TensorType_UINT8, {3});
const int median_tensor = model_builder.AddTensor(TensorType_UINT8, {3});
const int invoke_count_tensor =
@@ -231,8 +301,7 @@ const Model* BuildSimpleModelWithBranch() {
v
*/
const int op_id =
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom",
/* version= */ 0);
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom");
const int t0 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
const int t1 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
const int t2 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
@@ -243,6 +312,35 @@ const Model* BuildSimpleModelWithBranch() {
return model_builder.BuildModel({t0}, {t3});
}
const Model* BuildModelWithOfflinePlanning(int number_of_tensors,
const int32_t* metadata_buffer,
NodeConnection* node_conn,
int num_conns,
int num_subgraph_inputs) {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance();
ModelBuilder model_builder(fb_builder);
const int op_id =
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom");
for (int i = 0; i < number_of_tensors; ++i) {
model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
}
for (int i = 0; i < num_conns; ++i) {
model_builder.AddNode(op_id, node_conn[i].input, node_conn[i].output);
}
model_builder.AddMetadata(
"OfflineMemoryAllocation", metadata_buffer,
number_of_tensors + tflite::testing::kOfflinePlannerHeaderSize);
return model_builder.BuildModel(
node_conn[0].input, node_conn[num_conns - 1].output, num_subgraph_inputs);
}
const Model* BuildSimpleMockModel() {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
@@ -306,8 +404,9 @@ const Model* BuildSimpleMockModel() {
builder->CreateString("test_subgraph"))};
constexpr size_t operator_codes_size = 1;
const Offset<OperatorCode> operator_codes[operator_codes_size] = {
CreateOperatorCodeDirect(*builder, BuiltinOperator_CUSTOM, "mock_custom",
0)};
CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
"mock_custom",
/*version=*/0, BuiltinOperator_CUSTOM)};
const Offset<Model> model_offset = CreateModel(
*builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
builder->CreateVector(subgraphs, subgraphs_size),
@@ -455,8 +554,9 @@ const Model* BuildComplexMockModel() {
constexpr size_t operator_codes_size = 1;
const Offset<OperatorCode> operator_codes[operator_codes_size] = {
CreateOperatorCodeDirect(*builder, BuiltinOperator_CUSTOM, "mock_custom",
0)};
CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
"mock_custom",
/*version=*/0, BuiltinOperator_CUSTOM)};
const Offset<Model> model_offset = CreateModel(
*builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
@@ -472,6 +572,147 @@ const Model* BuildComplexMockModel() {
} // namespace
const TfLiteRegistration* SimpleStatefulOp::getRegistration() {
return GetMutableRegistration();
}
TfLiteRegistration* SimpleStatefulOp::GetMutableRegistration() {
static TfLiteRegistration r;
r.init = Init;
r.prepare = Prepare;
r.invoke = Invoke;
return &r;
}
void* SimpleStatefulOp::Init(TfLiteContext* context, const char* buffer,
size_t length) {
TFLITE_DCHECK(context->AllocateBufferForEval == nullptr);
TFLITE_DCHECK(context->GetScratchBuffer == nullptr);
TFLITE_DCHECK(context->RequestScratchBufferInArena == nullptr);
void* raw = context->AllocatePersistentBuffer(context, sizeof(OpData));
OpData* data = reinterpret_cast<OpData*>(raw);
*data = {};
return raw;
}
TfLiteStatus SimpleStatefulOp::Prepare(TfLiteContext* context,
TfLiteNode* node) {
OpData* data = reinterpret_cast<OpData*>(node->user_data);
// Make sure that the input is in uint8_t with at least 1 data entry.
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
if (input->type != kTfLiteUInt8) return kTfLiteError;
if (NumElements(input->dims) == 0) return kTfLiteError;
// Allocate a temporary buffer with the same size of input for sorting.
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, sizeof(uint8_t) * NumElements(input->dims),
&data->sorting_buffer));
// We can interleave scratch / persistent buffer allocation.
data->invoke_count = reinterpret_cast<int*>(
context->AllocatePersistentBuffer(context, sizeof(int)));
*data->invoke_count = 0;
return kTfLiteOk;
}
TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context,
TfLiteNode* node) {
OpData* data = reinterpret_cast<OpData*>(node->user_data);
*data->invoke_count += 1;
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
const uint8_t* input_data = GetTensorData<uint8_t>(input);
int size = NumElements(input->dims);
uint8_t* sorting_buffer = reinterpret_cast<uint8_t*>(
context->GetScratchBuffer(context, data->sorting_buffer));
// Copy inputs data to the sorting buffer. We don't want to mutate the input
// tensor as it might be used by a another node.
for (int i = 0; i < size; i++) {
sorting_buffer[i] = input_data[i];
}
// In place insertion sort on `sorting_buffer`.
for (int i = 1; i < size; i++) {
for (int j = i; j > 0 && sorting_buffer[j] < sorting_buffer[j - 1]; j--) {
std::swap(sorting_buffer[j], sorting_buffer[j - 1]);
}
}
TfLiteTensor* median;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kMedianTensor, &median));
uint8_t* median_data = GetTensorData<uint8_t>(median);
TfLiteTensor* invoke_count;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kInvokeCount, &invoke_count));
int32_t* invoke_count_data = GetTensorData<int32_t>(invoke_count);
median_data[0] = sorting_buffer[size / 2];
invoke_count_data[0] = *data->invoke_count;
return kTfLiteOk;
}
const TfLiteRegistration* MockCustom::getRegistration() {
return GetMutableRegistration();
}
TfLiteRegistration* MockCustom::GetMutableRegistration() {
static TfLiteRegistration r;
r.init = Init;
r.prepare = Prepare;
r.invoke = Invoke;
r.free = Free;
return &r;
}
void* MockCustom::Init(TfLiteContext* context, const char* buffer,
size_t length) {
// We don't support delegate in TFL micro. This is a weak check to test if
// context struct being zero-initialized.
TFLITE_DCHECK(context->ReplaceNodeSubsetsWithDelegateKernels == nullptr);
freed_ = false;
// Do nothing.
return nullptr;
}
void MockCustom::Free(TfLiteContext* context, void* buffer) { freed_ = true; }
TfLiteStatus MockCustom::Prepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
TfLiteStatus MockCustom::Invoke(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
const int32_t* input_data = input->data.i32;
const TfLiteTensor* weight;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &weight));
const uint8_t* weight_data = weight->data.uint8;
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
int32_t* output_data = output->data.i32;
output_data[0] =
0; // Catch output tensor sharing memory with an input tensor
output_data[0] = input_data[0] + weight_data[0];
return kTfLiteOk;
}
bool MockCustom::freed_ = false;
AllOpsResolver GetOpResolver() {
AllOpsResolver op_resolver;
op_resolver.AddCustom("mock_custom", MockCustom::GetMutableRegistration());
op_resolver.AddCustom("simple_stateful_op",
SimpleStatefulOp::GetMutableRegistration());
return op_resolver;
}
const Model* GetSimpleMockModel() {
static Model* model = nullptr;
if (!model) {
@@ -496,6 +737,16 @@ const Model* GetSimpleModelWithBranch() {
return model;
}
const Model* GetModelWithOfflinePlanning(int num_tensors,
const int32_t* metadata_buffer,
NodeConnection* node_conn,
int num_conns,
int num_subgraph_inputs) {
const Model* model = BuildModelWithOfflinePlanning(
num_tensors, metadata_buffer, node_conn, num_conns, num_subgraph_inputs);
return model;
}
const Model* GetSimpleStatefulModel() {
static Model* model = nullptr;
if (!model) {
@@ -592,11 +843,13 @@ int TestStrcmp(const char* a, const char* b) {
// Wrapper to forward kernel errors to the interpreter's error reporter.
void ReportOpError(struct TfLiteContext* context, const char* format, ...) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
ErrorReporter* error_reporter = static_cast<ErrorReporter*>(context->impl_);
va_list args;
va_start(args, format);
TF_LITE_REPORT_ERROR(error_reporter, format, args);
va_end(args);
#endif
}
// Create a TfLiteIntArray from an array of ints. The first element in the
@@ -616,122 +869,27 @@ TfLiteFloatArray* FloatArrayFromFloats(const float* floats) {
return reinterpret_cast<TfLiteFloatArray*>(const_cast<float*>(floats));
}
TfLiteTensor CreateTensor(TfLiteIntArray* dims, const char* name,
bool is_variable) {
TfLiteTensor result;
result.dims = dims;
result.name = name;
result.params = {};
result.quantization = {kTfLiteNoQuantization, nullptr};
result.is_variable = is_variable;
result.allocation_type = kTfLiteMemNone;
result.allocation = nullptr;
return result;
}
TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims,
const char* name, bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteFloat32;
result.data.f = const_cast<float*>(data);
result.bytes = ElementCount(*dims) * sizeof(float);
return result;
}
void PopulateFloatTensor(TfLiteTensor* tensor, float* begin, float* end) {
float* p = begin;
float* v = tensor->data.f;
while (p != end) {
*v++ = *p++;
}
}
TfLiteTensor CreateBoolTensor(const bool* data, TfLiteIntArray* dims,
const char* name, bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteBool;
result.data.b = const_cast<bool*>(data);
result.bytes = ElementCount(*dims) * sizeof(bool);
return result;
}
TfLiteTensor CreateInt32Tensor(const int32_t* data, TfLiteIntArray* dims,
const char* name, bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteInt32;
result.data.i32 = const_cast<int32_t*>(data);
result.bytes = ElementCount(*dims) * sizeof(int32_t);
return result;
}
TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
const char* name, bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteUInt8;
result.data.uint8 = const_cast<uint8_t*>(data);
result.params = {scale, zero_point};
result.quantization = {kTfLiteAffineQuantization, nullptr};
result.bytes = ElementCount(*dims) * sizeof(uint8_t);
return result;
}
TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
const char* name, bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteInt8;
result.data.int8 = const_cast<int8_t*>(data);
result.params = {scale, zero_point};
result.quantization = {kTfLiteAffineQuantization, nullptr};
result.bytes = ElementCount(*dims) * sizeof(int8_t);
return result;
}
TfLiteTensor CreateQuantizedTensor(const int16_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
const char* name, bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteInt16;
result.data.i16 = const_cast<int16_t*>(data);
result.params = {scale, zero_point};
result.quantization = {kTfLiteAffineQuantization, nullptr};
result.bytes = ElementCount(*dims) * sizeof(int16_t);
return result;
}
TfLiteTensor CreateQuantized32Tensor(const int32_t* data, TfLiteIntArray* dims,
float scale, const char* name,
bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteInt32;
result.data.i32 = const_cast<int32_t*>(data);
// Quantized int32 tensors always have a zero point of 0, since the range of
// int32 values is large, and because zero point costs extra cycles during
// processing.
result.params = {scale, 0};
result.quantization = {kTfLiteAffineQuantization, nullptr};
result.bytes = ElementCount(*dims) * sizeof(int32_t);
return result;
}
TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
TfLiteIntArray* dims, float input_scale,
float weights_scale, const char* name,
bool is_variable) {
float weights_scale, bool is_variable) {
float bias_scale = input_scale * weights_scale;
tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
return CreateQuantized32Tensor(quantized, dims, bias_scale, name,
is_variable);
// Quantized int32_t tensors always have a zero point of 0, since the range of
// int32_t values is large, and because zero point costs extra cycles during
// processing.
TfLiteTensor result =
CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable);
return result;
}
// Quantizes int32 bias tensor with per-channel weights determined by input
// Quantizes int32_t bias tensor with per-channel weights determined by input
// scale multiplied by weight scale for each channel.
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
const float* input, int32_t* quantized, TfLiteIntArray* dims,
float input_scale, float* weight_scales, float* scales, int* zero_points,
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
const char* name, bool is_variable) {
bool is_variable) {
int input_size = ElementCount(*dims);
int num_channels = dims->data[quantized_dimension];
// First element is reserved for array length
@@ -743,25 +901,22 @@ TfLiteTensor CreatePerChannelQuantizedBiasTensor(
zero_points[i + 1] = 0;
}
SymmetricPerChannelQuantize(input, quantized, input_size, num_channels,
scales_array);
SymmetricPerChannelQuantize<int32_t>(input, quantized, input_size,
num_channels, scales_array);
affine_quant->scale = FloatArrayFromFloats(scales);
affine_quant->zero_point = IntArrayFromInts(zero_points);
affine_quant->quantized_dimension = quantized_dimension;
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteInt32;
result.data.i32 = const_cast<int32_t*>(quantized);
TfLiteTensor result = CreateTensor(quantized, dims, is_variable);
result.quantization = {kTfLiteAffineQuantization, affine_quant};
result.bytes = ElementCount(*dims) * sizeof(int32_t);
return result;
}
TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
int* zero_points, TfLiteAffineQuantization* affine_quant,
int quantized_dimension, const char* name, bool is_variable) {
int quantized_dimension, bool is_variable) {
int channel_count = dims->data[quantized_dimension];
scales[0] = static_cast<float>(channel_count);
zero_points[0] = channel_count;
@@ -777,13 +932,18 @@ TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
affine_quant->zero_point = IntArrayFromInts(zero_points);
affine_quant->quantized_dimension = quantized_dimension;
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteInt8;
result.data.int8 = const_cast<int8_t*>(quantized);
TfLiteTensor result = CreateTensor(quantized, dims, is_variable);
result.quantization = {kTfLiteAffineQuantization, affine_quant};
result.bytes = ElementCount(*dims) * sizeof(int8_t);
return result;
}
size_t GetModelTensorCount(const Model* model) {
auto* subgraphs = model->subgraphs();
if (subgraphs) {
return (*subgraphs)[0]->tensors()->size();
}
return 0;
}
} // namespace testing
} // namespace tflite

View File

@@ -18,15 +18,67 @@ limitations under the License.
// Useful functions for writing tests.
#include <cstdint>
#include <limits>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite//kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/all_ops_resolver.h"
#include "tensorflow/lite/micro/micro_utils.h"
#include "tensorflow/lite/portable_type_to_tflitetype.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace testing {
constexpr int kOfflinePlannerHeaderSize = 3;
struct NodeConnection_ {
std::initializer_list<int32_t> input;
std::initializer_list<int32_t> output;
};
typedef struct NodeConnection_ NodeConnection;
// A simple operator that returns the median of the input with the number of
// times the kernel was invoked. The implementation below is deliberately
// complicated, just to demonstrate how kernel memory planning works.
class SimpleStatefulOp {
static constexpr int kBufferNotAllocated = 0;
// Inputs:
static constexpr int kInputTensor = 0;
// Outputs:
static constexpr int kMedianTensor = 0;
static constexpr int kInvokeCount = 1;
struct OpData {
int* invoke_count = nullptr;
int sorting_buffer = kBufferNotAllocated;
};
public:
static const TfLiteRegistration* getRegistration();
static TfLiteRegistration* GetMutableRegistration();
static void* Init(TfLiteContext* context, const char* buffer, size_t length);
static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
};
class MockCustom {
public:
static const TfLiteRegistration* getRegistration();
static TfLiteRegistration* GetMutableRegistration();
static void* Init(TfLiteContext* context, const char* buffer, size_t length);
static void Free(TfLiteContext* context, void* buffer);
static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
static bool freed_;
};
// Returns an Op Resolver that can be used in the testing code.
AllOpsResolver GetOpResolver();
// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input,
// 1 layer of weights, 1 output Tensor, and 1 operator.
const Model* GetSimpleMockModel();
@@ -38,6 +90,24 @@ const Model* GetComplexMockModel();
// Returns a simple flatbuffer model with two branches.
const Model* GetSimpleModelWithBranch();
// Returns a simple flatbuffer model with offline planned tensors
// @param[in] num_tensors Number of tensors in the model.
// @param[in] metadata_buffer Metadata for offline planner.
// @param[in] node_con List of connections, i.e. operators
// in the model.
// @param[in] num_conns Number of connections.
// @param[in] num_subgraph_inputs How many of the input tensors are in
// the subgraph inputs. The default value
// of 0 means all of the input tensors
// are in the subgraph input list. There
// must be at least 1 input tensor in the
// subgraph input list.
const Model* GetModelWithOfflinePlanning(int num_tensors,
const int32_t* metadata_buffer,
NodeConnection* node_conn,
int num_conns,
int num_subgraph_inputs = 0);
// Returns a flatbuffer model with `simple_stateful_op`
const Model* GetSimpleStatefulModel();
@@ -72,57 +142,80 @@ TfLiteIntArray* IntArrayFromInts(const int* int_array);
// supplied array must be the size of the array expressed as a float.
TfLiteFloatArray* FloatArrayFromFloats(const float* floats);
TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims,
const char* name, bool is_variable = false);
template <typename T>
TfLiteTensor CreateTensor(const T* data, TfLiteIntArray* dims,
const bool is_variable = false) {
TfLiteTensor result;
result.dims = dims;
result.params = {};
result.quantization = {kTfLiteNoQuantization, nullptr};
result.is_variable = is_variable;
result.allocation_type = kTfLiteMemNone;
result.type = typeToTfLiteType<T>();
// Const cast is used to allow passing in const and non-const arrays within a
// single CreateTensor method. A Const array should be used for immutable
// input tensors and non-const array should be used for mutable and output
// tensors.
result.data.data = const_cast<T*>(data);
result.quantization = {kTfLiteAffineQuantization, nullptr};
result.bytes = ElementCount(*dims) * sizeof(T);
return result;
}
void PopulateFloatTensor(TfLiteTensor* tensor, float* begin, float* end);
TfLiteTensor CreateBoolTensor(const bool* data, TfLiteIntArray* dims,
const char* name, bool is_variable = false);
TfLiteTensor CreateInt32Tensor(const int32_t*, TfLiteIntArray* dims,
const char* name, bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
const char* name, bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
const char* name, bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(const int16_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
const char* name, bool is_variable = false);
template <typename T>
TfLiteTensor CreateQuantizedTensor(const T* data, TfLiteIntArray* dims,
const float scale, const int zero_point = 0,
const bool is_variable = false) {
TfLiteTensor result = CreateTensor(data, dims, is_variable);
result.params = {scale, zero_point};
result.quantization = {kTfLiteAffineQuantization, nullptr};
return result;
}
template <typename T>
TfLiteTensor CreateQuantizedTensor(const float* input, T* quantized,
TfLiteIntArray* dims, float scale,
int zero_point, const char* name,
bool is_variable = false) {
int zero_point, bool is_variable = false) {
int input_size = ElementCount(*dims);
tflite::AsymmetricQuantize(input, quantized, input_size, scale, zero_point);
return CreateQuantizedTensor(quantized, dims, scale, zero_point, name,
is_variable);
tflite::Quantize(input, quantized, input_size, scale, zero_point);
return CreateQuantizedTensor(quantized, dims, scale, zero_point, is_variable);
}
TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
TfLiteIntArray* dims, float input_scale,
float weights_scale, const char* name,
float weights_scale,
bool is_variable = false);
// Quantizes int32 bias tensor with per-channel weights determined by input
// Quantizes int32_t bias tensor with per-channel weights determined by input
// scale multiplied by weight scale for each channel.
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
const float* input, int32_t* quantized, TfLiteIntArray* dims,
float input_scale, float* weight_scales, float* scales, int* zero_points,
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
const char* name, bool is_variable = false);
bool is_variable = false);
TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
int* zero_points, TfLiteAffineQuantization* affine_quant,
int quantized_dimension, const char* name, bool is_variable = false);
int quantized_dimension, bool is_variable = false);
// Returns the number of tensors in the default subgraph for a tflite::Model.
size_t GetModelTensorCount(const Model* model);
// Derives the quantization scaling factor from a min and max range.
template <typename T>
inline float ScaleFromMinMax(const float min, const float max) {
return (max - min) /
static_cast<float>((std::numeric_limits<T>::max() * 1.0) -
std::numeric_limits<T>::min());
}
// Derives the quantization zero point from a min and max range.
template <typename T>
inline int ZeroPointFromMinMax(const float min, const float max) {
return static_cast<int>(std::numeric_limits<T>::min()) +
static_cast<int>(-min / ScaleFromMinMax<T>(min, max) + 0.5f);
}
} // namespace testing
} // namespace tflite