Initial Code v0.1.0

This commit is contained in:
jomjol
2020-08-07 17:42:29 +02:00
parent 0e2475bf0d
commit 4fe26dc0d8
269 changed files with 87264 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
#define TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
// C++ will automatically create class-specific delete operators for virtual
// objects, which by default call the global delete function. For embedded
// applications we want to avoid this, and won't be calling new/delete on these
// objects, so we need to override the default implementation with one that does
// nothing to avoid linking in ::delete().
// This macro needs to be included in all subclasses of a virtual base class in
// the private section.
#ifdef TF_LITE_STATIC_MEMORY
#define TF_LITE_REMOVE_VIRTUAL_DELETE \
void operator delete(void* p) {}
#else
#define TF_LITE_REMOVE_VIRTUAL_DELETE
#endif
#endif // TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_

View File

@@ -0,0 +1,41 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Reference implementation of the DebugLog() function that's required for a
// platform to support the TensorFlow Lite for Microcontrollers library. This is
// the only function that's absolutely required to be available on a target
// device, since it's used for communicating test results back to the host so
// that we can verify the implementation is working correctly.
// It's designed to be as easy as possible to supply an implementation though.
// On platforms that have a POSIX stack or C library, it can be written as a
// single call to `fprintf(stderr, "%s", s)` to output a string to the error
// stream of the console, but if there's no OS or C library available, there's
// almost always an equivalent way to write out a string to some serial
// interface that can be used instead. For example on Arm M-series MCUs, calling
// the `bkpt #0xAB` assembler instruction will output the string in r1 to
// whatever debug serial connection is available. If you're running mbed, you
// can do the same by creating `Serial pc(USBTX, USBRX)` and then calling
// `pc.printf("%s", s)`.
// To add an equivalent function for your own platform, create your own
// implementation file, and place it in a subfolder with named after the OS
// you're targeting. For example, see the Cortex M bare metal version in
// tensorflow/lite/micro/bluepill/debug_log.cc or the mbed one on
// tensorflow/lite/micro/mbed/debug_log.cc.
#include "tensorflow/lite/micro/debug_log.h"
#include <cstdio>
extern "C" void DebugLog(const char* s) { fprintf(stderr, "%s", s); }

View File

@@ -0,0 +1,23 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
// This function should be implemented by each target platform, and provide a
// way for strings to be output to some text stream. For more information, see
// tensorflow/lite/micro/debug_log.cc.
extern "C" void DebugLog(const char* s);
#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_

View File

@@ -0,0 +1,55 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
#include <algorithm>
#include <cmath>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
namespace tflite {
namespace ops {
namespace micro {
// Returns the floating point value for a fused activation:
inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
switch (act) {
case kTfLiteActNone:
return a;
case kTfLiteActRelu:
return std::max(0.0f, a);
case kTfLiteActRelu1:
return std::max(-1.0f, std::min(a, 1.0f));
case kTfLiteActRelu6:
return std::max(0.0f, std::min(a, 6.0f));
case kTfLiteActTanh:
return std::tanh(a);
case kTfLiteActSignBit:
return std::signbit(a);
case kTfLiteActSigmoid:
return 1.0f / (1.0f + std::exp(-a));
}
return 0.0f; // To indicate an unsupported activation (i.e. when a new fused
// activation is added to the enum and not handled here).
}
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_

View File

@@ -0,0 +1,186 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
template <typename Q>
inline void ReluQuantized(int32_t lower, const RuntimeShape& input_shape,
const Q* input_data, const RuntimeShape& output_shape,
Q* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const Q val = input_data[i];
const Q clamped = val < lower ? lower : val;
output_data[i] = clamped;
}
}
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float lower = 0.0f;
const float clamped = val < lower ? lower : val;
output_data[i] = clamped;
}
}
inline void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float upper = 6.0f;
const float lower = 0.0f;
const float clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
template <typename Q>
inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
const Q* input_data,
const RuntimeShape& output_shape, Q* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const Q val = input_data[i];
const Q clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
ReluFloat(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8: {
ReluQuantized<int8_t>(input->params.zero_point, GetTensorShape(input),
GetTensorData<int8_t>(input),
GetTensorShape(output),
GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
ReluQuantized<uint8_t>(input->params.zero_point, GetTensorShape(input),
GetTensorData<uint8_t>(input),
GetTensorShape(output),
GetTensorData<uint8_t>(output));
return kTfLiteOk;
}
default: {
TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
}
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
Relu6Float(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8: {
const int8_t six = FloatToAsymmetricQuantizedInt8(
6.0f, input->params.scale, input->params.zero_point);
const int8_t zero = input->params.zero_point;
Relu6Quantized<int8_t>(
zero, six, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
const uint8_t six = FloatToAsymmetricQuantizedUInt8(
6.0f, input->params.scale, input->params.zero_point);
const uint8_t zero = input->params.zero_point;
Relu6Quantized<uint8_t>(
zero, six, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
return kTfLiteOk;
}
default: {
TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
}
} // namespace activations
TfLiteRegistration* Register_RELU() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/activations::ReluPrepare,
/*invoke=*/activations::ReluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_RELU6() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/activations::Relu6Prepare,
/*invoke=*/activations::Relu6Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,204 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace ops {
namespace micro {
namespace add {
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
struct OpData {
bool requires_broadcast;
// These fields are used in both the general 8-bit -> 8bit quantized path,
// and the special 16-bit -> 16bit quantized path
int input1_shift;
int input2_shift;
int32 output_activation_min;
int32 output_activation_max;
// These fields are used only in the general 8-bit -> 8bit quantized path
int32 input1_multiplier;
int32 input2_multiplier;
int32 output_multiplier;
int output_shift;
int left_shift;
int32 input1_offset;
int32 input2_offset;
int32 output_offset;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output,
OpData* data) {
data->requires_broadcast = !HaveSameShapes(input1, input2);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
// 8bit -> 8bit general quantized path, with general rescalings
data->input1_offset = -input1->params.zero_point;
data->input2_offset = -input2->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = 20;
const double twice_max_input_scale =
2 * static_cast<double>(
std::max(input1->params.scale, input2->params.scale));
const double real_input1_multiplier =
static_cast<double>(input1->params.scale) / twice_max_input_scale;
const double real_input2_multiplier =
static_cast<double>(input2->params.scale) / twice_max_input_scale;
const double real_output_multiplier =
twice_max_input_scale /
((1 << data->left_shift) * static_cast<double>(output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
QuantizeMultiplierSmallerThanOneExp(
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
}
return kTfLiteOk;
}
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
const OpData* data, const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
#define TF_LITE_ADD(opname) \
reference_ops::opname(op_params, GetTensorShape(input1), \
GetTensorData<float>(input1), GetTensorShape(input2), \
GetTensorData<float>(input2), GetTensorShape(output), \
GetTensorData<float>(output))
if (data->requires_broadcast) {
TF_LITE_ADD(BroadcastAdd4DSlow);
} else {
TF_LITE_ADD(Add);
}
#undef TF_LITE_ADD
}
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteAddParams* params, const OpData* data,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
GetTensorShape(input1), GetTensorShape(input2), &op_params);
#define TF_LITE_ADD(type, opname, dtype) \
type::opname(op_params, GetTensorShape(input1), \
GetTensorData<dtype>(input1), GetTensorShape(input2), \
GetTensorData<dtype>(input2), GetTensorShape(output), \
GetTensorData<dtype>(output));
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
} else {
TF_LITE_ADD(reference_integer_ops, Add, int8_t);
}
} else {
if (need_broadcast) {
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
} else {
TF_LITE_ADD(reference_ops, Add, uint8_t);
}
}
#undef TF_LITE_ADD
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
OpData data;
TF_LITE_ENSURE_STATUS(
CalculateOpData(context, params, input1, input2, output, &data));
if (output->type == kTfLiteFloat32) {
EvalAdd(context, node, params, &data, input1, input2, output);
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, &data,
input1, input2, output));
} else {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace add
TfLiteRegistration* Register_ADD() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/add::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,83 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
#include "tensorflow/lite/micro/kernels/micro_ops.h"
namespace tflite {
namespace ops {
namespace micro {
// Register each supported op with:
// AddBuiltin(<operator ID>, <registration>, [min version], [max version])
AllOpsResolver::AllOpsResolver() {
AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED(), 1, 4);
AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D(), 1, 2);
AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(), 1, 2);
AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC(), 1, 2);
AddBuiltin(BuiltinOperator_SVDF, Register_SVDF(), 1, 3);
AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), 1, 3);
AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION(), 1, 3);
AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D(), 1,
3);
AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D(), 1, 2);
AddBuiltin(BuiltinOperator_ABS, Register_ABS());
AddBuiltin(BuiltinOperator_SIN, Register_SIN());
AddBuiltin(BuiltinOperator_COS, Register_COS());
AddBuiltin(BuiltinOperator_LOG, Register_LOG());
AddBuiltin(BuiltinOperator_SQRT, Register_SQRT());
AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT());
AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE());
AddBuiltin(BuiltinOperator_PRELU, Register_PRELU());
AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR());
AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM());
AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM());
AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX());
AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN());
AddBuiltin(BuiltinOperator_LOGICAL_OR, Register_LOGICAL_OR());
AddBuiltin(BuiltinOperator_LOGICAL_AND, Register_LOGICAL_AND());
AddBuiltin(BuiltinOperator_LOGICAL_NOT, Register_LOGICAL_NOT());
AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE());
AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL(), 1, 2);
AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(), 1, 2);
AddBuiltin(BuiltinOperator_GREATER, Register_GREATER(), 1, 2);
AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL(), 1, 2);
AddBuiltin(BuiltinOperator_LESS, Register_LESS(), 1, 2);
AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL(), 1, 2);
AddBuiltin(BuiltinOperator_CEIL, Register_CEIL());
AddBuiltin(BuiltinOperator_ROUND, Register_ROUND());
AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE());
AddBuiltin(BuiltinOperator_PACK, Register_PACK(), 1, 2);
AddBuiltin(BuiltinOperator_PAD, Register_PAD(), 1, 2);
AddBuiltin(BuiltinOperator_PADV2, Register_PADV2(), 1, 2);
AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), 1, 3);
AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK(), 1, 2);
AddBuiltin(BuiltinOperator_NEG, Register_NEG());
AddBuiltin(BuiltinOperator_ADD, Register_ADD(), 1, 2);
AddBuiltin(BuiltinOperator_MUL, Register_MUL(), 1, 3);
AddBuiltin(BuiltinOperator_SUB, Register_SUB(), 1, 2);
AddBuiltin(BuiltinOperator_QUANTIZE, Register_QUANTIZE());
AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE(), 1, 2);
AddBuiltin(BuiltinOperator_RELU, Register_RELU());
AddBuiltin(BuiltinOperator_RELU6, Register_RELU6());
AddBuiltin(BuiltinOperator_MEAN, Register_MEAN());
AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
Register_RESIZE_NEAREST_NEIGHBOR(),
/* min_version = */ 1,
/* max_version = */ 2);
AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION());
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,34 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
namespace tflite {
namespace ops {
namespace micro {
class AllOpsResolver : public MicroMutableOpResolver {
public:
AllOpsResolver();
private:
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_

View File

@@ -0,0 +1,127 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace arg_min_max {
constexpr int kInputTensor = 0;
constexpr int kAxis = 1;
constexpr int kOutputTensor = 0;
template <typename T1, typename T2, typename T3>
inline void ArgMinMaxHelper(const RuntimeShape& input1_shape,
const T1* input1_data, const T3* input2_data,
const RuntimeShape& output_shape, T2* output_data,
bool is_arg_max) {
if (is_arg_max) {
reference_ops::ArgMinMax(input1_shape, input1_data, input2_data,
output_shape, output_data, micro::Greater());
} else {
reference_ops::ArgMinMax(input1_shape, input1_data, input2_data,
output_shape, output_data, micro::Less());
}
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* axis = GetInput(context, node, kAxis);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
ArgMinMaxHelper(GetTensorShape(input), GetTensorData<data_type>(input), \
GetTensorData<axis_type>(axis), GetTensorShape(output), \
GetTensorData<output_type>(output), is_arg_max)
if (axis->type == kTfLiteInt32) {
if (output->type == kTfLiteInt32) {
switch (input->type) {
case kTfLiteFloat32:
TF_LITE_ARG_MIN_MAX(float, int32_t, int32_t);
break;
case kTfLiteUInt8:
TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
break;
case kTfLiteInt8:
TF_LITE_ARG_MIN_MAX(int8_t, int32_t, int32_t);
break;
default:
TF_LITE_KERNEL_LOG(context,
"Only float32, uint8 and int8 are "
"supported currently, got %s.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
TfLiteTypeGetName(axis->type));
return kTfLiteError;
}
#undef TF_LITE_ARG_MIN_MAX
return kTfLiteOk;
}
TfLiteStatus ArgMinEval(TfLiteContext* context, TfLiteNode* node) {
return Eval(context, node, false);
}
TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) {
return Eval(context, node, true);
}
} // namespace arg_min_max
TfLiteRegistration* Register_ARG_MAX() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/arg_min_max::ArgMaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_ARG_MIN() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/arg_min_max::ArgMinEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,70 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/ceil.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace ceil {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_EQ(context, output->type, input->type);
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
for (int i = 0; i < output->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
reference_ops::Ceil(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace ceil
TfLiteRegistration* Register_CEIL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/ceil::Prepare,
/*invoke=*/ceil::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,175 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
/*
* The circular buffer custom operator is used to implement strided streaming
* convolutions on TFLite Micro. Each time this operator is invoked, it checks
* whether or not to run, based on a predetermined stride in time. If the op
* runs, it inserts the input into the end of the output buffer and shifts the
* output values towards the start of the buffer. It discards the oldest value
* in the output buffer.
*
* Input: [<input N+1]
* Before shifting:
* Output: [<input 1>, <input 2>, <input ...>, <input N>]
*
* After shifting:
* Output: [<input 2>, <input 3>, <input ...>, <input N+1>]
*
* We make some assumptions in this custom operator:
* - Input shape must be [1, 1, 1, depth]
* - Output shape must be [1, num_slots, 1, depth]
* - Input and output types must match.
* - Input and output quantization params must be identical.
*/
namespace tflite {
namespace ops {
namespace micro {
namespace circular_buffer {
namespace {
// The CircularBuffer op has one input and one output tensor.
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
// TODO(b/149795762): Add this to TfLiteStatus enum.
constexpr int kTfLiteAbort = -9;
// These fields control the stride period of a strided streaming model. This op
// returns kTfLiteAbort until cycles_until_run-- is zero. At this time,
// cycles_until_run is reset to cycles_max.
struct OpData {
int cycles_until_run;
int cycles_max;
};
// These constants represent constants specific to the music detect model.
// They exist until (b/132070898) is fixed.
constexpr int kMaxOpDataSize = 7;
int op_data_counter = 0;
OpData op_data_array[kMaxOpDataSize];
} // namespace
void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, 1, output->dims->data[0]);
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[0]);
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[1]);
TF_LITE_ENSURE_EQ(context, 1, output->dims->data[2]);
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[2]);
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
// The circular buffer custom operator currently only supports int8.
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteInt8);
// TODO(b/132070898): Use statically slotted OpData structures until a
// scratch memory API is ready.
TFLITE_DCHECK_LE(op_data_counter, kMaxOpDataSize);
OpData* op_data = &op_data_array[op_data_counter++];
// The last circular buffer layer (length 5) simply accumulates outputs, and
// does not run periodically.
// TODO(b/150001379): Move this special case logic to the tflite flatbuffer.
if (output->dims->data[1] == 5) {
op_data->cycles_max = 1;
} else {
op_data->cycles_max = 2;
}
op_data->cycles_until_run = op_data->cycles_max;
node->user_data = op_data;
return kTfLiteOk;
}
// Shifts buffer over by the output depth, and write new input to end of buffer.
// num_slots is the number of samples stored in the output buffer.
// depth is the size of each sample.
void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) {
memmove(output, &output[depth], (num_slots - 1) * depth);
memcpy(&output[(num_slots - 1) * depth], input, depth);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
OpData* data = reinterpret_cast<OpData*>(node->user_data);
int num_slots = output->dims->data[1];
int depth = output->dims->data[3];
if (input->type == kTfLiteInt8) {
EvalInt8(GetTensorData<int8_t>(input), num_slots, depth,
GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
if (--data->cycles_until_run != 0) {
// Signal the interpreter to end current run if the delay before op invoke
// has not been reached.
// TODO(b/149795762): Add kTfLiteAbort to TfLiteStatus enum.
return static_cast<TfLiteStatus>(kTfLiteAbort);
}
// If prepare is ever called more than one time (for example, when testing the
// ambient model, the interpreter is created a few times), this op data
// counter needs to be reset so that future instances do not overrun this op
// data array.
op_data_counter = 0;
data->cycles_until_run = data->cycles_max;
return kTfLiteOk;
}
} // namespace circular_buffer
TfLiteRegistration* Register_CIRCULAR_BUFFER() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/circular_buffer::Free,
/*prepare=*/circular_buffer::Prepare,
/*invoke=*/circular_buffer::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,370 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace comparisons {
namespace {
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
// TODO(ruic): optimize macros below to using template functions.
#define TF_LITE_QUANTIZE_COMPARISON(opname) \
template <typename input_dtype> \
void EvalQuantized##opname(TfLiteContext* context, TfLiteNode* node, \
const TfLiteTensor* input1, \
const TfLiteTensor* input2, TfLiteTensor* output, \
bool requires_broadcast) { \
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) { \
auto input1_offset = -input1->params.zero_point; \
auto input2_offset = -input2->params.zero_point; \
const int left_shift = 8; \
\
int32 input1_multiplier; \
int input1_shift; \
QuantizeMultiplierSmallerThanOneExp( \
static_cast<double>(input1->params.scale), &input1_multiplier, \
&input1_shift); \
int32 input2_multiplier; \
int input2_shift; \
QuantizeMultiplierSmallerThanOneExp( \
static_cast<double>(input2->params.scale), &input2_multiplier, \
&input2_shift); \
\
ComparisonParams op_params; \
op_params.left_shift = left_shift; \
op_params.input1_offset = input1_offset; \
op_params.input1_multiplier = input1_multiplier; \
op_params.input1_shift = input1_shift; \
op_params.input2_offset = input2_offset; \
op_params.input2_multiplier = input2_multiplier; \
op_params.input2_shift = input2_shift; \
if (requires_broadcast) { \
reference_ops::Broadcast4DSlow##opname##WithScaling( \
op_params, GetTensorShape(input1), \
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
GetTensorData<bool>(output)); \
} else { \
reference_ops::opname##WithScaling( \
op_params, GetTensorShape(input1), \
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
GetTensorData<bool>(output)); \
} \
} \
}
TF_LITE_QUANTIZE_COMPARISON(Equal);
TF_LITE_QUANTIZE_COMPARISON(NotEqual);
TF_LITE_QUANTIZE_COMPARISON(Greater);
TF_LITE_QUANTIZE_COMPARISON(GreaterEqual);
TF_LITE_QUANTIZE_COMPARISON(Less);
TF_LITE_QUANTIZE_COMPARISON(LessEqual);
#undef TF_LITE_QUANTIZE_COMPARISON
#define TF_LITE_COMPARISON(type, opname, requires_broadcast) \
{ \
ComparisonParams op_params; \
requires_broadcast \
? reference_ops::Broadcast4DSlow##opname##NoScaling( \
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
GetTensorShape(input2), GetTensorData<type>(input2), \
GetTensorShape(output), GetTensorData<bool>(output)) \
: reference_ops::opname##NoScaling( \
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
GetTensorShape(input2), GetTensorData<type>(input2), \
GetTensorShape(output), GetTensorData<bool>(output)); \
}
TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteBool:
TF_LITE_COMPARISON(bool, Equal, requires_broadcast);
break;
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, Equal, requires_broadcast);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, Equal, requires_broadcast);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, Equal, requires_broadcast);
break;
case kTfLiteUInt8:
EvalQuantizedEqual<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
break;
case kTfLiteInt8:
EvalQuantizedEqual<int8_t>(context, node, input1, input2, output,
requires_broadcast);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
// TODO(renjieliu): Refactor the logic to avoid duplications.
TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteBool:
TF_LITE_COMPARISON(bool, NotEqual, requires_broadcast);
break;
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, NotEqual, requires_broadcast);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, NotEqual, requires_broadcast);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, NotEqual, requires_broadcast);
break;
case kTfLiteUInt8:
EvalQuantizedNotEqual<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
break;
case kTfLiteInt8:
EvalQuantizedNotEqual<int8_t>(context, node, input1, input2, output,
requires_broadcast);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, Greater, requires_broadcast);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, Greater, requires_broadcast);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, Greater, requires_broadcast);
break;
case kTfLiteUInt8:
EvalQuantizedGreater<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
break;
case kTfLiteInt8:
EvalQuantizedGreater<int8_t>(context, node, input1, input2, output,
requires_broadcast);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, GreaterEqual, requires_broadcast);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, GreaterEqual, requires_broadcast);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, GreaterEqual, requires_broadcast);
break;
case kTfLiteUInt8:
EvalQuantizedGreaterEqual<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
break;
case kTfLiteInt8:
EvalQuantizedGreaterEqual<int8_t>(context, node, input1, input2, output,
requires_broadcast);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, Less, requires_broadcast);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, Less, requires_broadcast);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, Less, requires_broadcast);
break;
case kTfLiteUInt8:
EvalQuantizedLess<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
break;
case kTfLiteInt8:
EvalQuantizedLess<int8_t>(context, node, input1, input2, output,
requires_broadcast);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, LessEqual, requires_broadcast);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, LessEqual, requires_broadcast);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, LessEqual, requires_broadcast);
break;
case kTfLiteUInt8:
EvalQuantizedLessEqual<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
break;
case kTfLiteInt8:
EvalQuantizedLessEqual<int8_t>(context, node, input1, input2, output,
requires_broadcast);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
} // namespace comparisons
TfLiteRegistration* Register_EQUAL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::EqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_NOT_EQUAL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::NotEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_GREATER() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::GreaterEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_GREATER_EQUAL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::GreaterEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_LESS() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::LessEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_LESS_EQUAL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::LessEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,231 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace concatenation {
constexpr int kMaxInputNum = 10; // Maximum number of input tensors
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// This function only checks the types. Additional shape validations are
// performed in the reference implementation called during Eval().
const TfLiteConcatenationParams* params =
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
TfLiteType input_type = GetInput(context, node, 0)->type;
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
// Check activation and input type
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
TF_LITE_ENSURE(context,
input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 ||
input_type == kTfLiteInt8 || input_type == kTfLiteInt32 ||
input_type == kTfLiteInt64);
// Output type must match input type
TF_LITE_ENSURE_EQ(context, output_type, input_type);
// This implementation does not support large number of input tensors
const int num_inputs = NumInputs(node);
TF_LITE_ENSURE(context, num_inputs <= kMaxInputNum);
// Shapes with dimensions >4 are not yet supported with static allocation.
for (int i = 0; i < num_inputs; ++i) {
const TfLiteTensor* input = GetInput(context, node, i);
int num_dimensions = NumDimensions(input);
if (num_dimensions > 4) {
TF_LITE_KERNEL_LOG(
context,
"Op Concatenation does not currently support num dimensions >4 "
"Tensor '%s' has %d dimensions.",
input->name, num_dimensions);
return kTfLiteError;
}
}
return kTfLiteOk;
}
// Handles negative axis index, coerces to positive index value.
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
if (axis >= 0) {
return axis;
} else {
return NumDimensions(output_tensor) + axis;
}
}
// The following functions are helpers to get tensor data in the format that the
// reference op implementation expects. They provide the same functionality as
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
// Gets shapes from a list of tensors.
inline void GetAllTensorShapes(const TfLiteContext& context,
const TfLiteIntArray& tensor_list,
RuntimeShape all_shapes[kMaxInputNum]) {
for (int i = 0; i < tensor_list.size; ++i) {
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
RuntimeShape shape = GetTensorShape(t);
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
}
}
// Get shape pointers from a list of shapes.
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
const RuntimeShape* pointers[]) {
for (size_t i = 0; i < num; ++i) {
pointers[i] = &shapes[i];
}
}
// Gets data pointers from a list of tensors.
template <typename T>
inline void GetAllTensorData(const TfLiteContext& context,
const TfLiteIntArray& tensor_list,
T* all_data[kMaxInputNum]) {
for (int i = 0; i < tensor_list.size; ++i) {
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
all_data[i] = GetTensorData<T>(t);
}
}
// Gets scale and zero point from a list of tensors
inline void GetAllQuantizationParam(const TfLiteContext& context,
const TfLiteIntArray& tensor_list,
float scales[kMaxInputNum],
int32 zero_points[kMaxInputNum]) {
for (int i = 0; i < tensor_list.size; ++i) {
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
scales[i] = t->params.scale;
zero_points[i] = t->params.zero_point;
}
}
template <typename data_type>
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
// Collect the shapes and data pointer of input tensors
RuntimeShape inputs_shape[kMaxInputNum];
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
const data_type* inputs_data[kMaxInputNum];
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
GetAllTensorData(*context, *node->inputs, inputs_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteConcatenationParams* params =
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
ConcatenationParams op_params;
op_params.axis = CalculatePositiveAxis(params->axis, output);
op_params.inputs_count = NumInputs(node);
reference_ops::Concatenation(op_params, inputs_shape_ptr, inputs_data,
GetTensorShape(output),
GetTensorData<data_type>(output));
}
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
// Collect the shapes and data pointer of input tensors
RuntimeShape inputs_shape[kMaxInputNum];
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
const uint8_t* inputs_data[kMaxInputNum];
float inputs_scale[kMaxInputNum];
int32 inputs_zero_point[kMaxInputNum];
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
GetAllTensorData(*context, *node->inputs, inputs_data);
GetAllQuantizationParam(*context, *node->inputs, inputs_scale,
inputs_zero_point);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteConcatenationParams* params =
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
ConcatenationParams op_params;
op_params.axis = CalculatePositiveAxis(params->axis, output);
op_params.inputs_count = NumInputs(node);
op_params.input_zeropoint = inputs_zero_point;
op_params.input_scale = inputs_scale;
op_params.output_zeropoint = output->params.zero_point;
op_params.output_scale = output->params.scale;
reference_ops::ConcatenationWithScaling(op_params, inputs_shape_ptr,
inputs_data, GetTensorShape(output),
GetTensorData<uint8>(output));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
switch (output_type) { // Already know in/outtypes are same.
case kTfLiteFloat32:
EvalUnquantized<float>(context, node);
break;
case kTfLiteInt32:
EvalUnquantized<int32_t>(context, node);
break;
case kTfLiteUInt8:
EvalQuantizedUInt8(context, node);
break;
case kTfLiteInt8:
EvalUnquantized<int8_t>(context, node);
break;
case kTfLiteInt64:
EvalUnquantized<int64_t>(context, node);
break;
default:
TF_LITE_KERNEL_LOG(
context, "Op Concatenation does not currently support Type '%s'.",
TfLiteTypeGetName(output_type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace concatenation
TfLiteRegistration* Register_CONCATENATION() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/concatenation::Prepare,
/*invoke=*/concatenation::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,279 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/conv.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
namespace tflite {
namespace ops {
namespace micro {
namespace conv {
constexpr int kInputTensor = 0;
constexpr int kFilterTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
// Angepasst jomjol 05.06.20
//constexpr int kMaxChannels = 1024;
constexpr int kMaxChannels = 4096;
// Conv is quantized along dimension 0:
// https://www.tensorflow.org/lite/performance/quantization_spec
constexpr int kConvQuantizedDimension = 0;
// This file has 2 implementation of Conv.
struct OpData {
TfLitePaddingValues padding;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// Per channel output multiplier and shift.
// TODO(b/141139247): Allocate these dynamically when possible.
int32_t per_channel_output_multiplier[kMaxChannels];
int32_t per_channel_output_shift[kMaxChannels];
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
};
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
switch (padding) {
case TfLitePadding::kTfLitePaddingSame:
return PaddingType::kSame;
case TfLitePadding::kTfLitePaddingValid:
return PaddingType::kValid;
case TfLitePadding::kTfLitePaddingUnknown:
default:
return PaddingType::kNone;
}
}
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, int width, int height,
int filter_width, int filter_height, int out_width,
int out_height, const TfLiteType data_type,
OpData* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
// Matching GetWindowedOutputSize in TensorFlow.
auto padding = params->padding;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
params->dilation_height_factor, params->dilation_width_factor, height,
width, filter_height, filter_width, padding, &out_height, &out_width);
// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
int output_channels = filter->dims->data[kConvQuantizedDimension];
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, params->activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift),
output_channels));
}
return kTfLiteOk;
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* im2col,
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
const int32_t input_offset = -input->params.zero_point;
const int32_t filter_offset = -filter->params.zero_point;
const int32_t output_offset = output->params.zero_point;
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = -data->output_shift;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
reference_ops::Conv(op_params, GetTensorShape(input),
GetTensorData<uint8_t>(input), GetTensorShape(filter),
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
GetTensorData<int32_t>(bias), GetTensorShape(output),
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
GetTensorData<uint8_t>(im2col), nullptr);
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, OpData* data,
const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output,
TfLiteTensor* im2col) {
ConvParams op_params;
op_params.input_offset = -input->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
reference_integer_ops::ConvPerChannel(
op_params, data->per_channel_output_multiplier,
data->per_channel_output_shift, GetTensorShape(input),
GetTensorData<int8>(input), GetTensorShape(filter),
GetTensorData<int8>(filter), GetTensorShape(bias),
GetTensorData<int32>(bias), GetTensorShape(output),
GetTensorData<int8>(output));
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* im2col,
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
reference_ops::Conv(op_params, GetTensorShape(input),
GetTensorData<float>(input), GetTensorShape(filter),
GetTensorData<float>(filter), GetTensorShape(bias),
GetTensorData<float>(bias), GetTensorShape(output),
GetTensorData<float>(output), GetTensorShape(im2col),
GetTensorData<float>(im2col));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
int input_width = input->dims->data[2];
int input_height = input->dims->data[1];
int filter_width = filter->dims->data[2];
int filter_height = filter->dims->data[1];
int output_width = output->dims->data[2];
int output_height = output->dims->data[1];
OpData data;
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(
filter->quantization.params);
TF_LITE_ENSURE(context, affine_quantization);
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->zero_point);
TF_LITE_ENSURE(context,
affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kConvQuantizedDimension]);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
affine_quantization->zero_point->size);
}
TF_LITE_ENSURE_STATUS(CalculateOpData(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, &data));
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
EvalFloat(context, node, params, &data, input, filter, bias, nullptr,
nullptr, output);
break;
case kTfLiteInt8:
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
output, nullptr);
break;
case kTfLiteUInt8:
EvalQuantized(context, node, params, &data, input, filter, bias, nullptr,
nullptr, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace conv
TfLiteRegistration* Register_CONV_2D() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,282 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/conv.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
namespace tflite {
namespace ops {
namespace micro {
namespace conv {
constexpr int kInputTensor = 0;
constexpr int kFilterTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
// Angepasst jomjol 05.06.20
//constexpr int kMaxChannels = 1024;
constexpr int kMaxChannels = 32384;
// Conv is quantized along dimension 0:
// https://www.tensorflow.org/lite/performance/quantization_spec
constexpr int kConvQuantizedDimension = 0;
// This file has 2 implementation of Conv.
struct OpData {
TfLitePaddingValues padding;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// Per channel output multiplier and shift.
// TODO(b/141139247): Allocate these dynamically when possible.
int32_t per_channel_output_multiplier[kMaxChannels];
int32_t per_channel_output_shift[kMaxChannels];
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
};
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
switch (padding) {
case TfLitePadding::kTfLitePaddingSame:
return PaddingType::kSame;
case TfLitePadding::kTfLitePaddingValid:
return PaddingType::kValid;
case TfLitePadding::kTfLitePaddingUnknown:
default:
return PaddingType::kNone;
}
}
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, int width, int height,
int filter_width, int filter_height, int out_width,
int out_height, const TfLiteType data_type,
OpData* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
// Matching GetWindowedOutputSize in TensorFlow.
auto padding = params->padding;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
params->dilation_height_factor, params->dilation_width_factor, height,
width, filter_height, filter_width, padding, &out_height, &out_width);
// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
int output_channels = filter->dims->data[kConvQuantizedDimension];
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, params->activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift),
output_channels));
}
return kTfLiteOk;
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* im2col,
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
const int32_t input_offset = -input->params.zero_point;
const int32_t filter_offset = -filter->params.zero_point;
const int32_t output_offset = output->params.zero_point;
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = -data->output_shift;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
reference_ops::Conv(op_params, GetTensorShape(input),
GetTensorData<uint8_t>(input), GetTensorShape(filter),
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
GetTensorData<int32_t>(bias), GetTensorShape(output),
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
GetTensorData<uint8_t>(im2col), nullptr);
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, OpData* data,
const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output,
TfLiteTensor* im2col) {
ConvParams op_params;
op_params.input_offset = -input->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
reference_integer_ops::ConvPerChannel(
op_params, data->per_channel_output_multiplier,
data->per_channel_output_shift, GetTensorShape(input),
GetTensorData<int8>(input), GetTensorShape(filter),
GetTensorData<int8>(filter), GetTensorShape(bias),
GetTensorData<int32>(bias), GetTensorShape(output),
GetTensorData<int8>(output));
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* im2col,
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
reference_ops::Conv(op_params, GetTensorShape(input),
GetTensorData<float>(input), GetTensorShape(filter),
GetTensorData<float>(filter), GetTensorShape(bias),
GetTensorData<float>(bias), GetTensorShape(output),
GetTensorData<float>(output), GetTensorShape(im2col),
GetTensorData<float>(im2col));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
int input_width = input->dims->data[2];
int input_height = input->dims->data[1];
int filter_width = filter->dims->data[2];
int filter_height = filter->dims->data[1];
int output_width = output->dims->data[2];
int output_height = output->dims->data[1];
struct tflite::ops::micro::conv::OpData *data = (struct tflite::ops::micro::conv::OpData*) malloc(sizeof(struct tflite::ops::micro::conv::OpData));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(
filter->quantization.params);
TF_LITE_ENSURE(context, affine_quantization);
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->zero_point);
TF_LITE_ENSURE(context,
affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kConvQuantizedDimension]);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
affine_quantization->zero_point->size);
}
TF_LITE_ENSURE_STATUS(CalculateOpData(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, data));
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
EvalFloat(context, node, params, data, input, filter, bias, nullptr,
nullptr, output);
break;
case kTfLiteInt8:
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
output, nullptr);
break;
case kTfLiteUInt8:
EvalQuantized(context, node, params, data, input, filter, bias, nullptr,
nullptr, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
free(data);
return kTfLiteError;
}
free(data);
return kTfLiteOk;
}
} // namespace conv
TfLiteRegistration* Register_CONV_2D() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,269 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
namespace tflite {
namespace ops {
namespace micro {
namespace depthwise_conv {
namespace {
constexpr int kInputTensor = 0;
constexpr int kFilterTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
constexpr int kMaxChannels = 1024;
// Depthwise conv is quantized along dimension 3:
// https://www.tensorflow.org/lite/performance/quantization_spec
constexpr int kDepthwiseConvQuantizedDimension = 3;
struct OpData {
TfLitePaddingValues padding;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// Per channel output multiplier and shift.
// TODO(b/141139247): Allocate these dynamically when possible.
int32_t per_channel_output_multiplier[kMaxChannels];
int32_t per_channel_output_shift[kMaxChannels];
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, int width,
int height, int filter_width, int filter_height,
const TfLiteType data_type, OpData* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
int unused_output_height, unused_output_width;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width, 1, 1, height, width,
filter_height, filter_width, params->padding, &unused_output_height,
&unused_output_width);
// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, params->activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels));
}
return kTfLiteOk;
}
} // namespace
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::DepthwiseParams op_params;
// Padding type is ignored, but still set.
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
tflite::reference_ops::DepthwiseConv(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(filter), GetTensorData<float>(filter),
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
GetTensorData<float>(output));
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, OpData* data,
const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
DepthwiseParams op_params;
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.input_offset = -input->params.zero_point;
op_params.weights_offset = 0;
op_params.output_offset = output->params.zero_point;
// TODO(b/130439627): Use calculated value for clamping.
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
reference_integer_ops::DepthwiseConvPerChannel(
op_params, data->per_channel_output_multiplier,
data->per_channel_output_shift, GetTensorShape(input),
GetTensorData<int8>(input), GetTensorShape(filter),
GetTensorData<int8>(filter), GetTensorShape(bias),
GetTensorData<int32>(bias), GetTensorShape(output),
GetTensorData<int8>(output));
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
const int32_t input_offset = -input->params.zero_point;
const int32_t filter_offset = -filter->params.zero_point;
const int32_t output_offset = output->params.zero_point;
tflite::DepthwiseParams op_params;
// Padding type is ignored, but still set.
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data->output_multiplier;
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
op_params.output_shift = -data->output_shift;
tflite::reference_ops::DepthwiseConv(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(filter), GetTensorData<uint8_t>(filter),
GetTensorShape(bias), GetTensorData<int32_t>(bias),
GetTensorShape(output), GetTensorData<uint8_t>(output));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias =
(NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
const TfLiteType data_type = input->type;
int width = SizeOfDimension(input, 2);
int height = SizeOfDimension(input, 1);
int filter_width = SizeOfDimension(filter, 2);
int filter_height = SizeOfDimension(filter, 1);
OpData data;
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(
filter->quantization.params);
TF_LITE_ENSURE(context, affine_quantization);
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->zero_point);
TF_LITE_ENSURE(
context, affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kDepthwiseConvQuantizedDimension]);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
affine_quantization->zero_point->size);
}
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
filter_width, filter_height, data_type,
&data));
// TODO(aselle): Consider whether float conv and quantized conv should be
// separate ops to avoid dispatch overhead here.
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
EvalFloat(context, node, params, &data, input, filter, bias, output);
break;
case kTfLiteInt8:
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
output);
break;
case kTfLiteUInt8:
EvalQuantized(context, node, params, &data, input, filter, bias, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace depthwise_conv
TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/depthwise_conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,135 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace dequantize {
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
// TODO(b/140515557): Add cached dequant to improve hybrid model performance.
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE(context, input->type == kTfLiteUInt8 ||
input->type == kTfLiteInt8 ||
input->type == kTfLiteInt16);
TF_LITE_ENSURE(
context, output->type == kTfLiteFloat32 || output->type == kTfLiteInt32);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
if (output->type == kTfLiteFloat32) {
tflite::DequantizationParams op_params;
op_params.zero_point = input->params.zero_point;
op_params.scale = static_cast<double>(input->params.scale);
switch (input->type) {
case kTfLiteUInt8:
reference_ops::Dequantize(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<float>(output));
break;
case kTfLiteInt8:
reference_ops::Dequantize(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<float>(output));
break;
case kTfLiteInt16:
reference_ops::Dequantize(
op_params, GetTensorShape(input), GetTensorData<int16_t>(input),
GetTensorShape(output), GetTensorData<float>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (output->type == kTfLiteInt32) {
int32_t output_multiplier;
int output_shift;
const double effective_output_scale =
static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_output_scale, &output_multiplier,
&output_shift);
int flat_size =
MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
switch (input->type) {
case kTfLiteInt16: {
reference_ops::Requantize(
GetTensorData<int16_t>(input), flat_size, output_multiplier,
output_shift, input->params.zero_point, output->params.zero_point,
GetTensorData<int32_t>(output));
break;
}
case kTfLiteInt8: {
reference_ops::Requantize(
GetTensorData<int8_t>(input), flat_size, output_multiplier,
output_shift, input->params.zero_point, output->params.zero_point,
GetTensorData<int32_t>(output));
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace dequantize
TfLiteRegistration* Register_DEQUANTIZE() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/dequantize::Prepare,
/*invoke=*/dequantize::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,226 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cmath>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace elementwise {
namespace {
bool IsNumericSupportedType(const TfLiteType type) {
return type == kTfLiteFloat32;
}
bool IsLogicalSupportedType(const TfLiteType type) {
return type == kTfLiteBool;
}
typedef bool (*IsSupportedType)(TfLiteType);
template <IsSupportedType>
TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
if (!IsSupportedType(input->type)) {
TF_LITE_KERNEL_LOG(context, "Input data type %s (%d) is not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
template <typename T>
inline TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node,
T func(T), TfLiteType expected_type) {
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE_EQ(context, input->type, expected_type);
const int64_t num_elements = NumElements(input);
const T* in_data = GetTensorData<T>(input);
T* out_data = GetTensorData<T>(output);
for (int64_t i = 0; i < num_elements; ++i) {
out_data[i] = func(in_data[i]);
}
return kTfLiteOk;
}
inline TfLiteStatus EvalNumeric(TfLiteContext* context, TfLiteNode* node,
float float_func(float)) {
return EvalImpl<float>(context, node, float_func, kTfLiteFloat32);
}
inline TfLiteStatus EvalLogical(TfLiteContext* context, TfLiteNode* node,
bool bool_func(bool)) {
return EvalImpl<bool>(context, node, bool_func, kTfLiteBool);
}
TfLiteStatus AbsEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, std::abs);
}
TfLiteStatus SinEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, std::sin);
}
TfLiteStatus CosEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, std::cos);
}
TfLiteStatus LogEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, std::log);
}
TfLiteStatus SqrtEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, std::sqrt);
}
TfLiteStatus RsqrtEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, [](float f) { return 1.f / std::sqrt(f); });
}
TfLiteStatus SquareEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, [](float f) { return f * f; });
}
TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) {
return EvalLogical(context, node, [](bool v) { return !v; });
}
} // namespace
} // namespace elementwise
TfLiteRegistration* Register_ABS() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::AbsEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_SIN() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SinEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_COS() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::CosEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_LOG() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::LogEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_SQRT() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SqrtEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_RSQRT() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::RsqrtEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_SQUARE() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SquareEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_LOGICAL_NOT() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
/*invoke=*/elementwise::LogicalNotEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,54 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/floor.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace floor {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
reference_ops::Floor(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace floor
TfLiteRegistration* Register_FLOOR() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/floor::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,233 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace fully_connected {
namespace {
struct OpData {
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
// The index of the temporary tensor where the quantized inputs are cached.
int input_quantized_index;
};
constexpr int kInputTensor = 0;
constexpr int kWeightsTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
TfLiteStatus CalculateOpData(TfLiteContext* context,
TfLiteFusedActivation activation,
TfLiteType data_type, const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output,
OpData* data) {
TfLiteStatus status = kTfLiteOk;
if (data_type != kTfLiteFloat32) {
double real_multiplier = 0.0;
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
context, input, filter, bias, output, &real_multiplier));
int exponent;
QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
data->output_shift = -exponent;
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, activation, output, &data->output_activation_min,
&data->output_activation_max));
}
return status;
}
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
void* data = nullptr;
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
kTfLiteError) {
return nullptr;
}
return data;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const auto params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
return CalculateOpData(context, params->activation, input->type, input,
filter, bias, output, data);
}
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
const OpData& data, const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
tflite::FullyConnectedParams op_params;
op_params.input_offset = -input->params.zero_point;
op_params.weights_offset = -filter->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.output_multiplier = data.output_multiplier;
// TODO(b/138810107): Figure out whether output shift should be inverted
op_params.output_shift = -data.output_shift;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
reference_integer_ops::FullyConnected(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(filter), GetTensorData<int8_t>(filter),
GetTensorShape(bias), GetTensorData<int32_t>(bias),
GetTensorShape(output), GetTensorData<int8_t>(output));
return kTfLiteOk;
}
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
const OpData& data, const TfLiteTensor* input,
const TfLiteTensor* filter, const TfLiteTensor* bias,
TfLiteTensor* output) {
const int32_t input_offset = -input->params.zero_point;
const int32_t filter_offset = -filter->params.zero_point;
const int32_t output_offset = output->params.zero_point;
tflite::FullyConnectedParams op_params;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data.output_multiplier;
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
op_params.output_shift = -data.output_shift;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
reference_ops::FullyConnected( \
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
GetTensorShape(filter), GetTensorData<uint8_t>(filter), \
GetTensorShape(bias), GetTensorData<int32_t>(bias), \
GetTensorShape(output), GetTensorData<output_data_type>(output))
switch (output->type) {
case kTfLiteUInt8:
TF_LITE_FULLY_CONNECTED(uint8_t);
break;
case kTfLiteInt16:
TF_LITE_FULLY_CONNECTED(int16_t);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteFusedActivation activation,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(activation, &output_activation_min,
&output_activation_max);
tflite::FullyConnectedParams op_params;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
tflite::reference_ops::FullyConnected(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(filter), GetTensorData<float>(filter),
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
GetTensorData<float>(output));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto* params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
// Checks in Prepare ensure input, output and filter types are all the same.
switch (input->type) {
case kTfLiteFloat32:
return EvalFloat(context, node, params->activation, input, filter, bias,
output);
case kTfLiteInt8:
return EvalQuantizedInt8(context, node, data, input, filter, bias,
output);
case kTfLiteUInt8:
return EvalQuantized(context, node, data, input, filter, bias, output);
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace fully_connected
TfLiteRegistration* Register_FULLY_CONNECTED() {
static TfLiteRegistration r = {/*init=*/fully_connected::Init,
/*free=*/nullptr,
/*prepare=*/fully_connected::Prepare,
/*invoke=*/fully_connected::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,150 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h"
#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace l2norm {
// This file has two implementation of L2Norm.
enum KernelType {
kReference,
kGenericOptimized,
};
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
#if defined(DEBUG)
auto* params = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32 ||
output->type == kTfLiteUInt8 ||
output->type == kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.scale, (1. / 128.));
if (output->type == kTfLiteUInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 128);
}
if (output->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
}
}
// TODO(ahentz): For some reason our implementations don't support
// activations.
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
#endif
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
// TODO(b/143912164): instead of hardcode the epsilon here, we should read it
// from tensorflow, i.e., adding a params.
// We don't compute epsilon for quantized kernel:
//
// epsilon_float = (epsilon_quant - zp) * scale
// so
// espsilon_quant = epsilon_float / scale + zp
// We know epsilon_float is just a very small number to avoid division by
// zero error, and scale is > 1, so the integer value of epsilon for quant
// is just dominated by the zero point.
// Also, GetInvSqrtQuantizedMultiplierExp handles the scenario where the sum
// of input value squared is zero case well.
// So we don't even need to do handle the epsilon for quantized kernel case.
const float epsilon = 1e-6f;
if (output->type == kTfLiteFloat32) {
#define TF_LITE_L2NORM(type) \
tflite::L2NormalizationParams op_params; \
op_params.input_zero_point = 0; \
type::L2Normalization(op_params, GetTensorShape(input), \
GetTensorData<float>(input), GetTensorShape(output), \
GetTensorData<float>(output), epsilon)
TF_LITE_L2NORM(reference_ops);
#undef TF_LITE_L2NORM
} else if (output->type == kTfLiteUInt8) {
#define TF_LITE_L2NORM(type) \
tflite::L2NormalizationParams op_params; \
op_params.input_zero_point = input->params.zero_point; \
type::L2Normalization(op_params, GetTensorShape(input), \
GetTensorData<uint8>(input), GetTensorShape(output), \
GetTensorData<uint8>(output))
TF_LITE_L2NORM(reference_ops);
#undef TF_LITE_L2NORM
} else if (output->type == kTfLiteInt8) {
const auto input_shape = GetTensorShape(input);
const auto output_shape = GetTensorShape(output);
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
reference_integer_ops::L2Normalization(input->params.zero_point, outer_size,
depth, GetTensorData<int8>(input),
GetTensorData<int8>(output));
} else {
TF_LITE_KERNEL_LOG(context, "Output type is %d, requires float.",
output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace l2norm
TfLiteRegistration* Register_L2NORM_REF() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/l2norm::Prepare,
/*invoke=*/l2norm::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_L2_NORMALIZATION() {
return Register_L2NORM_REF();
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,98 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace ops {
namespace micro {
namespace logical {
namespace {
// Input/output tensor index.
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
bool (*func)(bool, bool)) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
if (HaveSameShapes(input1, input2)) {
reference_ops::BinaryFunction<bool, bool, bool>(
GetTensorShape(input1), GetTensorData<bool>(input1),
GetTensorShape(input2), GetTensorData<bool>(input2),
GetTensorShape(output), GetTensorData<bool>(output), func);
} else {
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
GetTensorShape(input1), GetTensorData<bool>(input1),
GetTensorShape(input2), GetTensorData<bool>(input2),
GetTensorShape(output), GetTensorData<bool>(output), func);
}
return kTfLiteOk;
}
bool LogicalOr(bool x, bool y) { return x || y; }
TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) {
return LogicalImpl(context, node, LogicalOr);
}
bool LogicalAnd(bool x, bool y) { return x && y; }
TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
return LogicalImpl(context, node, LogicalAnd);
}
} // namespace
} // namespace logical
TfLiteRegistration* Register_LOGICAL_OR() {
// Init, Free, Prepare, Eval are satisfying the Interface required by
// TfLiteRegistration.
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalOrEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_LOGICAL_AND() {
// Init, Free, Prepare, Eval are satisfying the Interface required by
// TfLiteRegistration.
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalAndEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,129 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
struct OpData {
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
OpData* data) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
std::numeric_limits<int8_t>::min());
static constexpr int kInputIntegerBits = 4;
const double input_real_multiplier =
static_cast<double>(input->params.scale) *
static_cast<double>(1 << (31 - kInputIntegerBits));
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
return kTfLiteOk;
}
} // namespace
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
OpData data;
CalculateArithmeticOpData(context, node, &data);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteFloat32: {
reference_ops::Logistic(
GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
return kTfLiteOk;
}
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt8) {
switch (output->type) {
case kTfLiteInt8: {
reference_integer_ops::Logistic(
input->params.zero_point, data.input_range_radius,
data.input_multiplier, data.input_left_shift,
NumElements(input->dims), GetTensorData<int8_t>(input),
GetTensorData<int8_t>(output));
return kTfLiteOk;
}
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
// TODO(b/141211002): Also support other data types once we have supported
// temporary tensors in TFLM.
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace activations
TfLiteRegistration* Register_LOGISTIC() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/activations::LogisticEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,151 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace ops {
namespace micro {
namespace maximum_minimum {
namespace {
// This file has a reference implementation of TFMaximum/TFMinimum.
enum KernelType {
kReference,
};
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
struct OpContext {
OpContext(TfLiteContext* context, TfLiteNode* node) {
input1 = GetInput(context, node, kInputTensor1);
input2 = GetInput(context, node, kInputTensor2);
output = GetOutput(context, node, kOutputTensor);
}
const TfLiteTensor* input1;
const TfLiteTensor* input2;
TfLiteTensor* output;
};
struct MaximumOp {
template <typename data_type>
static data_type op(data_type el1, data_type el2) {
return el1 > el2 ? el1 : el2;
}
};
struct MinimumOp {
template <typename data_type>
static data_type op(data_type el1, data_type el2) {
return el1 < el2 ? el1 : el2;
}
};
} // namespace
template <typename data_type, typename op_type>
void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
const OpContext& op_context) {
reference_ops::MaximumMinimumBroadcastSlow(
GetTensorShape(op_context.input1),
GetTensorData<data_type>(op_context.input1),
GetTensorShape(op_context.input2),
GetTensorData<data_type>(op_context.input2),
GetTensorShape(op_context.output),
GetTensorData<data_type>(op_context.output),
op_type::template op<data_type>);
}
template <KernelType kernel_type, typename OpType>
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
OpContext op_context(context, node);
if (kernel_type == kReference) {
switch (op_context.output->type) {
case kTfLiteFloat32:
TFLiteOperation<float, OpType>(context, node, op_context);
break;
case kTfLiteUInt8:
TFLiteOperation<uint8_t, OpType>(context, node, op_context);
break;
case kTfLiteInt8:
TFLiteOperation<int8_t, OpType>(context, node, op_context);
break;
case kTfLiteInt32:
TFLiteOperation<int32_t, OpType>(context, node, op_context);
break;
case kTfLiteInt64:
TFLiteOperation<int64_t, OpType>(context, node, op_context);
break;
default:
TF_LITE_KERNEL_LOG(context,
"Type %s (%d) is not supported by Maximum/Minimum.",
TfLiteTypeGetName(op_context.output->type),
op_context.output->type);
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context,
"Kernel type not supported by Maximum/Minimum.");
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace maximum_minimum
TfLiteRegistration* Register_MAXIMUM() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/
maximum_minimum::Eval<maximum_minimum::kReference,
maximum_minimum::MaximumOp>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_MINIMUM() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/
maximum_minimum::Eval<maximum_minimum::kReference,
maximum_minimum::MinimumOp>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,89 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
#include "tensorflow/lite/c/common.h"
namespace tflite {
namespace ops {
namespace micro {
// Forward declaration of all micro op kernel registration methods. These
// registrations are included with the standard `BuiltinOpResolver`.
//
// This header is particularly useful in cases where only a subset of ops are
// needed. In such cases, the client can selectively add only the registrations
// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
// registration in turn allows the linker to strip unused kernels.
TfLiteRegistration* Register_ABS();
TfLiteRegistration* Register_ADD();
TfLiteRegistration* Register_ARG_MAX();
TfLiteRegistration* Register_ARG_MIN();
TfLiteRegistration* Register_AVERAGE_POOL_2D();
TfLiteRegistration* Register_CEIL();
TfLiteRegistration* Register_CIRCULAR_BUFFER();
TfLiteRegistration* Register_CONV_2D();
TfLiteRegistration* Register_CONCATENATION();
TfLiteRegistration* Register_COS();
TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
TfLiteRegistration* Register_DEQUANTIZE();
TfLiteRegistration* Register_EQUAL();
TfLiteRegistration* Register_FLOOR();
TfLiteRegistration* Register_FULLY_CONNECTED();
TfLiteRegistration* Register_GREATER();
TfLiteRegistration* Register_GREATER_EQUAL();
TfLiteRegistration* Register_LESS();
TfLiteRegistration* Register_LESS_EQUAL();
TfLiteRegistration* Register_LOG();
TfLiteRegistration* Register_LOGICAL_AND();
TfLiteRegistration* Register_LOGICAL_NOT();
TfLiteRegistration* Register_LOGICAL_OR();
TfLiteRegistration* Register_LOGISTIC();
TfLiteRegistration* Register_MAXIMUM();
TfLiteRegistration* Register_MAX_POOL_2D();
TfLiteRegistration* Register_MEAN();
TfLiteRegistration* Register_MINIMUM();
TfLiteRegistration* Register_MUL();
TfLiteRegistration* Register_NEG();
TfLiteRegistration* Register_NOT_EQUAL();
TfLiteRegistration* Register_PACK();
TfLiteRegistration* Register_PAD();
TfLiteRegistration* Register_PADV2();
TfLiteRegistration* Register_PRELU();
TfLiteRegistration* Register_QUANTIZE();
TfLiteRegistration* Register_RELU();
TfLiteRegistration* Register_RELU6();
TfLiteRegistration* Register_RESHAPE();
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR();
TfLiteRegistration* Register_ROUND();
TfLiteRegistration* Register_RSQRT();
TfLiteRegistration* Register_SIN();
TfLiteRegistration* Register_SOFTMAX();
TfLiteRegistration* Register_SPLIT();
TfLiteRegistration* Register_SQRT();
TfLiteRegistration* Register_SQUARE();
TfLiteRegistration* Register_STRIDED_SLICE();
TfLiteRegistration* Register_SUB();
TfLiteRegistration* Register_SVDF();
TfLiteRegistration* Register_UNPACK();
TfLiteRegistration* Register_L2_NORMALIZATION();
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_

View File

@@ -0,0 +1,37 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
namespace tflite {
namespace ops {
namespace micro {
// Same as gtl::Greater but defined here to reduce dependencies and
// binary size for micro environment.
struct Greater {
template <typename T>
bool operator()(const T& x, const T& y) const {
return x > y;
}
};
struct Less {
template <typename T>
bool operator()(const T& x, const T& y) const {
return x < y;
}
};
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_

View File

@@ -0,0 +1,175 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/mul.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace mul {
constexpr int kInput1Tensor = 0;
constexpr int kInput2Tensor = 1;
constexpr int kOutputTensor = 0;
struct OpData {
int32_t output_activation_min;
int32_t output_activation_max;
int32_t output_multiplier;
int output_shift;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpData* data) {
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
double real_multiplier = static_cast<double>(input1->params.scale) *
static_cast<double>(input2->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
&data->output_shift);
}
return kTfLiteOk;
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpData* data,
const TfLiteTensor* input1, const TfLiteTensor* input2,
TfLiteTensor* output) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
tflite::ArithmeticParams op_params;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
op_params.input1_offset = -input1->params.zero_point;
op_params.input2_offset = -input2->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
GetTensorShape(input1), GetTensorShape(input2), &op_params);
#define TF_LITE_MUL(type, opname, dtype) \
type::opname(op_params, GetTensorShape(input1), \
GetTensorData<dtype>(input1), GetTensorShape(input2), \
GetTensorData<dtype>(input2), GetTensorShape(output), \
GetTensorData<dtype>(output));
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
} else {
TF_LITE_MUL(reference_integer_ops, Mul, int8_t);
}
} else if (output->type == kTfLiteUInt8) {
if (need_broadcast) {
TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
} else {
TF_LITE_MUL(reference_ops, Mul, uint8_t);
}
}
#undef TF_LITE_MUL
}
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpData* data,
const TfLiteTensor* input1, const TfLiteTensor* input2,
TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
GetTensorShape(input1), GetTensorShape(input2), &op_params);
#define TF_LITE_MUL(opname) \
reference_ops::opname(op_params, GetTensorShape(input1), \
GetTensorData<float>(input1), GetTensorShape(input2), \
GetTensorData<float>(input2), GetTensorShape(output), \
GetTensorData<float>(output));
if (need_broadcast) {
TF_LITE_MUL(BroadcastMul4DSlow);
} else {
TF_LITE_MUL(Mul);
}
#undef TF_LITE_MUL
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
OpData data;
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
CalculateOpData(context, node, params, &data);
switch (input1->type) {
case kTfLiteUInt8:
case kTfLiteInt8:
EvalQuantized(context, node, params, &data, input1, input2, output);
break;
case kTfLiteFloat32:
EvalFloat(context, node, params, &data, input1, input2, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace mul
TfLiteRegistration* Register_MUL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/mul::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,64 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/neg.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace neg {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
switch (input->type) {
// TODO(wangtz): handle for kTfLiteInt8
case kTfLiteFloat32:
reference_ops::Negate(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output),
GetTensorData<float>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace neg
TfLiteRegistration* Register_NEG() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/neg::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,125 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace pack {
namespace {
constexpr int kOutputTensor = 0;
template <typename T>
TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
TfLiteTensor* output, int values_count, int axis) {
const int dimensions = output->dims->size;
const TfLiteTensor* input0 = GetInput(context, node, 0);
const TfLiteIntArray* input_dims = input0->dims;
const TfLiteIntArray* output_dims = output->dims;
if (axis < 0) {
axis += dimensions;
}
int outer_size = 1;
for (int i = 0; i < axis; ++i) {
outer_size *= output_dims->data[i];
}
int copy_size = 1;
for (int i = axis + 1; i < dimensions; ++i) {
copy_size *= output_dims->data[i];
}
int input_size = 1;
for (int i = 0; i < input_dims->size; ++i) {
input_size *= input_dims->data[i];
}
TFLITE_DCHECK_EQ(input_size, copy_size * outer_size);
T* output_data = GetTensorData<T>(output);
for (int i = 0; i < values_count; ++i) {
const TfLiteTensor* t = GetInput(context, node, i);
const T* input_data = GetTensorData<T>(t);
for (int k = 0; k < outer_size; ++k) {
const T* input_ptr = input_data + copy_size * k;
int loc = k * values_count * copy_size + i * copy_size;
T* output_ptr = output_data + loc;
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
}
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLitePackParams* data =
reinterpret_cast<TfLitePackParams*>(node->builtin_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
switch (output->type) {
case kTfLiteFloat32: {
return PackImpl<float>(context, node, output, data->values_count,
data->axis);
}
case kTfLiteUInt8: {
return PackImpl<uint8_t>(context, node, output, data->values_count,
data->axis);
}
case kTfLiteInt8: {
return PackImpl<int8_t>(context, node, output, data->values_count,
data->axis);
}
case kTfLiteInt32: {
return PackImpl<int32_t>(context, node, output, data->values_count,
data->axis);
}
case kTfLiteInt64: {
return PackImpl<int64_t>(context, node, output, data->values_count,
data->axis);
}
default: {
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by pack.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
}
return kTfLiteOk;
}
} // namespace
} // namespace pack
TfLiteRegistration* Register_PACK() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/pack::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,237 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/pad.h"
#include <string.h>
#include "tensorflow/lite/kernels/internal/types.h"
#ifdef MEMORY_SANITIZER
#include <sanitizer/msan_interface.h>
#else
#define __msan_check_mem_is_initialized(ptr, size)
#endif
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace ops {
namespace micro {
namespace pad {
struct PadContext {
PadContext(TfLiteContext* context, TfLiteNode* node) {
input = GetInput(context, node, 0);
paddings = GetInput(context, node, 1);
constant_values = nullptr;
if (NumInputs(node) == 3) {
constant_values = GetOptionalInputTensor(context, node, 2);
} else {
constant_values = nullptr;
}
output = GetOutput(context, node, 0);
dims = NumDimensions(input);
resizing_category = ResizingCategory::kGenericResize;
const int paddings_total = GetTensorShape(paddings).FlatSize();
const int32* paddings_data = GetTensorData<int32>(paddings);
// Paddings will be a n,2 array, and we need to detect 4D arrays with the
// pattern { {0,0}, {a, b}, {c, d}, {0,0} }.
if (IsConstantTensor(paddings) && paddings_total == 8 &&
(paddings_data[0] == 0 && paddings_data[1] == 0) &&
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
resizing_category = ResizingCategory::kImageStyle;
}
}
const TfLiteTensor* constant_values;
const TfLiteTensor* input;
const TfLiteTensor* paddings;
TfLiteTensor* output;
int dims;
ResizingCategory resizing_category;
};
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
PadContext op_context(context, node);
TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
if (op_context.constant_values != nullptr) {
TF_LITE_ENSURE_EQ(context, op_context.input->type,
op_context.constant_values->type);
}
// There must be a pair of paddings for each output dimension.
TF_LITE_ENSURE_EQ(context, GetTensorShape(op_context.paddings).FlatSize(),
op_context.output->dims->size * 2);
// On Micro, outputs must be properly sized by the converter.
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
for (int i = 0; i < op_context.output->dims->size; i++) {
int output_dim = op_context.output->dims->data[i];
int expected_dim = op_context.input->dims->data[i] + paddings_data[i * 2] +
paddings_data[i * 2 + 1];
TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
}
// Current implementations rely on the inputs being <= 4D.
TF_LITE_ENSURE(
context, op_context.dims <= reference_ops::PadKernelMaxDimensionCount());
TF_LITE_ENSURE(context, IsConstantTensor(op_context.paddings));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
PadContext op_context(context, node);
if (op_context.constant_values != nullptr) {
// Ensure that constant_values is a scalar.
TF_LITE_ENSURE_EQ(context, NumElements(op_context.constant_values), 1);
}
// Create before and after padding arrays that are accepted by the kernel.
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
tflite::PadParams op_params;
memset(&op_params, 0, sizeof(PadParams));
op_params.left_padding_count = op_context.dims;
op_params.right_padding_count = op_context.dims;
for (int idx = op_context.dims - 1; idx >= 0; --idx) {
op_params.left_padding[idx] = paddings_data[idx * 2];
op_params.right_padding[idx] = paddings_data[idx * 2 + 1];
}
#define TF_LITE_PAD(type, op_name, scalar, pad_value) \
const scalar pad_value_copy = pad_value; \
\
type::op_name(op_params, GetTensorShape(op_context.input), \
GetTensorData<scalar>(op_context.input), &pad_value_copy, \
GetTensorShape(op_context.output), \
GetTensorData<scalar>(op_context.output))
switch (op_context.input->type) {
case kTfLiteFloat32: {
float pad_value = op_context.constant_values == nullptr
? 0.f
: *GetTensorData<float>(op_context.constant_values);
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
TF_LITE_PAD(reference_ops, PadImageStyle, float, pad_value);
} else {
TF_LITE_PAD(reference_ops, Pad, float, pad_value);
}
} break;
case kTfLiteUInt8: {
uint8_t pad_value;
if (op_context.constant_values == nullptr) {
// Quantized Pad requires that 0 is represented in the quantized
// range.
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
std::numeric_limits<uint8_t>::min());
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
std::numeric_limits<uint8_t>::max());
pad_value = static_cast<uint8_t>(op_context.output->params.zero_point);
} else {
// Quantized Pad requires that 'constant_values' is represented in the
// same quantized range as the input and output tensors.
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
op_context.constant_values->params.zero_point);
TF_LITE_ENSURE_EQ(
context, static_cast<double>(op_context.output->params.scale),
static_cast<double>(op_context.constant_values->params.scale));
pad_value = *GetTensorData<uint8_t>(op_context.constant_values);
}
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
TF_LITE_PAD(reference_ops, PadImageStyle, uint8_t, pad_value);
} else {
TF_LITE_PAD(reference_ops, Pad, uint8_t, pad_value);
}
} break;
case kTfLiteInt8: {
int8_t pad_value;
if (op_context.constant_values == nullptr) {
// Quantized Pad requires that 0 is represented in the quantized
// range.
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
std::numeric_limits<int8_t>::min());
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
std::numeric_limits<int8_t>::max());
pad_value = static_cast<int8_t>(op_context.output->params.zero_point);
} else {
// Quantized Pad requires that 'constant_values' is represented in the
// same quantized range as the input and output tensors.
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
op_context.constant_values->params.zero_point);
TF_LITE_ENSURE(context, op_context.output->params.scale ==
op_context.constant_values->params.scale);
pad_value = *GetTensorData<int8_t>(op_context.constant_values);
}
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
TF_LITE_PAD(reference_ops, PadImageStyle, int8_t, pad_value);
} else {
TF_LITE_PAD(reference_ops, Pad, int8_t, pad_value);
}
} break;
case kTfLiteInt32: {
int32_t pad_value =
op_context.constant_values == nullptr
? 0
: *GetTensorData<int32_t>(op_context.constant_values);
TF_LITE_PAD(reference_ops, Pad, int32_t, pad_value);
} break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported by Pad.",
TfLiteTypeGetName(op_context.input->type));
return kTfLiteError;
}
#undef TF_LITE_PAD
return kTfLiteOk;
}
} // namespace pad
TfLiteRegistration* Register_PAD() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/pad::Prepare,
/*invoke=*/pad::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
// Also register Pad as PadV2.
TfLiteRegistration* Register_PADV2() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/pad::Prepare,
/*invoke=*/pad::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,238 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
namespace tflite {
namespace ops {
namespace micro {
namespace pooling {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
struct OpData {
TfLitePaddingValues padding;
};
TfLiteStatus CalculateOpData(const TfLiteContext* context,
const TfLitePoolParams* params,
const TfLiteTensor* input,
const TfLiteTensor* output, OpData* data) {
// input: batch, height, width, channel
int height = SizeOfDimension(input, 1);
int width = SizeOfDimension(input, 2);
int out_height, out_width;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
/*dilation_rate_height=*/1,
/*dilation_rate_width=*/1, height, width, params->filter_height,
params->filter_width, params->padding, &out_height, &out_width);
return kTfLiteOk;
}
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
float activation_min, activation_max;
CalculateActivationRange(params->activation, &activation_min,
&activation_max);
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = activation_min;
op_params.float_activation_max = activation_max;
reference_ops::AveragePool(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
}
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
int32_t activation_min, activation_max;
(void)CalculateActivationRangeQuantized(context, params->activation, output,
&activation_min, &activation_max);
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = activation_min;
op_params.quantized_activation_max = activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::AveragePool(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::AveragePool(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
}
}
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
float activation_min, activation_max;
CalculateActivationRange(params->activation, &activation_min,
&activation_max);
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = activation_min;
op_params.float_activation_max = activation_max;
reference_ops::MaxPool(op_params, GetTensorShape(input),
GetTensorData<float>(input), GetTensorShape(output),
GetTensorData<float>(output));
}
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
int32_t activation_min, activation_max;
(void)CalculateActivationRangeQuantized(context, params->activation, output,
&activation_min, &activation_max);
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = activation_min;
op_params.quantized_activation_max = activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::MaxPool(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::MaxPool(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
}
}
} // namespace
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
OpData data;
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
// Inputs and outputs share the same type, guaranteed by the converter.
switch (input->type) {
case kTfLiteFloat32:
AverageEvalFloat(context, node, params, &data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
AverageEvalQuantized(context, node, params, &data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
OpData data;
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
switch (input->type) {
case kTfLiteFloat32:
MaxEvalFloat(context, node, params, &data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
MaxEvalQuantized(context, node, params, &data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace pooling
TfLiteRegistration* Register_AVERAGE_POOL_2D() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/pooling::AverageEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_MAX_POOL_2D() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/pooling::MaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,121 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
inline void BroadcastPrelu4DSlowFloat(
const RuntimeShape& unextended_input1_shape, const float* input1_data,
const RuntimeShape& unextended_input2_shape, const float* input2_data,
const RuntimeShape& unextended_output_shape, float* output_data) {
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
for (int b = 0; b < output_shape.Dims(0); ++b) {
for (int y = 0; y < output_shape.Dims(1); ++y) {
for (int x = 0; x < output_shape.Dims(2); ++x) {
for (int c = 0; c < output_shape.Dims(3); ++c) {
auto out_idx = Offset(output_shape, b, y, x, c);
auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
auto in1_val = input1_data[in1_idx];
auto in2_val = input2_data[in2_idx];
output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val;
}
}
}
}
}
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
const TfLiteTensor* alpha = GetInput(context, node, 1);
TfLiteTensor* output = GetOutput(context, node, 0);
int32_t output_multiplier = 0;
int output_shift = 0;
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
double real_multiplier = static_cast<double>(input->params.scale) *
static_cast<double>(alpha->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
&output_shift);
}
switch (input->type) {
case kTfLiteFloat32: {
BroadcastPrelu4DSlowFloat(
GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(alpha), GetTensorData<float>(alpha),
GetTensorShape(output), GetTensorData<float>(output));
return kTfLiteOk;
} break;
case kTfLiteUInt8: {
PreluParams op_params;
op_params.input_offset = -input->params.zero_point;
op_params.alpha_offset = -alpha->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.output_multiplier = output_multiplier;
op_params.output_shift = output_shift;
reference_ops::BroadcastPrelu4DSlow(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
GetTensorShape(output), GetTensorData<uint8_t>(output));
return kTfLiteOk;
} break;
default:
TF_LITE_KERNEL_LOG(
context, "Only float32 and uint8 are supported currently, got %d.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
} // namespace activations
TfLiteRegistration* Register_PRELU() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/activations::PreluPrepare,
/*invoke=*/activations::PreluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,129 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace quantize {
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
// Currently this only support affine per-layer quantization.
TF_LITE_ENSURE_EQ(context, output->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(output->quantization.params);
TF_LITE_ENSURE(context, affine_quantization);
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
TF_LITE_ENSURE(context,
input->type == kTfLiteFloat32 || input->type == kTfLiteInt16);
TF_LITE_ENSURE(context,
output->type == kTfLiteUInt8 || output->type == kTfLiteInt8);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
tflite::QuantizationParams op_params;
op_params.zero_point = output->params.zero_point;
op_params.scale = static_cast<double>(output->params.scale);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteInt8:
reference_ops::AffineQuantize(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
break;
case kTfLiteUInt8:
reference_ops::AffineQuantize(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt16) {
size_t size = ElementCount(*input->dims);
int32_t output_multiplier;
int output_shift;
double effective_scale =
static_cast<double>(input->params.scale / output->params.scale);
switch (output->type) {
case kTfLiteInt8:
QuantizeMultiplier(effective_scale, &output_multiplier, &output_shift);
reference_ops::Requantize(
GetTensorData<int16_t>(input), size, output_multiplier,
output_shift, input->params.zero_point, output->params.zero_point,
GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace quantize
// This Op (QUANTIZE) quantizes the input and produces quantized output.
// AffineQuantize takes scale and zero point and quantizes the float value to
// quantized output, in int8 or uint8 format.
TfLiteRegistration* Register_QUANTIZE() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/quantize::Prepare,
/*invoke=*/quantize::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,135 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/reduce.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace reduce {
constexpr int kMaxNumberOfAxis = 4;
constexpr int kMaxNumberOfReducedAxis = 2;
TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
// Inputs Tensor (dtype depends on quantization):
// [0] = Input
// [1] = Axis
// Outputs Tensor (dtype depends on quantization):
// [0] = Output
// Validate number of inputs and outputs
TF_LITE_ENSURE_EQ(context, node->inputs->size, 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
// Validate axis type
const TfLiteTensor* axis = GetInput(context, node, 1);
TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
return kTfLiteOk;
}
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
return kTfLiteOk;
}
void ResolveAxis(const int* axis_data, int axis_count,
tflite::MeanParams* op_params) {
int i = 0;
for (; i < axis_count; ++i) {
op_params->axis[i] = static_cast<int16>(axis_data[i]);
}
for (; i < 4; ++i) {
op_params->axis[i] = 1;
}
op_params->axis_count = axis_count;
}
TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
const TfLiteTensor* axis = GetInput(context, node, 1);
TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteReducerParams* params =
reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
int num_axis = static_cast<int>(NumElements(axis));
int temp_index[kMaxNumberOfAxis];
int resolved_axis[kMaxNumberOfReducedAxis];
switch (input->type) {
case kTfLiteFloat32: {
tflite::MeanParams op_params;
ResolveAxis(GetTensorData<int>(axis), num_axis, &op_params);
// TODO(b/146571391): Support only 4D Input and 2D Axis for Mean until
// scratch tensor allocation has been implemented in (b/132070898)
bool is_valid_inputs =
(NumDimensions(input) == 4 && op_params.axis_count == 2 &&
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
(op_params.axis[0] == 2 && op_params.axis[1] == 1)));
TF_LITE_ENSURE_MSG(
context, is_valid_inputs == true,
"Number of Input "
"dimensions != 4 OR the Axis is not either [1, 2] or [2, 1]");
// TODO(b/139102329): Handle the below special case in the combined
// reference method.
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (params->keep_dims) {
reference_ops::Mean(op_params, GetTensorShape(input),
GetTensorData<float>(input), GetTensorShape(output),
GetTensorData<float>(output));
} else {
TF_LITE_ENSURE(
context,
reference_ops::Mean(GetTensorData<float>(input), input->dims->data,
input->dims->size, GetTensorData<float>(output),
output->dims->data, output->dims->size,
GetTensorData<int>(axis), num_axis,
params->keep_dims, temp_index, resolved_axis,
GetTensorData<float>(output)));
}
} break;
default:
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
TF_LITE_ENSURE_MSG(context, false,
"Currently, only float32 input type "
"is supported.");
}
return kTfLiteOk;
}
} // namespace reduce
TfLiteRegistration* Register_MEAN() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/reduce::PrepareMeanOrSum,
/*invoke=*/reduce::EvalMean,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,106 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace ops {
namespace micro {
namespace reshape {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
// Tensorflow's Reshape allows one of the shape components to have the
// special -1 value, meaning it will be calculated automatically based on the
// input. Here we calculate what that dimension should be so that the number
// of output elements in the same as the number of input elements.
int num_input_elements = NumElements(input);
TfLiteIntArray* output_shape = output->dims;
if (NumInputs(node) == 1 && // Legacy scalar supported with params.
output_shape->size == 1 && output_shape->data[0] == 0) {
// Legacy tflite models use a shape parameter of [0] to indicate scalars,
// so adjust accordingly. TODO(b/111614235): Allow zero-sized buffers during
// toco conversion.
output_shape->size = 0;
}
int num_output_elements = 1;
int stretch_dim = -1;
for (int i = 0; i < output_shape->size; ++i) {
int value = output_shape->data[i];
if (value == -1) {
TF_LITE_ENSURE_EQ(context, stretch_dim, -1);
stretch_dim = i;
} else {
num_output_elements *= value;
}
}
if (stretch_dim != -1) {
output_shape->data[stretch_dim] = num_input_elements / num_output_elements;
num_output_elements *= output_shape->data[stretch_dim];
}
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_EQ(context, ReshapeOutput(context, node), kTfLiteOk);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
// Do nothing for in-place reshape.
if (input->data.raw != output->data.raw) {
// Otherwise perform reshape with copy.
for (size_t i = 0; i < input->bytes; ++i) {
output->data.raw[i] = input->data.raw[i];
}
}
return kTfLiteOk;
}
} // namespace reshape
TfLiteRegistration* Register_RESHAPE() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/reshape::Prepare,
/*invoke=*/reshape::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,112 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace ops {
namespace micro {
namespace resize_nearest_neighbor {
constexpr int kInputTensor = 0;
constexpr int kSizeTensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
#if defined(DEBUG)
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
// Our current implementations rely on the input being 4D,
// and the size being 1D tensor with exactly 2 elements.
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32);
TF_LITE_ENSURE_EQ(context, size->dims->data[0], 2);
output->type = input->type;
if (!IsConstantTensor(size)) {
TF_LITE_KERNEL_LOG(context,
"Dynamic tensors are unsupported in tfmicro.");
return kTfLiteError;
}
#endif
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteResizeNearestNeighborParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
tflite::ResizeNearestNeighborParams op_params;
op_params.align_corners = params->align_corners;
op_params.half_pixel_centers = false;
if (output->type == kTfLiteFloat32) {
reference_ops::ResizeNearestNeighbor(
op_params, GetTensorShape(input), GetTensorData<int32>(input),
GetTensorShape(size), GetTensorData<int32>(size),
GetTensorShape(output), GetTensorData<int32>(output));
} else if (output->type == kTfLiteUInt8) {
reference_ops::ResizeNearestNeighbor(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(size), GetTensorData<int32>(size),
GetTensorShape(output), GetTensorData<uint8_t>(output));
} else if (output->type == kTfLiteInt8) {
reference_ops::ResizeNearestNeighbor(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(size), GetTensorData<int32>(size),
GetTensorShape(output), GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context,
"Output type is %d, requires float, uint8 or int8.",
output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace resize_nearest_neighbor
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/resize_nearest_neighbor::Prepare,
/*invoke=*/resize_nearest_neighbor::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,70 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/round.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace round {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_EQ(context, output->type, input->type);
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
for (int i = 0; i < output->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
reference_ops::Round(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace round
TfLiteRegistration* Register_ROUND() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/round::Prepare,
/*invoke=*/round::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,153 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/softmax.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
const TfLiteTensor* input,
TfLiteTensor* output,
const TfLiteSoftmaxParams* params,
SoftmaxParams* op_data) {
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
if (input->type == kTfLiteUInt8) {
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteUInt8);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
} else {
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
// NOTE: Current int16 softmax output does not require symmetric scaling
// - so no need to verify scale here.
} else {
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
}
}
static const int kScaledDiffIntegerBits = 5;
int input_left_shift;
tflite::PreprocessSoftmaxScaling(
static_cast<double>(params->beta),
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
&op_data->input_multiplier, &input_left_shift);
op_data->input_left_shift = input_left_shift;
op_data->diff_min =
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
op_data->input_left_shift);
} else {
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
op_data->beta = static_cast<double>(params->beta);
}
return kTfLiteOk;
}
} // namespace
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
return kTfLiteOk;
}
// Takes a tensor and performs softmax along the last dimension.
void SoftmaxFloat(const TfLiteTensor* input, TfLiteTensor* output,
const SoftmaxParams& op_data) {
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
}
void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output,
const SoftmaxParams& op_data) {
if (input->type == kTfLiteUInt8) {
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
} else {
if (output->type == kTfLiteInt16) {
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int16_t>(output));
} else {
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
}
}
}
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
SoftmaxParams op_data;
TF_LITE_ENSURE_STATUS(
CalculateSoftmaxParams(context, input, output, params, &op_data));
switch (input->type) {
case kTfLiteFloat32: {
SoftmaxFloat(input, output, op_data);
return kTfLiteOk;
}
case kTfLiteInt8:
case kTfLiteUInt8: {
SoftmaxQuantized(input, output, op_data);
return kTfLiteOk;
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
}
} // namespace activations
TfLiteRegistration* Register_SOFTMAX() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/activations::SoftmaxPrepare,
/*invoke=*/activations::SoftmaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,128 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace split {
template <typename T>
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* input, int axis_value) {
const int output_count = NumOutputs(node);
const TfLiteIntArray* input_dims = input->dims;
const TfLiteTensor* output0 = GetOutput(context, node, 0);
const TfLiteIntArray* output_dims = output0->dims;
const int split_dimensions = input_dims->size;
int axis = axis_value < 0 ? axis_value + split_dimensions : axis_value;
TFLITE_DCHECK_LT(axis, split_dimensions);
TFLITE_DCHECK_EQ(output_dims->size, split_dimensions);
int64_t split_size = output_dims->data[axis] * output_count;
TFLITE_DCHECK_EQ(split_size, input_dims->data[axis]);
int64_t outer_size = 1;
for (int i = 0; i < axis; ++i) {
outer_size *= input_dims->data[i];
}
int64_t base_inner_size = 1;
for (int i = axis + 1; i < split_dimensions; ++i) {
base_inner_size *= input_dims->data[i];
}
const T* input_ptr = GetTensorData<T>(input);
for (int k = 0; k < outer_size; ++k) {
for (int i = 0; i < output_count; ++i) {
TfLiteTensor* t = GetOutput(context, node, i);
T* output_data = GetTensorData<T>(t);
const int copy_size = output_dims->data[axis] * base_inner_size;
T* output_ptr = output_data + k * copy_size;
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
input_ptr += copy_size;
}
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* axis = GetInput(context, node, 0);
const TfLiteTensor* input = GetInput(context, node, 1);
// Dynamic output tensors are needed if axis tensor is not constant.
// But Micro doesn't support dynamic memory allocation, so we only support
// constant axis tensor for now.
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
"Non constant axis tensor not supported");
int axis_value = GetTensorData<int32_t>(axis)[0];
if (axis_value < 0) {
axis_value += NumDimensions(input);
}
TF_LITE_ENSURE(context, axis_value >= 0);
TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
switch (input->type) {
case kTfLiteFloat32: {
return SplitImpl<float>(context, node, input, axis_value);
}
case kTfLiteUInt8: {
return SplitImpl<uint8_t>(context, node, input, axis_value);
}
case kTfLiteInt8: {
return SplitImpl<int8_t>(context, node, input, axis_value);
}
case kTfLiteInt16: {
return SplitImpl<int16_t>(context, node, input, axis_value);
}
case kTfLiteInt32: {
return SplitImpl<int32_t>(context, node, input, axis_value);
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s currently not supported.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
#undef TF_LITE_SPLIT
return kTfLiteOk;
}
} // namespace split
TfLiteRegistration* Register_SPLIT() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/split::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,185 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
#include <cmath>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace ops {
namespace micro {
namespace strided_slice {
enum KernelType {
kReference,
// TODO(soroosh): add kGenericOptimized
};
constexpr int kInputTensor = 0;
constexpr int kBeginTensor = 1;
constexpr int kEndTensor = 2;
constexpr int kStridesTensor = 3;
constexpr int kOutputTensor = 0;
struct StridedSliceContext {
StridedSliceContext(TfLiteContext* context, TfLiteNode* node) {
params = reinterpret_cast<TfLiteStridedSliceParams*>(node->builtin_data);
input = GetInput(context, node, kInputTensor);
begin = GetInput(context, node, kBeginTensor);
end = GetInput(context, node, kEndTensor);
strides = GetInput(context, node, kStridesTensor);
output = GetOutput(context, node, kOutputTensor);
dims = NumDimensions(input);
}
const TfLiteStridedSliceParams* params;
const TfLiteTensor* input;
const TfLiteTensor* begin;
const TfLiteTensor* end;
const TfLiteTensor* strides;
TfLiteTensor* output;
int dims;
};
// This Op only supports 1-4D cases and since we use the reference 4D
// implementation, the 1-3D tensors are mapped to 4D.
const int kMaxDim = 4;
tflite::StridedSliceParams BuildStridedSliceParams(
StridedSliceContext* op_context) {
tflite::StridedSliceParams op_params;
op_params.start_indices_count = op_context->dims;
op_params.stop_indices_count = op_context->dims;
op_params.strides_count = op_context->dims;
for (int i = 0; i < op_context->dims; ++i) {
op_params.start_indices[i] = GetTensorData<int32_t>(op_context->begin)[i];
op_params.stop_indices[i] = GetTensorData<int32_t>(op_context->end)[i];
op_params.strides[i] = GetTensorData<int32_t>(op_context->strides)[i];
}
op_params.begin_mask = op_context->params->begin_mask;
op_params.ellipsis_mask = 0;
op_params.end_mask = op_context->params->end_mask;
op_params.new_axis_mask = 0;
op_params.shrink_axis_mask = op_context->params->shrink_axis_mask;
return op_params;
}
// Processes the indexing tensors (begin, end and strides) to resize the
// output tensor. This function is callable from both Prepare() and Eval() as
// long as the caller ensures the indexing tensors are present.
TfLiteStatus CheckOutputSize(TfLiteContext* context,
StridedSliceContext* op_context) {
using ::tflite::strided_slice::StartForAxis;
using ::tflite::strided_slice::StopForAxis;
TfLiteIntArray* output_shape = op_context->output->dims;
int shape_size = 0;
auto op_params = BuildStridedSliceParams(op_context);
auto input_shape = GetTensorShape(op_context->input);
for (int idx = 0; idx < op_context->dims; ++idx) {
int32_t stride = GetTensorData<int32_t>(op_context->strides)[idx];
TF_LITE_ENSURE_MSG(context, stride != 0, "stride value has to be non-zero");
int32_t begin = StartForAxis(op_params, input_shape, idx);
int32_t end = StopForAxis(op_params, input_shape, idx, begin);
// When shrinking an axis, the end position does not matter (and can be
// incorrect when negative indexing is used, see Issue #19260). Always use
// begin + 1 to generate a length 1 slice, since begin has
// already been adjusted for negative indices by StartForAxis.
const bool shrink_axis = op_context->params->shrink_axis_mask & (1 << idx);
if (shrink_axis) {
end = begin + 1;
}
// This is valid for both positive and negative strides
int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
dim_shape = dim_shape < 0 ? 0 : dim_shape;
if (!shrink_axis) {
TF_LITE_ENSURE_EQ(context, output_shape->data[shape_size], dim_shape);
shape_size++;
}
}
TF_LITE_ENSURE_EQ(context, output_shape->size, shape_size);
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 4);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
StridedSliceContext op_context(context, node);
TF_LITE_ENSURE_MSG(context, op_context.dims <= kMaxDim,
"input dim should not exceed 4");
return CheckOutputSize(context, &op_context);
}
template <KernelType kernel_type>
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
StridedSliceContext op_context(context, node);
auto op_params = BuildStridedSliceParams(&op_context);
#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \
kernel_type::StridedSlice(op_params, GetTensorShape(op_context.input), \
GetTensorData<data_type>(op_context.input), \
GetTensorShape(op_context.output), \
GetTensorData<data_type>(op_context.output))
switch (op_context.input->type) {
case kTfLiteFloat32:
if (kernel_type == kReference) {
TF_LITE_STRIDED_SLICE(reference_ops, float);
}
break;
case kTfLiteUInt8:
if (kernel_type == kReference) {
TF_LITE_STRIDED_SLICE(reference_ops, uint8_t);
}
break;
case kTfLiteInt8:
if (kernel_type == kReference) {
TF_LITE_STRIDED_SLICE(reference_ops, int8_t);
}
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(op_context.input->type),
op_context.input->type);
return kTfLiteError;
}
#undef TF_LITE_STRIDED_SLICE
return kTfLiteOk;
}
} // namespace strided_slice
TfLiteRegistration* Register_STRIDED_SLICE() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/strided_slice::Prepare,
/*invoke=*/strided_slice::Eval<strided_slice::kReference>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,201 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/sub.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace ops {
namespace micro {
namespace sub {
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
struct OpData {
bool requires_broadcast;
// These fields are used in both the general 8-bit -> 8bit quantized path,
// and the special 16-bit -> 16bit quantized path
int input1_shift;
int input2_shift;
int32 output_activation_min;
int32 output_activation_max;
// These fields are used only in the general 8-bit -> 8bit quantized path
int32 input1_multiplier;
int32 input2_multiplier;
int32 output_multiplier;
int output_shift;
int left_shift;
int32 input1_offset;
int32 input2_offset;
int32 output_offset;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output,
OpData* data) {
data->requires_broadcast = !HaveSameShapes(input1, input2);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
// 8bit -> 8bit general quantized path, with general rescalings
data->input1_offset = -input1->params.zero_point;
data->input2_offset = -input2->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = 20;
const float twice_max_input_scale =
2 * std::max(input1->params.scale, input2->params.scale);
const double real_input1_multiplier =
static_cast<double>(input1->params.scale / twice_max_input_scale);
const double real_input2_multiplier =
static_cast<double>(input2->params.scale / twice_max_input_scale);
const double real_output_multiplier =
static_cast<double>(twice_max_input_scale /
((1 << data->left_shift) * output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
QuantizeMultiplierSmallerThanOneExp(
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
}
return kTfLiteOk;
}
void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
const OpData* data, const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
#define TF_LITE_SUB(opname) \
opname(op_params, GetTensorShape(input1), GetTensorData<float>(input1), \
GetTensorShape(input2), GetTensorData<float>(input2), \
GetTensorShape(output), GetTensorData<float>(output))
if (data->requires_broadcast) {
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow);
} else {
TF_LITE_SUB(tflite::reference_ops::SubWithActivation);
}
#undef TF_LITE_SUB
}
TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteSubParams* params, const OpData* data,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
GetTensorShape(input1), GetTensorShape(input2), &op_params);
#define TF_LITE_SUB(opname, dtype) \
opname(op_params, GetTensorShape(input1), GetTensorData<dtype>(input1), \
GetTensorShape(input2), GetTensorData<dtype>(input2), \
GetTensorShape(output), GetTensorData<dtype>(output));
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, int8_t);
} else {
TF_LITE_SUB(tflite::reference_ops::Sub, int8_t);
}
} else {
if (need_broadcast) {
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, uint8_t);
} else {
TF_LITE_SUB(tflite::reference_ops::Sub, uint8_t);
}
}
#undef TF_LITE_SUB
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
OpData data;
TF_LITE_ENSURE_STATUS(
CalculateOpData(context, params, input1, input2, output, &data));
if (output->type == kTfLiteFloat32) {
EvalSub(context, node, params, &data, input1, input2, output);
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data,
input1, input2, output));
} else {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace sub
TfLiteRegistration* Register_SUB() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/sub::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,544 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <math.h>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/activation_utils.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace svdf {
namespace {
struct OpData {
int32 effective_scale_1_a;
int32 effective_scale_2_a;
// b versions of each scale are kept at int since the numbers are just the
// shift value - typically between [-32, 32].
int effective_scale_1_b;
int effective_scale_2_b;
int scratch_tensor_index;
int scratch_output_tensor_index;
};
/**
* This version of SVDF is specific to TFLite Micro. It contains the following
* differences between the TFLite version:
*
* 1.) Scratch tensor allocation - scratch tensors must be known ahead of time
* for the Micro interpreter.
* 2.) Output dimensions - the TFLite version determines output size and runtime
* and resizes the output tensor. Micro runtime does not support tensor
* resizing.
*/
static inline void ApplyTimeWeightsBiasAndActivation(
int batch_size, int memory_size, int num_filters, int num_units, int rank,
const float* const __restrict__ weights_time_ptr,
const float* const __restrict__ bias_ptr, TfLiteFusedActivation activation,
float* const __restrict__ state_ptr, float* const __restrict__ scratch_ptr,
float* const __restrict__ output_ptr) {
// Compute matmul(activation_state, weights_time).
for (int b = 0; b < batch_size; ++b) {
// Perform batched vector dot product:
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
const float* vector1_ptr = weights_time_ptr;
const float* vector2_ptr = state_ptr + b * memory_size * num_filters;
for (int i = 0; i < num_filters; ++i) {
*scratch_ptr_batch = 0.f;
for (int j = 0; j < memory_size; ++j) {
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
}
scratch_ptr_batch++;
}
}
// Initialize output with bias if provided.
if (bias_ptr) {
// VectorBatchVectorAssign
for (int i = 0; i < batch_size; ++i) {
float* output_data = output_ptr + i * num_units;
const float* bias_data = bias_ptr;
for (int j = 0; j < num_units; ++j) {
*output_data++ = *bias_data++;
}
}
} else {
float* output_data = output_ptr;
for (int i = 0; i < batch_size * num_units; ++i) {
*output_data++ = 0.0f;
}
}
// Reduction sum.
for (int b = 0; b < batch_size; ++b) {
float* output_ptr_batch = output_ptr + b * num_units;
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
// Reduction sum vector
for (int i = 0; i < num_units; ++i) {
for (int j = 0; j < rank; j++) {
output_ptr_batch[i] += *scratch_ptr_batch++;
}
}
}
// Apply activation.
for (int b = 0; b < batch_size; ++b) {
float* output_ptr_batch = output_ptr + b * num_units;
for (int i = 0; i < num_units; ++i) {
*output_ptr_batch = ActivationValFloat(activation, *output_ptr_batch);
++output_ptr_batch;
}
}
}
inline void EvalFloatSVDF(
TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input,
const TfLiteTensor* weights_feature, const TfLiteTensor* weights_time,
const TfLiteTensor* bias, const TfLiteSVDFParams* params,
int scratch_tensor_index, TfLiteTensor* activation_state,
TfLiteTensor* output) {
const int rank = params->rank;
const int batch_size = input->dims->data[0];
const int input_size = input->dims->data[1];
const int num_filters = weights_feature->dims->data[0];
const int num_units = num_filters / rank;
const int memory_size = weights_time->dims->data[1];
const float* weights_feature_ptr = GetTensorData<float>(weights_feature);
const float* weights_time_ptr = GetTensorData<float>(weights_time);
const float* bias_ptr = GetTensorData<float>(bias);
const float* input_ptr = GetTensorData<float>(input);
float* state_ptr = GetTensorData<float>(activation_state);
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
float* scratch_ptr = static_cast<float*>(
context->GetScratchBuffer(context, scratch_tensor_index));
float* output_ptr = GetTensorData<float>(output);
// Left shift the activation_state.
{
float* new_state_start = state_ptr;
const float* old_state_start = state_ptr + 1;
const float* old_state_end =
state_ptr + batch_size * num_filters * memory_size;
while (old_state_start != old_state_end) {
*new_state_start++ = *old_state_start++;
}
}
// Note: no need to clear the latest activation, matmul is not accumulative.
// Compute conv1d(inputs, weights_feature).
// The activation_state's rightmost column is used to save current cycle
// activation. This is achieved by starting at state_ptr[memory_size - 1] and
// having the stride equal to memory_size.
// Perform batched matrix vector multiply operation:
{
const float* matrix = weights_feature_ptr;
const float* vector = input_ptr;
float* result = &state_ptr[memory_size - 1];
float* result_in_batch = result;
for (int i = 0; i < batch_size; ++i) {
const float* matrix_ptr = matrix;
for (int j = 0; j < num_filters; ++j) {
float dot_prod = 0.0f;
const float* vector_in_batch = vector + i * input_size;
for (int k = 0; k < input_size; ++k) {
dot_prod += *matrix_ptr++ * *vector_in_batch++;
}
*result_in_batch = dot_prod;
result_in_batch += memory_size;
}
}
}
ApplyTimeWeightsBiasAndActivation(
batch_size, memory_size, num_filters, num_units, rank, weights_time_ptr,
bias_ptr, params->activation, state_ptr, scratch_ptr, output_ptr);
}
void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* input_tensor,
const TfLiteTensor* weights_feature_tensor,
const TfLiteTensor* weights_time_tensor,
const TfLiteTensor* bias_tensor,
const TfLiteSVDFParams* params,
TfLiteTensor* activation_state_tensor,
TfLiteTensor* output_tensor, const OpData& data,
int32_t input_zp, int32_t output_zp) {
const int n_rank = params->rank;
const int n_batch = input_tensor->dims->data[0];
const int n_input = input_tensor->dims->data[1];
const int n_filter = weights_feature_tensor->dims->data[0];
const int n_unit = n_filter / n_rank;
const int n_memory = weights_time_tensor->dims->data[1];
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
int32_t* scratch_tensor = static_cast<int32_t*>(
context->GetScratchBuffer(context, data.scratch_tensor_index));
int32_t* scratch_output_tensor = static_cast<int32_t*>(
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
// Shift states.
int16_t* const state_ptr = GetTensorData<int16_t>(activation_state_tensor);
// Left shift the activation_state.
{
int16_t* new_state_start = state_ptr;
const int16_t* old_state_start = state_ptr + 1;
const int16_t* old_state_end = state_ptr + n_batch * n_filter * n_memory;
while (old_state_start != old_state_end) {
*new_state_start++ = *old_state_start++;
}
}
// Note: no need to clear the latest activation, matmul is not accumulative.
// Feature matmul.
{
int16_t* state = GetTensorData<int16_t>(activation_state_tensor);
const int8_t* input = GetTensorData<int8_t>(input_tensor);
const int8_t* weight_feature =
GetTensorData<int8_t>(weights_feature_tensor);
const int32_t output_max = std::numeric_limits<int16_t>::max();
const int32_t output_min = std::numeric_limits<int16_t>::min();
int16_t* result_in_batch = state + (n_memory - 1);
for (int b = 0; b < n_batch; b++) {
const int8_t* matrix_ptr = weight_feature;
for (int r = 0; r < n_filter; r++) {
int32_t dot_prod = 0;
const int8_t* vector_in_batch = input + b * n_input;
for (int c = 0; c < n_input; c++) {
dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
}
dot_prod = MultiplyByQuantizedMultiplier(
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
dot_prod = std::min(std::max(output_min, dot_prod), output_max);
// This assumes state is symmetrically quantized. Otherwise last bit of
// state should be initialized to its zero point and accumulate the
// dot_prod.
// Equivalent as the following:
// result_in_batch = zero point, which happens to be zero.
// result_in_batch += dot_prod_56.
*result_in_batch = dot_prod;
result_in_batch += n_memory;
}
}
}
// Time.
{
for (int b = 0; b < n_batch; ++b) {
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
// Perform batched vector dot product:
const int16_t* vector1_ptr = GetTensorData<int16_t>(weights_time_tensor);
const int16_t* vector2_ptr =
GetTensorData<int16_t>(activation_state_tensor) +
b * n_memory * n_filter;
for (int i = 0; i < n_filter; i++) {
*scratch_ptr_batch = 0;
for (int j = 0; j < n_memory; j++) {
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
}
scratch_ptr_batch++;
}
}
}
// Reduce, add bias, rescale, activation.
{
// Add bias.
if (bias_tensor) {
// Vector batch assign:
const int32_t* bias_data = GetTensorData<int32_t>(bias_tensor);
for (int i = 0; i < n_batch; ++i) {
int32_t* output_ptr = scratch_output_tensor + i * n_unit;
const int32_t* bias_ptr = bias_data;
for (int j = 0; j < n_unit; ++j) {
*output_ptr++ = *bias_ptr++;
}
}
} else {
int32_t* output_ptr = scratch_output_tensor;
for (int i = 0; i < n_batch * n_unit; ++i) {
*output_ptr++ = 0;
}
}
// Reduce.
for (int b = 0; b < n_batch; ++b) {
int32_t* output_temp_ptr = scratch_output_tensor + b * n_unit;
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
// Reduction sum vector
for (int i = 0; i < n_unit; ++i) {
for (int j = 0; j < n_rank; ++j) {
output_temp_ptr[i] += *scratch_ptr_batch++;
}
}
}
// Rescale.
const int32_t output_max = std::numeric_limits<int8_t>::max();
const int32_t output_min = std::numeric_limits<int8_t>::min();
for (int i = 0; i < n_batch * n_unit; ++i) {
int32_t x1 = scratch_output_tensor[i];
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
data.effective_scale_2_b);
int32_t x3 = x2 + output_zp;
int32_t x4 = std::min(std::max(output_min, x3), output_max);
GetTensorData<int8_t>(output_tensor)[i] = static_cast<int8_t>(x4);
}
}
}
} // namespace
// Input tensors.
constexpr int kInputTensor = 0;
constexpr int kWeightsFeatureTensor = 1;
constexpr int kWeightsTimeTensor = 2;
constexpr int kBiasTensor = 3;
// This is a variable tensor, and will be modified by this op.
constexpr int kInputActivationStateTensor = 4;
// Output tensor.
constexpr int kOutputTensor = 0;
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
void* data = nullptr;
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
kTfLiteError) {
return nullptr;
}
return data;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
// Validate Tensor Inputs (dtype depends on quantization):
// [0] = Input, {2, batch_size, input_size}
// [1] = Weights Feature, {2, num_filters, input_size}
// [2] = Weights Time, {2, num_filters, memory_size}
// [3] = Bias (optional), {1, num_units}
// [4] = Activation State (variable),
// {2, batch_size, memory_size * num_filters}
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* weights_feature =
GetInput(context, node, kWeightsFeatureTensor);
const TfLiteTensor* weights_time =
GetInput(context, node, kWeightsTimeTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
const TfLiteTensor* activation_state =
GetInput(context, node, kInputActivationStateTensor);
// Define input constants based on input tensor definition above:
const int rank = params->rank;
const int input_size = input->dims->data[1];
const int batch_size = input->dims->data[0];
const int num_filters = weights_feature->dims->data[0];
TF_LITE_ENSURE_EQ(context, num_filters % rank, 0);
const int num_units = num_filters / rank;
const int memory_size = weights_time->dims->data[1];
// Validate Input Tensor:
TF_LITE_ENSURE(context,
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
// Validate Tensor Output:
// [0] = float/int8, {2, batch_size, num_units}
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
// Validate Weights Feature Input Tensor:
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_feature), 2);
TF_LITE_ENSURE_EQ(context, weights_feature->dims->data[1], input_size);
// Validate Weights Time Input Tensor:
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_time), 2);
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[0], num_filters);
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size);
// Validate Optional Bias Input Tensor:
if (bias != nullptr) {
TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
}
// Validate Activation State Input Tensor:
TF_LITE_ENSURE_EQ(context, NumDimensions(activation_state), 2);
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
memory_size * num_filters);
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
if (bias != nullptr) {
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
}
TF_LITE_ENSURE_EQ(context, output->type, kTfLiteInt8);
const auto* input_params =
reinterpret_cast<TfLiteAffineQuantization*>(input->quantization.params);
const auto* weights_feature_params =
static_cast<const TfLiteAffineQuantization*>(
weights_feature->quantization.params);
const auto* state_params = static_cast<const TfLiteAffineQuantization*>(
activation_state->quantization.params);
const auto* weight_time_params =
static_cast<const TfLiteAffineQuantization*>(
weights_time->quantization.params);
const auto* output_params = static_cast<const TfLiteAffineQuantization*>(
output->quantization.params);
const double effective_scale_1 = static_cast<double>(
input_params->scale->data[0] * weights_feature_params->scale->data[0] /
state_params->scale->data[0]);
const double effective_scale_2 = static_cast<double>(
state_params->scale->data[0] * weight_time_params->scale->data[0] /
output_params->scale->data[0]);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
&(data->effective_scale_1_b));
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
&(data->effective_scale_2_b));
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
context, batch_size * num_filters * sizeof(int32_t),
&(data->scratch_tensor_index));
TF_LITE_ENSURE_OK(context, scratch_status);
const TfLiteStatus scratch_output_status =
context->RequestScratchBufferInArena(
context, batch_size * num_units * sizeof(int32_t),
&(data->scratch_output_tensor_index));
TF_LITE_ENSURE_OK(context, scratch_output_status);
} else {
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32);
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32);
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
if (bias != nullptr) {
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
}
TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
context, batch_size * num_filters * sizeof(float),
&(data->scratch_tensor_index));
TF_LITE_ENSURE_OK(context, scratch_status);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* weights_feature =
GetInput(context, node, kWeightsFeatureTensor);
const TfLiteTensor* weights_time =
GetInput(context, node, kWeightsTimeTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* activation_state =
GetVariableInput(context, node, kInputActivationStateTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
switch (weights_feature->type) {
case kTfLiteFloat32: {
EvalFloatSVDF(context, node, input, weights_feature, weights_time, bias,
params, data.scratch_tensor_index, activation_state,
output);
return kTfLiteOk;
break;
}
case kTfLiteInt8: {
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu);
EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
params, activation_state, output, data,
input->params.zero_point, output->params.zero_point);
return kTfLiteOk;
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
TfLiteTypeGetName(weights_feature->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace svdf
TfLiteRegistration* Register_SVDF() {
static TfLiteRegistration r = {/*init=*/svdf::Init,
/*free=*/nullptr,
/*prepare=*/svdf::Prepare,
/*invoke=*/svdf::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,118 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace unpack {
namespace {
constexpr int kInputTensor = 0;
template <typename T>
TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* input, int output_count, int axis) {
const TfLiteTensor* output0 = GetOutput(context, node, 0);
const TfLiteIntArray* input_dims = input->dims;
const TfLiteIntArray* output_dims = output0->dims;
const int dimensions = input_dims->size;
if (axis < 0) {
axis += NumDimensions(input);
}
TFLITE_DCHECK_LT(axis, dimensions);
int outer_size = 1;
for (int i = 0; i < axis; ++i) {
outer_size *= input_dims->data[i];
}
int copy_size = 1;
for (int i = axis + 1; i < dimensions; ++i) {
copy_size *= input_dims->data[i];
}
int output_size = 1;
for (int i = 0; i < output_dims->size; ++i) {
output_size *= output_dims->data[i];
}
TFLITE_DCHECK_EQ(output_size, copy_size * outer_size);
const T* input_data = GetTensorData<T>(input);
for (int i = 0; i < output_count; ++i) {
TfLiteTensor* t = GetOutput(context, node, i);
T* output_data = GetTensorData<T>(t);
for (int k = 0; k < outer_size; ++k) {
T* output_ptr = output_data + copy_size * k;
int loc = k * output_count * copy_size + i * copy_size;
const T* input_ptr = input_data + loc;
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
}
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteUnpackParams* data =
reinterpret_cast<TfLiteUnpackParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
switch (input->type) {
case kTfLiteFloat32: {
return UnpackImpl<float>(context, node, input, data->num, data->axis);
}
case kTfLiteInt32: {
return UnpackImpl<int32_t>(context, node, input, data->num, data->axis);
}
case kTfLiteUInt8: {
return UnpackImpl<uint8_t>(context, node, input, data->num, data->axis);
}
case kTfLiteInt8: {
return UnpackImpl<int8_t>(context, node, input, data->num, data->axis);
}
default: {
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by unpack.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
return kTfLiteOk;
}
} // namespace
} // namespace unpack
TfLiteRegistration* Register_UNPACK() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/unpack::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,95 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/memory_helpers.h"
#include <cstdint>
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
namespace tflite {
uint8_t* AlignPointerUp(uint8_t* data, size_t alignment) {
std::uintptr_t data_as_uintptr_t = reinterpret_cast<std::uintptr_t>(data);
uint8_t* aligned_result = reinterpret_cast<uint8_t*>(
((data_as_uintptr_t + (alignment - 1)) / alignment) * alignment);
return aligned_result;
}
uint8_t* AlignPointerDown(uint8_t* data, size_t alignment) {
std::uintptr_t data_as_uintptr_t = reinterpret_cast<std::uintptr_t>(data);
uint8_t* aligned_result =
reinterpret_cast<uint8_t*>((data_as_uintptr_t / alignment) * alignment);
return aligned_result;
}
size_t AlignSizeUp(size_t size, size_t alignment) {
size_t aligned_size = (((size + (alignment - 1)) / alignment) * alignment);
return aligned_size;
}
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
ErrorReporter* reporter) {
switch (type) {
case kTfLiteFloat32:
*size = sizeof(float);
break;
case kTfLiteInt16:
*size = sizeof(int16_t);
break;
case kTfLiteInt32:
*size = sizeof(int32_t);
break;
case kTfLiteUInt8:
*size = sizeof(uint8_t);
break;
case kTfLiteInt8:
*size = sizeof(int8_t);
break;
case kTfLiteInt64:
*size = sizeof(int64_t);
break;
case kTfLiteBool:
*size = sizeof(bool);
break;
case kTfLiteComplex64:
*size = sizeof(float) * 2;
break;
default:
reporter->Report("Type %s (%d) not is not supported",
TfLiteTypeGetName(type), type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
size_t* bytes, size_t* type_size,
ErrorReporter* error_reporter) {
int element_count = 1;
for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
element_count *= flatbuffer_tensor.shape()->Get(n);
}
TfLiteType tf_lite_type;
TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
&tf_lite_type, error_reporter));
TF_LITE_ENSURE_STATUS(
TfLiteTypeSizeOf(tf_lite_type, type_size, error_reporter));
*bytes = element_count * (*type_size);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -0,0 +1,44 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Returns the next pointer address aligned to the given alignment.
uint8_t* AlignPointerUp(uint8_t* data, size_t alignment);
// Returns the previous pointer address aligned to the given alignment.
uint8_t* AlignPointerDown(uint8_t* data, size_t alignment);
// Returns an increased size that's a multiple of alignment.
size_t AlignSizeUp(size_t size, size_t alignment);
// Returns size in bytes for a given TfLiteType.
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
ErrorReporter* reporter);
// How many bytes are needed to hold a tensor's contents.
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
size_t* bytes, size_t* type_size,
ErrorReporter* error_reporter);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_

View File

@@ -0,0 +1,386 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
namespace tflite {
// Simple stable in-place sort function. Not time-efficient for large arrays.
// Would normally be in an anonymous namespace to keep it private, but we want
// to be able to test it externally.
void ReverseSortInPlace(int* values, int* ids, int size) {
bool any_swapped;
do {
any_swapped = false;
for (int i = 1; i < size; ++i) {
if (values[i - 1] < values[i]) {
const int value_temp = values[i - 1];
values[i - 1] = values[i];
values[i] = value_temp;
const int id_temp = ids[i - 1];
ids[i - 1] = ids[i];
ids[i] = id_temp;
any_swapped = true;
}
}
} while (any_swapped);
}
GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
int scratch_buffer_size)
: buffer_count_(0), need_to_calculate_offsets_(true) {
// Allocate the arrays we need within the scratch buffer arena.
max_buffer_count_ = scratch_buffer_size / per_buffer_size();
unsigned char* next_free = scratch_buffer;
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
next_free += sizeof(BufferRequirements) * max_buffer_count_;
buffer_sizes_sorted_by_size_ = reinterpret_cast<int*>(next_free);
next_free += sizeof(int) * max_buffer_count_;
buffer_ids_sorted_by_size_ = reinterpret_cast<int*>(next_free);
next_free += sizeof(int) * max_buffer_count_;
buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
next_free += sizeof(ListEntry) * max_buffer_count_;
buffer_offsets_ = reinterpret_cast<int*>(next_free);
}
GreedyMemoryPlanner::~GreedyMemoryPlanner() {
// We don't own the scratch buffer, so don't deallocate anything.
}
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
int last_time_used) {
if (buffer_count_ >= max_buffer_count_) {
TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)",
max_buffer_count_);
return kTfLiteError;
}
BufferRequirements* current = &requirements_[buffer_count_];
current->size = size;
current->first_time_used = first_time_used;
current->last_time_used = last_time_used;
++buffer_count_;
need_to_calculate_offsets_ = true;
return kTfLiteOk;
}
bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
const int last_time_used) const {
const BufferRequirements* entry_requirements =
&requirements_[entry->requirements_index];
if (entry_requirements->first_time_used > last_time_used) {
return false;
}
if (first_time_used > entry_requirements->last_time_used) {
return false;
}
return true;
}
GreedyMemoryPlanner::ListEntry*
GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
const GreedyMemoryPlanner::ListEntry* start, const int first_time_used,
const int last_time_used) {
ListEntry* result = nullptr;
ListEntry* candidate_next_entry;
if (start == nullptr) {
candidate_next_entry = &buffers_sorted_by_offset_[0];
} else {
if (start->next_entry_index == -1) {
return nullptr;
}
candidate_next_entry = &buffers_sorted_by_offset_[start->next_entry_index];
}
do {
if (DoesEntryOverlapInTime(candidate_next_entry, first_time_used,
last_time_used)) {
result = candidate_next_entry;
break;
}
if (candidate_next_entry->next_entry_index == -1) {
break;
}
candidate_next_entry =
&buffers_sorted_by_offset_[candidate_next_entry->next_entry_index];
} while (true);
return result;
}
void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
if (!need_to_calculate_offsets_ || (buffer_count_ == 0)) {
return;
}
need_to_calculate_offsets_ = false;
// Start off by ordering the buffers in descending order of size.
// This helps find a more compact layout. Intuitively, you can think
// about putting the large buffers in place first, and then the
// smaller buffers can fit in the gaps, rather than fragmenting the
// gaps with small buffers at the beginning.
for (int i = 0; i < buffer_count_; ++i) {
buffer_sizes_sorted_by_size_[i] = requirements_[i].size;
buffer_ids_sorted_by_size_[i] = i;
buffer_offsets_[i] = -1;
}
// This sorting algorithm is naive, and may end up taking a very long time
// with hundreds of buffers.
ReverseSortInPlace(buffer_sizes_sorted_by_size_, buffer_ids_sorted_by_size_,
buffer_count_);
// Put the largest buffer at offset zero to start the process.
ListEntry* first_entry = &buffers_sorted_by_offset_[0];
first_entry->offset = 0;
first_entry->requirements_index = buffer_ids_sorted_by_size_[0];
first_entry->next_entry_index = -1;
next_free_entry_ = 1;
buffer_offsets_[buffer_ids_sorted_by_size_[0]] = 0;
// Work through the rest of the buffers to find a good gap to place each one.
for (int i = 1; i < buffer_count_; ++i) {
// The id is the order the buffer was originally added by the client.
const int buffer_id = buffer_ids_sorted_by_size_[i];
// Look at what size and time range the buffer needs to be active.
BufferRequirements* wanted_requirements = &requirements_[buffer_id];
const int wanted_size = wanted_requirements->size;
const int wanted_first_time_used = wanted_requirements->first_time_used;
const int wanted_last_time_used = wanted_requirements->last_time_used;
// Find the first buffer that's active in our time range. All placed
// buffers are stored in the order of their starting position in the arena
// so that it's easy to find the next buffer in memory, and so the gap.
// The candidate_entry variable holds the buffer that we're considering
// placing the current buffer after.
ListEntry* prior_entry = nullptr;
int candidate_offset = 0;
// Loop through the offset-ordered list of buffers, looking for gaps.
while (true) {
// Find out what the next active buffer is.
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
prior_entry, wanted_first_time_used, wanted_last_time_used);
if (prior_entry) {
BufferRequirements* candidate_requirements =
&requirements_[prior_entry->requirements_index];
const int prior_entry_offset =
prior_entry->offset + candidate_requirements->size;
if (prior_entry_offset > candidate_offset) {
candidate_offset = prior_entry_offset;
}
}
if (next_entry == nullptr) {
// We're at the end of the list, so we can always append the buffer
// here.
break;
}
// Find out how much space there is between us and the next buffer.
const int gap = next_entry->offset - candidate_offset;
if (gap >= wanted_size) {
// This entry has a big enough gap between it and the next, so
// use it!
break;
}
// The gap wasn't big enough, so move on to another candidate.
prior_entry = next_entry;
}
// At this point, we've either found a gap (possibly at the end of the
// list) and want to place the buffer there, or there are no other active
// buffers in this time range and so we can put it at offset zero.
// Record the buffer's offset in our plan.
buffer_offsets_[buffer_id] = candidate_offset;
// Add the newly-placed buffer to our offset-ordered list, so that
// subsequent passes can fit in their buffers around it.
ListEntry* new_entry = &buffers_sorted_by_offset_[next_free_entry_];
new_entry->offset = candidate_offset;
new_entry->requirements_index = buffer_id;
const int new_entry_index = next_free_entry_;
++next_free_entry_;
ListEntry* current_entry = first_entry;
// Make sure that we insert the buffer at the correct place in the ordered
// list.
while (true) {
const int next_entry_index = current_entry->next_entry_index;
if (next_entry_index == -1) {
// We're at the end of the list, so just add the new entry here.
current_entry->next_entry_index = new_entry_index;
new_entry->next_entry_index = -1;
break;
}
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
if (next_entry->offset > candidate_offset) {
// We're at the right spot to do an insertion and retain the sorting
// order, so place the new entry here.
new_entry->next_entry_index = current_entry->next_entry_index;
current_entry->next_entry_index = new_entry_index;
break;
}
current_entry = next_entry;
}
}
}
size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
CalculateOffsetsIfNeeded();
if (buffer_count_ == 0) {
return 0;
}
ListEntry* entry = &buffers_sorted_by_offset_[0];
size_t max_size = 0;
while (entry) {
BufferRequirements* requirements =
&requirements_[entry->requirements_index];
// TODO(b/148246793): Update all size and offset variables types from
// int to size_t
const size_t current_size = entry->offset + requirements->size;
if (current_size > max_size) {
max_size = current_size;
}
if (entry->next_entry_index == -1) {
break;
}
entry = &buffers_sorted_by_offset_[entry->next_entry_index];
}
return max_size;
}
void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
CalculateOffsetsIfNeeded();
for (int i = 0; i < buffer_count_; ++i) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Planner buffer ID: %d, calculated offset: %d, size required: %d, "
"first_time_created: %d, "
"last_time_used: %d",
i, buffer_offsets_[i], requirements_[i].size,
requirements_[i].first_time_used, requirements_[i].last_time_used);
}
constexpr int kLineWidth = 80;
int max_size = kLineWidth;
int max_time = 0;
for (int i = 0; i < buffer_count_; ++i) {
BufferRequirements* requirements = &requirements_[i];
const int offset = buffer_offsets_[i];
const int last_time_used = requirements->last_time_used;
const int size = offset + requirements->size;
if (size > max_size) {
max_size = size;
}
if (last_time_used > max_time) {
max_time = last_time_used;
}
}
char line[kLineWidth + 1];
for (int t = 0; t <= max_time; ++t) {
for (int c = 0; c < kLineWidth; ++c) {
line[c] = '.';
}
for (int i = 0; i < buffer_count_; ++i) {
BufferRequirements* requirements = &requirements_[i];
if ((t < requirements->first_time_used) ||
(t > requirements->last_time_used)) {
continue;
}
const int offset = buffer_offsets_[i];
if (offset == -1) {
continue;
}
const int size = requirements->size;
const int line_start = (offset * kLineWidth) / max_size;
const int line_end = ((offset + size) * kLineWidth) / max_size;
for (int n = line_start; n < line_end; ++n) {
if (line[n] == '.') {
char display;
if (i < 10) {
display = '0' + i;
} else if (i < 36) {
display = 'a' + (i - 10);
} else if (i < 62) {
display = 'A' + (i - 36);
} else {
display = '*';
}
line[n] = display;
} else {
line[n] = '!';
}
}
}
line[kLineWidth] = 0;
TF_LITE_REPORT_ERROR(error_reporter, "%s", (const char*)line);
}
}
int GreedyMemoryPlanner::GetBufferCount() { return buffer_count_; }
TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer(
tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
CalculateOffsetsIfNeeded();
if ((buffer_index < 0) || (buffer_index >= buffer_count_)) {
TF_LITE_REPORT_ERROR(error_reporter,
"buffer index %d is outside range 0 to %d",
buffer_index, buffer_count_);
return kTfLiteError;
}
*offset = buffer_offsets_[buffer_index];
return kTfLiteOk;
}
bool GreedyMemoryPlanner::DoAnyBuffersOverlap(ErrorReporter* error_reporter) {
CalculateOffsetsIfNeeded();
bool were_overlaps_found = false;
for (int i = 0; i < buffer_count_; ++i) {
BufferRequirements* a_requirements = &requirements_[i];
const int a_start_offset = buffer_offsets_[i];
const int a_first_time_used = a_requirements->first_time_used;
const int a_last_time_used = a_requirements->last_time_used;
const int a_end_offset = a_start_offset + a_requirements->size;
for (int j = 0; j < buffer_count_; ++j) {
if (i == j) {
continue;
}
BufferRequirements* b_requirements = &requirements_[j];
const int b_start_offset = buffer_offsets_[j];
const int b_first_time_used = b_requirements->first_time_used;
const int b_last_time_used = b_requirements->last_time_used;
const int b_end_offset = b_start_offset + b_requirements->size;
if ((a_first_time_used > b_last_time_used) ||
(b_first_time_used > a_last_time_used)) {
// Buffers don't overlap in time.
continue;
}
if ((a_start_offset >= b_end_offset) ||
(b_start_offset >= a_end_offset)) {
// No overlap in memory.
continue;
}
were_overlaps_found = true;
TF_LITE_REPORT_ERROR(
error_reporter, "Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)",
i, a_first_time_used, a_last_time_used, a_start_offset, a_end_offset,
j, b_first_time_used, b_last_time_used, b_start_offset, b_end_offset);
}
}
return were_overlaps_found;
}
} // namespace tflite

View File

@@ -0,0 +1,146 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/memory_planner/memory_planner.h"
namespace tflite {
// A memory planner that uses a greedy algorithm to arrange buffers in memory
// to minimize the overall arena size needed.
//
// The algorithm works like this:
// - The client enters the buffer information through AddBuffer().
// - When a function like GetOffsetForBuffer() is called, the
// CalculateOffsetsIfNeeded() method is invoked.
// - If an up to date plan is not already present, one will be calculated.
// - The buffers are sorted in descending order of size.
// - The largest buffer is placed at offset zero.
// - The rest of the buffers are looped through in descending size order.
// - The other buffers that need to be in memory at the same time are found.
// - The first gap between simultaneously active buffers that the current
// buffer fits into will be used.
// - If no large-enough gap is found, the current buffer is placed after the
// last buffer that's simultaneously active.
// - This continues until all buffers are placed, and the offsets stored.
//
// This is not guaranteed to produce the best placement, since that's an
// NP-Complete problem, but in practice it should produce one that's decent.
class GreedyMemoryPlanner : public MemoryPlanner {
public:
// You need to pass in an area of memory to be used for planning. This memory
// needs to have a lifetime as long as the planner, but isn't owned by this
// object, so management should be handled by the client. This is so it can be
// stack or globally allocated if necessary on devices without dynamic memory
// allocation. How many buffers can be planned for will depend on the size of
// this scratch memory, so you should enlarge it if you see an error when
// calling AddBuffer(). The memory can be reused once you're done with the
// planner, as long as you copy the calculated offsets to another location.
// Each buffer requires about 36 bytes of scratch.
GreedyMemoryPlanner(unsigned char* scratch_buffer, int scratch_buffer_size);
~GreedyMemoryPlanner() override;
// Record details of a buffer we want to place.
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used) override;
// Returns the high-water mark of used memory. This is the minimum size of a
// memory arena you'd need to allocate to hold these buffers.
size_t GetMaximumMemorySize() override;
// How many buffers have been recorded.
int GetBufferCount() override;
// Where a given buffer should be placed in the memory arena.
// This information is stored in the memory arena itself, so once the arena
// is used for inference, it will be overwritten.
TfLiteStatus GetOffsetForBuffer(ErrorReporter* error_reporter,
int buffer_index, int* offset) override;
// Prints an ascii-art diagram of the buffer layout plan.
void PrintMemoryPlan(ErrorReporter* error_reporter);
// Debug method to check whether any buffer allocations are overlapping. This
// is an O(N^2) complexity operation, so only use for testing.
bool DoAnyBuffersOverlap(ErrorReporter* error_reporter);
// Used to store a list of buffers ordered by their offset.
struct ListEntry {
int offset;
int requirements_index;
int next_entry_index;
};
// Number of bytes required in order to plan a buffer.
static size_t per_buffer_size() {
const int per_buffer_size =
sizeof(BufferRequirements) + // requirements_
sizeof(int) + // buffer_sizes_sorted_by_size_
sizeof(int) + // buffer_ids_sorted_by_size_
sizeof(ListEntry) + // buffers_sorted_by_offset_
sizeof(int); // buffer_offsets_;
return per_buffer_size;
}
private:
// Whether a buffer is active in a given time range.
bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used,
const int last_time_used) const;
// Walks the list to return the next buffer that is active in a given time
// range, or a null pointer if there are none.
ListEntry* NextSimultaneouslyActiveBuffer(const ListEntry* start,
const int first_time_used,
const int last_time_used);
// If there isn't an up to date plan, calculate a new one.
void CalculateOffsetsIfNeeded();
// How many buffers we can plan for, based on the arena size we're given in
// the constructor.
int max_buffer_count_;
// The number of buffers added so far.
int buffer_count_;
// Records the client-provided information about each buffer.
struct BufferRequirements {
int size;
int first_time_used;
int last_time_used;
};
// Working arrays used during the layout algorithm.
BufferRequirements* requirements_;
int* buffer_sizes_sorted_by_size_;
int* buffer_ids_sorted_by_size_;
ListEntry* buffers_sorted_by_offset_;
int next_free_entry_;
// Stores the outcome of the plan, the location of each buffer in the arena.
int* buffer_offsets_;
// Whether buffers have been added since the last plan was calculated.
bool need_to_calculate_offsets_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_

View File

@@ -0,0 +1,54 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/memory_planner/linear_memory_planner.h"
namespace tflite {
LinearMemoryPlanner::LinearMemoryPlanner()
: current_buffer_count_(0), next_free_offset_(0) {}
LinearMemoryPlanner::~LinearMemoryPlanner() {}
TfLiteStatus LinearMemoryPlanner::AddBuffer(
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
int last_time_used) {
if (current_buffer_count_ >= kMaxBufferCount) {
TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)",
kMaxBufferCount);
return kTfLiteError;
}
buffer_offsets_[current_buffer_count_] = next_free_offset_;
next_free_offset_ += size;
++current_buffer_count_;
return kTfLiteOk;
}
size_t LinearMemoryPlanner::GetMaximumMemorySize() { return next_free_offset_; }
int LinearMemoryPlanner::GetBufferCount() { return current_buffer_count_; }
TfLiteStatus LinearMemoryPlanner::GetOffsetForBuffer(
tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
if ((buffer_index < 0) || (buffer_index >= current_buffer_count_)) {
TF_LITE_REPORT_ERROR(error_reporter,
"buffer index %d is outside range 0 to %d",
buffer_index, current_buffer_count_);
return kTfLiteError;
}
*offset = buffer_offsets_[buffer_index];
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -0,0 +1,50 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/memory_planner/memory_planner.h"
namespace tflite {
// The simplest possible memory planner that just lays out all buffers at
// increasing offsets without trying to reuse memory.
class LinearMemoryPlanner : public MemoryPlanner {
public:
LinearMemoryPlanner();
~LinearMemoryPlanner() override;
TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used) override;
size_t GetMaximumMemorySize() override;
int GetBufferCount() override;
TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter,
int buffer_index, int* offset) override;
private:
static constexpr int kMaxBufferCount = 1024;
size_t buffer_offsets_[kMaxBufferCount];
int current_buffer_count_;
size_t next_free_offset_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_

View File

@@ -0,0 +1,71 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
namespace tflite {
// Interface class for planning the layout of memory buffers during the
// execution of a graph.
// It's designed to be used by a client that iterates in any order through the
// buffers it wants to lay out, and then calls the getter functions for
// information about the calculated layout. For example:
//
// SomeMemoryPlanner planner;
// planner.AddBuffer(reporter, 100, 0, 1); // Buffer 0
// planner.AddBuffer(reporter, 50, 2, 3); // Buffer 1
// planner.AddBuffer(reporter, 50, 2, 3); // Buffer 2
//
// int offset0;
// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 0, &offset0));
// int offset1;
// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 1, &offset1));
// int offset2;
// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 2, &offset2));
// const int arena_size_needed = planner.GetMaximumMemorySize();
//
// The goal is for applications to be able to experiment with different layout
// strategies without changing their client code, by swapping out classes that
// implement this interface.=
class MemoryPlanner {
public:
MemoryPlanner() {}
virtual ~MemoryPlanner() {}
// Pass information about a buffer's size and lifetime to the layout
// algorithm. The order this is called implicitly assigns an index to the
// result, so the buffer information that's passed into the N-th call of
// this method will be used as the buffer_index argument to
// GetOffsetForBuffer().
virtual TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter,
int size, int first_time_used,
int last_time_used) = 0;
// The largest contiguous block of memory that's needed to hold the layout.
virtual size_t GetMaximumMemorySize() = 0;
// How many buffers have been added to the planner.
virtual int GetBufferCount() = 0;
// Calculated layout offset for the N-th buffer added to the planner.
virtual TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter,
int buffer_index, int* offset) = 0;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_

View File

@@ -0,0 +1,669 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_allocator.h"
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/core/api/op_resolver.h"
#include "tensorflow/lite/core/api/tensor_utils.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
namespace tflite {
namespace {
// Used to hold information used during allocation calculations.
struct AllocationInfo {
size_t bytes;
int first_created;
int last_used;
bool needs_allocating;
void** output_ptr;
};
// We align tensor buffers to 16-byte boundaries, since this is a common
// requirement for SIMD extensions.
constexpr int kBufferAlignment = 16;
class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
public:
explicit MicroBuiltinDataAllocator(SimpleMemoryAllocator* memory_allocator)
: memory_allocator_(memory_allocator) {}
void* Allocate(size_t size, size_t alignment_hint) override {
return memory_allocator_->AllocateFromTail(size, alignment_hint);
}
void Deallocate(void* data) override {
// Do not deallocate, builtin data needs to be available for the life time
// of the model.
}
private:
SimpleMemoryAllocator* memory_allocator_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
TfLiteStatus AllocateVariables(
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* flatbuffer_tensors,
TfLiteTensor* runtime_tensors, SimpleMemoryAllocator* allocator) {
for (size_t i = 0; i < flatbuffer_tensors->size(); ++i) {
if (flatbuffer_tensors->Get(i)->is_variable()) {
runtime_tensors[i].data.data = allocator->AllocateFromTail(
runtime_tensors[i].bytes, kBufferAlignment);
// Allocation failure.
if (runtime_tensors[i].data.data == nullptr) {
return kTfLiteError;
}
}
tflite::ResetVariableTensor(&(runtime_tensors[i]));
}
return kTfLiteOk;
}
// A helper class to construct AllocationInfo array. This array contains the
// lifetime of tensors / scratch_buffer and will be used to calculate the memory
// plan. Methods need to be called in order from `Init`, `Add*`, to `Finish`.
class AllocationInfoBuilder {
public:
AllocationInfoBuilder(ErrorReporter* reporter,
SimpleMemoryAllocator* allocator)
: reporter_(reporter), allocator_(allocator) {}
// Initializes the builder by allocating AllocationInfo array from the
// simple memory allocator.
TfLiteStatus Init(size_t tensor_count, size_t scratch_buffer_count) {
tensor_count_ = tensor_count;
buffer_count_ = scratch_buffer_count;
return Allocate();
}
// Add allocaiton information for the tensors.
TfLiteStatus AddTensors(const SubGraph* subgraph,
TfLiteTensor* runtime_tensors);
// Add allocation information for the scratch buffers.
TfLiteStatus AddScratchBuffers(internal::ScratchBufferHandle* buffer_handles);
// Returns a pointer to the built AllocationInfo array.
const AllocationInfo* Finish() const { return info_; }
size_t Size() const { return tensor_count_ + buffer_count_; }
private:
// Allocate the output AllocationInfo array from the allocator_;
TfLiteStatus Allocate();
ErrorReporter* reporter_ = nullptr;
SimpleMemoryAllocator* allocator_ = nullptr;
size_t tensor_count_ = 0;
size_t buffer_count_ = 0;
AllocationInfo* info_ = nullptr;
};
TfLiteStatus AllocationInfoBuilder::Allocate() {
size_t bytes = sizeof(AllocationInfo) * Size();
info_ = reinterpret_cast<AllocationInfo*>(
allocator_->AllocateFromTail(bytes, alignof(AllocationInfo)));
if (info_ == nullptr) {
TF_LITE_REPORT_ERROR(
reporter_,
"Failed to allocate memory for allocation_info, %d bytes required",
bytes);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
TfLiteTensor* runtime_tensors) {
// Set up allocation info for all tensors.
for (size_t i = 0; i < tensor_count_; ++i) {
AllocationInfo* current = &info_[i];
// TfLiteTensor.uint8 field is deprecated so use .data field instead.
current->output_ptr = &(runtime_tensors[i].data.data);
current->bytes = runtime_tensors[i].bytes;
current->first_created = -1;
current->last_used = -1;
current->needs_allocating = (runtime_tensors[i].data.data == nullptr) &&
(!subgraph->tensors()->Get(i)->is_variable());
}
for (size_t i = 0; i < subgraph->inputs()->size(); ++i) {
const int tensor_index = subgraph->inputs()->Get(i);
AllocationInfo* current = &info_[tensor_index];
current->first_created = 0;
}
// Mark all outputs as persistent to the end of the invocation.
for (size_t i = 0; i < subgraph->outputs()->size(); ++i) {
const int tensor_index = subgraph->outputs()->Get(i);
AllocationInfo* current = &info_[tensor_index];
current->last_used = subgraph->operators()->size() - 1;
}
// Figure out when the first and last use of each tensor is.
for (int i = (subgraph->operators()->size() - 1); i >= 0; --i) {
const auto* op = subgraph->operators()->Get(i);
for (size_t n = 0; n < op->inputs()->size(); ++n) {
const int tensor_index = op->inputs()->Get(n);
AllocationInfo* current = &info_[tensor_index];
if (((current->last_used == -1) || (current->last_used < i))) {
current->last_used = i;
}
}
for (size_t n = 0; n < op->outputs()->size(); ++n) {
const int tensor_index = op->outputs()->Get(n);
AllocationInfo* current = &info_[tensor_index];
if ((current->first_created == -1) || (current->first_created > i)) {
current->first_created = i;
}
}
}
// Work out which tensors need to be allocated.
for (size_t i = 0; i < tensor_count_; ++i) {
AllocationInfo* current = &info_[i];
const bool is_read_only =
(current->first_created == -1) && (current->last_used != -1);
if (is_read_only) {
current->needs_allocating = false;
}
const bool has_partial_lifetime =
!is_read_only &&
((current->first_created == -1) || (current->last_used == -1));
if (has_partial_lifetime && current->needs_allocating) {
TF_LITE_REPORT_ERROR(
reporter_,
"Logic error in memory planner, tensor %d has an invalid lifetime: "
"first_created: %d, last_used: %d",
i, current->first_created, current->last_used);
return kTfLiteError;
}
}
return kTfLiteOk;
}
TfLiteStatus AllocationInfoBuilder::AddScratchBuffers(
internal::ScratchBufferHandle* buffer_handles) {
// Set up allocation info for buffers.
for (size_t i = tensor_count_; i < tensor_count_ + buffer_count_; ++i) {
AllocationInfo* current = &info_[i];
internal::ScratchBufferHandle* handle =
&(buffer_handles[i - tensor_count_]);
current->output_ptr = reinterpret_cast<void**>(&handle->data);
current->bytes = handle->bytes;
current->first_created = handle->node_idx;
current->last_used = handle->node_idx;
current->needs_allocating = true;
}
return kTfLiteOk;
}
TfLiteStatus CreatePlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
const AllocationInfo* allocation_info,
size_t allocation_info_size) {
// Add the tensors to our allocation plan.
for (size_t i = 0; i < allocation_info_size; ++i) {
const AllocationInfo* current = &allocation_info[i];
if (current->needs_allocating) {
size_t aligned_bytes_required =
AlignSizeUp(current->bytes, kBufferAlignment);
TF_LITE_ENSURE_STATUS(
planner->AddBuffer(error_reporter, aligned_bytes_required,
current->first_created, current->last_used));
}
}
return kTfLiteOk;
}
TfLiteStatus CommitPlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
uint8_t* starting_point,
const AllocationInfo* allocation_info,
size_t allocation_info_size) {
// Figure out the actual memory addresses for each buffer, based on the plan.
int planner_index = 0;
for (size_t i = 0; i < allocation_info_size; ++i) {
const AllocationInfo* current = &allocation_info[i];
if (current->needs_allocating) {
int offset = -1;
TF_LITE_ENSURE_STATUS(
planner->GetOffsetForBuffer(error_reporter, planner_index, &offset));
*current->output_ptr = reinterpret_cast<void*>(starting_point + offset);
++planner_index;
}
}
return kTfLiteOk;
}
} // namespace
namespace internal {
TfLiteStatus InitializeRuntimeTensor(
SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
ErrorReporter* error_reporter, TfLiteTensor* result) {
*result = {};
// Make sure the serialized type is one we know how to deal with, and convert
// it from a flatbuffer enum into a constant used by the kernel C API.
TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
&result->type, error_reporter));
// Make sure we remember if the serialized tensor is designated as a variable.
result->is_variable = flatbuffer_tensor.is_variable();
// We need to figure out where the actual contents of this tensor are stored
// in memory. We'll check to see if there's a serialized buffer (pretty much
// the same as a constant op in TensorFlow) associated with this tensor first,
// and if there is update the runtime structure to point to its location in
// memory.
// First see if there's any buffer information in the serialized tensor.
if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) {
// If we've found a buffer, does it have any data?
if (auto* array = buffer->data()) {
// If it has any data, is the data size larger than zero?
if (array->size()) {
// We've found a buffer with valid data, so update the runtime tensor
// data structure to point to it.
result->data.data =
const_cast<void*>(static_cast<const void*>(array->data()));
// We set the data from a serialized buffer, so record tha.
result->allocation_type = kTfLiteMmapRo;
}
}
// TODO(petewarden): It's not clear in what circumstances we could have a
// buffer in the serialized tensor, but it doesn't have any data in it. Is
// that a validly-generated file, and if so what does it mean, or is it an
// error condition? It would be good to tighten up the specification to make
// it less ambiguous.
}
// TODO(petewarden): Some of these paths aren't getting enough testing
// coverage, so we should figure out some tests that exercise them.
if (result->data.data == nullptr) {
// The tensor contents haven't been set from a serialized buffer, so
// make a note that they will be allocated from memory. The actual
// allocation won't happen until later.
result->allocation_type = kTfLiteArenaRw;
}
// Figure out what the size in bytes of the buffer is and store it.
size_t type_size;
TF_LITE_ENSURE_STATUS(BytesRequiredForTensor(
flatbuffer_tensor, &result->bytes, &type_size, error_reporter));
// TFLM doesn't allow reshaping the tensor which requires dynamic memory
// allocation so it is safe to drop the const qualifier. In the future, if we
// really want to update the tensor shape, we can always pass in a new
// TfLiteIntArray - especially we have to do so if the dimension is changed.
result->dims = const_cast<TfLiteIntArray*>(
reinterpret_cast<const TfLiteIntArray*>(flatbuffer_tensor.shape()));
// Copy the quantization information from the serialized data.
const auto* src_quantization = flatbuffer_tensor.quantization();
if (src_quantization && src_quantization->scale() &&
(src_quantization->scale()->size() > 0) &&
src_quantization->zero_point() &&
(src_quantization->zero_point()->size() > 0)) {
// Always populate the TfLiteTensor.params field, even if there are
// per-channel quantization parameters.
result->params.scale = src_quantization->scale()->Get(0);
// Note that the zero_point field in the FlatBuffers schema is a 64-bit
// integer, but the zero_point field in the TfLiteQuantizationParams struct
// is a 32-bit integer.
result->params.zero_point =
static_cast<int32_t>(src_quantization->zero_point()->Get(0));
// Populate per-channel quantization params.
int channels = src_quantization->scale()->size();
TfLiteAffineQuantization* quantization =
reinterpret_cast<TfLiteAffineQuantization*>(
allocator->AllocateFromTail(sizeof(TfLiteAffineQuantization),
alignof(TfLiteAffineQuantization)));
if (quantization == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter,
"Unable to allocate TfLiteAffineQuantization.\n");
return kTfLiteError;
}
quantization->zero_point =
reinterpret_cast<TfLiteIntArray*>(allocator->AllocateFromTail(
TfLiteIntArrayGetSizeInBytes(channels), alignof(TfLiteIntArray)));
if (quantization->zero_point == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter,
"Unable to allocate quantization->zero_point.\n");
return kTfLiteError;
}
quantization->scale = reinterpret_cast<TfLiteFloatArray*>(
allocator->AllocateFromTail(TfLiteFloatArrayGetSizeInBytes(channels),
alignof(TfLiteFloatArray)));
if (quantization->scale == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter,
"Unable to allocate quantization->scale.\n");
return kTfLiteError;
}
quantization->zero_point->size = channels;
quantization->scale->size = channels;
int* zero_point_data = quantization->zero_point->data;
float* scale_data = quantization->scale->data;
for (int i = 0; i < channels; i++) {
zero_point_data[i] = src_quantization->zero_point()->Get(i);
scale_data[i] = src_quantization->scale()->Get(i);
}
// TODO(rocky): Need to add a micro_allocator test case that fails when
// this is not copied:
quantization->quantized_dimension = src_quantization->quantized_dimension();
result->quantization = {kTfLiteAffineQuantization, quantization};
}
if (flatbuffer_tensor.name() != nullptr) {
result->name = flatbuffer_tensor.name()->c_str();
}
return kTfLiteOk;
}
} // namespace internal
TfLiteStatus MicroAllocator::Init() {
auto* subgraphs = model_->subgraphs();
if (subgraphs->size() != 1) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Only 1 subgraph is currently supported.\n");
return kTfLiteError;
}
subgraph_ = (*subgraphs)[0];
tensors_ = subgraph_->tensors();
operators_ = subgraph_->operators();
context_->tensors_size = tensors_->size();
context_->tensors =
reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
sizeof(TfLiteTensor) * context_->tensors_size,
alignof(TfLiteTensor)));
if (context_->tensors == nullptr) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to allocate memory for context->tensors, %d bytes required",
sizeof(TfLiteTensor) * context_->tensors_size);
return kTfLiteError;
}
// Initialize runtime tensors in context_ using the flatbuffer.
for (size_t i = 0; i < tensors_->size(); ++i) {
TfLiteStatus status = internal::InitializeRuntimeTensor(
memory_allocator_, *tensors_->Get(i), model_->buffers(),
error_reporter_, &context_->tensors[i]);
if (status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
i);
return kTfLiteError;
}
}
return kTfLiteOk;
}
MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model,
uint8_t* tensor_arena, size_t arena_size,
ErrorReporter* error_reporter)
: model_(model), error_reporter_(error_reporter), context_(context) {
uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment);
if (aligned_arena != tensor_arena) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"%d bytes lost due to alignment. To avoid this loss, please make sure "
"the tensor_arena is 16 bytes aligned.",
aligned_arena - tensor_arena);
}
size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
// Creates a root memory allocator managing the arena. The allocator itself
// also locates in the arena buffer. This allocator doesn't need to be
// destructed as it's the root allocator.
memory_allocator_ = CreateInPlaceSimpleMemoryAllocator(
error_reporter, aligned_arena, aligned_arena_size);
TfLiteStatus status = Init();
// TODO(b/147871299): Consider improving this code. A better way of handling
// failures in the constructor is to have a static function that returns a
// pointer to the class. If allocation failed, a nullptr will be returned.
if (status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_,
"MicroAllocator: Failed to initialize.");
active_ = false;
} else {
active_ = true;
}
}
TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
const OpResolver& op_resolver,
NodeAndRegistration** node_and_registrations) {
if (!active_) {
return kTfLiteError;
}
auto* output = reinterpret_cast<NodeAndRegistration*>(
memory_allocator_->AllocateFromTail(
sizeof(NodeAndRegistration) * operators_->size(),
alignof(NodeAndRegistration)));
if (output == nullptr) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to allocate memory for node_and_registrations.");
return kTfLiteError;
}
TfLiteStatus status = kTfLiteOk;
auto* opcodes = model_->operator_codes();
MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_);
for (size_t i = 0; i < operators_->size(); ++i) {
const auto* op = operators_->Get(i);
size_t index = op->opcode_index();
if (index >= opcodes->size()) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Missing registration for opcode_index %d\n", index);
return kTfLiteError;
}
auto* opcode = (*opcodes)[index];
status = GetRegistrationFromOpCode(opcode, op_resolver, error_reporter_,
&(output[i].registration));
if (status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to get registration from op code %s\n ",
EnumNameBuiltinOperator(opcode->builtin_code()));
return status;
}
const auto* registration = output[i].registration;
if (registration == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_, "Skipping op for opcode_index %d\n",
index);
return kTfLiteError;
}
BuiltinOperator op_type =
static_cast<BuiltinOperator>(registration->builtin_code);
if (op_type != BuiltinOperator_CUSTOM && op->custom_options()) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Unsupported behavior: found builtin operator %s with custom "
"options.\n",
EnumNameBuiltinOperator(op_type));
return kTfLiteError;
}
const char* custom_data = nullptr;
size_t custom_data_size = 0;
unsigned char* builtin_data = nullptr;
if (op->custom_options()) {
custom_data = reinterpret_cast<const char*>(op->custom_options()->data());
custom_data_size = op->custom_options()->size();
} else {
TF_LITE_ENSURE_STATUS(ParseOpData(op, op_type, error_reporter_,
&builtin_data_allocator,
(void**)(&builtin_data)));
}
// Disregard const qualifier to workaround with existing API.
TfLiteIntArray* inputs_array = const_cast<TfLiteIntArray*>(
reinterpret_cast<const TfLiteIntArray*>(op->inputs()));
TfLiteIntArray* outputs_array = const_cast<TfLiteIntArray*>(
reinterpret_cast<const TfLiteIntArray*>(op->outputs()));
TfLiteNode* node = &(output[i].node);
*node = {};
node->inputs = inputs_array;
node->outputs = outputs_array;
node->builtin_data = reinterpret_cast<void*>(builtin_data);
node->custom_initial_data = custom_data;
node->custom_initial_data_size = custom_data_size;
}
*node_and_registrations = output;
return kTfLiteOk;
}
TfLiteStatus MicroAllocator::FinishTensorAllocation() {
if (!active_) {
return kTfLiteError;
}
// Create static memory plan
// 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
// 2. Add them into the planner (such as the GreedyMemoryPlanner).
// 3. Static memory planning using the planner.
// 4. Set tensor/buffer pointers based on the offsets from the previous step.
// Note that AllocationInfo is only needed for creating the plan. It will be
// thrown away when the child allocator (tmp_allocator) goes out of scope.
{
SimpleMemoryAllocator tmp_allocator(error_reporter_,
memory_allocator_->GetHead(),
memory_allocator_->GetTail());
AllocationInfoBuilder builder(error_reporter_, &tmp_allocator);
TF_LITE_ENSURE_STATUS(
builder.Init(tensors_->size(), scratch_buffer_count_));
TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph_, context_->tensors));
TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_handles_));
const AllocationInfo* allocation_info = builder.Finish();
// Remaining arena size that memory planner can use for calculating offsets.
size_t remaining_arena_size = tmp_allocator.GetAvailableMemory();
uint8_t* planner_arena =
tmp_allocator.AllocateFromHead(remaining_arena_size, /*alignment=*/1);
TF_LITE_ENSURE(error_reporter_, planner_arena != nullptr);
GreedyMemoryPlanner planner(planner_arena, remaining_arena_size);
TF_LITE_ENSURE_STATUS(
CreatePlan(error_reporter_, &planner, allocation_info, builder.Size()));
size_t actual_available_arena_size =
memory_allocator_->GetAvailableMemory();
// Make sure we have enough arena size.
if (planner.GetMaximumMemorySize() > actual_available_arena_size) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Arena size is too small for activation buffers. Needed %d but only "
"%d was available.",
planner.GetMaximumMemorySize(), actual_available_arena_size);
return kTfLiteError;
}
// Commit the plan.
TF_LITE_ENSURE_STATUS(CommitPlan(error_reporter_, &planner,
memory_allocator_->GetHead(),
allocation_info, builder.Size()));
// Allocate the planned area, so the allocator knows it's used.
uint8_t* allocated_tensor_memory =
memory_allocator_->AllocateFromHead(planner.GetMaximumMemorySize(),
/*alignment=*/1);
TF_LITE_ENSURE(error_reporter_, allocated_tensor_memory != nullptr);
}
// Data in variables need to be kept for the next invocation so allocating
// them from the tail (persistent area).
if (AllocateVariables(tensors_, context_->tensors, memory_allocator_) !=
kTfLiteOk) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to allocate variables. Please increase arena size.");
return kTfLiteError;
}
active_ = false;
return kTfLiteOk;
}
TfLiteStatus MicroAllocator::AllocatePersistentBuffer(size_t bytes,
void** ptr) {
uint8_t* data = memory_allocator_->AllocateFromTail(bytes, kBufferAlignment);
if (data == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to allocate persistent buffer of size %d",
bytes);
return kTfLiteError;
}
(*ptr) = data;
return kTfLiteOk;
}
TfLiteStatus MicroAllocator::RequestScratchBufferInArena(int node_id,
size_t bytes,
int* buffer_idx) {
// A sanity check to make sure scratch_buffer_handles_ is contiguous i.e.
// scratch_buffer_handles_ is pointing to the last allocation from memory
// allocator.
if (scratch_buffer_handles_ != nullptr &&
reinterpret_cast<uint8_t*>(scratch_buffer_handles_) !=
memory_allocator_->GetTail()) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Internal error: AllocateFromTail can not be called "
"between two RequestScratchBufferInArena calls.");
return kTfLiteError;
}
internal::ScratchBufferHandle* handle =
reinterpret_cast<internal::ScratchBufferHandle*>(
memory_allocator_->AllocateFromTail(
sizeof(internal::ScratchBufferHandle),
alignof(internal::ScratchBufferHandle)));
if (handle == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to register scratch buffer handle for node %s",
node_id);
return kTfLiteError;
}
*handle = {};
handle->bytes = bytes;
handle->node_idx = node_id;
*buffer_idx = scratch_buffer_count_;
scratch_buffer_count_ += 1;
// scratch_buffer_handles_ is in reverse order. The following code ensures
// that scratch_buffers[0] is pointing to the newly allocated handle.
scratch_buffer_handles_ = handle;
return kTfLiteOk;
}
void* MicroAllocator::GetScratchBuffer(int buffer_idx) const {
if (static_cast<size_t>(buffer_idx) >= scratch_buffer_count_) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Buffer %d not found. %d buffers available.",
buffer_idx, scratch_buffer_count_);
return nullptr;
}
// scratch_buffer_handles_ is in reverse order.
return scratch_buffer_handles_[scratch_buffer_count_ - buffer_idx - 1].data;
}
} // namespace tflite

View File

@@ -0,0 +1,148 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Namespace used for unittests.
namespace internal {
// Sets up all of the data structure members for a runtime tensor
// based on the contents of a serialized tensor.
TfLiteStatus InitializeRuntimeTensor(
SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
ErrorReporter* error_reporter, TfLiteTensor* result);
// A handle tracking scratch buffer allocation. This handle is created by
// `RequestScratchBufferInArena`. `data` field is populated in
// `FinishTensorAllocation` after static memory planning.
// TODO(b/150257460) As a future optimization, this struct could be replaced by
// a union, since once `data` is populated, `bytes` and `node_idx` is not
// needed.
typedef struct {
// Pointer to the scratch buffer.
uint8_t* data;
// Number of bytes required by the buffer. The actual allocated size might be
// greater than `bytes` due to buffer alignment.
size_t bytes;
// Node where the buffer is allocated for. This provides useful information to
// determine the lifetime of the buffer. In AllocationInfo, this buffer will
// have `before` = node_idx and `after` = node_idx.
int node_idx;
} ScratchBufferHandle;
} // namespace internal
typedef struct {
TfLiteNode node;
const TfLiteRegistration* registration;
} NodeAndRegistration;
// Allocator responsible for allocating memory for all intermediate tensors
// necessary to invoke a model.
// Memory layout to help understand how it works
// This information could change in the future version.
// ************** .memory_allocator->GetBuffer()
// Tensors/Scratch buffers (head)
// ************** .head_watermark
// unused memory
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
// - ->GetDataSize()
// persistent area (tail)
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
class MicroAllocator {
public:
// The lifetime of the model, tensor allocator and error reporter must be at
// least as long as that of the allocator object, since the allocator needs
// them to be accessible during its entire lifetime.
// Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
// bytes aligned, otherwise some head room will be wasted.
MicroAllocator(TfLiteContext* context, const Model* model,
uint8_t* tensor_arena, size_t arena_size,
ErrorReporter* error_reporter);
// Runs through the model and allocates all necessary input, output and
// intermediate tensors.
// WARNING: doing any allocation after calling this method has the risk of
// corrupting tensor data so this method should be the last non-const method
// called in this class.
TfLiteStatus FinishTensorAllocation();
// Returns the arena usage in bytes, only available after
// `FinishTensorAllocation`. Otherwise, it will return 0.
size_t used_bytes() const {
if (active_) {
return 0;
}
return memory_allocator_->GetUsedBytes();
}
// Run through the model to allocate nodes and registrations. We need to keep
// them for the entire life time of the model to allow persistent tensors.
// This method needs to be called before FinishTensorAllocation method.
TfLiteStatus AllocateNodeAndRegistrations(
const OpResolver& op_resolver,
NodeAndRegistration** node_and_registrations);
// Allocates persistent buffer which has the same life time as the allocator.
// The memory is immediately available and is allocated from the tail of the
// arena.
TfLiteStatus AllocatePersistentBuffer(size_t bytes, void** ptr);
// Register a scratch buffer of size `bytes` for Node with `node_id`.
// This method only allocates a BufferHandle holding information for memory
// planning. The buffer ptr is ready after `FinishTensorAllocation` and can
// be retrieved by `GetScratchBuffer` method using the returned buffer_idx.
// Note that there should be no tail allocation between two consecutive
// `RequestScratchBufferInArena` calls.
TfLiteStatus RequestScratchBufferInArena(int node_id, size_t bytes,
int* buffer_idx);
// Returns the pointer to the planned scratch buffer.
void* GetScratchBuffer(int buffer_idx) const;
private:
TfLiteStatus Init();
const Model* model_;
// A simple memory allocator that always allocate from the arena tail.
SimpleMemoryAllocator* memory_allocator_;
ErrorReporter* error_reporter_;
TfLiteContext* context_;
// Indicating if the allocator is ready for allocation.
bool active_ = false;
// In reverse order for efficiency.
// i.e. scratch_buffer_handles_[0] is the handle for the last buffer,
// corresponding to the last RequestScratchBufferInArena call.
internal::ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
// How many scratch buffers have been allocated.
size_t scratch_buffer_count_ = 0;
const SubGraph* subgraph_;
const flatbuffers::Vector<flatbuffers::Offset<Operator>>* operators_;
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_

View File

@@ -0,0 +1,38 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_error_reporter.h"
#ifndef TF_LITE_STRIP_ERROR_STRINGS
#include "tensorflow/lite/micro/micro_string.h"
#endif
namespace tflite {
int MicroErrorReporter::Report(const char* format, va_list args) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
// Only pulling in the implementation of this function for builds where we
// expect to make use of it to be extra cautious about not increasing the code
// size.
static constexpr int kMaxLogLen = 256;
char log_buffer[kMaxLogLen];
MicroVsnprintf(log_buffer, kMaxLogLen, format, args);
DebugLog(log_buffer);
DebugLog("\r\n");
#endif
return 0;
}
} // namespace tflite

View File

@@ -0,0 +1,35 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/debug_log.h"
namespace tflite {
class MicroErrorReporter : public ErrorReporter {
public:
~MicroErrorReporter() override {}
int Report(const char* format, va_list args) override;
private:
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_

View File

@@ -0,0 +1,309 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/core/api/tensor_utils.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_optional_debug_tools.h"
namespace tflite {
namespace {
const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
return registration->custom_name;
} else {
return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
}
}
} // namespace
namespace internal {
TfLiteStatus ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
size_t bytes, void** ptr) {
return reinterpret_cast<ContextHelper*>(ctx->impl_)
->allocator_->AllocatePersistentBuffer(bytes, ptr);
}
TfLiteStatus ContextHelper::RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx) {
ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
return helper->allocator_->RequestScratchBufferInArena(
helper->current_node_idx_, bytes, buffer_idx);
}
void* ContextHelper::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
return reinterpret_cast<ContextHelper*>(ctx->impl_)
->allocator_->GetScratchBuffer(buffer_idx);
}
void ContextHelper::ReportOpError(struct TfLiteContext* context,
const char* format, ...) {
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
va_list args;
va_start(args, format);
TF_LITE_REPORT_ERROR(helper->error_reporter_, format, args);
va_end(args);
}
} // namespace internal
MicroInterpreter::MicroInterpreter(const Model* model,
const OpResolver& op_resolver,
uint8_t* tensor_arena,
size_t tensor_arena_size,
ErrorReporter* error_reporter)
: model_(model),
op_resolver_(op_resolver),
error_reporter_(error_reporter),
allocator_(&context_, model_, tensor_arena, tensor_arena_size,
error_reporter_),
tensors_allocated_(false),
context_helper_(error_reporter_, &allocator_) {
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
model->subgraphs();
if (subgraphs->size() != 1) {
TF_LITE_REPORT_ERROR(error_reporter,
"Only 1 subgraph is currently supported.\n");
initialization_status_ = kTfLiteError;
return;
}
subgraph_ = (*subgraphs)[0];
tensors_ = subgraph_->tensors();
operators_ = subgraph_->operators();
context_.impl_ = static_cast<void*>(&context_helper_);
context_.ReportError = context_helper_.ReportOpError;
context_.recommended_num_threads = 1;
// If the system is big endian then convert weights from the flatbuffer from
// little to big endian on startup so that it does not need to be done during
// inference.
// NOTE: This requires that the flatbuffer is held in memory which can be
// modified by this process.
if (!FLATBUFFERS_LITTLEENDIAN) {
for (size_t t = 0; t < tensors_size(); ++t) {
TfLiteTensor* thisTensor = &context_.tensors[t];
if (thisTensor->allocation_type == kTfLiteMmapRo)
CorrectTensorEndianness(thisTensor);
}
}
initialization_status_ = kTfLiteOk;
}
MicroInterpreter::~MicroInterpreter() {
if (node_and_registrations_ != nullptr) {
for (size_t i = 0; i < operators_->size(); ++i) {
TfLiteNode* node = &(node_and_registrations_[i].node);
const TfLiteRegistration* registration =
node_and_registrations_[i].registration;
// registration is allocated outside the interpreter, so double check to
// make sure it's not nullptr;
if (registration != nullptr && registration->free != nullptr) {
registration->free(&context_, node->user_data);
}
}
}
}
void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) {
int32_t tensorSize = 1;
for (int d = 0; d < tensorCorr->dims->size; ++d)
tensorSize *= reinterpret_cast<const int32_t*>(tensorCorr->dims->data)[d];
switch (tensorCorr->type) {
case TfLiteType::kTfLiteFloat32:
CorrectTensorDataEndianness(tensorCorr->data.f, tensorSize);
break;
case TfLiteType::kTfLiteFloat16:
CorrectTensorDataEndianness(tensorCorr->data.f16, tensorSize);
break;
case TfLiteType::kTfLiteInt64:
CorrectTensorDataEndianness(tensorCorr->data.i64, tensorSize);
break;
case TfLiteType::kTfLiteInt32:
CorrectTensorDataEndianness(tensorCorr->data.i32, tensorSize);
break;
case TfLiteType::kTfLiteInt16:
CorrectTensorDataEndianness(tensorCorr->data.i16, tensorSize);
break;
case TfLiteType::kTfLiteComplex64:
CorrectTensorDataEndianness(tensorCorr->data.c64, tensorSize);
break;
default:
// Do nothing for other data types.
break;
}
}
template <class T>
void MicroInterpreter::CorrectTensorDataEndianness(T* data, int32_t size) {
for (int32_t i = 0; i < size; ++i) {
data[i] = flatbuffers::EndianScalar(data[i]);
}
}
TfLiteStatus MicroInterpreter::AllocateTensors() {
TF_LITE_ENSURE_OK(&context_, allocator_.AllocateNodeAndRegistrations(
op_resolver_, &node_and_registrations_));
// Only allow AllocatePersistentBuffer in Init stage.
context_.AllocatePersistentBuffer = context_helper_.AllocatePersistentBuffer;
context_.RequestScratchBufferInArena = nullptr;
context_.GetScratchBuffer = nullptr;
for (size_t i = 0; i < operators_->size(); ++i) {
context_helper_.SetNodeIndex(i);
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
size_t init_data_size;
const char* init_data;
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
init_data = reinterpret_cast<const char*>(node->custom_initial_data);
init_data_size = node->custom_initial_data_size;
} else {
init_data = reinterpret_cast<const char*>(node->builtin_data);
init_data_size = 0;
}
if (registration->init) {
node->user_data =
registration->init(&context_, init_data, init_data_size);
}
}
context_helper_.SetNodeIndex(-1);
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is available
// in Prepare stage.
context_.RequestScratchBufferInArena =
context_helper_.RequestScratchBufferInArena;
for (size_t i = 0; i < operators_->size(); ++i) {
// Set node idx to annotate the lifetime for scratch buffers.
context_helper_.SetNodeIndex(i);
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
if (registration->prepare) {
TfLiteStatus prepare_status = registration->prepare(&context_, node);
if (prepare_status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Node %s (number %df) failed to prepare with status %d",
OpNameFromRegistration(registration), i, prepare_status);
return kTfLiteError;
}
}
}
context_helper_.SetNodeIndex(-1);
// Prepare is done, we're ready for Invoke. Memory allocation is no longer
// allowed. Kernels can only fetch scratch buffers via GetScratchBuffer.
context_.AllocatePersistentBuffer = nullptr;
context_.RequestScratchBufferInArena = nullptr;
context_.GetScratchBuffer = context_helper_.GetScratchBuffer;
TF_LITE_ENSURE_OK(&context_, allocator_.FinishTensorAllocation());
tensors_allocated_ = true;
return kTfLiteOk;
}
TfLiteStatus MicroInterpreter::Invoke() {
if (initialization_status_ != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Invoke() called after initialization failed\n");
return kTfLiteError;
}
// Ensure tensors are allocated before the interpreter is invoked to avoid
// difficult to debug segfaults.
if (!tensors_allocated_) {
TF_LITE_ENSURE_OK(&context_, AllocateTensors());
}
for (size_t i = 0; i < operators_->size(); ++i) {
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
if (registration->invoke) {
TfLiteStatus invoke_status = registration->invoke(&context_, node);
if (invoke_status == kTfLiteError) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Node %s (number %d) failed to invoke with status %d",
OpNameFromRegistration(registration), i, invoke_status);
return kTfLiteError;
} else if (invoke_status != kTfLiteOk) {
return invoke_status;
}
}
}
return kTfLiteOk;
}
TfLiteTensor* MicroInterpreter::input(size_t index) {
const size_t length = inputs_size();
if ((index < 0) || (index >= length)) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Input index %d out of range (length is %d)", index,
length);
return nullptr;
}
return &(context_.tensors[inputs().Get(index)]);
}
TfLiteTensor* MicroInterpreter::output(size_t index) {
const size_t length = outputs_size();
if ((index < 0) || (index >= length)) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Output index %d out of range (length is %d)", index,
length);
return nullptr;
}
return &(context_.tensors[outputs().Get(index)]);
}
TfLiteTensor* MicroInterpreter::tensor(size_t index) {
const size_t length = tensors_size();
if ((index < 0) || (index >= length)) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Tensor index %d out of range (length is %d)", index,
length);
return nullptr;
}
return &context_.tensors[index];
}
TfLiteStatus MicroInterpreter::ResetVariableTensors() {
const size_t length = tensors_size();
for (size_t i = 0; i < length; ++i) {
TfLiteTensor* cur_tensor = tensor(i);
if (cur_tensor->is_variable) {
TfLiteStatus status = tflite::ResetVariableTensor(cur_tensor);
if (status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to reset variable tensor at index: %d", i);
return status;
}
}
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -0,0 +1,175 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/op_resolver.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/type_to_tflitetype.h"
namespace tflite {
namespace internal {
// A helper class to encapsulate the implementation of APIs in Context.
// context->impl_ points to an instance of this class.
// Check tensorflow/lite/c/common.h for detailed descriptions.
class ContextHelper {
public:
explicit ContextHelper(ErrorReporter* error_reporter,
MicroAllocator* allocator)
: allocator_(allocator), error_reporter_(error_reporter) {}
static TfLiteStatus AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes,
void** ptr);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx);
static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
void SetNodeIndex(int idx) { current_node_idx_ = idx; }
private:
MicroAllocator* allocator_;
ErrorReporter* error_reporter_;
int current_node_idx_ = -1;
};
} // namespace internal
class MicroInterpreter {
public:
// The lifetime of the model, op resolver, tensor arena, and error reporter
// must be at least as long as that of the interpreter object, since the
// interpreter may need to access them at any time. This means that you should
// usually create them with the same scope as each other, for example having
// them all allocated on the stack as local variables through a top-level
// function.
// The interpreter doesn't do any deallocation of any of the pointed-to
// objects, ownership remains with the caller.
MicroInterpreter(const Model* model, const OpResolver& op_resolver,
uint8_t* tensor_arena, size_t tensor_arena_size,
ErrorReporter* error_reporter);
~MicroInterpreter();
// Runs through the model and allocates all necessary input, output and
// intermediate tensors.
TfLiteStatus AllocateTensors();
// In order to support partial graph runs for strided models, this can return
// values other than kTfLiteOk and kTfLiteError.
// TODO(b/149795762): Add this to the TfLiteStatus enum.
TfLiteStatus Invoke();
size_t tensors_size() const { return context_.tensors_size; }
TfLiteTensor* tensor(size_t tensor_index);
template <class T>
T* typed_tensor(int tensor_index) {
if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) {
if (tensor_ptr->type == typeToTfLiteType<T>()) {
return GetTensorData<T>(tensor_ptr);
}
}
return nullptr;
}
TfLiteTensor* input(size_t index);
size_t inputs_size() const { return subgraph_->inputs()->Length(); }
const flatbuffers::Vector<int32_t>& inputs() const {
return *subgraph_->inputs();
}
TfLiteTensor* input_tensor(size_t index) { return input(index); }
template <class T>
T* typed_input_tensor(int tensor_index) {
if (TfLiteTensor* tensor_ptr = input_tensor(tensor_index)) {
if (tensor_ptr->type == typeToTfLiteType<T>()) {
return GetTensorData<T>(tensor_ptr);
}
}
return nullptr;
}
TfLiteTensor* output(size_t index);
size_t outputs_size() const { return subgraph_->outputs()->Length(); }
const flatbuffers::Vector<int32_t>& outputs() const {
return *subgraph_->outputs();
}
TfLiteTensor* output_tensor(size_t index) { return output(index); }
template <class T>
T* typed_output_tensor(int tensor_index) {
if (TfLiteTensor* tensor_ptr = output_tensor(tensor_index)) {
if (tensor_ptr->type == typeToTfLiteType<T>()) {
return GetTensorData<T>(tensor_ptr);
}
}
return nullptr;
}
// Reset all variable tensors to the default value.
TfLiteStatus ResetVariableTensors();
TfLiteStatus initialization_status() const { return initialization_status_; }
size_t operators_size() const { return operators_->size(); }
// For debugging only.
const NodeAndRegistration node_and_registration(int node_index) const {
return node_and_registrations_[node_index];
}
// For debugging only.
// Returns the actual used arena in bytes. This method gives the optimal arena
// size. It's only available after `AllocateTensors` has been called.
// Note that normally `tensor_arena` requires 16 bytes alignment to fully
// utilize the space. If it's not the case, the optimial arena size would be
// arena_used_bytes() + 16.
size_t arena_used_bytes() const { return allocator_.used_bytes(); }
private:
void CorrectTensorEndianness(TfLiteTensor* tensorCorr);
template <class T>
void CorrectTensorDataEndianness(T* data, int32_t size);
NodeAndRegistration* node_and_registrations_ = nullptr;
const Model* model_;
const OpResolver& op_resolver_;
ErrorReporter* error_reporter_;
TfLiteContext context_ = {};
MicroAllocator allocator_;
bool tensors_allocated_;
TfLiteStatus initialization_status_;
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors_;
const flatbuffers::Vector<flatbuffers::Offset<Operator>>* operators_;
const SubGraph* subgraph_;
internal::ContextHelper context_helper_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_

View File

@@ -0,0 +1,125 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/op_resolver.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/schema/schema_generated.h"
#ifndef TFLITE_REGISTRATIONS_MAX
#define TFLITE_REGISTRATIONS_MAX (128)
#endif
namespace tflite {
// Op versions discussed in this file are enumerated here:
// tensorflow/lite/tools/versioning/op_version.cc
inline int MicroOpResolverAnyVersion() { return 0; }
template <unsigned int tOpCount = TFLITE_REGISTRATIONS_MAX>
class MicroOpResolver : public OpResolver {
public:
const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
int version) const override {
for (unsigned int i = 0; i < registrations_len_; ++i) {
const TfLiteRegistration& registration = registrations_[i];
if ((registration.builtin_code == op) &&
(registration.version == MicroOpResolverAnyVersion() ||
registration.version == version)) {
return &registration;
}
}
return nullptr;
}
const TfLiteRegistration* FindOp(const char* op, int version) const override {
for (unsigned int i = 0; i < registrations_len_; ++i) {
const TfLiteRegistration& registration = registrations_[i];
if ((registration.builtin_code == BuiltinOperator_CUSTOM) &&
(strcmp(registration.custom_name, op) == 0) &&
(registration.version == MicroOpResolverAnyVersion() ||
registration.version == version)) {
return &registration;
}
}
return nullptr;
}
void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
int version = 1) {
if (registrations_len_ >= tOpCount) {
// TODO(b/147748244) - Add error reporting hooks so we can report this!
return;
}
TfLiteRegistration* new_registration = &registrations_[registrations_len_];
registrations_len_ += 1;
*new_registration = *registration;
new_registration->builtin_code = op;
new_registration->version = version;
}
void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
int min_version, int max_version) {
for (int version = min_version; version <= max_version; ++version) {
AddBuiltin(op, registration, version);
}
}
void AddCustom(const char* name, TfLiteRegistration* registration,
int version = 1) {
if (registrations_len_ >= tOpCount) {
// TODO(b/147748244) - Add error reporting hooks so we can report this!
return;
}
TfLiteRegistration* new_registration = &registrations_[registrations_len_];
registrations_len_ += 1;
*new_registration = *registration;
new_registration->builtin_code = BuiltinOperator_CUSTOM;
new_registration->custom_name = name;
new_registration->version = version;
}
void AddCustom(const char* name, TfLiteRegistration* registration,
int min_version, int max_version) {
for (int version = min_version; version <= max_version; ++version) {
AddCustom(name, registration, version);
}
}
unsigned int GetRegistrationLength() { return registrations_len_; }
private:
TfLiteRegistration registrations_[tOpCount];
unsigned int registrations_len_ = 0;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
// TODO(b/147854028): Consider switching all uses of MicroMutableOpResolver to
// MicroOpResolver.
class MicroMutableOpResolver
: public MicroOpResolver<TFLITE_REGISTRATIONS_MAX> {
private:
TF_LITE_REMOVE_VIRTUAL_DELETE
};
}; // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_

View File

@@ -0,0 +1,144 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_optional_debug_tools.h"
// `cinttypes` requires `__STDC_FORMAT_MACROS` to be defined to expose `PRId32`.
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <cinttypes>
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
std::vector<int> flatbuffersVector2StdVector(
const flatbuffers::Vector<int32_t>& fVector) {
std::vector<int> stdVector;
stdVector.reserve(fVector.size());
for (size_t i = 0; i < fVector.size(); i++) {
stdVector.push_back(fVector.Get(i));
}
return stdVector;
}
void PrintIntVector(const std::vector<int>& v) {
for (const auto& it : v) {
printf(" %d", it);
}
printf("\n");
}
void PrintTfLiteIntVector(const TfLiteIntArray* v) {
if (!v) {
printf(" (null)\n");
return;
}
for (int k = 0; k < v->size; k++) {
printf(" %d", v->data[k]);
}
printf("\n");
}
const char* TensorTypeName(TfLiteType type) {
switch (type) {
case kTfLiteNoType:
return "kTfLiteNoType";
case kTfLiteFloat32:
return "kTfLiteFloat32";
case kTfLiteInt32:
return "kTfLiteInt32";
case kTfLiteUInt8:
return "kTfLiteUInt8";
case kTfLiteInt8:
return "kTfLiteInt8";
case kTfLiteInt64:
return "kTfLiteInt64";
case kTfLiteString:
return "kTfLiteString";
case kTfLiteBool:
return "kTfLiteBool";
case kTfLiteInt16:
return "kTfLiteInt16";
case kTfLiteComplex64:
return "kTfLiteComplex64";
case kTfLiteFloat16:
return "kTfLiteFloat16";
case kTfLiteFloat64:
return "kTfLiteFloat64";
}
return "(invalid)";
}
const char* AllocTypeName(TfLiteAllocationType type) {
switch (type) {
case kTfLiteMemNone:
return "kTfLiteMemNone";
case kTfLiteMmapRo:
return "kTfLiteMmapRo";
case kTfLiteDynamic:
return "kTfLiteDynamic";
case kTfLiteArenaRw:
return "kTfLiteArenaRw";
case kTfLiteArenaRwPersistent:
return "kTfLiteArenaRwPersistent";
}
return "(invalid)";
}
} // namespace
// Prints a dump of what tensors and what nodes are in the interpreter.
void PrintInterpreterState(MicroInterpreter* interpreter) {
printf("Interpreter has %zu tensors and %zu nodes\n",
interpreter->tensors_size(), interpreter->operators_size());
printf("Inputs:");
PrintIntVector(flatbuffersVector2StdVector(interpreter->inputs()));
printf("Outputs:");
PrintIntVector(flatbuffersVector2StdVector(interpreter->outputs()));
printf("\n");
for (size_t tensor_index = 0; tensor_index < interpreter->tensors_size();
tensor_index++) {
TfLiteTensor* tensor = interpreter->tensor(static_cast<int>(tensor_index));
printf("Tensor %3zu %-20s %10s %15s %10zu bytes (%4.1f MB) ", tensor_index,
tensor->name, TensorTypeName(tensor->type),
AllocTypeName(tensor->allocation_type), tensor->bytes,
static_cast<double>(tensor->bytes / (1 << 20)));
PrintTfLiteIntVector(tensor->dims);
}
printf("\n");
for (size_t node_index = 0; node_index < interpreter->operators_size();
node_index++) {
const NodeAndRegistration node_and_reg =
interpreter->node_and_registration(static_cast<int>(node_index));
const TfLiteNode& node = node_and_reg.node;
const TfLiteRegistration* reg = node_and_reg.registration;
if (reg->custom_name != nullptr) {
printf("Node %3zu Operator Custom Name %s\n", node_index,
reg->custom_name);
} else {
printf("Node %3zu Operator Builtin Code %3" PRId32 " %s\n", node_index,
reg->builtin_code, EnumNamesBuiltinOperator()[reg->builtin_code]);
}
printf(" Inputs:");
PrintTfLiteIntVector(node.inputs);
printf(" Outputs:");
PrintTfLiteIntVector(node.outputs);
}
}
} // namespace tflite

View File

@@ -0,0 +1,27 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Optional debugging functionality. For small sized binaries, these are not
// needed.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
#define TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
#include "tensorflow/lite/micro/micro_interpreter.h"
namespace tflite {
// Prints a dump of what tensors and what nodes are in the interpreter.
void PrintInterpreterState(MicroInterpreter* interpreter);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_

View File

@@ -0,0 +1,265 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Implements debug logging for numbers by converting them into strings and then
// calling the main DebugLog(char*) function. These are separated into a
// different file so that platforms can just implement the string output version
// of DebugLog() and then get the numerical variations without requiring any
// more code.
#include "tensorflow/lite/micro/micro_string.h"
#include <cstdarg>
#include <cstdint>
namespace {
// Int formats can need up to 10 bytes for the value plus a single byte for the
// sign.
constexpr int kMaxIntCharsNeeded = 10 + 1;
// Hex formats can need up to 8 bytes for the value plus two bytes for the "0x".
constexpr int kMaxHexCharsNeeded = 8 + 2;
// Float formats can need up to 7 bytes for the fraction plus 3 bytes for "x2^"
// plus 3 bytes for the exponent and a single sign bit.
constexpr float kMaxFloatCharsNeeded = 7 + 3 + 3 + 1;
// All input buffers to the number conversion functions must be this long.
const int kFastToBufferSize = 48;
// Reverses a zero-terminated string in-place.
char* ReverseStringInPlace(char* start, char* end) {
char* p1 = start;
char* p2 = end - 1;
while (p1 < p2) {
char tmp = *p1;
*p1++ = *p2;
*p2-- = tmp;
}
return start;
}
// Appends a string to a string, in-place. You need to pass in the maximum
// string length as the second argument.
char* StrCatStr(char* main, int main_max_length, const char* to_append) {
char* current = main;
while (*current != 0) {
++current;
}
char* current_end = main + (main_max_length - 1);
while ((*to_append != 0) && (current < current_end)) {
*current = *to_append;
++current;
++to_append;
}
*current = 0;
return current;
}
// Populates the provided buffer with an ASCII representation of the number.
char* FastUInt32ToBufferLeft(uint32_t i, char* buffer, int base) {
char* start = buffer;
do {
int32_t digit = i % base;
char character;
if (digit < 10) {
character = '0' + digit;
} else {
character = 'a' + (digit - 10);
}
*buffer++ = character;
i /= base;
} while (i > 0);
*buffer = 0;
ReverseStringInPlace(start, buffer);
return buffer;
}
// Populates the provided buffer with an ASCII representation of the number.
char* FastInt32ToBufferLeft(int32_t i, char* buffer) {
uint32_t u = i;
if (i < 0) {
*buffer++ = '-';
u = -u;
}
return FastUInt32ToBufferLeft(u, buffer, 10);
}
// Converts a number to a string and appends it to another.
char* StrCatInt32(char* main, int main_max_length, int32_t number) {
char number_string[kFastToBufferSize];
FastInt32ToBufferLeft(number, number_string);
return StrCatStr(main, main_max_length, number_string);
}
// Converts a number to a string and appends it to another.
char* StrCatUInt32(char* main, int main_max_length, uint32_t number, int base) {
char number_string[kFastToBufferSize];
FastUInt32ToBufferLeft(number, number_string, base);
return StrCatStr(main, main_max_length, number_string);
}
// Populates the provided buffer with ASCII representation of the float number.
// Avoids the use of any floating point instructions (since these aren't
// supported on many microcontrollers) and as a consequence prints values with
// power-of-two exponents.
char* FastFloatToBufferLeft(float f, char* buffer) {
char* current = buffer;
char* current_end = buffer + (kFastToBufferSize - 1);
// Access the bit fields of the floating point value to avoid requiring any
// float instructions. These constants are derived from IEEE 754.
const uint32_t sign_mask = 0x80000000;
const uint32_t exponent_mask = 0x7f800000;
const int32_t exponent_shift = 23;
const int32_t exponent_bias = 127;
const uint32_t fraction_mask = 0x007fffff;
const uint32_t u = *reinterpret_cast<uint32_t*>(&f);
const int32_t exponent =
((u & exponent_mask) >> exponent_shift) - exponent_bias;
const uint32_t fraction = (u & fraction_mask);
// Expect ~0x2B1B9D3 for fraction.
if (u & sign_mask) {
*current = '-';
current += 1;
}
*current = 0;
// These are special cases for infinities and not-a-numbers.
if (exponent == 128) {
if (fraction == 0) {
current = StrCatStr(current, (current_end - current), "Inf");
return current;
} else {
current = StrCatStr(current, (current_end - current), "NaN");
return current;
}
}
// 0x007fffff (8388607) represents 0.99... for the fraction, so to print the
// correct decimal digits we need to scale our value before passing it to the
// conversion function. This scale should be 10000000/8388608 = 1.1920928955.
// We can approximate this using multiply-adds and right-shifts using the
// values in this array. The 1. portion of the number string is printed out
// in a fixed way before the fraction, below.
const int32_t scale_shifts_size = 13;
const int8_t scale_shifts[13] = {3, 4, 8, 11, 13, 14, 17,
18, 19, 20, 21, 22, 23};
uint32_t scaled_fraction = fraction;
for (int i = 0; i < scale_shifts_size; ++i) {
scaled_fraction += (fraction >> scale_shifts[i]);
}
*current = '1';
current += 1;
*current = '.';
current += 1;
*current = 0;
current = StrCatUInt32(current, (current_end - current), scaled_fraction, 10);
current = StrCatStr(current, (current_end - current), "*2^");
current = StrCatInt32(current, (current_end - current), exponent);
return current;
}
int FormatInt32(char* output, int32_t i) {
return static_cast<int>(FastInt32ToBufferLeft(i, output) - output);
}
int FormatUInt32(char* output, uint32_t i) {
return static_cast<int>(FastUInt32ToBufferLeft(i, output, 10) - output);
}
int FormatHex(char* output, uint32_t i) {
return static_cast<int>(FastUInt32ToBufferLeft(i, output, 16) - output);
}
int FormatFloat(char* output, float i) {
return static_cast<int>(FastFloatToBufferLeft(i, output) - output);
}
} // namespace
extern "C" int MicroVsnprintf(char* output, int len, const char* format,
va_list args) {
int output_index = 0;
const char* current = format;
// One extra character must be left for the null terminator.
const int usable_length = len - 1;
while (*current != '\0' && output_index < usable_length) {
if (*current == '%') {
current++;
switch (*current) {
case 'd':
// Cut off log message if format could exceed log buffer length.
if (usable_length - output_index < kMaxIntCharsNeeded) {
output[output_index++] = '\0';
return output_index;
}
output_index +=
FormatInt32(&output[output_index], va_arg(args, int32_t));
current++;
break;
case 'u':
if (usable_length - output_index < kMaxIntCharsNeeded) {
output[output_index++] = '\0';
return output_index;
}
output_index +=
FormatUInt32(&output[output_index], va_arg(args, uint32_t));
current++;
break;
case 'x':
if (usable_length - output_index < kMaxHexCharsNeeded) {
output[output_index++] = '\0';
return output_index;
}
output[output_index++] = '0';
output[output_index++] = 'x';
output_index +=
FormatHex(&output[output_index], va_arg(args, uint32_t));
current++;
break;
case 'f':
if (usable_length - output_index < kMaxFloatCharsNeeded) {
output[output_index++] = '\0';
return output_index;
}
output_index +=
FormatFloat(&output[output_index], va_arg(args, double));
current++;
break;
case '%':
output[output_index++] = *current++;
break;
case 's':
char* string = va_arg(args, char*);
int string_idx = 0;
while (string_idx + output_index < usable_length &&
string[string_idx] != '\0') {
output[output_index++] = string[string_idx++];
}
current++;
}
} else {
output[output_index++] = *current++;
}
}
output[output_index++] = '\0';
return output_index;
}
extern "C" int MicroSnprintf(char* output, int len, const char* format, ...) {
va_list args;
va_start(args, format);
int bytes_written = MicroVsnprintf(output, len, format, args);
va_end(args);
return bytes_written;
}

View File

@@ -0,0 +1,33 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_STRING_H_
#define TENSORFLOW_LITE_MICRO_MICRO_STRING_H_
#include <cstdarg>
// Implements simple string formatting for numeric types. Returns the number of
// bytes written to output.
extern "C" {
// Functionally equivalent to vsnprintf, trimmed down for TFLite Micro.
// MicroSnprintf() is implemented using MicroVsnprintf().
int MicroVsnprintf(char* output, int len, const char* format, va_list args);
// Functionally equavalent to snprintf, trimmed down for TFLite Micro.
// For example, MicroSnprintf(buffer, 10, "int %d", 10) will put the string
// "int 10" in the buffer.
// Floating point values are logged in exponent notation (1.XXX*2^N).
int MicroSnprintf(char* output, int len, const char* format, ...);
}
#endif // TENSORFLOW_LITE_MICRO_MICRO_STRING_H_

View File

@@ -0,0 +1,44 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Reference implementation of timer functions. Platforms are not required to
// implement these timer methods, but they are required to enable profiling.
// On platforms that have a POSIX stack or C library, it can be written using
// methods from <sys/time.h> or clock() from <time.h>.
// To add an equivalent function for your own platform, create your own
// implementation file, and place it in a subfolder with named after the OS
// you're targeting. For example, see the Cortex M bare metal version in
// tensorflow/lite/micro/bluepill/micro_time.cc or the mbed one on
// tensorflow/lite/micro/mbed/micro_time.cc.
#include "tensorflow/lite/micro/micro_time.h"
namespace tflite {
// Reference implementation of the ticks_per_second() function that's required
// for a platform to support Tensorflow Lite for Microcontrollers profiling.
// This returns 0 by default because timing is an optional feature that builds
// without errors on platforms that do not need it.
int32_t ticks_per_second() { return 0; }
// Reference implementation of the GetCurrentTimeTicks() function that's
// required for a platform to support Tensorflow Lite for Microcontrollers
// profiling. This returns 0 by default because timing is an optional feature
// that builds without errors on platforms that do not need it.
int32_t GetCurrentTimeTicks() { return 0; }
} // namespace tflite

View File

@@ -0,0 +1,31 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
#define TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
#include <stdint.h>
namespace tflite {
// These functions should be implemented by each target platform, and provide an
// accurate tick count along with how many ticks there are per second.
int32_t ticks_per_second();
// Return time in ticks. The meaning of a tick varies per platform.
int32_t GetCurrentTimeTicks();
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_TIME_H_

View File

@@ -0,0 +1,279 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_utils.h"
#include <limits.h>
#include <math.h>
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace {
static const uint8_t kAsymmetricUInt8Min = 0;
static const uint8_t kAsymmetricUInt8Max = UINT8_MAX;
static const uint8_t kSymmetricUInt8Min = 1;
static const uint8_t kSymmetricUInt8Max = UINT8_MAX;
static const int8_t kAsymmetricInt8Min = INT8_MIN;
static const int8_t kAsymmetricInt8Max = INT8_MAX;
static const int kSymmetricInt8Scale = kAsymmetricInt8Max;
static const int16_t kAsymmetricInt16Min = INT16_MIN;
static const int16_t kAsymmetricInt16Max = INT16_MAX;
static const int kSymmetricInt16Scale = kAsymmetricInt16Max;
static const int32_t kAsymmetricInt32Max = INT32_MAX;
static const int kSymmetricInt32Scale = kAsymmetricInt32Max;
} // namespace
int ElementCount(const TfLiteIntArray& dims) {
int result = 1;
for (int i = 0; i < dims.size; ++i) {
result *= dims.data[i];
}
return result;
}
// Converts a float value into an unsigned eight-bit quantized value.
uint8_t FloatToAsymmetricQuantizedUInt8(const float value, const float scale,
const int zero_point) {
int32_t result = round(value / scale) + zero_point;
if (result < kAsymmetricUInt8Min) {
result = kAsymmetricUInt8Min;
}
if (result > kAsymmetricUInt8Max) {
result = kAsymmetricUInt8Max;
}
return result;
}
uint8_t FloatToSymmetricQuantizedUInt8(const float value, const float scale) {
int32_t result = round(value / scale);
if (result < kSymmetricUInt8Min) {
result = kSymmetricUInt8Min;
}
if (result > kSymmetricUInt8Max) {
result = kSymmetricUInt8Max;
}
return result;
}
int8_t FloatToAsymmetricQuantizedInt8(const float value, const float scale,
const int zero_point) {
int32_t result = round(value / scale) + zero_point;
if (result < kAsymmetricInt8Min) {
result = kAsymmetricInt8Min;
}
if (result > kAsymmetricInt8Max) {
result = kAsymmetricInt8Max;
}
return result;
}
int16_t FloatToAsymmetricQuantizedInt16(const float value, const float scale,
const int zero_point) {
int32_t result = round(value / scale) + zero_point;
if (result < kAsymmetricInt16Min) {
result = kAsymmetricInt16Min;
}
if (result > kAsymmetricInt16Max) {
result = kAsymmetricInt16Max;
}
return result;
}
int8_t FloatToSymmetricQuantizedInt8(const float value, const float scale) {
return FloatToAsymmetricQuantizedInt8(value, scale, 0.0f);
}
int32_t FloatToSymmetricQuantizedInt32(const float value, const float scale) {
float quantized = round(value / scale);
if (static_cast<int>(quantized) > INT_MAX) {
quantized = static_cast<float>(INT_MAX);
} else if (quantized < INT_MIN) {
quantized = static_cast<float> INT_MIN;
}
return static_cast<int>(quantized);
}
void AsymmetricQuantize(const float* input, int8_t* output, int num_elements,
float scale, int zero_point) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToAsymmetricQuantizedInt8(input[i], scale, zero_point);
}
}
void AsymmetricQuantize(const float* input, uint8_t* output, int num_elements,
float scale, int zero_point) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToAsymmetricQuantizedUInt8(input[i], scale, zero_point);
}
}
void AsymmetricQuantize(const float* input, int16_t* output, int num_elements,
float scale, int zero_point) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToAsymmetricQuantizedInt16(input[i], scale, zero_point);
}
}
void SymmetricQuantize(const float* input, int32_t* output, int num_elements,
float scale) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToSymmetricQuantizedInt32(input[i], scale);
}
}
void SymmetricPerChannelQuantize(const float* input, int32_t* output,
int num_elements, int num_channels,
float* scales) {
int elements_per_channel = num_elements / num_channels;
for (int i = 0; i < num_channels; i++) {
for (int j = 0; j < elements_per_channel; j++) {
output[i * elements_per_channel + j] = FloatToSymmetricQuantizedInt32(
input[i * elements_per_channel + j], scales[i]);
}
}
}
void SignedSymmetricPerChannelQuantize(const float* values,
TfLiteIntArray* dims,
int quantized_dimension,
int8_t* quantized_values,
float* scaling_factors) {
int input_size = ElementCount(*dims);
int channel_count = dims->data[quantized_dimension];
int per_channel_size = input_size / channel_count;
int stride;
int channel_stride;
if (quantized_dimension == 0) {
stride = 1;
channel_stride = per_channel_size;
} else if (quantized_dimension == 3) {
stride = channel_count;
channel_stride = 1;
} else {
TF_LITE_FATAL("quantized dimension must be 0 or 3");
}
// Calculate scales for each channel.
for (int channel = 0; channel < channel_count; channel++) {
float min = 0;
float max = 0;
for (int i = 0; i < per_channel_size; i++) {
int idx = channel * channel_stride + i * stride;
min = fminf(min, values[idx]);
max = fmaxf(max, values[idx]);
}
scaling_factors[channel] =
fmaxf(fabs(min), fabs(max)) / kSymmetricInt8Scale;
for (int i = 0; i < per_channel_size; i++) {
int idx = channel * channel_stride + i * stride;
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[idx] / scaling_factors[channel]));
// Clamp: just in case some odd numeric offset.
quantized_values[idx] = fminf(
kSymmetricInt8Scale, fmaxf(-kSymmetricInt8Scale, quantized_value));
}
}
}
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int8_t* quantized_values, float* scaling_factor) {
int input_size = ElementCount(*dims);
float min = 0;
float max = 0;
for (int i = 0; i < input_size; i++) {
min = fminf(min, values[i]);
max = fmaxf(max, values[i]);
}
*scaling_factor = fmaxf(fabs(min), fabs(max)) / kSymmetricInt8Scale;
for (int i = 0; i < input_size; i++) {
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[i] / *scaling_factor));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = fminf(kSymmetricInt8Scale,
fmaxf(-kSymmetricInt8Scale, quantized_value));
}
}
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int16_t* quantized_values, float* scaling_factor) {
int input_size = ElementCount(*dims);
float min = 0;
float max = 0;
for (int i = 0; i < input_size; i++) {
min = fminf(min, values[i]);
max = fmaxf(max, values[i]);
}
*scaling_factor = fmaxf(fabs(min), fabs(max)) / kSymmetricInt16Scale;
for (int i = 0; i < input_size; i++) {
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[i] / *scaling_factor));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = fminf(kSymmetricInt16Scale,
fmaxf(-kSymmetricInt16Scale, quantized_value));
}
}
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int32_t* quantized_values, float* scaling_factor) {
int input_size = ElementCount(*dims);
float min = 0;
float max = 0;
for (int i = 0; i < input_size; i++) {
min = fminf(min, values[i]);
max = fmaxf(max, values[i]);
}
*scaling_factor =
fmaxf(fabs(min), fabs(max)) / static_cast<float>(kSymmetricInt32Scale);
for (int i = 0; i < input_size; i++) {
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[i] / *scaling_factor));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = fminf(
static_cast<float>(kSymmetricInt32Scale),
fmaxf(static_cast<float>(-kSymmetricInt32Scale), quantized_value));
}
}
void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
uint8_t* quantized_values, float* scaling_factor) {
SignedSymmetricQuantize(values, dims,
reinterpret_cast<int8_t*>(quantized_values),
scaling_factor);
}
void SymmetricDequantize(const int8_t* values, const int size,
const float dequantization_scale,
float* dequantized_values) {
for (int i = 0; i < size; ++i) {
dequantized_values[i] = values[i] * dequantization_scale;
}
}
} // namespace tflite

View File

@@ -0,0 +1,99 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
#define TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Returns number of elements in the shape array.
int ElementCount(const TfLiteIntArray& dims);
uint8_t FloatToAsymmetricQuantizedUInt8(const float value, const float scale,
const int zero_point);
uint8_t FloatToSymmetricQuantizedUInt8(const float value, const float scale);
int8_t FloatToAsymmetricQuantizedInt8(const float value, const float scale,
const int zero_point);
int16_t FloatToAsymmetricQuantizedInt16(const float value, const float scale,
const int zero_point);
int8_t FloatToSymmetricQuantizedInt8(const float value, const float scale);
// Converts a float value into a signed thirty-two-bit quantized value. Note
// that values close to max int and min int may see significant error due to
// a lack of floating point granularity for large values.
int32_t FloatToSymmetricQuantizedInt32(const float value, const float scale);
// Helper methods to quantize arrays of floats to the desired format.
//
// There are several key flavors of quantization in TfLite:
// asymmetric symmetric per channel
// int8 | X | X | X |
// uint8 | X | X | |
// int16 | X | | |
// int32 | | X | X |
//
// The per-op quantization spec can be found here:
// https://www.tensorflow.org/lite/performance/quantization_spec
void AsymmetricQuantize(const float* input, int8_t* output, int num_elements,
float scale, int zero_point = 0);
void AsymmetricQuantize(const float* input, uint8_t* output, int num_elements,
float scale, int zero_point = 128);
void AsymmetricQuantize(const float* input, int16_t* output, int num_elements,
float scale, int zero_point = 0);
void SymmetricQuantize(const float* input, int32_t* output, int num_elements,
float scale);
void SymmetricPerChannelQuantize(const float* input, int32_t* output,
int num_elements, int num_channels,
float* scales);
void SignedSymmetricPerChannelQuantize(const float* values,
TfLiteIntArray* dims,
int quantized_dimension,
int8_t* quantized_values,
float* scaling_factor);
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int8_t* quantized_values, float* scaling_factor);
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int16_t* quantized_values, float* scaling_factor);
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int32_t* quantized_values, float* scaling_factor);
void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
uint8_t* quantized_values, float* scaling_factor);
void SymmetricDequantize(const int8_t* values, const int size,
const float dequantization_scale,
float* dequantized_values);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_

View File

@@ -0,0 +1,66 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/simple_memory_allocator.h"
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
SimpleMemoryAllocator* CreateInPlaceSimpleMemoryAllocator(
ErrorReporter* error_reporter, uint8_t* buffer, size_t buffer_size) {
SimpleMemoryAllocator tmp =
SimpleMemoryAllocator(error_reporter, buffer, buffer_size);
SimpleMemoryAllocator* in_place_allocator =
reinterpret_cast<SimpleMemoryAllocator*>(tmp.AllocateFromTail(
sizeof(SimpleMemoryAllocator), alignof(SimpleMemoryAllocator)));
*in_place_allocator = tmp;
return in_place_allocator;
}
uint8_t* SimpleMemoryAllocator::AllocateFromHead(size_t size,
size_t alignment) {
uint8_t* const aligned_result = AlignPointerUp(head_, alignment);
const size_t available_memory = tail_ - aligned_result;
if (available_memory < size) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to allocate memory. Requested: %u, available %u, missing: %u",
size, available_memory, size - available_memory);
return nullptr;
}
head_ = aligned_result + size;
return aligned_result;
}
uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size,
size_t alignment) {
uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment);
if (aligned_result < head_) {
const size_t missing_memory = head_ - aligned_result;
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to allocate memory. Requested: %u, available %u, missing: %u",
size, size - missing_memory, missing_memory);
return nullptr;
}
tail_ = aligned_result;
return aligned_result;
}
} // namespace tflite

View File

@@ -0,0 +1,71 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
namespace tflite {
// TODO(petewarden): This allocator never frees up or reuses any memory, even
// though we have enough information about lifetimes of the tensors to do so.
// This makes it pretty wasteful, so we should use a more intelligent method.
class SimpleMemoryAllocator {
public:
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer_head,
uint8_t* buffer_tail)
: error_reporter_(error_reporter),
buffer_head_(buffer_head),
buffer_tail_(buffer_tail),
head_(buffer_head),
tail_(buffer_tail) {}
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer,
size_t buffer_size)
: SimpleMemoryAllocator(error_reporter, buffer, buffer + buffer_size) {}
// Allocates memory starting at the head of the arena (lowest address and
// moving upwards).
uint8_t* AllocateFromHead(size_t size, size_t alignment);
// Allocates memory starting at the tail of the arena (highest address and
// moving downwards).
uint8_t* AllocateFromTail(size_t size, size_t alignment);
uint8_t* GetHead() const { return head_; }
uint8_t* GetTail() const { return tail_; }
size_t GetAvailableMemory() const { return tail_ - head_; }
size_t GetUsedBytes() const { return GetBufferSize() - GetAvailableMemory(); }
private:
size_t GetBufferSize() const { return buffer_tail_ - buffer_head_; }
ErrorReporter* error_reporter_;
uint8_t* buffer_head_;
uint8_t* buffer_tail_;
uint8_t* head_;
uint8_t* tail_;
};
// Allocate a SimpleMemoryAllocator from the buffer and then return the pointer
// to this allocator.
SimpleMemoryAllocator* CreateInPlaceSimpleMemoryAllocator(
ErrorReporter* error_reporter, uint8_t* buffer, size_t buffer_size);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_

View File

@@ -0,0 +1,789 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/test_helpers.h"
#include <initializer_list>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/tensor_utils.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/micro_utils.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace testing {
namespace {
class StackAllocator : public flatbuffers::Allocator {
public:
StackAllocator() : data_(data_backing_), data_size_(0) {}
uint8_t* allocate(size_t size) override {
TFLITE_DCHECK((data_size_ + size) <= kStackAllocatorSize);
uint8_t* result = data_;
data_ += size;
data_size_ += size;
return result;
}
void deallocate(uint8_t* p, size_t) override {}
static StackAllocator& instance() {
// Avoid using true dynamic memory allocation to be portable to bare metal.
static char inst_memory[sizeof(StackAllocator)];
static StackAllocator* inst = new (inst_memory) StackAllocator;
return *inst;
}
static constexpr size_t kStackAllocatorSize = 4096;
private:
uint8_t data_backing_[kStackAllocatorSize];
uint8_t* data_;
int data_size_;
};
flatbuffers::FlatBufferBuilder* BuilderInstance() {
static char inst_memory[sizeof(flatbuffers::FlatBufferBuilder)];
static flatbuffers::FlatBufferBuilder* inst =
new (inst_memory) flatbuffers::FlatBufferBuilder(
StackAllocator::kStackAllocatorSize, &StackAllocator::instance());
return inst;
}
// A wrapper around FlatBuffer API to help build model easily.
class ModelBuilder {
public:
typedef int32_t Tensor;
typedef int Operator;
typedef int Node;
// `builder` needs to be available until BuildModel is called.
explicit ModelBuilder(flatbuffers::FlatBufferBuilder* builder)
: builder_(builder) {}
// Registers an operator that will be used in the model.
Operator RegisterOp(BuiltinOperator op, const char* custom_code,
int32_t version);
// Adds a tensor to the model.
Tensor AddTensor(TensorType type, std::initializer_list<int32_t> shape) {
return AddTensorImpl(type, /* is_variable */ false, shape);
}
// Adds a variable tensor to the model.
Tensor AddVariableTensor(TensorType type,
std::initializer_list<int32_t> shape) {
return AddTensorImpl(type, /* is_variable */ true, shape);
}
// Adds a node to the model with given input and output Tensors.
Node AddNode(Operator op, std::initializer_list<Tensor> inputs,
std::initializer_list<Tensor> outputs);
// Constructs the flatbuffer model using `builder_` and return a pointer to
// it. The returned model has the same lifetime as `builder_`.
const Model* BuildModel(std::initializer_list<Tensor> inputs,
std::initializer_list<Tensor> outputs);
private:
// Adds a tensor to the model.
Tensor AddTensorImpl(TensorType type, bool is_variable,
std::initializer_list<int32_t> shape);
flatbuffers::FlatBufferBuilder* builder_;
static constexpr int kMaxOperatorCodes = 10;
flatbuffers::Offset<tflite::OperatorCode> operator_codes_[kMaxOperatorCodes];
int next_operator_code_id_ = 0;
static constexpr int kMaxOperators = 50;
flatbuffers::Offset<tflite::Operator> operators_[kMaxOperators];
int next_operator_id_ = 0;
static constexpr int kMaxTensors = 50;
flatbuffers::Offset<tflite::Tensor> tensors_[kMaxTensors];
int next_tensor_id_ = 0;
};
ModelBuilder::Operator ModelBuilder::RegisterOp(BuiltinOperator op,
const char* custom_code,
int32_t version) {
TFLITE_DCHECK(next_operator_code_id_ <= kMaxOperatorCodes);
operator_codes_[next_operator_code_id_] =
tflite::CreateOperatorCodeDirect(*builder_, op, custom_code, version);
next_operator_code_id_++;
return next_operator_code_id_ - 1;
}
ModelBuilder::Node ModelBuilder::AddNode(
ModelBuilder::Operator op,
std::initializer_list<ModelBuilder::Tensor> inputs,
std::initializer_list<ModelBuilder::Tensor> outputs) {
TFLITE_DCHECK(next_operator_id_ <= kMaxOperators);
operators_[next_operator_id_] = tflite::CreateOperator(
*builder_, op, builder_->CreateVector(inputs.begin(), inputs.size()),
builder_->CreateVector(outputs.begin(), outputs.size()),
BuiltinOptions_NONE);
next_operator_id_++;
return next_operator_id_ - 1;
}
const Model* ModelBuilder::BuildModel(
std::initializer_list<ModelBuilder::Tensor> inputs,
std::initializer_list<ModelBuilder::Tensor> outputs) {
// Model schema requires an empty buffer at idx 0.
constexpr size_t kBufferSize = 1;
const flatbuffers::Offset<Buffer> buffers[kBufferSize] = {
tflite::CreateBuffer(*builder_)};
// TFLM only supports single subgraph.
constexpr size_t subgraphs_size = 1;
const flatbuffers::Offset<SubGraph> subgraphs[subgraphs_size] = {
tflite::CreateSubGraph(
*builder_, builder_->CreateVector(tensors_, next_tensor_id_),
builder_->CreateVector(inputs.begin(), inputs.size()),
builder_->CreateVector(outputs.begin(), outputs.size()),
builder_->CreateVector(operators_, next_operator_id_),
builder_->CreateString("test_subgraph"))};
const flatbuffers::Offset<Model> model_offset = tflite::CreateModel(
*builder_, 0,
builder_->CreateVector(operator_codes_, next_operator_code_id_),
builder_->CreateVector(subgraphs, subgraphs_size),
builder_->CreateString("teset_model"),
builder_->CreateVector(buffers, kBufferSize));
tflite::FinishModelBuffer(*builder_, model_offset);
void* model_pointer = builder_->GetBufferPointer();
const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
return model;
}
ModelBuilder::Tensor ModelBuilder::AddTensorImpl(
TensorType type, bool is_variable, std::initializer_list<int32_t> shape) {
TFLITE_DCHECK(next_tensor_id_ <= kMaxTensors);
tensors_[next_tensor_id_] = tflite::CreateTensor(
*builder_, builder_->CreateVector(shape.begin(), shape.size()), type,
/* buffer */ 0, /* name */ 0, /* quantization */ 0,
/* is_variable */ is_variable,
/* sparsity */ 0);
next_tensor_id_++;
return next_tensor_id_ - 1;
}
const Model* BuildSimpleStatefulModel() {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance();
ModelBuilder model_builder(fb_builder);
const int op_id =
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "simple_stateful_op", 0);
const int input_tensor = model_builder.AddTensor(TensorType_UINT8, {3});
const int median_tensor = model_builder.AddTensor(TensorType_UINT8, {3});
const int invoke_count_tensor =
model_builder.AddTensor(TensorType_INT32, {1});
model_builder.AddNode(op_id, {input_tensor},
{median_tensor, invoke_count_tensor});
return model_builder.BuildModel({input_tensor},
{median_tensor, invoke_count_tensor});
}
const Model* BuildSimpleModelWithBranch() {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance();
ModelBuilder model_builder(fb_builder);
/* Model structure
| t0
+------|
| v
| +---------+
| | n0 |
| | |
| +---------+
v +
|
+---------+ | t1
| n1 | |
| | |
+---------+ |
| |
t2 | v
| +---------+
+-->| n2 |
| |
+-------|-+
|t3
v
*/
const int op_id =
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom",
/* version= */ 0);
const int t0 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
const int t1 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
const int t2 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
const int t3 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
model_builder.AddNode(op_id, {t0}, {t1}); // n0
model_builder.AddNode(op_id, {t0}, {t2}); // n1
model_builder.AddNode(op_id, {t1, t2}, {t3}); // n2
return model_builder.BuildModel({t0}, {t3});
}
const Model* BuildSimpleMockModel() {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
constexpr size_t buffer_data_size = 1;
const uint8_t buffer_data[buffer_data_size] = {21};
constexpr size_t buffers_size = 2;
const Offset<Buffer> buffers[buffers_size] = {
CreateBuffer(*builder),
CreateBuffer(*builder,
builder->CreateVector(buffer_data, buffer_data_size))};
constexpr size_t tensor_shape_size = 1;
const int32_t tensor_shape[tensor_shape_size] = {1};
constexpr size_t tensors_size = 4;
const Offset<Tensor> tensors[tensors_size] = {
CreateTensor(*builder,
builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 0,
builder->CreateString("test_input_tensor"), 0, false),
CreateTensor(*builder,
builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_UINT8, 1,
builder->CreateString("test_weight_tensor"), 0, false),
CreateTensor(*builder,
builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 0,
builder->CreateString("test_output_tensor"), 0, false),
CreateTensor(*builder,
builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 0,
builder->CreateString("test_output2_tensor"), 0, false),
};
constexpr size_t inputs_size = 1;
const int32_t inputs[inputs_size] = {0};
constexpr size_t outputs_size = 2;
const int32_t outputs[outputs_size] = {2, 3};
constexpr size_t operator_inputs_size = 2;
const int32_t operator_inputs[operator_inputs_size] = {0, 1};
constexpr size_t operator_outputs_size = 1;
const int32_t operator_outputs[operator_outputs_size] = {2};
const int32_t operator2_outputs[operator_outputs_size] = {3};
constexpr size_t operators_size = 2;
const Offset<Operator> operators[operators_size] = {
CreateOperator(
*builder, 0,
builder->CreateVector(operator_inputs, operator_inputs_size),
builder->CreateVector(operator_outputs, operator_outputs_size),
BuiltinOptions_NONE),
CreateOperator(
*builder, 0,
builder->CreateVector(operator_inputs, operator_inputs_size),
builder->CreateVector(operator2_outputs, operator_outputs_size),
BuiltinOptions_NONE),
};
constexpr size_t subgraphs_size = 1;
const Offset<SubGraph> subgraphs[subgraphs_size] = {
CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size),
builder->CreateVector(inputs, inputs_size),
builder->CreateVector(outputs, outputs_size),
builder->CreateVector(operators, operators_size),
builder->CreateString("test_subgraph"))};
constexpr size_t operator_codes_size = 1;
const Offset<OperatorCode> operator_codes[operator_codes_size] = {
CreateOperatorCodeDirect(*builder, BuiltinOperator_CUSTOM, "mock_custom",
0)};
const Offset<Model> model_offset = CreateModel(
*builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
builder->CreateVector(subgraphs, subgraphs_size),
builder->CreateString("test_model"),
builder->CreateVector(buffers, buffers_size));
FinishModelBuffer(*builder, model_offset);
void* model_pointer = builder->GetBufferPointer();
const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
return model;
}
const Model* BuildComplexMockModel() {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
constexpr size_t buffer_data_size = 1;
const uint8_t buffer_data_1[buffer_data_size] = {21};
const uint8_t buffer_data_2[buffer_data_size] = {21};
const uint8_t buffer_data_3[buffer_data_size] = {21};
constexpr size_t buffers_size = 7;
const Offset<Buffer> buffers[buffers_size] = {
// Op 1 buffers:
CreateBuffer(*builder),
CreateBuffer(*builder),
CreateBuffer(*builder,
builder->CreateVector(buffer_data_1, buffer_data_size)),
// Op 2 buffers:
CreateBuffer(*builder),
CreateBuffer(*builder,
builder->CreateVector(buffer_data_2, buffer_data_size)),
// Op 3 buffers:
CreateBuffer(*builder),
CreateBuffer(*builder,
builder->CreateVector(buffer_data_3, buffer_data_size)),
};
constexpr size_t tensor_shape_size = 1;
const int32_t tensor_shape[tensor_shape_size] = {1};
constexpr size_t tensors_size = 10;
const Offset<Tensor> tensors[tensors_size] = {
// Op 1 inputs:
CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 0, builder->CreateString("test_input_tensor_1"), 0,
false /* is_variable */),
CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 1, builder->CreateString("test_variable_tensor_1"),
0, true /* is_variable */),
CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_UINT8, 2, builder->CreateString("test_weight_tensor_1"), 0,
false /* is_variable */),
// Op 1 output / Op 2 input:
CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 0, builder->CreateString("test_output_tensor_1"), 0,
false /* is_variable */),
// Op 2 inputs:
CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 1, builder->CreateString("test_variable_tensor_2"),
0, true /* is_variable */),
CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_UINT8, 2, builder->CreateString("test_weight_tensor_2"), 0,
false /* is_variable */),
// Op 2 output / Op 3 input:
CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 0, builder->CreateString("test_output_tensor_2"), 0,
false /* is_variable */),
// Op 3 inputs:
CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 1, builder->CreateString("test_variable_tensor_3"),
0, true /* is_variable */),
CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_UINT8, 2, builder->CreateString("test_weight_tensor_3"), 0,
false /* is_variable */),
// Op 3 output:
CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 0, builder->CreateString("test_output_tensor_3"), 0,
false /* is_variable */),
};
constexpr size_t operators_size = 3;
Offset<Operator> operators[operators_size];
{
// Set Op 1 attributes:
constexpr size_t operator_inputs_size = 3;
const int32_t operator_inputs[operator_inputs_size] = {0, 1, 2};
constexpr size_t operator_outputs_size = 1;
const int32_t operator_outputs[operator_outputs_size] = {3};
operators[0] = {CreateOperator(
*builder, 0,
builder->CreateVector(operator_inputs, operator_inputs_size),
builder->CreateVector(operator_outputs, operator_outputs_size),
BuiltinOptions_NONE)};
}
{
// Set Op 2 attributes
constexpr size_t operator_inputs_size = 3;
const int32_t operator_inputs[operator_inputs_size] = {3, 4, 5};
constexpr size_t operator_outputs_size = 1;
const int32_t operator_outputs[operator_outputs_size] = {6};
operators[1] = {CreateOperator(
*builder, 0,
builder->CreateVector(operator_inputs, operator_inputs_size),
builder->CreateVector(operator_outputs, operator_outputs_size),
BuiltinOptions_NONE)};
}
{
// Set Op 3 attributes
constexpr size_t operator_inputs_size = 3;
const int32_t operator_inputs[operator_inputs_size] = {6, 7, 8};
constexpr size_t operator_outputs_size = 1;
const int32_t operator_outputs[operator_outputs_size] = {9};
operators[2] = {CreateOperator(
*builder, 0,
builder->CreateVector(operator_inputs, operator_inputs_size),
builder->CreateVector(operator_outputs, operator_outputs_size),
BuiltinOptions_NONE)};
}
constexpr size_t inputs_size = 1;
const int32_t inputs[inputs_size] = {0};
constexpr size_t outputs_size = 1;
const int32_t outputs[outputs_size] = {9};
constexpr size_t subgraphs_size = 1;
const Offset<SubGraph> subgraphs[subgraphs_size] = {
CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size),
builder->CreateVector(inputs, inputs_size),
builder->CreateVector(outputs, outputs_size),
builder->CreateVector(operators, operators_size),
builder->CreateString("test_subgraph"))};
constexpr size_t operator_codes_size = 1;
const Offset<OperatorCode> operator_codes[operator_codes_size] = {
CreateOperatorCodeDirect(*builder, BuiltinOperator_CUSTOM, "mock_custom",
0)};
const Offset<Model> model_offset = CreateModel(
*builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
builder->CreateVector(subgraphs, subgraphs_size),
builder->CreateString("test_model"),
builder->CreateVector(buffers, buffers_size));
FinishModelBuffer(*builder, model_offset);
void* model_pointer = builder->GetBufferPointer();
const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
return model;
}
} // namespace
const Model* GetSimpleMockModel() {
static Model* model = nullptr;
if (!model) {
model = const_cast<Model*>(BuildSimpleMockModel());
}
return model;
}
const Model* GetComplexMockModel() {
static Model* model = nullptr;
if (!model) {
model = const_cast<Model*>(BuildComplexMockModel());
}
return model;
}
const Model* GetSimpleModelWithBranch() {
static Model* model = nullptr;
if (!model) {
model = const_cast<Model*>(BuildSimpleModelWithBranch());
}
return model;
}
const Model* GetSimpleStatefulModel() {
static Model* model = nullptr;
if (!model) {
model = const_cast<Model*>(BuildSimpleStatefulModel());
}
return model;
}
const Tensor* Create1dFlatbufferTensor(int size, bool is_variable) {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
constexpr size_t tensor_shape_size = 1;
const int32_t tensor_shape[tensor_shape_size] = {size};
const Offset<Tensor> tensor_offset = CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 0, builder->CreateString("test_tensor"), 0,
is_variable);
builder->Finish(tensor_offset);
void* tensor_pointer = builder->GetBufferPointer();
const Tensor* tensor = flatbuffers::GetRoot<Tensor>(tensor_pointer);
return tensor;
}
const Tensor* CreateQuantizedFlatbufferTensor(int size) {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
const Offset<QuantizationParameters> quant_params =
CreateQuantizationParameters(
*builder,
/*min=*/builder->CreateVector<float>({0.1f}),
/*max=*/builder->CreateVector<float>({0.2f}),
/*scale=*/builder->CreateVector<float>({0.3f}),
/*zero_point=*/builder->CreateVector<int64_t>({100ll}));
constexpr size_t tensor_shape_size = 1;
const int32_t tensor_shape[tensor_shape_size] = {size};
const Offset<Tensor> tensor_offset = CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 0, builder->CreateString("test_tensor"), quant_params,
false);
builder->Finish(tensor_offset);
void* tensor_pointer = builder->GetBufferPointer();
const Tensor* tensor = flatbuffers::GetRoot<Tensor>(tensor_pointer);
return tensor;
}
const Tensor* CreateMissingQuantizationFlatbufferTensor(int size) {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
const Offset<QuantizationParameters> quant_params =
CreateQuantizationParameters(*builder, 0, 0, 0, 0,
QuantizationDetails_NONE, 0, 0);
constexpr size_t tensor_shape_size = 1;
const int32_t tensor_shape[tensor_shape_size] = {size};
const Offset<Tensor> tensor_offset = CreateTensor(
*builder, builder->CreateVector(tensor_shape, tensor_shape_size),
TensorType_INT32, 0, builder->CreateString("test_tensor"), quant_params,
false);
builder->Finish(tensor_offset);
void* tensor_pointer = builder->GetBufferPointer();
const Tensor* tensor = flatbuffers::GetRoot<Tensor>(tensor_pointer);
return tensor;
}
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>*
CreateFlatbufferBuffers() {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
constexpr size_t buffers_size = 1;
const Offset<Buffer> buffers[buffers_size] = {
CreateBuffer(*builder),
};
const flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>>
buffers_offset = builder->CreateVector(buffers, buffers_size);
builder->Finish(buffers_offset);
void* buffers_pointer = builder->GetBufferPointer();
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* result =
flatbuffers::GetRoot<flatbuffers::Vector<flatbuffers::Offset<Buffer>>>(
buffers_pointer);
return result;
}
int TestStrcmp(const char* a, const char* b) {
if ((a == nullptr) || (b == nullptr)) {
return -1;
}
while ((*a != 0) && (*a == *b)) {
a++;
b++;
}
return *reinterpret_cast<const unsigned char*>(a) -
*reinterpret_cast<const unsigned char*>(b);
}
// Wrapper to forward kernel errors to the interpreter's error reporter.
void ReportOpError(struct TfLiteContext* context, const char* format, ...) {
ErrorReporter* error_reporter = static_cast<ErrorReporter*>(context->impl_);
va_list args;
va_start(args, format);
TF_LITE_REPORT_ERROR(error_reporter, format, args);
va_end(args);
}
// Create a TfLiteIntArray from an array of ints. The first element in the
// supplied array must be the size of the array expressed as an int.
TfLiteIntArray* IntArrayFromInts(const int* int_array) {
return const_cast<TfLiteIntArray*>(
reinterpret_cast<const TfLiteIntArray*>(int_array));
}
// Create a TfLiteFloatArray from an array of floats. The first element in the
// supplied array must be the size of the array expressed as a float.
TfLiteFloatArray* FloatArrayFromFloats(const float* floats) {
static_assert(sizeof(float) == sizeof(int),
"assumes sizeof(float) == sizeof(int) to perform casting");
int size = static_cast<int>(floats[0]);
*reinterpret_cast<int32_t*>(const_cast<float*>(floats)) = size;
return reinterpret_cast<TfLiteFloatArray*>(const_cast<float*>(floats));
}
TfLiteTensor CreateTensor(TfLiteIntArray* dims, const char* name,
bool is_variable) {
TfLiteTensor result;
result.dims = dims;
result.name = name;
result.params = {};
result.quantization = {kTfLiteNoQuantization, nullptr};
result.is_variable = is_variable;
result.allocation_type = kTfLiteMemNone;
result.allocation = nullptr;
return result;
}
TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims,
const char* name, bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteFloat32;
result.data.f = const_cast<float*>(data);
result.bytes = ElementCount(*dims) * sizeof(float);
return result;
}
void PopulateFloatTensor(TfLiteTensor* tensor, float* begin, float* end) {
float* p = begin;
float* v = tensor->data.f;
while (p != end) {
*v++ = *p++;
}
}
TfLiteTensor CreateBoolTensor(const bool* data, TfLiteIntArray* dims,
const char* name, bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteBool;
result.data.b = const_cast<bool*>(data);
result.bytes = ElementCount(*dims) * sizeof(bool);
return result;
}
TfLiteTensor CreateInt32Tensor(const int32_t* data, TfLiteIntArray* dims,
const char* name, bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteInt32;
result.data.i32 = const_cast<int32_t*>(data);
result.bytes = ElementCount(*dims) * sizeof(int32_t);
return result;
}
TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
const char* name, bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteUInt8;
result.data.uint8 = const_cast<uint8_t*>(data);
result.params = {scale, zero_point};
result.quantization = {kTfLiteAffineQuantization, nullptr};
result.bytes = ElementCount(*dims) * sizeof(uint8_t);
return result;
}
TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
const char* name, bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteInt8;
result.data.int8 = const_cast<int8_t*>(data);
result.params = {scale, zero_point};
result.quantization = {kTfLiteAffineQuantization, nullptr};
result.bytes = ElementCount(*dims) * sizeof(int8_t);
return result;
}
TfLiteTensor CreateQuantizedTensor(const int16_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
const char* name, bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteInt16;
result.data.i16 = const_cast<int16_t*>(data);
result.params = {scale, zero_point};
result.quantization = {kTfLiteAffineQuantization, nullptr};
result.bytes = ElementCount(*dims) * sizeof(int16_t);
return result;
}
TfLiteTensor CreateQuantized32Tensor(const int32_t* data, TfLiteIntArray* dims,
float scale, const char* name,
bool is_variable) {
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteInt32;
result.data.i32 = const_cast<int32_t*>(data);
// Quantized int32 tensors always have a zero point of 0, since the range of
// int32 values is large, and because zero point costs extra cycles during
// processing.
result.params = {scale, 0};
result.quantization = {kTfLiteAffineQuantization, nullptr};
result.bytes = ElementCount(*dims) * sizeof(int32_t);
return result;
}
TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
TfLiteIntArray* dims, float input_scale,
float weights_scale, const char* name,
bool is_variable) {
float bias_scale = input_scale * weights_scale;
tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
return CreateQuantized32Tensor(quantized, dims, bias_scale, name,
is_variable);
}
// Quantizes int32 bias tensor with per-channel weights determined by input
// scale multiplied by weight scale for each channel.
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
const float* input, int32_t* quantized, TfLiteIntArray* dims,
float input_scale, float* weight_scales, float* scales, int* zero_points,
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
const char* name, bool is_variable) {
int input_size = ElementCount(*dims);
int num_channels = dims->data[quantized_dimension];
// First element is reserved for array length
zero_points[0] = num_channels;
scales[0] = static_cast<float>(num_channels);
float* scales_array = &scales[1];
for (int i = 0; i < num_channels; i++) {
scales_array[i] = input_scale * weight_scales[i];
zero_points[i + 1] = 0;
}
SymmetricPerChannelQuantize(input, quantized, input_size, num_channels,
scales_array);
affine_quant->scale = FloatArrayFromFloats(scales);
affine_quant->zero_point = IntArrayFromInts(zero_points);
affine_quant->quantized_dimension = quantized_dimension;
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteInt32;
result.data.i32 = const_cast<int32_t*>(quantized);
result.quantization = {kTfLiteAffineQuantization, affine_quant};
result.bytes = ElementCount(*dims) * sizeof(int32_t);
return result;
}
TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
int* zero_points, TfLiteAffineQuantization* affine_quant,
int quantized_dimension, const char* name, bool is_variable) {
int channel_count = dims->data[quantized_dimension];
scales[0] = static_cast<float>(channel_count);
zero_points[0] = channel_count;
SignedSymmetricPerChannelQuantize(input, dims, quantized_dimension, quantized,
&scales[1]);
for (int i = 0; i < channel_count; i++) {
zero_points[i + 1] = 0;
}
affine_quant->scale = FloatArrayFromFloats(scales);
affine_quant->zero_point = IntArrayFromInts(zero_points);
affine_quant->quantized_dimension = quantized_dimension;
TfLiteTensor result = CreateTensor(dims, name, is_variable);
result.type = kTfLiteInt8;
result.data.int8 = const_cast<int8_t*>(quantized);
result.quantization = {kTfLiteAffineQuantization, affine_quant};
result.bytes = ElementCount(*dims) * sizeof(int8_t);
return result;
}
} // namespace testing
} // namespace tflite

View File

@@ -0,0 +1,130 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
#define TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
// Useful functions for writing tests.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/micro_utils.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace testing {
// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input,
// 1 layer of weights, 1 output Tensor, and 1 operator.
const Model* GetSimpleMockModel();
// Returns a flatbuffer TensorFlow Lite model with more inputs, variable
// tensors, and operators.
const Model* GetComplexMockModel();
// Returns a simple flatbuffer model with two branches.
const Model* GetSimpleModelWithBranch();
// Returns a flatbuffer model with `simple_stateful_op`
const Model* GetSimpleStatefulModel();
// Builds a one-dimensional flatbuffer tensor of the given size.
const Tensor* Create1dFlatbufferTensor(int size, bool is_variable = false);
// Builds a one-dimensional flatbuffer tensor of the given size with
// quantization metadata.
const Tensor* CreateQuantizedFlatbufferTensor(int size);
// Creates a one-dimensional tensor with no quantization metadata.
const Tensor* CreateMissingQuantizationFlatbufferTensor(int size);
// Creates a vector of flatbuffer buffers.
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>*
CreateFlatbufferBuffers();
// Performs a simple string comparison without requiring standard C library.
int TestStrcmp(const char* a, const char* b);
// Wrapper to forward kernel errors to the interpreter's error reporter.
void ReportOpError(struct TfLiteContext* context, const char* format, ...);
void PopulateContext(TfLiteTensor* tensors, int tensors_size,
TfLiteContext* context);
// Create a TfLiteIntArray from an array of ints. The first element in the
// supplied array must be the size of the array expressed as an int.
TfLiteIntArray* IntArrayFromInts(const int* int_array);
// Create a TfLiteFloatArray from an array of floats. The first element in the
// supplied array must be the size of the array expressed as a float.
TfLiteFloatArray* FloatArrayFromFloats(const float* floats);
TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims,
const char* name, bool is_variable = false);
void PopulateFloatTensor(TfLiteTensor* tensor, float* begin, float* end);
TfLiteTensor CreateBoolTensor(const bool* data, TfLiteIntArray* dims,
const char* name, bool is_variable = false);
TfLiteTensor CreateInt32Tensor(const int32_t*, TfLiteIntArray* dims,
const char* name, bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
const char* name, bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
const char* name, bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(const int16_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
const char* name, bool is_variable = false);
template <typename T>
TfLiteTensor CreateQuantizedTensor(const float* input, T* quantized,
TfLiteIntArray* dims, float scale,
int zero_point, const char* name,
bool is_variable = false) {
int input_size = ElementCount(*dims);
tflite::AsymmetricQuantize(input, quantized, input_size, scale, zero_point);
return CreateQuantizedTensor(quantized, dims, scale, zero_point, name,
is_variable);
}
TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
TfLiteIntArray* dims, float input_scale,
float weights_scale, const char* name,
bool is_variable = false);
// Quantizes int32 bias tensor with per-channel weights determined by input
// scale multiplied by weight scale for each channel.
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
const float* input, int32_t* quantized, TfLiteIntArray* dims,
float input_scale, float* weight_scales, float* scales, int* zero_points,
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
const char* name, bool is_variable = false);
TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
int* zero_points, TfLiteAffineQuantization* affine_quant,
int quantized_dimension, const char* name, bool is_variable = false);
} // namespace testing
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_