mirror of
https://github.com/jomjol/AI-on-the-edge-device.git
synced 2025-12-12 14:37:06 +03:00
Update tflite
This commit is contained in:
94
code/lib/tfmicro/tensorflow/lite/micro/all_ops_resolver.cc
Normal file
94
code/lib/tfmicro/tensorflow/lite/micro/all_ops_resolver.cc
Normal file
@@ -0,0 +1,94 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/all_ops_resolver.h"
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/micro_ops.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace custom {
|
||||
TfLiteRegistration* Register_ETHOSU();
|
||||
const char* GetString_ETHOSU();
|
||||
} // namespace custom
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
AllOpsResolver::AllOpsResolver() {
|
||||
// Please keep this list of Builtin Operators in alphabetical order.
|
||||
AddAbs();
|
||||
AddAdd();
|
||||
AddArgMax();
|
||||
AddArgMin();
|
||||
AddAveragePool2D();
|
||||
AddCeil();
|
||||
AddConcatenation();
|
||||
AddConv2D();
|
||||
AddCos();
|
||||
AddDepthwiseConv2D();
|
||||
AddDequantize();
|
||||
AddEqual();
|
||||
AddFloor();
|
||||
AddFullyConnected();
|
||||
AddGreater();
|
||||
AddGreaterEqual();
|
||||
AddHardSwish();
|
||||
AddL2Normalization();
|
||||
AddLess();
|
||||
AddLessEqual();
|
||||
AddLog();
|
||||
AddLogicalAnd();
|
||||
AddLogicalNot();
|
||||
AddLogicalOr();
|
||||
AddLogistic();
|
||||
AddMaximum();
|
||||
AddMaxPool2D();
|
||||
AddMean();
|
||||
AddMinimum();
|
||||
AddMul();
|
||||
AddNeg();
|
||||
AddNotEqual();
|
||||
AddPack();
|
||||
AddPad();
|
||||
AddPadV2();
|
||||
AddPrelu();
|
||||
AddQuantize();
|
||||
AddReduceMax();
|
||||
AddRelu();
|
||||
AddRelu6();
|
||||
AddReshape();
|
||||
AddResizeNearestNeighbor();
|
||||
AddRound();
|
||||
AddRsqrt();
|
||||
AddShape();
|
||||
AddSin();
|
||||
AddSoftmax();
|
||||
AddSplit();
|
||||
AddSplitV();
|
||||
AddSqrt();
|
||||
AddSquare();
|
||||
AddStridedSlice();
|
||||
AddSub();
|
||||
AddSvdf();
|
||||
AddTanh();
|
||||
AddUnpack();
|
||||
|
||||
// TODO(b/159644355): Figure out if custom Ops belong in AllOpsResolver.
|
||||
TfLiteRegistration* registration =
|
||||
tflite::ops::micro::custom::Register_ETHOSU();
|
||||
if (registration) {
|
||||
AddCustom(tflite::ops::micro::custom::GetString_ETHOSU(), registration);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -9,17 +9,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
|
||||
#ifndef TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
class AllOpsResolver : public MicroMutableOpResolver {
|
||||
// The magic number in the template parameter is the maximum number of ops that
|
||||
// can be added to AllOpsResolver. It can be increased if needed. And most
|
||||
// applications that care about the memory footprint will want to directly use
|
||||
// MicroMutableOpResolver and have an application specific template parameter.
|
||||
// The examples directory has sample code for this.
|
||||
class AllOpsResolver : public MicroMutableOpResolver<128> {
|
||||
public:
|
||||
AllOpsResolver();
|
||||
|
||||
@@ -27,8 +30,6 @@ class AllOpsResolver : public MicroMutableOpResolver {
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
|
||||
#endif // TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,22 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
|
||||
#define TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
|
||||
|
||||
extern const unsigned char g_keyword_scrambled_model_data[];
|
||||
extern const unsigned int g_keyword_scrambled_model_data_length;
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -36,6 +36,15 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/micro/debug_log.h"
|
||||
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
#include <cstdio>
|
||||
#endif
|
||||
|
||||
extern "C" void DebugLog(const char* s) { fprintf(stderr, "%s", s); }
|
||||
extern "C" void DebugLog(const char* s) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
// Reusing TF_LITE_STRIP_ERROR_STRINGS to disable DebugLog completely to get
|
||||
// maximum reduction in binary size. This is because we have DebugLog calls
|
||||
// via TF_LITE_CHECK that are not stubbed out by TF_LITE_REPORT_ERROR.
|
||||
fprintf(stderr, "%s", s);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -15,9 +15,17 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
// This function should be implemented by each target platform, and provide a
|
||||
// way for strings to be output to some text stream. For more information, see
|
||||
// tensorflow/lite/micro/debug_log.cc.
|
||||
extern "C" void DebugLog(const char* s);
|
||||
void DebugLog(const char* s);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
|
||||
@@ -21,6 +21,8 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/max.h"
|
||||
#include "tensorflow/lite/kernels/internal/min.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -32,11 +34,11 @@ inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
|
||||
case kTfLiteActNone:
|
||||
return a;
|
||||
case kTfLiteActRelu:
|
||||
return std::max(0.0f, a);
|
||||
case kTfLiteActRelu1:
|
||||
return std::max(-1.0f, std::min(a, 1.0f));
|
||||
return TfLiteMax(0.0f, a);
|
||||
case kTfLiteActReluN1To1:
|
||||
return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
|
||||
case kTfLiteActRelu6:
|
||||
return std::max(0.0f, std::min(a, 6.0f));
|
||||
return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
|
||||
case kTfLiteActTanh:
|
||||
return std::tanh(a);
|
||||
case kTfLiteActSignBit:
|
||||
|
||||
@@ -18,30 +18,82 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
struct ReluOpData {
|
||||
ReluParams params;
|
||||
};
|
||||
|
||||
struct Relu6OpData {
|
||||
int8_t six_int8;
|
||||
int8_t zero_int8;
|
||||
uint8_t six_uint8;
|
||||
uint8_t zero_uint8;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename Q>
|
||||
inline void ReluQuantized(int32_t lower, const RuntimeShape& input_shape,
|
||||
const Q* input_data, const RuntimeShape& output_shape,
|
||||
Q* output_data) {
|
||||
template <typename T>
|
||||
inline void ReluQuantized(const ReluOpData& data,
|
||||
const RuntimeShape& input_shape,
|
||||
const RuntimeShape& output_shape, const T* input_data,
|
||||
T* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const Q val = input_data[i];
|
||||
const Q clamped = val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
const int32_t val = static_cast<int32_t>(input_data[i]);
|
||||
int32_t clamped =
|
||||
data.params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
|
||||
data.params.output_multiplier,
|
||||
data.params.output_shift);
|
||||
clamped = std::max(data.params.quantized_activation_min, clamped);
|
||||
clamped = std::min(data.params.quantized_activation_max, clamped);
|
||||
output_data[i] = static_cast<T>(clamped);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
ReluOpData* data) {
|
||||
float act_min = 0.0;
|
||||
float act_max = std::numeric_limits<float>::infinity();
|
||||
double real_multiplier =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
|
||||
const RuntimeShape input_shape = GetTensorShape(input);
|
||||
const RuntimeShape output_shape = GetTensorShape(output);
|
||||
|
||||
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
|
||||
&data->params.output_shift);
|
||||
|
||||
data->params.quantized_activation_min = std::max(
|
||||
static_cast<int32_t>(std::numeric_limits<T>::min()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32_t>(roundf(act_min / output->params.scale)));
|
||||
data->params.quantized_activation_max =
|
||||
act_max == std::numeric_limits<float>::infinity()
|
||||
? static_cast<int32_t>(std::numeric_limits<T>::max())
|
||||
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32_t>(
|
||||
roundf(act_max / output->params.scale)));
|
||||
data->params.input_offset = input->params.zero_point;
|
||||
data->params.output_offset = output->params.zero_point;
|
||||
}
|
||||
|
||||
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
@@ -77,33 +129,59 @@ inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
|
||||
}
|
||||
}
|
||||
|
||||
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
|
||||
}
|
||||
|
||||
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
CalculateReluOpData<int8_t>(input, output, data);
|
||||
} else if (input->type == kTfLiteUInt8) {
|
||||
CalculateReluOpData<uint8_t>(input, output, data);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
ReluFloat(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
ReluFloat(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
ReluQuantized<int8_t>(input->params.zero_point, GetTensorShape(input),
|
||||
GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<int8_t>(output));
|
||||
ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
ReluQuantized<uint8_t>(input->params.zero_point, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output));
|
||||
ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
@@ -114,37 +192,63 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
}
|
||||
|
||||
void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
|
||||
input->params.zero_point);
|
||||
data->zero_int8 = input->params.zero_point;
|
||||
} else if (input->type == kTfLiteUInt8) {
|
||||
data->six_uint8 = FloatToQuantizedType<uint8_t>(6.0f, input->params.scale,
|
||||
input->params.zero_point);
|
||||
data->zero_uint8 = input->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
Relu6Float(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
Relu6Float(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
const int8_t six = FloatToAsymmetricQuantizedInt8(
|
||||
6.0f, input->params.scale, input->params.zero_point);
|
||||
const int8_t zero = input->params.zero_point;
|
||||
Relu6Quantized<int8_t>(
|
||||
zero, six, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
const uint8_t six = FloatToAsymmetricQuantizedUInt8(
|
||||
6.0f, input->params.scale, input->params.zero_point);
|
||||
const uint8_t zero = input->params.zero_point;
|
||||
Relu6Quantized<uint8_t>(
|
||||
zero, six, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
@@ -157,28 +261,26 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_RELU() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::ReluPrepare,
|
||||
/*invoke=*/activations::ReluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RELU() {
|
||||
return {/*init=*/activations::ReluInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::ReluPrepare,
|
||||
/*invoke=*/activations::ReluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RELU6() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::Relu6Prepare,
|
||||
/*invoke=*/activations::Relu6Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RELU6() {
|
||||
return {/*init=*/activations::Relu6Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::Relu6Prepare,
|
||||
/*invoke=*/activations::Relu6Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -23,6 +23,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -40,18 +42,22 @@ struct OpData {
|
||||
// and the special 16-bit -> 16bit quantized path
|
||||
int input1_shift;
|
||||
int input2_shift;
|
||||
int32 output_activation_min;
|
||||
int32 output_activation_max;
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||
int32 input1_multiplier;
|
||||
int32 input2_multiplier;
|
||||
int32 output_multiplier;
|
||||
int32_t input1_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int left_shift;
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
|
||||
// Used only for float evals:
|
||||
float output_activation_min_f32;
|
||||
float output_activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||
@@ -89,37 +95,44 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
} else if (output->type == kTfLiteFloat32) {
|
||||
CalculateActivationRange(params->activation,
|
||||
&data->output_activation_min_f32,
|
||||
&data->output_activation_max_f32);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
|
||||
const OpData* data, const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
const OpData* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
#define TF_LITE_ADD(opname) \
|
||||
reference_ops::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<float>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<float>(input2), GetTensorShape(output), \
|
||||
GetTensorData<float>(output))
|
||||
SetActivationParams(data->output_activation_min_f32,
|
||||
data->output_activation_max_f32, &op_params);
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_ADD(BroadcastAdd4DSlow);
|
||||
reference_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_ADD(Add);
|
||||
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
||||
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteAddParams* params, const OpData* data,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
@@ -135,46 +148,91 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_ADD(type, opname, dtype) \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<dtype>(output));
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
|
||||
reference_integer_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_ADD(reference_integer_ops, Add, int8_t);
|
||||
reference_integer_ops::Add(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
|
||||
reference_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
TF_LITE_ADD(reference_ops, Add, uint8_t);
|
||||
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, data));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
OpData data;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, &data));
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalAdd(context, node, params, &data, input1, input2, output);
|
||||
EvalAdd(context, node, params, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, &data,
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
|
||||
input1, input2, output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -187,16 +245,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace add
|
||||
|
||||
TfLiteRegistration* Register_ADD() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/add::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ADD() {
|
||||
return {/*init=*/add::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/add::Prepare,
|
||||
/*invoke=*/add::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/micro_ops.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Register each supported op with:
|
||||
// AddBuiltin(<operator ID>, <registration>, [min version], [max version])
|
||||
AllOpsResolver::AllOpsResolver() {
|
||||
AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED(), 1, 4);
|
||||
AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_SVDF, Register_SVDF(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D(), 1,
|
||||
3);
|
||||
AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_ABS, Register_ABS());
|
||||
AddBuiltin(BuiltinOperator_SIN, Register_SIN());
|
||||
AddBuiltin(BuiltinOperator_COS, Register_COS());
|
||||
AddBuiltin(BuiltinOperator_LOG, Register_LOG());
|
||||
AddBuiltin(BuiltinOperator_SQRT, Register_SQRT());
|
||||
AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT());
|
||||
AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE());
|
||||
AddBuiltin(BuiltinOperator_PRELU, Register_PRELU());
|
||||
AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR());
|
||||
AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM());
|
||||
AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM());
|
||||
AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX());
|
||||
AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN());
|
||||
AddBuiltin(BuiltinOperator_LOGICAL_OR, Register_LOGICAL_OR());
|
||||
AddBuiltin(BuiltinOperator_LOGICAL_AND, Register_LOGICAL_AND());
|
||||
AddBuiltin(BuiltinOperator_LOGICAL_NOT, Register_LOGICAL_NOT());
|
||||
AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE());
|
||||
AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_GREATER, Register_GREATER(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_LESS, Register_LESS(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_CEIL, Register_CEIL());
|
||||
AddBuiltin(BuiltinOperator_ROUND, Register_ROUND());
|
||||
AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE());
|
||||
AddBuiltin(BuiltinOperator_PACK, Register_PACK(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_PAD, Register_PAD(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_PADV2, Register_PADV2(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_NEG, Register_NEG());
|
||||
AddBuiltin(BuiltinOperator_ADD, Register_ADD(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_MUL, Register_MUL(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_SUB, Register_SUB(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_QUANTIZE, Register_QUANTIZE());
|
||||
AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_RELU, Register_RELU());
|
||||
AddBuiltin(BuiltinOperator_RELU6, Register_RELU6());
|
||||
AddBuiltin(BuiltinOperator_MEAN, Register_MEAN());
|
||||
AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
|
||||
Register_RESIZE_NEAREST_NEIGHBOR(),
|
||||
/* min_version = */ 1,
|
||||
/* max_version = */ 2);
|
||||
AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION());
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -19,6 +19,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
@@ -45,14 +46,20 @@ inline void ArgMinMaxHelper(const RuntimeShape& input1_shape,
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* axis = GetInput(context, node, kAxis);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* axis =
|
||||
tflite::micro::GetEvalInput(context, node, kAxis);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
|
||||
ArgMinMaxHelper(GetTensorShape(input), GetTensorData<data_type>(input), \
|
||||
GetTensorData<axis_type>(axis), GetTensorShape(output), \
|
||||
GetTensorData<output_type>(output), is_arg_max)
|
||||
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
|
||||
ArgMinMaxHelper(tflite::micro::GetTensorShape(input), \
|
||||
tflite::micro::GetTensorData<data_type>(input), \
|
||||
tflite::micro::GetTensorData<axis_type>(axis), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<output_type>(output), \
|
||||
is_arg_max)
|
||||
if (axis->type == kTfLiteInt32) {
|
||||
if (output->type == kTfLiteInt32) {
|
||||
switch (input->type) {
|
||||
@@ -67,18 +74,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only float32, uint8 and int8 are "
|
||||
"Only float32, uint8_t and int8_t are "
|
||||
"supported currently, got %s.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only int32_t are supported currently, got %s.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32_t are supported currently, got %s.",
|
||||
TfLiteTypeGetName(axis->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
@@ -98,28 +106,26 @@ TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace arg_min_max
|
||||
|
||||
TfLiteRegistration* Register_ARG_MAX() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ARG_MAX() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_ARG_MIN() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ARG_MIN() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -18,6 +18,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -29,11 +30,13 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
@@ -43,26 +46,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Ceil(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Ceil(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace ceil
|
||||
|
||||
TfLiteRegistration* Register_CEIL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/ceil::Prepare,
|
||||
/*invoke=*/ceil::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_CEIL() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/ceil::Prepare,
|
||||
/*invoke=*/ceil::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -17,11 +17,10 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
/*
|
||||
* The circular buffer custom operator is used to implement strided streaming
|
||||
@@ -78,7 +77,9 @@ void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
@@ -89,10 +90,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[2]);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
// The circular buffer custom operator currently only supports int8.
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteInt8);
|
||||
// The circular buffer custom operator currently only supports int8_t.
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
|
||||
// TODO(b/132070898): Use statically slotted OpData structures until a
|
||||
// scratch memory API is ready.
|
||||
@@ -121,8 +122,10 @@ void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
@@ -130,8 +133,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
int depth = output->dims->data[3];
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
EvalInt8(GetTensorData<int8_t>(input), num_slots, depth,
|
||||
GetTensorData<int8_t>(output));
|
||||
EvalInt8(tflite::micro::GetTensorData<int8_t>(input), num_slots, depth,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
|
||||
@@ -18,6 +18,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -25,103 +26,109 @@ namespace micro {
|
||||
namespace comparisons {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
ComparisonParams params;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// TODO(ruic): optimize macros below to using template functions.
|
||||
#define TF_LITE_QUANTIZE_COMPARISON(opname) \
|
||||
template <typename input_dtype> \
|
||||
void EvalQuantized##opname(TfLiteContext* context, TfLiteNode* node, \
|
||||
const TfLiteTensor* input1, \
|
||||
const TfLiteTensor* input2, TfLiteTensor* output, \
|
||||
bool requires_broadcast) { \
|
||||
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) { \
|
||||
auto input1_offset = -input1->params.zero_point; \
|
||||
auto input2_offset = -input2->params.zero_point; \
|
||||
const int left_shift = 8; \
|
||||
\
|
||||
int32 input1_multiplier; \
|
||||
int input1_shift; \
|
||||
QuantizeMultiplierSmallerThanOneExp( \
|
||||
static_cast<double>(input1->params.scale), &input1_multiplier, \
|
||||
&input1_shift); \
|
||||
int32 input2_multiplier; \
|
||||
int input2_shift; \
|
||||
QuantizeMultiplierSmallerThanOneExp( \
|
||||
static_cast<double>(input2->params.scale), &input2_multiplier, \
|
||||
&input2_shift); \
|
||||
\
|
||||
ComparisonParams op_params; \
|
||||
op_params.left_shift = left_shift; \
|
||||
op_params.input1_offset = input1_offset; \
|
||||
op_params.input1_multiplier = input1_multiplier; \
|
||||
op_params.input1_shift = input1_shift; \
|
||||
op_params.input2_offset = input2_offset; \
|
||||
op_params.input2_multiplier = input2_multiplier; \
|
||||
op_params.input2_shift = input2_shift; \
|
||||
if (requires_broadcast) { \
|
||||
reference_ops::Broadcast4DSlow##opname##WithScaling( \
|
||||
op_params, GetTensorShape(input1), \
|
||||
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<bool>(output)); \
|
||||
} else { \
|
||||
reference_ops::opname##WithScaling( \
|
||||
op_params, GetTensorShape(input1), \
|
||||
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<bool>(output)); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
TF_LITE_QUANTIZE_COMPARISON(Equal);
|
||||
TF_LITE_QUANTIZE_COMPARISON(NotEqual);
|
||||
TF_LITE_QUANTIZE_COMPARISON(Greater);
|
||||
TF_LITE_QUANTIZE_COMPARISON(GreaterEqual);
|
||||
TF_LITE_QUANTIZE_COMPARISON(Less);
|
||||
TF_LITE_QUANTIZE_COMPARISON(LessEqual);
|
||||
#undef TF_LITE_QUANTIZE_COMPARISON
|
||||
|
||||
#define TF_LITE_COMPARISON(type, opname, requires_broadcast) \
|
||||
{ \
|
||||
ComparisonParams op_params; \
|
||||
requires_broadcast \
|
||||
? reference_ops::Broadcast4DSlow##opname##NoScaling( \
|
||||
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<type>(input2), \
|
||||
GetTensorShape(output), GetTensorData<bool>(output)) \
|
||||
: reference_ops::opname##NoScaling( \
|
||||
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<type>(input2), \
|
||||
GetTensorShape(output), GetTensorData<bool>(output)); \
|
||||
}
|
||||
|
||||
TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteBool:
|
||||
TF_LITE_COMPARISON(bool, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -133,30 +140,100 @@ TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
// TODO(renjieliu): Refactor the logic to avoid duplications.
|
||||
TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteBool:
|
||||
TF_LITE_COMPARISON(bool, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedNotEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedNotEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -167,27 +244,87 @@ TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Greater, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Greater, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Greater, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedGreater<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedGreater<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -198,27 +335,87 @@ TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, GreaterEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, GreaterEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, GreaterEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedGreaterEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedGreaterEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -229,27 +426,87 @@ TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Less, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Less, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Less, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedLess<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedLess<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -260,27 +517,87 @@ TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, LessEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, LessEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, LessEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedLessEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedLessEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -291,78 +608,115 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
|
||||
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) {
|
||||
auto input1_offset = -input1->params.zero_point;
|
||||
auto input2_offset = -input2->params.zero_point;
|
||||
const int kLeftShift = 8;
|
||||
|
||||
int32_t input1_multiplier;
|
||||
int input1_shift;
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
static_cast<double>(input1->params.scale), &input1_multiplier,
|
||||
&input1_shift);
|
||||
int32_t input2_multiplier;
|
||||
int input2_shift;
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
static_cast<double>(input2->params.scale), &input2_multiplier,
|
||||
&input2_shift);
|
||||
|
||||
data->params.left_shift = kLeftShift;
|
||||
data->params.input1_offset = input1_offset;
|
||||
data->params.input1_multiplier = input1_multiplier;
|
||||
data->params.input1_shift = input1_shift;
|
||||
data->params.input2_offset = input2_offset;
|
||||
data->params.input2_multiplier = input2_multiplier;
|
||||
data->params.input2_shift = input2_shift;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace comparisons
|
||||
|
||||
TfLiteRegistration* Register_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::EqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::EqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_NOT_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::NotEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_NOT_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::NotEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_GREATER() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::GreaterEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_GREATER() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::GreaterEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_GREATER_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::GreaterEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_GREATER_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::GreaterEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LESS() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::LessEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LESS() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::LessEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LESS_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::LessEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LESS_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::LessEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -18,10 +18,11 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -31,14 +32,116 @@ namespace concatenation {
|
||||
constexpr int kMaxInputNum = 10; // Maximum number of input tensors
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
ConcatenationParams params;
|
||||
};
|
||||
|
||||
// Handles negative axis index, coerces to positive index value.
|
||||
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
|
||||
if (axis >= 0) {
|
||||
return axis;
|
||||
} else {
|
||||
return NumDimensions(output_tensor) + axis;
|
||||
}
|
||||
}
|
||||
|
||||
// The following functions are helpers to get tensor data in the format that the
|
||||
// reference op implementation expects. They provide the same functionality as
|
||||
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
|
||||
|
||||
// Gets shapes from a list of tensors.
|
||||
inline void GetAllInputTensorShapes(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
RuntimeShape all_shapes[kMaxInputNum]) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
RuntimeShape shape = tflite::micro::GetTensorShape(t);
|
||||
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
|
||||
}
|
||||
}
|
||||
|
||||
// Get shape pointers from a list of shapes.
|
||||
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
|
||||
const RuntimeShape* pointers[]) {
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
pointers[i] = &shapes[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Gets data pointers from a list of tensors.
|
||||
template <typename T>
|
||||
inline void GetAllInputTensorData(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
T* all_data[kMaxInputNum]) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
all_data[i] = tflite::micro::GetTensorData<T>(t);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename data_type>
|
||||
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const data_type* inputs_data[kMaxInputNum];
|
||||
GetAllInputTensorShapes(context, node, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllInputTensorData(context, node, inputs_data);
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<data_type>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const uint8_t* inputs_data[kMaxInputNum];
|
||||
GetAllInputTensorShapes(context, node, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllInputTensorData(context, node, inputs_data);
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
reference_ops::ConcatenationWithScaling(
|
||||
data->params, inputs_shape_ptr, inputs_data,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// This function only checks the types. Additional shape validations are
|
||||
// performed in the reference implementation called during Eval().
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
TfLiteType input_type = GetInput(context, node, 0)->type;
|
||||
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
|
||||
const TfLiteTensor* input_tensor = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input_tensor != nullptr);
|
||||
TfLiteType input_type = input_tensor->type;
|
||||
const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output_tensor != nullptr);
|
||||
TfLiteType output_type = output_tensor->type;
|
||||
|
||||
// Check activation and input type
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
|
||||
@@ -57,133 +160,76 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Shapes with dimensions >4 are not yet supported with static allocation.
|
||||
for (int i = 0; i < num_inputs; ++i) {
|
||||
const TfLiteTensor* input = GetInput(context, node, i);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
int num_dimensions = NumDimensions(input);
|
||||
|
||||
if (num_dimensions > 4) {
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context,
|
||||
"Op Concatenation does not currently support num dimensions >4 "
|
||||
"Tensor '%s' has %d dimensions.",
|
||||
input->name, num_dimensions);
|
||||
"Tensor has %d dimensions.",
|
||||
num_dimensions);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate OpData.
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
switch (output_type) { // Already know in/outtypes are same.
|
||||
case kTfLiteFloat32:
|
||||
case kTfLiteInt32:
|
||||
case kTfLiteInt64: {
|
||||
data->params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
data->params.inputs_count = node->inputs->size;
|
||||
break;
|
||||
}
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8: {
|
||||
data->params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
data->params.inputs_count = node->inputs->size;
|
||||
|
||||
float* input_scales =
|
||||
reinterpret_cast<float*>(context->AllocatePersistentBuffer(
|
||||
context, node->inputs->size * sizeof(float)));
|
||||
|
||||
int32_t* input_zero_points =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, node->inputs->size * sizeof(int32_t)));
|
||||
|
||||
// Allocate persistent scale and zeropoint buffers.
|
||||
// Store input scale and zero point values in OpParams:
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteTensor* t = GetInput(context, node, i);
|
||||
TF_LITE_ENSURE(context, t != nullptr);
|
||||
input_scales[i] = t->params.scale;
|
||||
input_zero_points[i] = t->params.zero_point;
|
||||
}
|
||||
|
||||
data->params.input_scale = input_scales;
|
||||
data->params.input_zeropoint = input_zero_points;
|
||||
data->params.output_zeropoint = output->params.zero_point;
|
||||
data->params.output_scale = output->params.scale;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Op Concatenation does not currently support Type '%s'.",
|
||||
TfLiteTypeGetName(output_type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Handles negative axis index, coerces to positive index value.
|
||||
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
|
||||
if (axis >= 0) {
|
||||
return axis;
|
||||
} else {
|
||||
return NumDimensions(output_tensor) + axis;
|
||||
}
|
||||
}
|
||||
|
||||
// The following functions are helpers to get tensor data in the format that the
|
||||
// reference op implementation expects. They provide the same functionality as
|
||||
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
|
||||
|
||||
// Gets shapes from a list of tensors.
|
||||
inline void GetAllTensorShapes(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
RuntimeShape all_shapes[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
RuntimeShape shape = GetTensorShape(t);
|
||||
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
|
||||
}
|
||||
}
|
||||
|
||||
// Get shape pointers from a list of shapes.
|
||||
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
|
||||
const RuntimeShape* pointers[]) {
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
pointers[i] = &shapes[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Gets data pointers from a list of tensors.
|
||||
template <typename T>
|
||||
inline void GetAllTensorData(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
T* all_data[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
all_data[i] = GetTensorData<T>(t);
|
||||
}
|
||||
}
|
||||
|
||||
// Gets scale and zero point from a list of tensors
|
||||
inline void GetAllQuantizationParam(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
float scales[kMaxInputNum],
|
||||
int32 zero_points[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
scales[i] = t->params.scale;
|
||||
zero_points[i] = t->params.zero_point;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename data_type>
|
||||
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const data_type* inputs_data[kMaxInputNum];
|
||||
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllTensorData(*context, *node->inputs, inputs_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
ConcatenationParams op_params;
|
||||
op_params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
op_params.inputs_count = NumInputs(node);
|
||||
|
||||
reference_ops::Concatenation(op_params, inputs_shape_ptr, inputs_data,
|
||||
GetTensorShape(output),
|
||||
GetTensorData<data_type>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const uint8_t* inputs_data[kMaxInputNum];
|
||||
float inputs_scale[kMaxInputNum];
|
||||
int32 inputs_zero_point[kMaxInputNum];
|
||||
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllTensorData(*context, *node->inputs, inputs_data);
|
||||
GetAllQuantizationParam(*context, *node->inputs, inputs_scale,
|
||||
inputs_zero_point);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
ConcatenationParams op_params;
|
||||
op_params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
op_params.inputs_count = NumInputs(node);
|
||||
op_params.input_zeropoint = inputs_zero_point;
|
||||
op_params.input_scale = inputs_scale;
|
||||
op_params.output_zeropoint = output->params.zero_point;
|
||||
op_params.output_scale = output->params.scale;
|
||||
|
||||
reference_ops::ConcatenationWithScaling(op_params, inputs_shape_ptr,
|
||||
inputs_data, GetTensorShape(output),
|
||||
GetTensorData<uint8>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
|
||||
const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output_tensor != nullptr);
|
||||
TfLiteType output_type = output_tensor->type;
|
||||
|
||||
switch (output_type) { // Already know in/outtypes are same.
|
||||
case kTfLiteFloat32:
|
||||
@@ -214,16 +260,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace concatenation
|
||||
|
||||
TfLiteRegistration* Register_CONCATENATION() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/concatenation::Prepare,
|
||||
/*invoke=*/concatenation::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_CONCATENATION() {
|
||||
return {/*init=*/concatenation::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/concatenation::Prepare,
|
||||
/*invoke=*/concatenation::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -1,279 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace conv {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
// Angepasst jomjol 05.06.20
|
||||
//constexpr int kMaxChannels = 1024;
|
||||
constexpr int kMaxChannels = 4096;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
// This file has 2 implementation of Conv.
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
switch (padding) {
|
||||
case TfLitePadding::kTfLitePaddingSame:
|
||||
return PaddingType::kSame;
|
||||
case TfLitePadding::kTfLitePaddingValid:
|
||||
return PaddingType::kValid;
|
||||
case TfLitePadding::kTfLitePaddingUnknown:
|
||||
default:
|
||||
return PaddingType::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params->padding;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
params->dilation_height_factor, params->dilation_width_factor, height,
|
||||
width, filter_height, filter_width, padding, &out_height, &out_width);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift),
|
||||
output_channels));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = -data->output_shift;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input), GetTensorShape(filter),
|
||||
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32_t>(bias), GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
|
||||
GetTensorData<uint8_t>(im2col), nullptr);
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
TfLiteTensor* im2col) {
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(filter),
|
||||
GetTensorData<float>(filter), GetTensorShape(bias),
|
||||
GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output), GetTensorShape(im2col),
|
||||
GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
|
||||
int input_width = input->dims->data[2];
|
||||
int input_height = input->dims->data[1];
|
||||
int filter_width = filter->dims->data[2];
|
||||
int filter_height = filter->dims->data[1];
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
OpData data;
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, &data));
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
|
||||
output, nullptr);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, &data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace conv
|
||||
|
||||
TfLiteRegistration* Register_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -23,19 +23,15 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace conv {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
// Angepasst jomjol 05.06.20
|
||||
//constexpr int kMaxChannels = 1024;
|
||||
constexpr int kMaxChannels = 32384;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
@@ -45,15 +41,20 @@ constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
@@ -74,10 +75,10 @@ inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpData* data) {
|
||||
const TfLiteConvParams* params, int width,
|
||||
int height, int filter_width, int filter_height,
|
||||
int out_width, int out_height,
|
||||
const TfLiteType data_type, OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
@@ -94,10 +95,13 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
@@ -111,100 +115,24 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = -data->output_shift;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input), GetTensorShape(filter),
|
||||
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32_t>(bias), GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
|
||||
GetTensorData<uint8_t>(im2col), nullptr);
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
TfLiteTensor* im2col) {
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(filter),
|
||||
GetTensorData<float>(filter), GetTensorShape(bias),
|
||||
GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output), GetTensorShape(im2col),
|
||||
GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
int input_width = input->dims->data[2];
|
||||
int input_height = input->dims->data[1];
|
||||
@@ -212,9 +140,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
int filter_height = filter->dims->data[1];
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
|
||||
struct tflite::ops::micro::conv::OpData *data = (struct tflite::ops::micro::conv::OpData*) malloc(sizeof(struct tflite::ops::micro::conv::OpData));
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
data->per_channel_output_multiplier =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
@@ -222,8 +156,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
filter->quantization.params);
|
||||
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
@@ -240,6 +173,136 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
} // namespace conv
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* im2col, TfLiteEvalTensor* hwcn_weights,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output,
|
||||
TfLiteEvalTensor* im2col) {
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col,
|
||||
TfLiteEvalTensor* hwcn_weights, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, data, input, filter, bias, nullptr,
|
||||
@@ -256,27 +319,22 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
free(data);
|
||||
return kTfLiteError;
|
||||
}
|
||||
free(data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace conv
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_CONV_2D() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -24,18 +24,15 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace depthwise_conv {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
constexpr int kMaxChannels = 1024;
|
||||
|
||||
// Depthwise conv is quantized along dimension 3:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
@@ -43,16 +40,20 @@ constexpr int kDepthwiseConvQuantizedDimension = 3;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
@@ -78,125 +79,44 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
return tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels));
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(filter), GetTensorData<float>(filter),
|
||||
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
DepthwiseParams op_params;
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.weights_offset = 0;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
// TODO(b/130439627): Use calculated value for clamping.
|
||||
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
||||
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
reference_integer_ops::DepthwiseConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data->output_shift;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(filter), GetTensorData<uint8_t>(filter),
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
(NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
const TfLiteType data_type = input->type;
|
||||
int width = SizeOfDimension(input, 2);
|
||||
@@ -204,7 +124,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
int filter_width = SizeOfDimension(filter, 2);
|
||||
int filter_height = SizeOfDimension(filter, 1);
|
||||
|
||||
OpData data;
|
||||
// Per channel quantization is only needed for int8_t inference. For other
|
||||
// quantized types, only a single scale and zero point is needed.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
@@ -227,20 +156,151 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
|
||||
filter_width, filter_height, data_type,
|
||||
&data));
|
||||
data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params,
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
DepthwiseParams op_params;
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = 0;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
// TODO(b/130439627): Use calculated value for clamping.
|
||||
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
||||
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
||||
|
||||
reference_integer_ops::DepthwiseConvPerChannel(
|
||||
op_params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data.output_shift;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
|
||||
// TODO(aselle): Consider whether float conv and quantized conv should be
|
||||
// separate ops to avoid dispatch overhead here.
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input, filter, bias, output);
|
||||
EvalFloat(context, node, params, data, input, filter, bias, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
|
||||
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
|
||||
output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, &data, input, filter, bias, output);
|
||||
EvalQuantized(context, node, params, data, input, filter, bias, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -250,20 +310,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace depthwise_conv
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/depthwise_conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -22,19 +22,39 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace dequantize {
|
||||
|
||||
struct OpData {
|
||||
tflite::DequantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
// TODO(b/140515557): Add cached dequant to improve hybrid model performance.
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteUInt8 ||
|
||||
input->type == kTfLiteInt8 ||
|
||||
@@ -42,32 +62,49 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(
|
||||
context, output->type == kTfLiteFloat32 || output->type == kTfLiteInt32);
|
||||
|
||||
if (output->type == kTfLiteInt32) {
|
||||
const double effective_output_scale =
|
||||
static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = input->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(input->params.scale);
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
tflite::DequantizationParams op_params;
|
||||
op_params.zero_point = input->params.zero_point;
|
||||
op_params.scale = static_cast<double>(input->params.scale);
|
||||
switch (input->type) {
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<int16_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
@@ -76,28 +113,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (output->type == kTfLiteInt32) {
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
const double effective_output_scale =
|
||||
static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(effective_output_scale, &output_multiplier,
|
||||
&output_shift);
|
||||
int flat_size =
|
||||
MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
|
||||
int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output));
|
||||
switch (input->type) {
|
||||
case kTfLiteInt16: {
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int16_t>(input), flat_size, output_multiplier,
|
||||
output_shift, input->params.zero_point, output->params.zero_point,
|
||||
GetTensorData<int32_t>(output));
|
||||
tflite::micro::GetTensorData<int16_t>(input), flat_size,
|
||||
data->output_multiplier, data->output_shift,
|
||||
data->quantization_params.zero_point, data->output_zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int8_t>(input), flat_size, output_multiplier,
|
||||
output_shift, input->params.zero_point, output->params.zero_point,
|
||||
GetTensorData<int32_t>(output));
|
||||
tflite::micro::GetTensorData<int8_t>(input), flat_size,
|
||||
data->output_multiplier, data->output_shift,
|
||||
data->quantization_params.zero_point, data->output_zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -118,16 +150,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace dequantize
|
||||
|
||||
TfLiteRegistration* Register_DEQUANTIZE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/dequantize::Prepare,
|
||||
/*invoke=*/dequantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_DEQUANTIZE() {
|
||||
return {/*init=*/dequantize::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/dequantize::Prepare,
|
||||
/*invoke=*/dequantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -18,6 +18,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -39,8 +41,10 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
if (!IsSupportedType(input->type)) {
|
||||
TF_LITE_KERNEL_LOG(context, "Input data type %s (%d) is not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
@@ -52,13 +56,13 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
template <typename T>
|
||||
inline TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
T func(T), TfLiteType expected_type) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, expected_type);
|
||||
const int64_t num_elements = NumElements(input);
|
||||
const T* in_data = GetTensorData<T>(input);
|
||||
T* out_data = GetTensorData<T>(output);
|
||||
for (int64_t i = 0; i < num_elements; ++i) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, expected_type);
|
||||
const size_t num_elements = ElementCount(*input->dims);
|
||||
const T* in_data = tflite::micro::GetTensorData<T>(input);
|
||||
T* out_data = tflite::micro::GetTensorData<T>(output);
|
||||
for (size_t i = 0; i < num_elements; ++i) {
|
||||
out_data[i] = func(in_data[i]);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
@@ -109,116 +113,100 @@ TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace elementwise
|
||||
|
||||
TfLiteRegistration* Register_ABS() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::AbsEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ABS() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::AbsEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SIN() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SIN() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_COS() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::CosEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_COS() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::CosEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOG() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::LogEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LOG() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::LogEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SQRT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SQRT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RSQRT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::RsqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RSQRT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::RsqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SQUARE() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SquareEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SQUARE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SquareEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_NOT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
|
||||
/*invoke=*/elementwise::LogicalNotEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LOGICAL_NOT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
|
||||
/*invoke=*/elementwise::LogicalNotEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -12,16 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
// Optional debugging functionality. For small sized binaries, these are not
|
||||
// needed.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
|
||||
|
||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||
//
|
||||
// This is a stub file for non-Ethos platforms
|
||||
//
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
// Prints a dump of what tensors and what nodes are in the interpreter.
|
||||
void PrintInterpreterState(MicroInterpreter* interpreter);
|
||||
} // namespace tflite
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace custom {
|
||||
TfLiteRegistration* Register_ETHOSU() { return nullptr; }
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
|
||||
const char* GetString_ETHOSU() { return ""; }
|
||||
|
||||
} // namespace custom
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -17,7 +17,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -28,25 +28,28 @@ constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
reference_ops::Floor(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
reference_ops::Floor(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace floor
|
||||
|
||||
TfLiteRegistration* Register_FLOOR() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/floor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_FLOOR() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/floor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -13,20 +13,19 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
|
||||
#include "tensorflow/lite/micro/kernels/fully_connected.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace fully_connected {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
@@ -40,6 +39,10 @@ struct OpData {
|
||||
int32_t output_activation_max;
|
||||
// The index of the temporary tensor where the quantized inputs are cached.
|
||||
int input_quantized_index;
|
||||
// Cached zero point values of tensors.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
@@ -64,20 +67,17 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
void* data = nullptr;
|
||||
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||
kTfLiteError) {
|
||||
return nullptr;
|
||||
}
|
||||
return data;
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
@@ -89,11 +89,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
@@ -102,13 +105,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.weights_offset = -filter->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = -data.filter_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// TODO(b/138810107): Figure out whether output shift should be inverted
|
||||
op_params.output_shift = -data.output_shift;
|
||||
@@ -116,20 +121,25 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::FullyConnected(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(filter), GetTensorData<int8_t>(filter),
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
||||
TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = input_offset;
|
||||
@@ -141,12 +151,16 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||
reference_ops::FullyConnected( \
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
|
||||
GetTensorShape(filter), GetTensorData<uint8_t>(filter), \
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias), \
|
||||
GetTensorShape(output), GetTensorData<output_data_type>(output))
|
||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||
reference_ops::FullyConnected( \
|
||||
op_params, tflite::micro::GetTensorShape(input), \
|
||||
tflite::micro::GetTensorData<uint8_t>(input), \
|
||||
tflite::micro::GetTensorShape(filter), \
|
||||
tflite::micro::GetTensorData<uint8_t>(filter), \
|
||||
tflite::micro::GetTensorShape(bias), \
|
||||
tflite::micro::GetTensorData<int32_t>(bias), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<output_data_type>(output))
|
||||
switch (output->type) {
|
||||
case kTfLiteUInt8:
|
||||
TF_LITE_FULLY_CONNECTED(uint8_t);
|
||||
@@ -165,8 +179,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
|
||||
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteFusedActivation activation,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
@@ -174,10 +189,14 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
tflite::reference_ops::FullyConnected(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(filter), GetTensorData<float>(filter),
|
||||
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@@ -186,10 +205,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const auto* params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
tflite::micro::GetEvalInput(context, node, kBiasTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
@@ -214,20 +237,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace fully_connected
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_FULLY_CONNECTED() {
|
||||
static TfLiteRegistration r = {/*init=*/fully_connected::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/fully_connected::Prepare,
|
||||
/*invoke=*/fully_connected::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_FULLY_CONNECTED() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// This is the most generic TfLiteRegistration. The actual supported types may
|
||||
// still be target dependent. The only requirement is that every implementation
|
||||
// (reference or optimized) must define this function.
|
||||
TfLiteRegistration Register_FULLY_CONNECTED();
|
||||
|
||||
#if defined(CMSIS_NN) || defined(ARDUINO)
|
||||
// The Arduino is a special case where we use the CMSIS kernels, but because of
|
||||
// the current approach to building for Arduino, we do not support -DCMSIS_NN as
|
||||
// part of the build. As a result, we use defined(ARDUINO) as proxy for the
|
||||
// CMSIS kernels for this one special case.
|
||||
|
||||
// Returns a TfLiteRegistration struct for cmsis-nn kernel variant that only
|
||||
// supports int8.
|
||||
TfLiteRegistration Register_FULLY_CONNECTED_INT8();
|
||||
|
||||
#else
|
||||
// Note that while this block gets used for both reference and optimized kernels
|
||||
// that do not have any specialized implementations, the only goal here is to
|
||||
// define fallback implementation that allow reference kernels to still be used
|
||||
// from applications that call a more specific kernel variant.
|
||||
|
||||
inline TfLiteRegistration Register_FULLY_CONNECTED_INT8() {
|
||||
return Register_FULLY_CONNECTED();
|
||||
}
|
||||
|
||||
#endif
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
142
code/lib/tfmicro/tensorflow/lite/micro/kernels/hard_swish.cc
Normal file
142
code/lib/tfmicro/tensorflow/lite/micro/kernels/hard_swish.cc
Normal file
@@ -0,0 +1,142 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace hard_swish {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
|
||||
}
|
||||
|
||||
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
|
||||
|
||||
params->input_zero_point = input->params.zero_point;
|
||||
params->output_zero_point = output->params.zero_point;
|
||||
|
||||
const float input_scale = input->params.scale;
|
||||
const float hires_input_scale = (1.0f / 128.0f) * input_scale;
|
||||
const float reluish_scale = 3.0f / 32768.0f;
|
||||
const float output_scale = output->params.scale;
|
||||
|
||||
const double output_multiplier =
|
||||
static_cast<double>(hires_input_scale / output_scale);
|
||||
int32_t output_multiplier_fixedpoint_int32;
|
||||
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
|
||||
¶ms->output_multiplier_exponent);
|
||||
DownScaleInt32ToInt16Multiplier(
|
||||
output_multiplier_fixedpoint_int32,
|
||||
¶ms->output_multiplier_fixedpoint_int16);
|
||||
|
||||
TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
|
||||
|
||||
const double reluish_multiplier =
|
||||
static_cast<double>(hires_input_scale / reluish_scale);
|
||||
int32_t reluish_multiplier_fixedpoint_int32;
|
||||
QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
|
||||
¶ms->reluish_multiplier_exponent);
|
||||
DownScaleInt32ToInt16Multiplier(
|
||||
reluish_multiplier_fixedpoint_int32,
|
||||
¶ms->reluish_multiplier_fixedpoint_int16);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
tflite::reference_ops::HardSwish<float>(
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
tflite::reference_ops::HardSwish<uint8_t>(
|
||||
*params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
tflite::reference_ops::HardSwish<int8_t>(
|
||||
*params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} break;
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context,
|
||||
"Only float32/int8_t/uint8_t are supported currently, got %s",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace hard_swish
|
||||
|
||||
TfLiteRegistration Register_HARD_SWISH() {
|
||||
return {/*init=*/hard_swish::HardSwishInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/hard_swish::HardSwishPrepare,
|
||||
/*invoke=*/hard_swish::HardSwishEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
165
code/lib/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.cc
Normal file
165
code/lib/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.cc
Normal file
@@ -0,0 +1,165 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
namespace {
|
||||
constexpr size_t kBufferAlignment = 16;
|
||||
} // namespace
|
||||
|
||||
// TODO(b/161841696): Consider moving away from global arena buffers:
|
||||
constexpr int KernelRunner::kNumScratchBuffers_;
|
||||
constexpr int KernelRunner::kKernelRunnerBufferSize_;
|
||||
uint8_t KernelRunner::kKernelRunnerBuffer_[];
|
||||
|
||||
KernelRunner::KernelRunner(const TfLiteRegistration& registration,
|
||||
TfLiteTensor* tensors, int tensors_size,
|
||||
TfLiteIntArray* inputs, TfLiteIntArray* outputs,
|
||||
void* builtin_data, ErrorReporter* error_reporter)
|
||||
: allocator_(SimpleMemoryAllocator::Create(
|
||||
error_reporter, kKernelRunnerBuffer_, kKernelRunnerBufferSize_)),
|
||||
registration_(registration),
|
||||
tensors_(tensors),
|
||||
error_reporter_(error_reporter) {
|
||||
// Prepare TfLiteContext:
|
||||
context_.impl_ = static_cast<void*>(this);
|
||||
context_.ReportError = ReportOpError;
|
||||
context_.recommended_num_threads = 1;
|
||||
context_.GetTensor = GetTensor;
|
||||
context_.GetEvalTensor = GetEvalTensor;
|
||||
context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
|
||||
context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
|
||||
context_.GetScratchBuffer = GetScratchBuffer;
|
||||
|
||||
// Prepare TfLiteNode:
|
||||
node_.inputs = inputs;
|
||||
node_.outputs = outputs;
|
||||
node_.builtin_data = builtin_data;
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data) {
|
||||
if (registration_.init) {
|
||||
node_.user_data = registration_.init(&context_, init_data, /*length=*/0);
|
||||
}
|
||||
if (registration_.prepare) {
|
||||
TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::Invoke() {
|
||||
if (registration_.invoke == nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"TfLiteRegistration missing invoke function pointer!");
|
||||
return kTfLiteError;
|
||||
}
|
||||
return registration_.invoke(&context_, &node_);
|
||||
}
|
||||
|
||||
TfLiteTensor* KernelRunner::GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
return &runner->tensors_[tensor_index];
|
||||
}
|
||||
|
||||
TfLiteEvalTensor* KernelRunner::GetEvalTensor(
|
||||
const struct TfLiteContext* context, int tensor_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
TfLiteEvalTensor* eval_tensor =
|
||||
reinterpret_cast<TfLiteEvalTensor*>(runner->allocator_->AllocateTemp(
|
||||
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
|
||||
TFLITE_DCHECK(eval_tensor != nullptr);
|
||||
|
||||
// In unit tests, the TfLiteTensor pointer contains the source of truth for
|
||||
// buffers and values:
|
||||
eval_tensor->data = runner->tensors_[tensor_index].data;
|
||||
eval_tensor->dims = runner->tensors_[tensor_index].dims;
|
||||
eval_tensor->type = runner->tensors_[tensor_index].type;
|
||||
return eval_tensor;
|
||||
}
|
||||
|
||||
void* KernelRunner::AllocatePersistentBuffer(TfLiteContext* context,
|
||||
size_t bytes) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
return runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::RequestScratchBufferInArena(TfLiteContext* context,
|
||||
size_t bytes,
|
||||
int* buffer_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(buffer_index != nullptr);
|
||||
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
if (runner->scratch_buffer_count_ == kNumScratchBuffers_) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
runner->error_reporter_,
|
||||
"Exceeded the maximum number of scratch tensors allowed (%d).",
|
||||
kNumScratchBuffers_);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
// For tests, we allocate scratch buffers from the tail and keep them around
|
||||
// for the lifetime of model. This means that the arena size in the tests will
|
||||
// be more than what we would have if the scratch buffers could share memory.
|
||||
runner->scratch_buffers_[runner->scratch_buffer_count_] =
|
||||
runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
|
||||
TFLITE_DCHECK(runner->scratch_buffers_[runner->scratch_buffer_count_] !=
|
||||
nullptr);
|
||||
|
||||
*buffer_index = runner->scratch_buffer_count_++;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* KernelRunner::GetScratchBuffer(TfLiteContext* context, int buffer_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
TFLITE_DCHECK(runner->scratch_buffer_count_ <= kNumScratchBuffers_);
|
||||
if (buffer_index >= runner->scratch_buffer_count_) {
|
||||
return nullptr;
|
||||
}
|
||||
return runner->scratch_buffers_[buffer_index];
|
||||
}
|
||||
|
||||
void KernelRunner::ReportOpError(struct TfLiteContext* context,
|
||||
const char* format, ...) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
TF_LITE_REPORT_ERROR(runner->error_reporter_, format, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,83 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/micro/simple_memory_allocator.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle
|
||||
// (init, prepare, invoke). All internal allocations are handled by this class.
|
||||
// Simply pass in the registration, list of required tensors, inputs array,
|
||||
// outputs array, and any pre-builtin data. Calling Invoke() will automatically
|
||||
// walk the kernl and outputs will be ready on the the TfLiteTensor output
|
||||
// provided during construction.
|
||||
class KernelRunner {
|
||||
public:
|
||||
KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors,
|
||||
int tensors_size, TfLiteIntArray* inputs,
|
||||
TfLiteIntArray* outputs, void* builtin_data,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any
|
||||
// exceptions will be reported through the error_reporter and returned as a
|
||||
// status code here.
|
||||
TfLiteStatus InitAndPrepare(const char* init_data = nullptr);
|
||||
|
||||
// Calls init, prepare, and invoke on a given TfLiteRegistration pointer.
|
||||
// After successful invoke, results will be available in the output tensor as
|
||||
// passed into the constructor of this class.
|
||||
TfLiteStatus Invoke();
|
||||
|
||||
protected:
|
||||
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_index);
|
||||
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
|
||||
int tensor_index);
|
||||
static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes);
|
||||
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context,
|
||||
size_t bytes,
|
||||
int* buffer_index);
|
||||
static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
|
||||
static void ReportOpError(struct TfLiteContext* context, const char* format,
|
||||
...);
|
||||
|
||||
private:
|
||||
static constexpr int kNumScratchBuffers_ = 5;
|
||||
|
||||
static constexpr int kKernelRunnerBufferSize_ = 10000;
|
||||
static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
|
||||
|
||||
SimpleMemoryAllocator* allocator_ = nullptr;
|
||||
const TfLiteRegistration& registration_;
|
||||
TfLiteTensor* tensors_ = nullptr;
|
||||
ErrorReporter* error_reporter_ = nullptr;
|
||||
|
||||
TfLiteContext context_ = {};
|
||||
TfLiteNode node_ = {};
|
||||
|
||||
int scratch_buffer_count_ = 0;
|
||||
uint8_t* scratch_buffers_[kNumScratchBuffers_];
|
||||
};
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
@@ -0,0 +1,41 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
bool HaveSameShapes(const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2) {
|
||||
TFLITE_DCHECK(input1 != nullptr);
|
||||
TFLITE_DCHECK(input2 != nullptr);
|
||||
return TfLiteIntArrayEqual(input1->dims, input2->dims);
|
||||
}
|
||||
|
||||
const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) {
|
||||
if (tensor == nullptr || tensor->dims == nullptr) {
|
||||
return RuntimeShape();
|
||||
}
|
||||
TfLiteIntArray* dims = tensor->dims;
|
||||
const int dims_size = dims->size;
|
||||
const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data);
|
||||
return RuntimeShape(dims_size, dims_data);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
75
code/lib/tfmicro/tensorflow/lite/micro/kernels/kernel_util.h
Normal file
75
code/lib/tfmicro/tensorflow/lite/micro/kernels/kernel_util.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
// Returns a mutable tensor for a given input index. is_variable must be checked
|
||||
// during prepare when the full TfLiteTensor is available.
|
||||
inline TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
int index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
return context->GetEvalTensor(context, node->inputs->data[index]);
|
||||
}
|
||||
|
||||
// Returns the TfLiteEvalTensor struct for a given input index in a node.
|
||||
inline const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return GetMutableEvalInput(context, node, index);
|
||||
}
|
||||
|
||||
// Returns the TfLiteEvalTensor struct for a given output index in a node.
|
||||
inline TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
return context->GetEvalTensor(context, node->outputs->data[index]);
|
||||
}
|
||||
|
||||
// Returns data for a TfLiteEvalTensor struct.
|
||||
template <typename T>
|
||||
T* GetTensorData(TfLiteEvalTensor* tensor) {
|
||||
return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
|
||||
}
|
||||
|
||||
// Returns const data for a TfLiteEvalTensor struct.
|
||||
template <typename T>
|
||||
const T* GetTensorData(const TfLiteEvalTensor* tensor) {
|
||||
TFLITE_DCHECK(tensor != nullptr);
|
||||
return reinterpret_cast<const T*>(tensor->data.raw);
|
||||
}
|
||||
|
||||
// Returns the shape of a TfLiteEvalTensor struct.
|
||||
const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor);
|
||||
|
||||
// Return true if the given tensors have the same shape.
|
||||
bool HaveSameShapes(const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2);
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
@@ -14,16 +14,19 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace l2norm {
|
||||
|
||||
namespace {
|
||||
|
||||
// This file has two implementation of L2Norm.
|
||||
enum KernelType {
|
||||
kReference,
|
||||
@@ -33,44 +36,59 @@ enum KernelType {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(DEBUG)
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
|
||||
L2NormalizationParams* data =
|
||||
static_cast<L2NormalizationParams*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
|
||||
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32 ||
|
||||
output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.scale, (1. / 128.));
|
||||
if (output->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 128);
|
||||
}
|
||||
if (output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
}
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
} else if (output->type == kTfLiteFloat32) {
|
||||
data->input_zero_point = 0;
|
||||
}
|
||||
|
||||
// TODO(ahentz): For some reason our implementations don't support
|
||||
// activations.
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
|
||||
#endif
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context,
|
||||
sizeof(L2NormalizationParams));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const L2NormalizationParams& data =
|
||||
*(static_cast<const L2NormalizationParams*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// TODO(b/143912164): instead of hardcode the epsilon here, we should read it
|
||||
// from tensorflow, i.e., adding a params.
|
||||
@@ -87,39 +105,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
// So we don't even need to do handle the epsilon for quantized kernel case.
|
||||
const float epsilon = 1e-6f;
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
#define TF_LITE_L2NORM(type) \
|
||||
tflite::L2NormalizationParams op_params; \
|
||||
op_params.input_zero_point = 0; \
|
||||
type::L2Normalization(op_params, GetTensorShape(input), \
|
||||
GetTensorData<float>(input), GetTensorShape(output), \
|
||||
GetTensorData<float>(output), epsilon)
|
||||
|
||||
TF_LITE_L2NORM(reference_ops);
|
||||
#undef TF_LITE_L2NORM
|
||||
reference_ops::L2Normalization(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
epsilon);
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
#define TF_LITE_L2NORM(type) \
|
||||
tflite::L2NormalizationParams op_params; \
|
||||
op_params.input_zero_point = input->params.zero_point; \
|
||||
type::L2Normalization(op_params, GetTensorShape(input), \
|
||||
GetTensorData<uint8>(input), GetTensorShape(output), \
|
||||
GetTensorData<uint8>(output))
|
||||
|
||||
TF_LITE_L2NORM(reference_ops);
|
||||
#undef TF_LITE_L2NORM
|
||||
reference_ops::L2Normalization(
|
||||
data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
const auto input_shape = GetTensorShape(input);
|
||||
const auto output_shape = GetTensorShape(output);
|
||||
const auto input_shape = tflite::micro::GetTensorShape(input);
|
||||
const auto output_shape = tflite::micro::GetTensorShape(output);
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
reference_integer_ops::L2Normalization(input->params.zero_point, outer_size,
|
||||
depth, GetTensorData<int8>(input),
|
||||
GetTensorData<int8>(output));
|
||||
reference_integer_ops::L2Normalization(
|
||||
data.input_zero_point, outer_size, depth,
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Output type is %d, requires float.",
|
||||
output->type);
|
||||
TF_LITE_KERNEL_LOG(context, "Output type is %s, requires float.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
@@ -128,22 +139,18 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace l2norm
|
||||
|
||||
TfLiteRegistration* Register_L2NORM_REF() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/l2norm::Prepare,
|
||||
/*invoke=*/l2norm::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
|
||||
return &r;
|
||||
TfLiteRegistration Register_L2NORM_REF() {
|
||||
return {/*init=*/l2norm::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/l2norm::Prepare,
|
||||
/*invoke=*/l2norm::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_L2_NORMALIZATION() {
|
||||
return Register_L2NORM_REF();
|
||||
}
|
||||
TfLiteRegistration Register_L2_NORMALIZATION() { return Register_L2NORM_REF(); }
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@@ -15,8 +15,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -31,20 +31,29 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
bool (*func)(bool, bool)) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (HaveSameShapes(input1, input2)) {
|
||||
if (tflite::micro::HaveSameShapes(input1, input2)) {
|
||||
reference_ops::BinaryFunction<bool, bool, bool>(
|
||||
GetTensorShape(input1), GetTensorData<bool>(input1),
|
||||
GetTensorShape(input2), GetTensorData<bool>(input2),
|
||||
GetTensorShape(output), GetTensorData<bool>(output), func);
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<bool>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<bool>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<bool>(output), func);
|
||||
} else {
|
||||
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
|
||||
GetTensorShape(input1), GetTensorData<bool>(input1),
|
||||
GetTensorShape(input2), GetTensorData<bool>(input2),
|
||||
GetTensorShape(output), GetTensorData<bool>(output), func);
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<bool>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<bool>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<bool>(output), func);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
@@ -65,32 +74,30 @@ TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace logical
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_OR() {
|
||||
TfLiteRegistration Register_LOGICAL_OR() {
|
||||
// Init, Free, Prepare, Eval are satisfying the Interface required by
|
||||
// TfLiteRegistration.
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalOrEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalOrEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_AND() {
|
||||
TfLiteRegistration Register_LOGICAL_AND() {
|
||||
// Init, Free, Prepare, Eval are satisfying the Interface required by
|
||||
// TfLiteRegistration.
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalAndEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalAndEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -23,6 +23,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -42,9 +43,11 @@ struct OpData {
|
||||
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
OpData* data) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||
std::numeric_limits<int8_t>::min());
|
||||
@@ -54,6 +57,8 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(1 << (31 - kInputIntegerBits));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
|
||||
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
|
||||
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
|
||||
|
||||
@@ -64,18 +69,34 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
return CalculateArithmeticOpData(context, node, data);
|
||||
}
|
||||
|
||||
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
OpData data;
|
||||
CalculateArithmeticOpData(context, node, &data);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
reference_ops::Logistic(
|
||||
GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Logistic(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
@@ -88,10 +109,11 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::Logistic(
|
||||
input->params.zero_point, data.input_range_radius,
|
||||
data.input_multiplier, data.input_left_shift,
|
||||
NumElements(input->dims), GetTensorData<int8_t>(input),
|
||||
GetTensorData<int8_t>(output));
|
||||
data->input_zero_point, data->input_range_radius,
|
||||
data->input_multiplier, data->input_left_shift,
|
||||
NumElements(input->dims),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
@@ -113,16 +135,15 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_LOGISTIC() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/activations::LogisticEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LOGISTIC() {
|
||||
return {/*init=*/activations::LogisticInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::LogisticPrepare,
|
||||
/*invoke=*/activations::LogisticEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -40,13 +41,13 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpContext {
|
||||
OpContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
input1 = GetInput(context, node, kInputTensor1);
|
||||
input2 = GetInput(context, node, kInputTensor2);
|
||||
output = GetOutput(context, node, kOutputTensor);
|
||||
input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
output = tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
}
|
||||
const TfLiteTensor* input1;
|
||||
const TfLiteTensor* input2;
|
||||
TfLiteTensor* output;
|
||||
const TfLiteEvalTensor* input1;
|
||||
const TfLiteEvalTensor* input2;
|
||||
TfLiteEvalTensor* output;
|
||||
};
|
||||
|
||||
struct MaximumOp {
|
||||
@@ -69,12 +70,12 @@ template <typename data_type, typename op_type>
|
||||
void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpContext& op_context) {
|
||||
reference_ops::MaximumMinimumBroadcastSlow(
|
||||
GetTensorShape(op_context.input1),
|
||||
GetTensorData<data_type>(op_context.input1),
|
||||
GetTensorShape(op_context.input2),
|
||||
GetTensorData<data_type>(op_context.input2),
|
||||
GetTensorShape(op_context.output),
|
||||
GetTensorData<data_type>(op_context.output),
|
||||
tflite::micro::GetTensorShape(op_context.input1),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.input1),
|
||||
tflite::micro::GetTensorShape(op_context.input2),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.input2),
|
||||
tflite::micro::GetTensorShape(op_context.output),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.output),
|
||||
op_type::template op<data_type>);
|
||||
}
|
||||
|
||||
@@ -116,34 +117,30 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace maximum_minimum
|
||||
|
||||
TfLiteRegistration* Register_MAXIMUM() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MaximumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MAXIMUM() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MaximumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_MINIMUM() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MinimumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MINIMUM() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MinimumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -17,10 +17,6 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Forward declaration of all micro op kernel registration methods. These
|
||||
// registrations are included with the standard `BuiltinOpResolver`.
|
||||
//
|
||||
@@ -29,58 +25,73 @@ namespace micro {
|
||||
// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
|
||||
// registration in turn allows the linker to strip unused kernels.
|
||||
|
||||
TfLiteRegistration* Register_ABS();
|
||||
TfLiteRegistration* Register_ADD();
|
||||
TfLiteRegistration* Register_ARG_MAX();
|
||||
TfLiteRegistration* Register_ARG_MIN();
|
||||
TfLiteRegistration* Register_AVERAGE_POOL_2D();
|
||||
TfLiteRegistration* Register_CEIL();
|
||||
namespace tflite {
|
||||
|
||||
// TFLM is incrementally moving towards a flat tflite namespace
|
||||
// (https://abseil.io/tips/130). Any new ops (or cleanup of existing ops should
|
||||
// have their Register function declarations in the tflite namespace.
|
||||
|
||||
TfLiteRegistration Register_CONV_2D();
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
|
||||
TfLiteRegistration Register_QUANTIZE();
|
||||
TfLiteRegistration Register_SHAPE();
|
||||
TfLiteRegistration Register_SOFTMAX();
|
||||
TfLiteRegistration Register_SVDF();
|
||||
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
TfLiteRegistration Register_ABS();
|
||||
TfLiteRegistration Register_ADD();
|
||||
TfLiteRegistration Register_ARG_MAX();
|
||||
TfLiteRegistration Register_ARG_MIN();
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D();
|
||||
TfLiteRegistration Register_CEIL();
|
||||
// TODO(b/160234179): Change custom OPs to also return by value.
|
||||
TfLiteRegistration* Register_CIRCULAR_BUFFER();
|
||||
TfLiteRegistration* Register_CONV_2D();
|
||||
TfLiteRegistration* Register_CONCATENATION();
|
||||
TfLiteRegistration* Register_COS();
|
||||
TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
|
||||
TfLiteRegistration* Register_DEQUANTIZE();
|
||||
TfLiteRegistration* Register_EQUAL();
|
||||
TfLiteRegistration* Register_FLOOR();
|
||||
TfLiteRegistration* Register_FULLY_CONNECTED();
|
||||
TfLiteRegistration* Register_GREATER();
|
||||
TfLiteRegistration* Register_GREATER_EQUAL();
|
||||
TfLiteRegistration* Register_LESS();
|
||||
TfLiteRegistration* Register_LESS_EQUAL();
|
||||
TfLiteRegistration* Register_LOG();
|
||||
TfLiteRegistration* Register_LOGICAL_AND();
|
||||
TfLiteRegistration* Register_LOGICAL_NOT();
|
||||
TfLiteRegistration* Register_LOGICAL_OR();
|
||||
TfLiteRegistration* Register_LOGISTIC();
|
||||
TfLiteRegistration* Register_MAXIMUM();
|
||||
TfLiteRegistration* Register_MAX_POOL_2D();
|
||||
TfLiteRegistration* Register_MEAN();
|
||||
TfLiteRegistration* Register_MINIMUM();
|
||||
TfLiteRegistration* Register_MUL();
|
||||
TfLiteRegistration* Register_NEG();
|
||||
TfLiteRegistration* Register_NOT_EQUAL();
|
||||
TfLiteRegistration* Register_PACK();
|
||||
TfLiteRegistration* Register_PAD();
|
||||
TfLiteRegistration* Register_PADV2();
|
||||
TfLiteRegistration* Register_PRELU();
|
||||
TfLiteRegistration* Register_QUANTIZE();
|
||||
TfLiteRegistration* Register_RELU();
|
||||
TfLiteRegistration* Register_RELU6();
|
||||
TfLiteRegistration* Register_RESHAPE();
|
||||
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR();
|
||||
TfLiteRegistration* Register_ROUND();
|
||||
TfLiteRegistration* Register_RSQRT();
|
||||
TfLiteRegistration* Register_SIN();
|
||||
TfLiteRegistration* Register_SOFTMAX();
|
||||
TfLiteRegistration* Register_SPLIT();
|
||||
TfLiteRegistration* Register_SQRT();
|
||||
TfLiteRegistration* Register_SQUARE();
|
||||
TfLiteRegistration* Register_STRIDED_SLICE();
|
||||
TfLiteRegistration* Register_SUB();
|
||||
TfLiteRegistration* Register_SVDF();
|
||||
TfLiteRegistration* Register_UNPACK();
|
||||
TfLiteRegistration* Register_L2_NORMALIZATION();
|
||||
TfLiteRegistration Register_CONCATENATION();
|
||||
TfLiteRegistration Register_COS();
|
||||
TfLiteRegistration Register_DEQUANTIZE();
|
||||
TfLiteRegistration Register_EQUAL();
|
||||
TfLiteRegistration Register_FLOOR();
|
||||
TfLiteRegistration Register_GREATER();
|
||||
TfLiteRegistration Register_GREATER_EQUAL();
|
||||
TfLiteRegistration Register_HARD_SWISH();
|
||||
TfLiteRegistration Register_LESS();
|
||||
TfLiteRegistration Register_LESS_EQUAL();
|
||||
TfLiteRegistration Register_LOG();
|
||||
TfLiteRegistration Register_LOGICAL_AND();
|
||||
TfLiteRegistration Register_LOGICAL_NOT();
|
||||
TfLiteRegistration Register_LOGICAL_OR();
|
||||
TfLiteRegistration Register_LOGISTIC();
|
||||
TfLiteRegistration Register_MAXIMUM();
|
||||
TfLiteRegistration Register_MAX_POOL_2D();
|
||||
TfLiteRegistration Register_MEAN();
|
||||
TfLiteRegistration Register_MINIMUM();
|
||||
TfLiteRegistration Register_MUL();
|
||||
TfLiteRegistration Register_NEG();
|
||||
TfLiteRegistration Register_NOT_EQUAL();
|
||||
TfLiteRegistration Register_PACK();
|
||||
TfLiteRegistration Register_PAD();
|
||||
TfLiteRegistration Register_PADV2();
|
||||
TfLiteRegistration Register_PRELU();
|
||||
TfLiteRegistration Register_REDUCE_MAX();
|
||||
TfLiteRegistration Register_RELU();
|
||||
TfLiteRegistration Register_RELU6();
|
||||
TfLiteRegistration Register_RESHAPE();
|
||||
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
|
||||
TfLiteRegistration Register_ROUND();
|
||||
TfLiteRegistration Register_RSQRT();
|
||||
TfLiteRegistration Register_SIN();
|
||||
TfLiteRegistration Register_SPLIT();
|
||||
TfLiteRegistration Register_SPLIT_V();
|
||||
TfLiteRegistration Register_SQRT();
|
||||
TfLiteRegistration Register_SQUARE();
|
||||
TfLiteRegistration Register_STRIDED_SLICE();
|
||||
TfLiteRegistration Register_SUB();
|
||||
TfLiteRegistration Register_UNPACK();
|
||||
TfLiteRegistration Register_L2_NORMALIZATION();
|
||||
TfLiteRegistration Register_TANH();
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@@ -21,132 +21,194 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace mul {
|
||||
namespace {
|
||||
|
||||
constexpr int kInput1Tensor = 0;
|
||||
constexpr int kInput2Tensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
int32_t input1_zero_point;
|
||||
int32_t input2_zero_point;
|
||||
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
int32_t output_zero_point;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
float output_activation_min_f32;
|
||||
float output_activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
double real_multiplier = static_cast<double>(input1->params.scale) *
|
||||
static_cast<double>(input2->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
|
||||
data->input1_zero_point = input1->params.zero_point;
|
||||
data->input2_zero_point = input2->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
} else {
|
||||
CalculateActivationRange(params->activation,
|
||||
&data->output_activation_min_f32,
|
||||
&data->output_activation_max_f32);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data,
|
||||
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
op_params.input1_offset = -input1->params.zero_point;
|
||||
op_params.input2_offset = -input2->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
} // namespace
|
||||
|
||||
#define TF_LITE_MUL(type, opname, dtype) \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<dtype>(output));
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
op_params.input1_offset = -data->input1_zero_point;
|
||||
op_params.input2_offset = -data->input2_zero_point;
|
||||
op_params.output_offset = data->output_zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_integer_ops, Mul, int8_t);
|
||||
}
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_ops, Mul, uint8_t);
|
||||
}
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::Mul(op_params,
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::Mul(op_params,
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
}
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data,
|
||||
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
TfLiteMulParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.float_activation_min = data->output_activation_min_f32;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_MUL(opname) \
|
||||
reference_ops::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<float>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<float>(input2), GetTensorShape(output), \
|
||||
GetTensorData<float>(output));
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(BroadcastMul4DSlow);
|
||||
reference_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_MUL(Mul);
|
||||
reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
return CalculateOpData(context, node, params, data);
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
CalculateOpData(context, node, params, &data);
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInput1Tensor);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInput2Tensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input1->type) {
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
EvalQuantized(context, node, params, &data, input1, input2, output);
|
||||
EvalQuantized(context, node, data, input1, input2, output);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input1, input2, output);
|
||||
EvalFloat(context, node, params, data, input1, input2, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -158,16 +220,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} // namespace mul
|
||||
|
||||
TfLiteRegistration* Register_MUL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/mul::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MUL() {
|
||||
return {/*init=*/mul::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/mul::Prepare,
|
||||
/*invoke=*/mul::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -17,7 +17,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -28,14 +28,17 @@ constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
switch (input->type) {
|
||||
// TODO(wangtz): handle for kTfLiteInt8
|
||||
case kTfLiteFloat32:
|
||||
reference_ops::Negate(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
reference_ops::Negate(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -47,16 +50,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace neg
|
||||
|
||||
TfLiteRegistration* Register_NEG() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/neg::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_NEG() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/neg::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -16,7 +16,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -28,9 +28,11 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteTensor* output, int values_count, int axis) {
|
||||
TfLiteEvalTensor* output, int values_count, int axis) {
|
||||
const TfLiteEvalTensor* input0 =
|
||||
tflite::micro::GetEvalInput(context, node, 0);
|
||||
|
||||
const int dimensions = output->dims->size;
|
||||
const TfLiteTensor* input0 = GetInput(context, node, 0);
|
||||
const TfLiteIntArray* input_dims = input0->dims;
|
||||
const TfLiteIntArray* output_dims = output->dims;
|
||||
|
||||
@@ -52,11 +54,11 @@ TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
TFLITE_DCHECK_EQ(input_size, copy_size * outer_size);
|
||||
|
||||
T* output_data = GetTensorData<T>(output);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(output);
|
||||
|
||||
for (int i = 0; i < values_count; ++i) {
|
||||
const TfLiteTensor* t = GetInput(context, node, i);
|
||||
const T* input_data = GetTensorData<T>(t);
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
const T* input_data = tflite::micro::GetTensorData<T>(t);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
const T* input_ptr = input_data + copy_size * k;
|
||||
int loc = k * values_count * copy_size + i * copy_size;
|
||||
@@ -72,7 +74,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLitePackParams* data =
|
||||
reinterpret_cast<TfLitePackParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@@ -108,16 +111,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace pack
|
||||
|
||||
TfLiteRegistration* Register_PACK() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PACK() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -16,189 +16,208 @@ limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
#ifdef MEMORY_SANITIZER
|
||||
#include <sanitizer/msan_interface.h>
|
||||
#else
|
||||
#define __msan_check_mem_is_initialized(ptr, size)
|
||||
#endif
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace pad {
|
||||
namespace {
|
||||
|
||||
struct PadContext {
|
||||
PadContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
input = GetInput(context, node, 0);
|
||||
paddings = GetInput(context, node, 1);
|
||||
constant_values = nullptr;
|
||||
if (NumInputs(node) == 3) {
|
||||
constant_values = GetOptionalInputTensor(context, node, 2);
|
||||
} else {
|
||||
constant_values = nullptr;
|
||||
}
|
||||
output = GetOutput(context, node, 0);
|
||||
dims = NumDimensions(input);
|
||||
|
||||
resizing_category = ResizingCategory::kGenericResize;
|
||||
const int paddings_total = GetTensorShape(paddings).FlatSize();
|
||||
const int32* paddings_data = GetTensorData<int32>(paddings);
|
||||
// Paddings will be a n,2 array, and we need to detect 4D arrays with the
|
||||
// pattern { {0,0}, {a, b}, {c, d}, {0,0} }.
|
||||
if (IsConstantTensor(paddings) && paddings_total == 8 &&
|
||||
(paddings_data[0] == 0 && paddings_data[1] == 0) &&
|
||||
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
|
||||
resizing_category = ResizingCategory::kImageStyle;
|
||||
}
|
||||
}
|
||||
const TfLiteTensor* constant_values;
|
||||
const TfLiteTensor* input;
|
||||
const TfLiteTensor* paddings;
|
||||
TfLiteTensor* output;
|
||||
int dims;
|
||||
ResizingCategory resizing_category;
|
||||
struct OpData {
|
||||
PadParams params;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
PadContext op_context(context, node);
|
||||
TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
|
||||
if (op_context.constant_values != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, op_context.input->type,
|
||||
op_context.constant_values->type);
|
||||
const TfLiteTensor* input = GetInput(context, node, /*index=*/0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1);
|
||||
TF_LITE_ENSURE(context, paddings != nullptr);
|
||||
const TfLiteTensor* constant_values =
|
||||
NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr;
|
||||
TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
|
||||
// Current implementations rely on the inputs being <= 4D.
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) <=
|
||||
reference_ops::PadKernelMaxDimensionCount());
|
||||
|
||||
if (constant_values != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, input->type, constant_values->type);
|
||||
// Ensure that constant_values is a scalar.
|
||||
TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1);
|
||||
}
|
||||
|
||||
// There must be a pair of paddings for each output dimension.
|
||||
TF_LITE_ENSURE_EQ(context, GetTensorShape(op_context.paddings).FlatSize(),
|
||||
op_context.output->dims->size * 2);
|
||||
TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(),
|
||||
output->dims->size * 2);
|
||||
|
||||
// On Micro, outputs must be properly sized by the converter.
|
||||
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
|
||||
for (int i = 0; i < op_context.output->dims->size; i++) {
|
||||
int output_dim = op_context.output->dims->data[i];
|
||||
int expected_dim = op_context.input->dims->data[i] + paddings_data[i * 2] +
|
||||
paddings_data[i * 2 + 1];
|
||||
// NOTE: This data is only available because the paddings buffer is stored in
|
||||
// the flatbuffer:
|
||||
TF_LITE_ENSURE(context, IsConstantTensor(paddings));
|
||||
const int32_t* paddings_data = GetTensorData<int32_t>(paddings);
|
||||
for (int i = 0; i < output->dims->size; i++) {
|
||||
int output_dim = output->dims->data[i];
|
||||
int expected_dim =
|
||||
input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1];
|
||||
TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
|
||||
}
|
||||
|
||||
// Current implementations rely on the inputs being <= 4D.
|
||||
TF_LITE_ENSURE(
|
||||
context, op_context.dims <= reference_ops::PadKernelMaxDimensionCount());
|
||||
TF_LITE_ENSURE(context, IsConstantTensor(op_context.paddings));
|
||||
// Calculate OpData:
|
||||
data->params.resizing_category = ResizingCategory::kGenericResize;
|
||||
const int paddings_total = GetTensorShape(paddings).FlatSize();
|
||||
if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) &&
|
||||
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
|
||||
data->params.resizing_category = ResizingCategory::kImageStyle;
|
||||
}
|
||||
|
||||
const int num_input_dimensions = NumDimensions(input);
|
||||
data->params.left_padding_count = num_input_dimensions;
|
||||
data->params.right_padding_count = num_input_dimensions;
|
||||
|
||||
for (int idx = num_input_dimensions - 1; idx >= 0; --idx) {
|
||||
data->params.left_padding[idx] = paddings_data[idx * 2];
|
||||
data->params.right_padding[idx] = paddings_data[idx * 2 + 1];
|
||||
}
|
||||
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
if (constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE(context, output->params.zero_point >=
|
||||
std::numeric_limits<uint8_t>::min());
|
||||
TF_LITE_ENSURE(context, output->params.zero_point <=
|
||||
std::numeric_limits<uint8_t>::max());
|
||||
} else {
|
||||
TF_LITE_ENSURE(context, output->params.zero_point >=
|
||||
std::numeric_limits<int8_t>::min());
|
||||
TF_LITE_ENSURE(context, output->params.zero_point <=
|
||||
std::numeric_limits<int8_t>::max());
|
||||
}
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||
constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE_EQ(context, static_cast<double>(output->params.scale),
|
||||
static_cast<double>(constant_values->params.scale));
|
||||
}
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
PadContext op_context(context, node);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
if (op_context.constant_values != nullptr) {
|
||||
// Ensure that constant_values is a scalar.
|
||||
TF_LITE_ENSURE_EQ(context, NumElements(op_context.constant_values), 1);
|
||||
}
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, /*index=*/0);
|
||||
const TfLiteEvalTensor* constant_values =
|
||||
NumInputs(node) == 3
|
||||
? tflite::micro::GetEvalInput(context, node, /*index=*/2)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, /*index=*/0);
|
||||
|
||||
// Create before and after padding arrays that are accepted by the kernel.
|
||||
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
|
||||
|
||||
tflite::PadParams op_params;
|
||||
memset(&op_params, 0, sizeof(PadParams));
|
||||
op_params.left_padding_count = op_context.dims;
|
||||
op_params.right_padding_count = op_context.dims;
|
||||
|
||||
for (int idx = op_context.dims - 1; idx >= 0; --idx) {
|
||||
op_params.left_padding[idx] = paddings_data[idx * 2];
|
||||
op_params.right_padding[idx] = paddings_data[idx * 2 + 1];
|
||||
}
|
||||
|
||||
#define TF_LITE_PAD(type, op_name, scalar, pad_value) \
|
||||
const scalar pad_value_copy = pad_value; \
|
||||
\
|
||||
type::op_name(op_params, GetTensorShape(op_context.input), \
|
||||
GetTensorData<scalar>(op_context.input), &pad_value_copy, \
|
||||
GetTensorShape(op_context.output), \
|
||||
GetTensorData<scalar>(op_context.output))
|
||||
switch (op_context.input->type) {
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
float pad_value = op_context.constant_values == nullptr
|
||||
? 0.f
|
||||
: *GetTensorData<float>(op_context.constant_values);
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, float, pad_value);
|
||||
float pad_value =
|
||||
constant_values == nullptr
|
||||
? 0.f
|
||||
: *tflite::micro::GetTensorData<float>(constant_values);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, float, pad_value);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
uint8_t pad_value;
|
||||
if (op_context.constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
|
||||
std::numeric_limits<uint8_t>::min());
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
|
||||
std::numeric_limits<uint8_t>::max());
|
||||
pad_value = static_cast<uint8_t>(op_context.output->params.zero_point);
|
||||
if (constant_values == nullptr) {
|
||||
pad_value = static_cast<uint8_t>(data->output_zero_point);
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
|
||||
op_context.constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context, static_cast<double>(op_context.output->params.scale),
|
||||
static_cast<double>(op_context.constant_values->params.scale));
|
||||
pad_value = *GetTensorData<uint8_t>(op_context.constant_values);
|
||||
pad_value = *tflite::micro::GetTensorData<uint8_t>(constant_values);
|
||||
}
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, uint8_t, pad_value);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, uint8_t, pad_value);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
int8_t pad_value;
|
||||
if (op_context.constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
|
||||
std::numeric_limits<int8_t>::min());
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
|
||||
std::numeric_limits<int8_t>::max());
|
||||
pad_value = static_cast<int8_t>(op_context.output->params.zero_point);
|
||||
if (constant_values == nullptr) {
|
||||
pad_value = static_cast<uint8_t>(data->output_zero_point);
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
|
||||
op_context.constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE(context, op_context.output->params.scale ==
|
||||
op_context.constant_values->params.scale);
|
||||
pad_value = *GetTensorData<int8_t>(op_context.constant_values);
|
||||
pad_value = *tflite::micro::GetTensorData<int8_t>(constant_values);
|
||||
}
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, int8_t, pad_value);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, int8_t, pad_value);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt32: {
|
||||
int32_t pad_value =
|
||||
op_context.constant_values == nullptr
|
||||
constant_values == nullptr
|
||||
? 0
|
||||
: *GetTensorData<int32_t>(op_context.constant_values);
|
||||
TF_LITE_PAD(reference_ops, Pad, int32_t, pad_value);
|
||||
: *tflite::micro::GetTensorData<int32_t>(constant_values);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int32_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
} break;
|
||||
default:
|
||||
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported by Pad.",
|
||||
TfLiteTypeGetName(op_context.input->type));
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
#undef TF_LITE_PAD
|
||||
@@ -207,29 +226,27 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace pad
|
||||
|
||||
TfLiteRegistration* Register_PAD() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PAD() {
|
||||
return {/*init=*/pad::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
// Also register Pad as PadV2.
|
||||
TfLiteRegistration* Register_PADV2() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PADV2() {
|
||||
return {/*init=*/pad::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -19,6 +19,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -32,6 +33,10 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
int32_t activation_min;
|
||||
int32_t activation_max;
|
||||
float activation_min_f32;
|
||||
float activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(const TfLiteContext* context,
|
||||
@@ -55,11 +60,7 @@ TfLiteStatus CalculateOpData(const TfLiteContext* context,
|
||||
|
||||
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params->activation, &activation_min,
|
||||
&activation_max);
|
||||
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
@@ -67,20 +68,19 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = activation_min;
|
||||
op_params.float_activation_max = activation_max;
|
||||
reference_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
op_params.float_activation_min = data->activation_min_f32;
|
||||
op_params.float_activation_max = data->activation_max_f32;
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||
int32_t activation_min, activation_max;
|
||||
(void)CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&activation_min, &activation_max);
|
||||
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
@@ -89,27 +89,26 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = activation_min;
|
||||
op_params.quantized_activation_max = activation_max;
|
||||
op_params.quantized_activation_min = data->activation_min;
|
||||
op_params.quantized_activation_max = data->activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
|
||||
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params->activation, &activation_min,
|
||||
&activation_max);
|
||||
|
||||
TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
@@ -117,22 +116,17 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = activation_min;
|
||||
op_params.float_activation_max = activation_max;
|
||||
reference_ops::MaxPool(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
op_params.float_activation_min = data->activation_min_f32;
|
||||
op_params.float_activation_max = data->activation_max_f32;
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||
|
||||
int32_t activation_min, activation_max;
|
||||
(void)CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&activation_min, &activation_max);
|
||||
|
||||
TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
@@ -140,39 +134,44 @@ void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = activation_min;
|
||||
op_params.quantized_activation_max = activation_max;
|
||||
op_params.quantized_activation_min = data->activation_min;
|
||||
op_params.quantized_activation_max = data->activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::MaxPool(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::MaxPool(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
||||
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// Inputs and outputs share the same type, guaranteed by the converter.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
AverageEvalFloat(context, node, params, &data, input, output);
|
||||
AverageEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
AverageEvalQuantized(context, node, params, &data, input, output);
|
||||
AverageEvalQuantized(context, node, params, data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
|
||||
@@ -183,21 +182,24 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
MaxEvalFloat(context, node, params, &data, input, output);
|
||||
MaxEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
MaxEvalQuantized(context, node, params, &data, input, output);
|
||||
MaxEvalQuantized(context, node, params, data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
|
||||
@@ -207,30 +209,59 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pooling
|
||||
|
||||
TfLiteRegistration* Register_AVERAGE_POOL_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pooling::AverageEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_MAX_POOL_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pooling::MaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
CalculateActivationRange(params->activation, &data->activation_min_f32,
|
||||
&data->activation_max_f32);
|
||||
} else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&data->activation_min,
|
||||
&data->activation_max);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pooling
|
||||
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D() {
|
||||
return {/*init=*/pooling::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pooling::Prepare,
|
||||
/*invoke=*/pooling::AverageEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_MAX_POOL_2D() {
|
||||
return {/*init=*/pooling::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pooling::Prepare,
|
||||
/*invoke=*/pooling::MaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -15,20 +15,45 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
|
||||
const TfLiteTensor* alpha,
|
||||
TfLiteTensor* output, PreluParams* params) {
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt16) {
|
||||
double real_multiplier_1 = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
double real_multiplier_2 = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier_1, ¶ms->output_multiplier_1,
|
||||
¶ms->output_shift_1);
|
||||
QuantizeMultiplier(real_multiplier_2, ¶ms->output_multiplier_2,
|
||||
¶ms->output_shift_2);
|
||||
|
||||
params->input_offset = -input->params.zero_point;
|
||||
params->alpha_offset = -alpha->params.zero_point;
|
||||
params->output_offset = output->params.zero_point;
|
||||
}
|
||||
|
||||
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
inline void BroadcastPrelu4DSlowFloat(
|
||||
const RuntimeShape& unextended_input1_shape, const float* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape, const float* input2_data,
|
||||
@@ -60,43 +85,67 @@ inline void BroadcastPrelu4DSlowFloat(
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(PreluParams));
|
||||
}
|
||||
|
||||
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
PreluParams* params = static_cast<PreluParams*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* alpha = GetInput(context, node, 1);
|
||||
TF_LITE_ENSURE(context, alpha != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
int32_t output_multiplier = 0;
|
||||
int output_shift = 0;
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
|
||||
double real_multiplier = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
|
||||
&output_shift);
|
||||
}
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
return CalculatePreluParams(input, alpha, output, params);
|
||||
}
|
||||
|
||||
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const PreluParams& params =
|
||||
*(static_cast<const PreluParams*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* alpha = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
BroadcastPrelu4DSlowFloat(
|
||||
GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(alpha), GetTensorData<float>(alpha),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
BroadcastPrelu4DSlowFloat(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<float>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
PreluParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.alpha_offset = -alpha->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = output_multiplier;
|
||||
op_params.output_shift = output_shift;
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<uint8_t>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<int8_t>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Only float32 and uint8 are supported currently, got %d.",
|
||||
context, "Only float32 and uint8_t are supported currently, got %d.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
@@ -104,16 +153,15 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_PRELU() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::PreluPrepare,
|
||||
/*invoke=*/activations::PreluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PRELU() {
|
||||
return {/*init=*/activations::PreluInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::PreluPrepare,
|
||||
/*invoke=*/activations::PreluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -19,19 +19,38 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace quantize {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
tflite::QuantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
int32_t input_zero_point;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
|
||||
// Currently this only support affine per-layer quantization.
|
||||
@@ -43,34 +62,61 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
input->type == kTfLiteFloat32 || input->type == kTfLiteInt16);
|
||||
TF_LITE_ENSURE(context,
|
||||
output->type == kTfLiteUInt8 || output->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 ||
|
||||
input->type == kTfLiteInt16 ||
|
||||
input->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt8 ||
|
||||
output->type == kTfLiteInt16 ||
|
||||
output->type == kTfLiteInt32);
|
||||
|
||||
if (((input->type == kTfLiteInt16 || input->type == kTfLiteInt8) &&
|
||||
output->type == kTfLiteInt8) ||
|
||||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt16)) {
|
||||
double effective_scale = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
|
||||
QuantizeMultiplier(effective_scale, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = output->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(output->params.scale);
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
tflite::QuantizationParams op_params;
|
||||
op_params.zero_point = output->params.zero_point;
|
||||
op_params.scale = static_cast<double>(output->params.scale);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
@@ -79,17 +125,45 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
size_t size = ElementCount(*input->dims);
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
double effective_scale =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
QuantizeMultiplier(effective_scale, &output_multiplier, &output_shift);
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
|
||||
size, data->output_multiplier,
|
||||
data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int16_t>(input), size, output_multiplier,
|
||||
output_shift, input->params.zero_point, output->params.zero_point,
|
||||
GetTensorData<int8_t>(output));
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->output_multiplier, data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
case kTfLiteInt32:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->output_multiplier, data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
// Int8 to Int8 requantization, required if the input and output tensors
|
||||
// have different scales and/or zero points.
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int8_t>(input),
|
||||
size, data->output_multiplier,
|
||||
data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
@@ -107,23 +181,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace quantize
|
||||
} // namespace
|
||||
|
||||
// This Op (QUANTIZE) quantizes the input and produces quantized output.
|
||||
// AffineQuantize takes scale and zero point and quantizes the float value to
|
||||
// quantized output, in int8 or uint8 format.
|
||||
TfLiteRegistration* Register_QUANTIZE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/quantize::Prepare,
|
||||
/*invoke=*/quantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_QUANTIZE() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -18,9 +18,12 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mean.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -30,10 +33,27 @@ namespace reduce {
|
||||
constexpr int kMaxNumberOfAxis = 4;
|
||||
constexpr int kMaxNumberOfReducedAxis = 2;
|
||||
|
||||
struct OpData {
|
||||
int32_t multiplier;
|
||||
int shift;
|
||||
int temp_buffer_idx;
|
||||
int resolved_axis_idx;
|
||||
int input_zp;
|
||||
float input_scale;
|
||||
int output_zp;
|
||||
float output_scale;
|
||||
int num_output_elements;
|
||||
};
|
||||
|
||||
void* InitReduce(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Inputs Tensor (dtype depends on quantization):
|
||||
// [0] = Input
|
||||
// [1] = Axis
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
|
||||
// Outputs Tensor (dtype depends on quantization):
|
||||
// [0] = Output
|
||||
@@ -44,13 +64,63 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
// Validate axis type
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
TF_LITE_ENSURE(context, axis != nullptr);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
const TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const double real_multiplier = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &data->multiplier, &data->shift);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
|
||||
|
||||
OpData* op_data = static_cast<OpData*>(node->user_data);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
const TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
|
||||
op_data->input_scale = input->params.scale;
|
||||
op_data->output_scale = output->params.scale;
|
||||
op_data->num_output_elements = NumElements(output);
|
||||
|
||||
context->RequestScratchBufferInArena(context, sizeof(int) * input->dims->size,
|
||||
&op_data->temp_buffer_idx);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, sizeof(int) * static_cast<int>(ElementCount(*axis->dims)),
|
||||
&op_data->resolved_axis_idx);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
const TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
if (input->type == kTfLiteInt8) {
|
||||
const double real_multiplier = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &op_data->multiplier, &op_data->shift);
|
||||
}
|
||||
|
||||
int output_size = NumElements(output);
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
context->RequestScratchBufferInArena(context, output_size * sizeof(int32_t),
|
||||
&op_data->temp_buffer_idx);
|
||||
op_data->input_zp = input->params.zero_point;
|
||||
op_data->input_scale = input->params.scale;
|
||||
op_data->output_zp = output->params.zero_point;
|
||||
op_data->output_scale = output->params.scale;
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
|
||||
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
|
||||
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@@ -58,7 +128,7 @@ void ResolveAxis(const int* axis_data, int axis_count,
|
||||
tflite::MeanParams* op_params) {
|
||||
int i = 0;
|
||||
for (; i < axis_count; ++i) {
|
||||
op_params->axis[i] = static_cast<int16>(axis_data[i]);
|
||||
op_params->axis[i] = static_cast<int16_t>(axis_data[i]);
|
||||
}
|
||||
for (; i < 4; ++i) {
|
||||
op_params->axis[i] = 1;
|
||||
@@ -67,69 +137,206 @@ void ResolveAxis(const int* axis_data, int axis_count,
|
||||
}
|
||||
|
||||
TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TfLiteReducerParams* params =
|
||||
reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
int num_axis = static_cast<int>(NumElements(axis));
|
||||
int num_axis = static_cast<int>(ElementCount(*axis->dims));
|
||||
int temp_index[kMaxNumberOfAxis];
|
||||
int resolved_axis[kMaxNumberOfReducedAxis];
|
||||
|
||||
tflite::MeanParams op_params;
|
||||
ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis, &op_params);
|
||||
|
||||
// Special case mean implementation exists for 4D mean across axes 1 and 2.
|
||||
bool special_case_4d_axes_1_and_2 =
|
||||
input->dims->size == 4 && op_params.axis_count == 2 &&
|
||||
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
tflite::MeanParams op_params;
|
||||
ResolveAxis(GetTensorData<int>(axis), num_axis, &op_params);
|
||||
// TODO(b/146571391): Support only 4D Input and 2D Axis for Mean until
|
||||
// scratch tensor allocation has been implemented in (b/132070898)
|
||||
bool is_valid_inputs =
|
||||
(NumDimensions(input) == 4 && op_params.axis_count == 2 &&
|
||||
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1)));
|
||||
TF_LITE_ENSURE_MSG(
|
||||
context, is_valid_inputs == true,
|
||||
"Number of Input "
|
||||
"dimensions != 4 OR the Axis is not either [1, 2] or [2, 1]");
|
||||
// TODO(b/139102329): Handle the below special case in the combined
|
||||
// reference method.
|
||||
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
|
||||
if (params->keep_dims) {
|
||||
reference_ops::Mean(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
if (params->keep_dims && special_case_4d_axes_1_and_2) {
|
||||
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::Mean(GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, GetTensorData<float>(output),
|
||||
reference_ops::Mean(
|
||||
tflite::micro::GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<float>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis,
|
||||
tflite::micro::GetTensorData<float>(output)));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
|
||||
if (params->keep_dims && special_case_4d_axes_1_and_2) {
|
||||
reference_integer_ops::Mean(
|
||||
op_params, op_data->multiplier, op_data->shift,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output), op_data->output_zp);
|
||||
} else if (op_data->input_zp == op_data->output_zp &&
|
||||
op_data->input_scale == op_data->output_scale) {
|
||||
int32_t* temp_buffer = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::Mean(
|
||||
tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis, temp_buffer));
|
||||
} else {
|
||||
int32_t* temp_buffer = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::QuantizedMeanOrSum(
|
||||
tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
|
||||
op_data->input_scale, input->dims->data, input->dims->size,
|
||||
tflite::micro::GetTensorData<int8_t>(output),
|
||||
op_data->output_zp, op_data->output_scale, output->dims->data,
|
||||
output->dims->size, tflite::micro::GetTensorData<int>(axis),
|
||||
num_axis, params->keep_dims, temp_index, resolved_axis,
|
||||
temp_buffer, false));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
|
||||
if (params->keep_dims && special_case_4d_axes_1_and_2) {
|
||||
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
op_data->input_zp, op_data->input_scale,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
op_data->output_zp, op_data->output_scale);
|
||||
} else if (op_data->input_zp == op_data->output_zp &&
|
||||
op_data->input_scale == op_data->output_scale) {
|
||||
uint32_t* temp_buffer = static_cast<uint32_t*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::Mean(tflite::micro::GetTensorData<uint8_t>(input),
|
||||
input->dims->data, input->dims->size,
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis,
|
||||
GetTensorData<float>(output)));
|
||||
tflite::micro::GetTensorData<int>(axis),
|
||||
num_axis, params->keep_dims, temp_index,
|
||||
resolved_axis, temp_buffer));
|
||||
} else {
|
||||
uint32_t* temp_buffer = static_cast<uint32_t*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::QuantizedMeanOrSum(
|
||||
tflite::micro::GetTensorData<uint8_t>(input), op_data->input_zp,
|
||||
op_data->input_scale, input->dims->data, input->dims->size,
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
op_data->output_zp, op_data->output_scale, output->dims->data,
|
||||
output->dims->size, tflite::micro::GetTensorData<int>(axis),
|
||||
num_axis, params->keep_dims, temp_index, resolved_axis,
|
||||
temp_buffer, false));
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
|
||||
TF_LITE_ENSURE_MSG(context, false,
|
||||
"Currently, only float32 input type "
|
||||
"Currently, only float32, int8 or uint8 input type "
|
||||
"is supported.");
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalMax(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TfLiteReducerParams* params =
|
||||
static_cast<TfLiteReducerParams*>(node->builtin_data);
|
||||
OpData* op_data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
// Interpret an axis tensor with null dimensions as a scalar
|
||||
int num_axis = static_cast<int>(ElementCount(*axis->dims));
|
||||
int* temp_buffer = static_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
int* resolved_axis = static_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->resolved_axis_idx));
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::ReduceGeneric<float>(
|
||||
tflite::micro::GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<float>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_buffer, resolved_axis,
|
||||
std::numeric_limits<float>::lowest(),
|
||||
[](const float current, const float in) -> float {
|
||||
return (in > current) ? in : current;
|
||||
}));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
TF_LITE_ENSURE_EQ(context, static_cast<double>(op_data->input_scale),
|
||||
static_cast<double>(op_data->output_scale));
|
||||
TF_LITE_ENSURE_EQ(context, op_data->input_zp, op_data->output_zp);
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::ReduceGeneric<int8_t>(
|
||||
tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_buffer, resolved_axis,
|
||||
std::numeric_limits<int8_t>::lowest(),
|
||||
[](const int8_t current, const int8_t in) -> int8_t {
|
||||
return (in > current) ? in : current;
|
||||
}));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only float32 and int8 types are supported.\n");
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace reduce
|
||||
|
||||
TfLiteRegistration* Register_MEAN() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMeanOrSum,
|
||||
/*invoke=*/reduce::EvalMean,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MEAN() {
|
||||
return {/*init=*/reduce::InitReduce,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMeanOrSum,
|
||||
/*invoke=*/reduce::EvalMean,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_REDUCE_MAX() {
|
||||
return {/*init=*/reduce::InitReduce,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMax,
|
||||
/*invoke=*/reduce::EvalMax,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -18,6 +18,9 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -29,7 +32,9 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
// Tensorflow's Reshape allows one of the shape components to have the
|
||||
// special -1 value, meaning it will be calculated automatically based on the
|
||||
// input. Here we calculate what that dimension should be so that the number
|
||||
@@ -61,7 +66,7 @@ TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
|
||||
num_output_elements *= output_shape->data[stretch_dim];
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
@@ -74,13 +79,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// TODO(b/162522304): storing input bytes in OpData increases some models
|
||||
// significantly, possibly due to alignment issues.
|
||||
size_t input_bytes;
|
||||
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes));
|
||||
input_bytes *= ElementCount(*input->dims);
|
||||
|
||||
// Do nothing for in-place reshape.
|
||||
if (input->data.raw != output->data.raw) {
|
||||
// Otherwise perform reshape with copy.
|
||||
for (size_t i = 0; i < input->bytes; ++i) {
|
||||
for (size_t i = 0; i < input_bytes; ++i) {
|
||||
output->data.raw[i] = input->data.raw[i];
|
||||
}
|
||||
}
|
||||
@@ -89,16 +102,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace reshape
|
||||
|
||||
TfLiteRegistration* Register_RESHAPE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reshape::Prepare,
|
||||
/*invoke=*/reshape::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RESHAPE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reshape::Prepare,
|
||||
/*invoke=*/reshape::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -31,7 +32,6 @@ constexpr int kSizeTensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(DEBUG)
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
@@ -49,11 +49,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
output->type = input->type;
|
||||
|
||||
if (!IsConstantTensor(size)) {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Dynamic tensors are unsupported in tfmicro.");
|
||||
TF_LITE_KERNEL_LOG(context, "Dynamic tensors are unsupported in tfmicro.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
#endif
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@@ -61,9 +59,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteResizeNearestNeighborParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* size =
|
||||
tflite::micro::GetEvalInput(context, node, kSizeTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
tflite::ResizeNearestNeighborParams op_params;
|
||||
op_params.align_corners = params->align_corners;
|
||||
@@ -71,22 +72,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<int32>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<int32>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int32_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Output type is %d, requires float, uint8 or int8.",
|
||||
"Output type is %d, requires float, uint8_t or int8_t.",
|
||||
output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
@@ -95,16 +105,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} // namespace resize_nearest_neighbor
|
||||
|
||||
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/resize_nearest_neighbor::Prepare,
|
||||
/*invoke=*/resize_nearest_neighbor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/resize_nearest_neighbor::Prepare,
|
||||
/*invoke=*/resize_nearest_neighbor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -18,6 +18,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -29,11 +30,13 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
@@ -43,26 +46,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Round(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Round(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace round
|
||||
|
||||
TfLiteRegistration* Register_ROUND() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/round::Prepare,
|
||||
/*invoke=*/round::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ROUND() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/round::Prepare,
|
||||
/*invoke=*/round::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
73
code/lib/tfmicro/tensorflow/lite/micro/kernels/shape.cc
Normal file
73
code/lib/tfmicro/tensorflow/lite/micro/kernels/shape.cc
Normal file
@@ -0,0 +1,73 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
void ExtractShape(const TfLiteEvalTensor* input, int32_t* output_data) {
|
||||
for (int i = 0; i < input->dims->size; ++i) {
|
||||
output_data[i] = input->dims->data[i];
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
if (output->type != kTfLiteInt32) {
|
||||
TF_LITE_KERNEL_LOG(context, "Output type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
} else {
|
||||
ExtractShape(input, tflite::micro::GetTensorData<int32_t>(output));
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_SHAPE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -22,29 +22,35 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
// Softmax parameter data that persists in user_data
|
||||
static constexpr int kInt16LUTArraySize = 513;
|
||||
|
||||
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
TfLiteTensor* output,
|
||||
const TfLiteSoftmaxParams* params,
|
||||
SoftmaxParams* op_data) {
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteInt16) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteUInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
} else {
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768,
|
||||
(0.001f * 1.f / 32768));
|
||||
} else { // input->type == kTfLiteInt8
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
|
||||
// NOTE: Current int16 softmax output does not require symmetric scaling
|
||||
// - so no need to verify scale here.
|
||||
} else {
|
||||
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 65536,
|
||||
(0.001f * 1.f / 65536));
|
||||
} else { // output->type == kTfLiteint8
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
|
||||
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
|
||||
@@ -53,15 +59,28 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
|
||||
static const int kScaledDiffIntegerBits = 5;
|
||||
|
||||
int input_left_shift;
|
||||
tflite::PreprocessSoftmaxScaling(
|
||||
static_cast<double>(params->beta),
|
||||
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
|
||||
&op_data->input_multiplier, &input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
op_data->diff_min =
|
||||
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
|
||||
op_data->input_left_shift);
|
||||
// Calculate input_multiplier and input_left_shift
|
||||
if (input->type == kTfLiteInt16) {
|
||||
int input_left_shift;
|
||||
double input_scale_beta_rescale =
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(params->beta) /
|
||||
(10.0 / 65535.0); // scale the input_diff such that [-65535, 0]
|
||||
// correspond to [-10.0, 0.0]
|
||||
QuantizeMultiplier(input_scale_beta_rescale, &op_data->input_multiplier,
|
||||
&input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
} else {
|
||||
int input_left_shift;
|
||||
tflite::PreprocessSoftmaxScaling(
|
||||
static_cast<double>(params->beta),
|
||||
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
|
||||
&op_data->input_multiplier, &input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
op_data->diff_min =
|
||||
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
|
||||
op_data->input_left_shift);
|
||||
}
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
@@ -70,53 +89,106 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
// Takes a tensor and performs softmax along the last dimension.
|
||||
void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
if (output->type == kTfLiteInt16) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else {
|
||||
tflite::reference_ops::SoftmaxInt16(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
}
|
||||
}
|
||||
|
||||
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Takes a tensor and performs softmax along the last dimension.
|
||||
void SoftmaxFloat(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
if (output->type == kTfLiteInt16) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
}
|
||||
TF_LITE_ENSURE(context, node->user_data != nullptr);
|
||||
SoftmaxParams* op_data = static_cast<SoftmaxParams*>(node->user_data);
|
||||
// Only allocate LUTs for KTfLiteInt16 data type
|
||||
if (input->type == kTfLiteInt16) {
|
||||
void* raw_exp_lut = context->AllocatePersistentBuffer(
|
||||
context, sizeof(int16_t) * kInt16LUTArraySize);
|
||||
TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
|
||||
op_data->exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
|
||||
void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
|
||||
context, sizeof(int16_t) * kInt16LUTArraySize);
|
||||
TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
|
||||
op_data->one_over_one_plus_x_lut =
|
||||
reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
|
||||
}
|
||||
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteUInt8 ||
|
||||
input->type == kTfLiteInt16);
|
||||
} else {
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
}
|
||||
|
||||
// Populate LUT if required
|
||||
if (input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
// exp LUT only used on negative values
|
||||
// we consider exp(-10.0) is insignificant to accumulation
|
||||
gen_lut([](float value) { return std::exp(value); }, -10.0f, 0.0f,
|
||||
op_data->exp_lut, kInt16LUTArraySize);
|
||||
gen_lut([](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f,
|
||||
op_data->one_over_one_plus_x_lut, kInt16LUTArraySize);
|
||||
op_data->zero_point = output->params.zero_point;
|
||||
op_data->scale = output->params.scale;
|
||||
}
|
||||
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
return CalculateSoftmaxParams(context, input, output, params, op_data);
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
SoftmaxParams op_data;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateSoftmaxParams(context, input, output, params, &op_data));
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
SoftmaxParams op_data = *static_cast<SoftmaxParams*>(node->user_data);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@@ -124,7 +196,8 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8:
|
||||
case kTfLiteUInt8: {
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt16: {
|
||||
SoftmaxQuantized(input, output, op_data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
@@ -134,20 +207,17 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
} // namespace activations
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_SOFTMAX() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::SoftmaxPrepare,
|
||||
/*invoke=*/activations::SoftmaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SOFTMAX() {
|
||||
return {/*init=*/SoftmaxInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/SoftmaxPrepare,
|
||||
/*invoke=*/SoftmaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -25,10 +26,11 @@ namespace split {
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteTensor* input, int axis_value) {
|
||||
const TfLiteEvalTensor* input, int axis_value) {
|
||||
const int output_count = NumOutputs(node);
|
||||
const TfLiteIntArray* input_dims = input->dims;
|
||||
const TfLiteTensor* output0 = GetOutput(context, node, 0);
|
||||
const TfLiteEvalTensor* output0 =
|
||||
tflite::micro::GetEvalOutput(context, node, 0);
|
||||
const TfLiteIntArray* output_dims = output0->dims;
|
||||
|
||||
const int split_dimensions = input_dims->size;
|
||||
@@ -50,11 +52,11 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
base_inner_size *= input_dims->data[i];
|
||||
}
|
||||
|
||||
const T* input_ptr = GetTensorData<T>(input);
|
||||
const T* input_ptr = tflite::micro::GetTensorData<T>(input);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
for (int i = 0; i < output_count; ++i) {
|
||||
TfLiteTensor* t = GetOutput(context, node, i);
|
||||
T* output_data = GetTensorData<T>(t);
|
||||
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(t);
|
||||
const int copy_size = output_dims->data[axis] * base_inner_size;
|
||||
T* output_ptr = output_data + k * copy_size;
|
||||
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
|
||||
@@ -65,23 +67,29 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* axis = GetInput(context, node, 0);
|
||||
const TfLiteTensor* input = GetInput(context, node, 1);
|
||||
TF_LITE_ENSURE(context, axis != nullptr);
|
||||
|
||||
// Dynamic output tensors are needed if axis tensor is not constant.
|
||||
// But Micro doesn't support dynamic memory allocation, so we only support
|
||||
// constant axis tensor for now.
|
||||
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
|
||||
"Non constant axis tensor not supported");
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
int axis_value = GetTensorData<int32_t>(axis)[0];
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 1);
|
||||
|
||||
int axis_value = tflite::micro::GetTensorData<int32_t>(axis)[0];
|
||||
if (axis_value < 0) {
|
||||
axis_value += NumDimensions(input);
|
||||
axis_value += input->dims->size;
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE(context, axis_value >= 0);
|
||||
TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
|
||||
TF_LITE_ENSURE(context, axis_value < input->dims->size);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@@ -111,16 +119,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace split
|
||||
|
||||
TfLiteRegistration* Register_SPLIT() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/split::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SPLIT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/split::Prepare,
|
||||
/*invoke=*/split::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
135
code/lib/tfmicro/tensorflow/lite/micro/kernels/split_v.cc
Normal file
135
code/lib/tfmicro/tensorflow/lite/micro/kernels/split_v.cc
Normal file
@@ -0,0 +1,135 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace split_v {
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteEvalTensor* input, int axis_value) {
|
||||
const TfLiteIntArray* input_dims = input->dims;
|
||||
const TfLiteEvalTensor* output0 =
|
||||
tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
const int split_dimensions = input_dims->size;
|
||||
|
||||
TFLITE_DCHECK_LT(axis_value, split_dimensions);
|
||||
TFLITE_DCHECK_EQ(output0->dims->size, split_dimensions);
|
||||
|
||||
int64_t split_size = 0;
|
||||
const int output_count = NumOutputs(node);
|
||||
for (int i = 0; i < output_count; i++) {
|
||||
split_size +=
|
||||
tflite::micro::GetEvalOutput(context, node, i)->dims->data[axis_value];
|
||||
}
|
||||
TFLITE_DCHECK_EQ(split_size, input_dims->data[axis_value]);
|
||||
int64_t outer_size = 1;
|
||||
for (int i = 0; i < axis_value; ++i) {
|
||||
outer_size *= input_dims->data[i];
|
||||
}
|
||||
|
||||
int64_t base_inner_size = 1;
|
||||
for (int i = axis_value + 1; i < split_dimensions; ++i) {
|
||||
base_inner_size *= input_dims->data[i];
|
||||
}
|
||||
|
||||
const T* input_ptr = tflite::micro::GetTensorData<T>(input);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
for (int i = 0; i < output_count; ++i) {
|
||||
TfLiteEvalTensor* output_tensor =
|
||||
tflite::micro::GetEvalOutput(context, node, i);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(output_tensor);
|
||||
const int copy_size =
|
||||
output_tensor->dims->data[axis_value] * base_inner_size;
|
||||
T* output_ptr = output_data + k * copy_size;
|
||||
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
|
||||
input_ptr += copy_size;
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
|
||||
|
||||
// Dynamic output tensors are needed if axis tensor is not constant.
|
||||
// But Micro doesn't support dynamic memory allocation, so we only support
|
||||
// constant axis tensor for now.
|
||||
const TfLiteTensor* axis = GetInput(context, node, 2);
|
||||
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
|
||||
"Non constant axis tensor not supported");
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 2);
|
||||
|
||||
int axis_value = tflite::micro::GetTensorData<int32_t>(axis)[0];
|
||||
if (axis_value < 0) {
|
||||
axis_value += input->dims->size;
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE(context, axis_value >= 0);
|
||||
TF_LITE_ENSURE(context, axis_value < input->dims->size);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
return SplitImpl<float>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
return SplitImpl<int8_t>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteInt16: {
|
||||
return SplitImpl<int16_t>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteInt32: {
|
||||
return SplitImpl<int32_t>(context, node, input, axis_value);
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s currently not supported.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace split_v
|
||||
|
||||
TfLiteRegistration Register_SPLIT_V() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/split_v::Prepare,
|
||||
/*invoke=*/split_v::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -15,23 +15,20 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace strided_slice {
|
||||
|
||||
enum KernelType {
|
||||
kReference,
|
||||
// TODO(soroosh): add kGenericOptimized
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kBeginTensor = 1;
|
||||
constexpr int kEndTensor = 2;
|
||||
@@ -120,64 +117,74 @@ TfLiteStatus CheckOutputSize(TfLiteContext* context,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(StridedSliceParams));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
StridedSliceParams* op_params =
|
||||
static_cast<StridedSliceParams*>(node->user_data);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 4);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
StridedSliceContext op_context(context, node);
|
||||
TF_LITE_ENSURE_MSG(context, op_context.dims <= kMaxDim,
|
||||
"input dim should not exceed 4");
|
||||
auto params = BuildStridedSliceParams(&op_context);
|
||||
memcpy(op_params, ¶ms, sizeof(StridedSliceParams));
|
||||
return CheckOutputSize(context, &op_context);
|
||||
}
|
||||
|
||||
template <KernelType kernel_type>
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
StridedSliceContext op_context(context, node);
|
||||
auto op_params = BuildStridedSliceParams(&op_context);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const StridedSliceParams& op_params =
|
||||
*(static_cast<const StridedSliceParams*>(node->user_data));
|
||||
|
||||
#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \
|
||||
kernel_type::StridedSlice(op_params, GetTensorShape(op_context.input), \
|
||||
GetTensorData<data_type>(op_context.input), \
|
||||
GetTensorShape(op_context.output), \
|
||||
GetTensorData<data_type>(op_context.output))
|
||||
|
||||
switch (op_context.input->type) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32:
|
||||
if (kernel_type == kReference) {
|
||||
TF_LITE_STRIDED_SLICE(reference_ops, float);
|
||||
}
|
||||
reference_ops::StridedSlice(op_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
if (kernel_type == kReference) {
|
||||
TF_LITE_STRIDED_SLICE(reference_ops, uint8_t);
|
||||
}
|
||||
reference_ops::StridedSlice(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
if (kernel_type == kReference) {
|
||||
TF_LITE_STRIDED_SLICE(reference_ops, int8_t);
|
||||
}
|
||||
reference_ops::StridedSlice(op_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(op_context.input->type),
|
||||
op_context.input->type);
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
#undef TF_LITE_STRIDED_SLICE
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace strided_slice
|
||||
|
||||
TfLiteRegistration* Register_STRIDED_SLICE() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/strided_slice::Prepare,
|
||||
/*invoke=*/strided_slice::Eval<strided_slice::kReference>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_STRIDED_SLICE() {
|
||||
return {/*init=*/strided_slice::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/strided_slice::Prepare,
|
||||
/*invoke=*/strided_slice::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -21,8 +21,10 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -40,18 +42,18 @@ struct OpData {
|
||||
// and the special 16-bit -> 16bit quantized path
|
||||
int input1_shift;
|
||||
int input2_shift;
|
||||
int32 output_activation_min;
|
||||
int32 output_activation_max;
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||
int32 input1_multiplier;
|
||||
int32 input2_multiplier;
|
||||
int32 output_multiplier;
|
||||
int32_t input1_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int left_shift;
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
|
||||
@@ -93,31 +95,62 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, data));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
|
||||
const OpData* data, const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output) {
|
||||
const OpData* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
#define TF_LITE_SUB(opname) \
|
||||
opname(op_params, GetTensorShape(input1), GetTensorData<float>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<float>(input2), \
|
||||
GetTensorShape(output), GetTensorData<float>(output))
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow);
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_SUB(tflite::reference_ops::SubWithActivation);
|
||||
tflite::reference_ops::SubWithActivation(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
#undef TF_LITE_SUB
|
||||
}
|
||||
|
||||
TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteSubParams* params, const OpData* data,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
@@ -133,25 +166,46 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_SUB(opname, dtype) \
|
||||
opname(op_params, GetTensorShape(input1), GetTensorData<dtype>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<dtype>(input2), \
|
||||
GetTensorShape(output), GetTensorData<dtype>(output));
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, int8_t);
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_SUB(tflite::reference_ops::Sub, int8_t);
|
||||
tflite::reference_ops::Sub(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, uint8_t);
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
TF_LITE_SUB(tflite::reference_ops::Sub, uint8_t);
|
||||
tflite::reference_ops::Sub(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_SUB
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
@@ -160,13 +214,15 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData data;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, &data));
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalSub(context, node, params, &data, input1, input2, output);
|
||||
@@ -184,16 +240,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace sub
|
||||
|
||||
TfLiteRegistration* Register_SUB() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/sub::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SUB() {
|
||||
return {/*init=*/sub::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/sub::Prepare,
|
||||
/*invoke=*/sub::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -23,25 +23,38 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/activation_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace svdf {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
int32 effective_scale_1_a;
|
||||
int32 effective_scale_2_a;
|
||||
int32_t effective_scale_1_a;
|
||||
int32_t effective_scale_2_a;
|
||||
// b versions of each scale are kept at int since the numbers are just the
|
||||
// shift value - typically between [-32, 32].
|
||||
int effective_scale_1_b;
|
||||
int effective_scale_2_b;
|
||||
int scratch_tensor_index;
|
||||
int scratch_output_tensor_index;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int input_zero_point;
|
||||
int output_zero_point;
|
||||
};
|
||||
|
||||
// Input tensors.
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kWeightsFeatureTensor = 1;
|
||||
constexpr int kWeightsTimeTensor = 2;
|
||||
constexpr int kBiasTensor = 3;
|
||||
// This is a variable tensor, and will be modified by this op.
|
||||
constexpr int kInputActivationStateTensor = 4;
|
||||
|
||||
// Output tensor.
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
/**
|
||||
* This version of SVDF is specific to TFLite Micro. It contains the following
|
||||
* differences between the TFLite version:
|
||||
@@ -107,18 +120,19 @@ static inline void ApplyTimeWeightsBiasAndActivation(
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* output_ptr_batch = output_ptr + b * num_units;
|
||||
for (int i = 0; i < num_units; ++i) {
|
||||
*output_ptr_batch = ActivationValFloat(activation, *output_ptr_batch);
|
||||
*output_ptr_batch =
|
||||
tflite::ops::micro::ActivationValFloat(activation, *output_ptr_batch);
|
||||
++output_ptr_batch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void EvalFloatSVDF(
|
||||
TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input,
|
||||
const TfLiteTensor* weights_feature, const TfLiteTensor* weights_time,
|
||||
const TfLiteTensor* bias, const TfLiteSVDFParams* params,
|
||||
int scratch_tensor_index, TfLiteTensor* activation_state,
|
||||
TfLiteTensor* output) {
|
||||
TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* weights_feature,
|
||||
const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
|
||||
const TfLiteSVDFParams* params, int scratch_tensor_index,
|
||||
TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) {
|
||||
const int rank = params->rank;
|
||||
const int batch_size = input->dims->data[0];
|
||||
const int input_size = input->dims->data[1];
|
||||
@@ -126,12 +140,14 @@ inline void EvalFloatSVDF(
|
||||
const int num_units = num_filters / rank;
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
const float* weights_feature_ptr = GetTensorData<float>(weights_feature);
|
||||
const float* weights_time_ptr = GetTensorData<float>(weights_time);
|
||||
const float* bias_ptr = GetTensorData<float>(bias);
|
||||
const float* input_ptr = GetTensorData<float>(input);
|
||||
const float* weights_feature_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_feature);
|
||||
const float* weights_time_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_time);
|
||||
const float* bias_ptr = tflite::micro::GetTensorData<float>(bias);
|
||||
const float* input_ptr = tflite::micro::GetTensorData<float>(input);
|
||||
|
||||
float* state_ptr = GetTensorData<float>(activation_state);
|
||||
float* state_ptr = tflite::micro::GetTensorData<float>(activation_state);
|
||||
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||
@@ -139,7 +155,7 @@ inline void EvalFloatSVDF(
|
||||
float* scratch_ptr = static_cast<float*>(
|
||||
context->GetScratchBuffer(context, scratch_tensor_index));
|
||||
|
||||
float* output_ptr = GetTensorData<float>(output);
|
||||
float* output_ptr = tflite::micro::GetTensorData<float>(output);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
@@ -185,14 +201,13 @@ inline void EvalFloatSVDF(
|
||||
}
|
||||
|
||||
void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteTensor* input_tensor,
|
||||
const TfLiteTensor* weights_feature_tensor,
|
||||
const TfLiteTensor* weights_time_tensor,
|
||||
const TfLiteTensor* bias_tensor,
|
||||
const TfLiteEvalTensor* input_tensor,
|
||||
const TfLiteEvalTensor* weights_feature_tensor,
|
||||
const TfLiteEvalTensor* weights_time_tensor,
|
||||
const TfLiteEvalTensor* bias_tensor,
|
||||
const TfLiteSVDFParams* params,
|
||||
TfLiteTensor* activation_state_tensor,
|
||||
TfLiteTensor* output_tensor, const OpData& data,
|
||||
int32_t input_zp, int32_t output_zp) {
|
||||
TfLiteEvalTensor* activation_state_tensor,
|
||||
TfLiteEvalTensor* output_tensor, const OpData& data) {
|
||||
const int n_rank = params->rank;
|
||||
const int n_batch = input_tensor->dims->data[0];
|
||||
const int n_input = input_tensor->dims->data[1];
|
||||
@@ -209,7 +224,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
|
||||
|
||||
// Shift states.
|
||||
int16_t* const state_ptr = GetTensorData<int16_t>(activation_state_tensor);
|
||||
int16_t* const state_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
@@ -225,10 +241,11 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
|
||||
// Feature matmul.
|
||||
{
|
||||
int16_t* state = GetTensorData<int16_t>(activation_state_tensor);
|
||||
const int8_t* input = GetTensorData<int8_t>(input_tensor);
|
||||
int16_t* state =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
|
||||
const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
|
||||
const int8_t* weight_feature =
|
||||
GetTensorData<int8_t>(weights_feature_tensor);
|
||||
tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
|
||||
const int32_t output_max = std::numeric_limits<int16_t>::max();
|
||||
const int32_t output_min = std::numeric_limits<int16_t>::min();
|
||||
int16_t* result_in_batch = state + (n_memory - 1);
|
||||
@@ -238,7 +255,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
int32_t dot_prod = 0;
|
||||
const int8_t* vector_in_batch = input + b * n_input;
|
||||
for (int c = 0; c < n_input; c++) {
|
||||
dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
|
||||
dot_prod +=
|
||||
*matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
|
||||
}
|
||||
dot_prod = MultiplyByQuantizedMultiplier(
|
||||
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
|
||||
@@ -261,9 +279,10 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
|
||||
|
||||
// Perform batched vector dot product:
|
||||
const int16_t* vector1_ptr = GetTensorData<int16_t>(weights_time_tensor);
|
||||
const int16_t* vector1_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(weights_time_tensor);
|
||||
const int16_t* vector2_ptr =
|
||||
GetTensorData<int16_t>(activation_state_tensor) +
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor) +
|
||||
b * n_memory * n_filter;
|
||||
|
||||
for (int i = 0; i < n_filter; i++) {
|
||||
@@ -281,7 +300,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
// Add bias.
|
||||
if (bias_tensor) {
|
||||
// Vector batch assign:
|
||||
const int32_t* bias_data = GetTensorData<int32_t>(bias_tensor);
|
||||
const int32_t* bias_data =
|
||||
tflite::micro::GetTensorData<int32_t>(bias_tensor);
|
||||
for (int i = 0; i < n_batch; ++i) {
|
||||
int32_t* output_ptr = scratch_output_tensor + i * n_unit;
|
||||
const int32_t* bias_ptr = bias_data;
|
||||
@@ -316,34 +336,17 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
int32_t x1 = scratch_output_tensor[i];
|
||||
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
|
||||
data.effective_scale_2_b);
|
||||
int32_t x3 = x2 + output_zp;
|
||||
int32_t x3 = x2 + data.output_zero_point;
|
||||
int32_t x4 = std::min(std::max(output_min, x3), output_max);
|
||||
GetTensorData<int8_t>(output_tensor)[i] = static_cast<int8_t>(x4);
|
||||
tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
|
||||
static_cast<int8_t>(x4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Input tensors.
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kWeightsFeatureTensor = 1;
|
||||
constexpr int kWeightsTimeTensor = 2;
|
||||
constexpr int kBiasTensor = 3;
|
||||
// This is a variable tensor, and will be modified by this op.
|
||||
constexpr int kInputActivationStateTensor = 4;
|
||||
|
||||
// Output tensor.
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
void* data = nullptr;
|
||||
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||
kTfLiteError) {
|
||||
return nullptr;
|
||||
}
|
||||
return data;
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
@@ -359,13 +362,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// [4] = Activation State (variable),
|
||||
// {2, batch_size, memory_size * num_filters}
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* weights_feature =
|
||||
GetInput(context, node, kWeightsFeatureTensor);
|
||||
TF_LITE_ENSURE(context, weights_feature != nullptr);
|
||||
const TfLiteTensor* weights_time =
|
||||
GetInput(context, node, kWeightsTimeTensor);
|
||||
TF_LITE_ENSURE(context, weights_time != nullptr);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
const TfLiteTensor* activation_state =
|
||||
GetInput(context, node, kInputActivationStateTensor);
|
||||
TF_LITE_ENSURE(context, activation_state != nullptr);
|
||||
|
||||
// Define input constants based on input tensor definition above:
|
||||
const int rank = params->rank;
|
||||
@@ -382,9 +389,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
|
||||
|
||||
// Validate Tensor Output:
|
||||
// [0] = float/int8, {2, batch_size, num_units}
|
||||
// [0] = float/int8_t, {2, batch_size, num_units}
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
|
||||
@@ -408,9 +416,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
|
||||
memory_size * num_filters);
|
||||
// Since is_variable is not part of TFLiteEvalTensor, check is_variable here.
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
|
||||
@@ -419,35 +432,30 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, output->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
|
||||
const auto* input_params =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(input->quantization.params);
|
||||
const auto* weights_feature_params =
|
||||
static_cast<const TfLiteAffineQuantization*>(
|
||||
weights_feature->quantization.params);
|
||||
const auto* state_params = static_cast<const TfLiteAffineQuantization*>(
|
||||
activation_state->quantization.params);
|
||||
const auto* weight_time_params =
|
||||
static_cast<const TfLiteAffineQuantization*>(
|
||||
weights_time->quantization.params);
|
||||
const auto* output_params = static_cast<const TfLiteAffineQuantization*>(
|
||||
output->quantization.params);
|
||||
const double effective_scale_1 = static_cast<double>(
|
||||
input_params->scale->data[0] * weights_feature_params->scale->data[0] /
|
||||
state_params->scale->data[0]);
|
||||
const double effective_scale_2 = static_cast<double>(
|
||||
state_params->scale->data[0] * weight_time_params->scale->data[0] /
|
||||
output_params->scale->data[0]);
|
||||
input->params.scale * weights_feature->params.scale /
|
||||
activation_state->params.scale);
|
||||
const double effective_scale_2 =
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale / output->params.scale);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
// TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
std::abs(static_cast<double>(bias->params.scale) -
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale)) < 1e-5);
|
||||
|
||||
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
|
||||
&(data->effective_scale_1_b));
|
||||
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
|
||||
&(data->effective_scale_2_b));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
@@ -467,10 +475,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
|
||||
}
|
||||
TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
@@ -484,20 +489,24 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* weights_feature =
|
||||
GetInput(context, node, kWeightsFeatureTensor);
|
||||
const TfLiteTensor* weights_time =
|
||||
GetInput(context, node, kWeightsTimeTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* activation_state =
|
||||
GetVariableInput(context, node, kInputActivationStateTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* weights_feature =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsFeatureTensor);
|
||||
const TfLiteEvalTensor* weights_time =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsTimeTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 5)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput(
|
||||
context, node, kInputActivationStateTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (weights_feature->type) {
|
||||
case kTfLiteFloat32: {
|
||||
EvalFloatSVDF(context, node, input, weights_feature, weights_time, bias,
|
||||
@@ -508,11 +517,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
case kTfLiteInt8: {
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu);
|
||||
|
||||
EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
|
||||
params, activation_state, output, data,
|
||||
input->params.zero_point, output->params.zero_point);
|
||||
params, activation_state, output, data);
|
||||
return kTfLiteOk;
|
||||
break;
|
||||
}
|
||||
@@ -525,20 +531,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace svdf
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_SVDF() {
|
||||
static TfLiteRegistration r = {/*init=*/svdf::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/svdf::Prepare,
|
||||
/*invoke=*/svdf::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SVDF() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
158
code/lib/tfmicro/tensorflow/lite/micro/kernels/tanh.cc
Normal file
158
code/lib/tfmicro/tensorflow/lite/micro/kernels/tanh.cc
Normal file
@@ -0,0 +1,158 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/tanh.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
int32_t input_zero_point;
|
||||
int32_t input_range_radius;
|
||||
int32_t input_multiplier;
|
||||
int input_left_shift;
|
||||
};
|
||||
|
||||
void* TanhInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
OpData* data) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
static constexpr int kInputIntegerBits = 4;
|
||||
const double input_real_multiplier =
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(1 << (31 - kInputIntegerBits));
|
||||
|
||||
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
|
||||
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
|
||||
|
||||
data->input_range_radius =
|
||||
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
return CalculateArithmeticOpData(context, node, data);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
reference_ops::Tanh(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt16: {
|
||||
TanhParams params;
|
||||
params.input_left_shift = data.input_left_shift;
|
||||
reference_ops::Tanh(params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
TanhParams params;
|
||||
params.input_zero_point = data.input_zero_point;
|
||||
params.input_range_radius = data.input_range_radius;
|
||||
params.input_multiplier = data.input_multiplier;
|
||||
params.input_left_shift = data.input_left_shift;
|
||||
reference_ops::Tanh(params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::Tanh(
|
||||
data.input_zero_point, data.input_range_radius, data.input_multiplier,
|
||||
data.input_left_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration Register_TANH() {
|
||||
return {/*init=*/activations::TanhInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::TanhPrepare,
|
||||
/*invoke=*/activations::TanhEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -28,14 +29,16 @@ constexpr int kInputTensor = 0;
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteTensor* input, int output_count, int axis) {
|
||||
const TfLiteTensor* output0 = GetOutput(context, node, 0);
|
||||
const TfLiteEvalTensor* input, int output_count,
|
||||
int axis) {
|
||||
const TfLiteEvalTensor* output0 =
|
||||
tflite::micro::GetEvalOutput(context, node, 0);
|
||||
const TfLiteIntArray* input_dims = input->dims;
|
||||
const TfLiteIntArray* output_dims = output0->dims;
|
||||
const int dimensions = input_dims->size;
|
||||
|
||||
if (axis < 0) {
|
||||
axis += NumDimensions(input);
|
||||
axis += input->dims->size;
|
||||
}
|
||||
|
||||
TFLITE_DCHECK_LT(axis, dimensions);
|
||||
@@ -54,11 +57,11 @@ TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
TFLITE_DCHECK_EQ(output_size, copy_size * outer_size);
|
||||
|
||||
const T* input_data = GetTensorData<T>(input);
|
||||
const T* input_data = tflite::micro::GetTensorData<T>(input);
|
||||
|
||||
for (int i = 0; i < output_count; ++i) {
|
||||
TfLiteTensor* t = GetOutput(context, node, i);
|
||||
T* output_data = GetTensorData<T>(t);
|
||||
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(t);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
T* output_ptr = output_data + copy_size * k;
|
||||
int loc = k * output_count * copy_size + i * copy_size;
|
||||
@@ -74,7 +77,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteUnpackParams* data =
|
||||
reinterpret_cast<TfLiteUnpackParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@@ -101,16 +105,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace unpack
|
||||
|
||||
TfLiteRegistration* Register_UNPACK() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/unpack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_UNPACK() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/unpack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -15,9 +15,15 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -40,8 +46,7 @@ size_t AlignSizeUp(size_t size, size_t alignment) {
|
||||
return aligned_size;
|
||||
}
|
||||
|
||||
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
|
||||
ErrorReporter* reporter) {
|
||||
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) {
|
||||
switch (type) {
|
||||
case kTfLiteFloat32:
|
||||
*size = sizeof(float);
|
||||
@@ -67,9 +72,10 @@ TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
|
||||
case kTfLiteComplex64:
|
||||
*size = sizeof(float) * 2;
|
||||
break;
|
||||
case kTfLiteComplex128:
|
||||
*size = sizeof(double) * 2;
|
||||
break;
|
||||
default:
|
||||
reporter->Report("Type %s (%d) not is not supported",
|
||||
TfLiteTypeGetName(type), type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
@@ -79,17 +85,71 @@ TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
|
||||
size_t* bytes, size_t* type_size,
|
||||
ErrorReporter* error_reporter) {
|
||||
int element_count = 1;
|
||||
for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
|
||||
element_count *= flatbuffer_tensor.shape()->Get(n);
|
||||
// If flatbuffer_tensor.shape == nullptr, then flatbuffer_tensor is a scalar
|
||||
// so has 1 element.
|
||||
if (flatbuffer_tensor.shape() != nullptr) {
|
||||
for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
|
||||
element_count *= flatbuffer_tensor.shape()->Get(n);
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteType tf_lite_type;
|
||||
TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
|
||||
&tf_lite_type, error_reporter));
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
TfLiteTypeSizeOf(tf_lite_type, type_size, error_reporter));
|
||||
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(tf_lite_type, type_size));
|
||||
*bytes = element_count * (*type_size);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
|
||||
size_t* out_bytes) {
|
||||
TFLITE_DCHECK(out_bytes != nullptr);
|
||||
|
||||
int element_count = 1;
|
||||
// If eval_tensor->dims == nullptr, then tensor is a scalar so has 1 element.
|
||||
if (eval_tensor->dims != nullptr) {
|
||||
for (int n = 0; n < eval_tensor->dims->size; ++n) {
|
||||
element_count *= eval_tensor->dims->data[n];
|
||||
}
|
||||
}
|
||||
size_t type_size;
|
||||
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(eval_tensor->type, &type_size));
|
||||
*out_bytes = element_count * type_size;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
const TfLiteTensor* input = nullptr;
|
||||
|
||||
TF_LITE_ENSURE(context, input1->dims != nullptr);
|
||||
TF_LITE_ENSURE(context, input2->dims != nullptr);
|
||||
TF_LITE_ENSURE(context, output->dims->size == 0);
|
||||
|
||||
input = input1->dims->size > input2->dims->size ? input1 : input2;
|
||||
TF_LITE_ENSURE(context, output->type == input->type);
|
||||
|
||||
size_t size = 0;
|
||||
TfLiteTypeSizeOf(input->type, &size);
|
||||
const int dimensions_count = tflite::GetTensorShape(input).DimensionsCount();
|
||||
for (int i = 0; i < dimensions_count; i++) {
|
||||
size *= input->dims->data[i];
|
||||
}
|
||||
|
||||
output->bytes = size;
|
||||
|
||||
output->dims =
|
||||
reinterpret_cast<TfLiteIntArray*>(context->AllocatePersistentBuffer(
|
||||
context, TfLiteIntArrayGetSizeInBytes(size)));
|
||||
|
||||
output->dims->size = input->dims->size;
|
||||
for (int i = 0; i < dimensions_count; i++) {
|
||||
output->dims->data[i] = input->dims->data[i];
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
@@ -15,6 +15,9 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
@@ -31,14 +34,26 @@ uint8_t* AlignPointerDown(uint8_t* data, size_t alignment);
|
||||
size_t AlignSizeUp(size_t size, size_t alignment);
|
||||
|
||||
// Returns size in bytes for a given TfLiteType.
|
||||
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
|
||||
ErrorReporter* reporter);
|
||||
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size);
|
||||
|
||||
// How many bytes are needed to hold a tensor's contents.
|
||||
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
|
||||
size_t* bytes, size_t* type_size,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// How many bytes are used in a TfLiteEvalTensor instance. The byte length is
|
||||
// returned in out_bytes.
|
||||
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
|
||||
size_t* out_bytes);
|
||||
|
||||
// Deduce output dimensions from input and allocate given size.
|
||||
// Useful for operators with two inputs where the largest input should equal the
|
||||
// output dimension.
|
||||
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
|
||||
|
||||
@@ -48,10 +48,10 @@ GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
|
||||
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
|
||||
next_free += sizeof(BufferRequirements) * max_buffer_count_;
|
||||
|
||||
buffer_sizes_sorted_by_size_ = reinterpret_cast<int*>(next_free);
|
||||
buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
|
||||
next_free += sizeof(int) * max_buffer_count_;
|
||||
|
||||
buffer_ids_sorted_by_size_ = reinterpret_cast<int*>(next_free);
|
||||
buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
|
||||
next_free += sizeof(int) * max_buffer_count_;
|
||||
|
||||
buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
|
||||
@@ -76,11 +76,24 @@ TfLiteStatus GreedyMemoryPlanner::AddBuffer(
|
||||
current->size = size;
|
||||
current->first_time_used = first_time_used;
|
||||
current->last_time_used = last_time_used;
|
||||
current->offline_offset = kOnlinePlannedBuffer;
|
||||
++buffer_count_;
|
||||
need_to_calculate_offsets_ = true;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
|
||||
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
|
||||
int last_time_used, int offline_offset) {
|
||||
BufferRequirements* current = &requirements_[buffer_count_];
|
||||
if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
|
||||
kTfLiteOk) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
current->offline_offset = offline_offset;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
|
||||
const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
|
||||
const int last_time_used) const {
|
||||
@@ -102,7 +115,7 @@ GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
|
||||
ListEntry* result = nullptr;
|
||||
ListEntry* candidate_next_entry;
|
||||
if (start == nullptr) {
|
||||
candidate_next_entry = &buffers_sorted_by_offset_[0];
|
||||
candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
|
||||
} else {
|
||||
if (start->next_entry_index == -1) {
|
||||
return nullptr;
|
||||
@@ -134,29 +147,51 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
|
||||
// This helps find a more compact layout. Intuitively, you can think
|
||||
// about putting the large buffers in place first, and then the
|
||||
// smaller buffers can fit in the gaps, rather than fragmenting the
|
||||
// gaps with small buffers at the beginning.
|
||||
// gaps with small buffers at the beginning. Add offline planned offsets
|
||||
// first in the list, since they have a predetermined offset.
|
||||
int idx_from_tail = buffer_count_;
|
||||
int idx_from_head = 0;
|
||||
for (int i = 0; i < buffer_count_; ++i) {
|
||||
buffer_sizes_sorted_by_size_[i] = requirements_[i].size;
|
||||
buffer_ids_sorted_by_size_[i] = i;
|
||||
buffer_offsets_[i] = -1;
|
||||
if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
|
||||
idx_from_tail--;
|
||||
buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
|
||||
buffer_ids_sorted_[idx_from_tail] = i;
|
||||
buffer_offsets_[i] = -1;
|
||||
} else {
|
||||
buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
|
||||
buffer_ids_sorted_[idx_from_head] = i;
|
||||
buffer_offsets_[i] = requirements_[i].offline_offset;
|
||||
idx_from_head++;
|
||||
}
|
||||
}
|
||||
// This sorting algorithm is naive, and may end up taking a very long time
|
||||
// with hundreds of buffers.
|
||||
ReverseSortInPlace(buffer_sizes_sorted_by_size_, buffer_ids_sorted_by_size_,
|
||||
buffer_count_);
|
||||
|
||||
// Put the largest buffer at offset zero to start the process.
|
||||
ListEntry* first_entry = &buffers_sorted_by_offset_[0];
|
||||
first_entry->offset = 0;
|
||||
first_entry->requirements_index = buffer_ids_sorted_by_size_[0];
|
||||
first_entry->next_entry_index = -1;
|
||||
// This sorting algorithm is naive, and may end up taking a very long time
|
||||
// with hundreds of buffers. Do not sort the offline planned offsets.
|
||||
ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
|
||||
&buffer_ids_sorted_[idx_from_head],
|
||||
buffer_count_ - idx_from_head);
|
||||
|
||||
// Initialize the first entry to the first buffer in
|
||||
// buffer_ids_sorted_.
|
||||
// - If there are no offline planned offsets, the largest buffer will be
|
||||
// first, and the buffers will be handled in size order.
|
||||
// - If offline offsets are present, these will be handled first in order
|
||||
// for the greedy algorithm to utilized gaps in the offline plan.
|
||||
first_entry_index_ = 0;
|
||||
next_free_entry_ = 1;
|
||||
buffer_offsets_[buffer_ids_sorted_by_size_[0]] = 0;
|
||||
ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
|
||||
first_entry->next_entry_index = -1; // to mark the entry as end of list
|
||||
int buffer_id = buffer_ids_sorted_[0];
|
||||
first_entry->requirements_index = buffer_id;
|
||||
if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
|
||||
buffer_offsets_[buffer_id] = 0;
|
||||
}
|
||||
first_entry->offset = buffer_offsets_[buffer_id];
|
||||
|
||||
// Work through the rest of the buffers to find a good gap to place each one.
|
||||
for (int i = 1; i < buffer_count_; ++i) {
|
||||
// The id is the order the buffer was originally added by the client.
|
||||
const int buffer_id = buffer_ids_sorted_by_size_[i];
|
||||
buffer_id = buffer_ids_sorted_[i];
|
||||
// Look at what size and time range the buffer needs to be active.
|
||||
BufferRequirements* wanted_requirements = &requirements_[buffer_id];
|
||||
const int wanted_size = wanted_requirements->size;
|
||||
@@ -168,37 +203,43 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
|
||||
// so that it's easy to find the next buffer in memory, and so the gap.
|
||||
// The candidate_entry variable holds the buffer that we're considering
|
||||
// placing the current buffer after.
|
||||
ListEntry* prior_entry = nullptr;
|
||||
|
||||
int candidate_offset = 0;
|
||||
// Loop through the offset-ordered list of buffers, looking for gaps.
|
||||
while (true) {
|
||||
// Find out what the next active buffer is.
|
||||
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
|
||||
prior_entry, wanted_first_time_used, wanted_last_time_used);
|
||||
if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
|
||||
ListEntry* prior_entry = nullptr;
|
||||
while (true) {
|
||||
// Find out what the next active buffer is.
|
||||
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
|
||||
prior_entry, wanted_first_time_used, wanted_last_time_used);
|
||||
|
||||
if (prior_entry) {
|
||||
BufferRequirements* candidate_requirements =
|
||||
&requirements_[prior_entry->requirements_index];
|
||||
const int prior_entry_offset =
|
||||
prior_entry->offset + candidate_requirements->size;
|
||||
if (prior_entry_offset > candidate_offset) {
|
||||
candidate_offset = prior_entry_offset;
|
||||
if (prior_entry) {
|
||||
BufferRequirements* candidate_requirements =
|
||||
&requirements_[prior_entry->requirements_index];
|
||||
const int prior_entry_offset =
|
||||
prior_entry->offset + candidate_requirements->size;
|
||||
if (prior_entry_offset > candidate_offset) {
|
||||
candidate_offset = prior_entry_offset;
|
||||
}
|
||||
}
|
||||
if (next_entry == nullptr) {
|
||||
// We're at the end of the list, so we can always append the buffer
|
||||
// here.
|
||||
break;
|
||||
}
|
||||
// Find out how much space there is between us and the next buffer.
|
||||
const int gap = next_entry->offset - candidate_offset;
|
||||
if (gap >= wanted_size) {
|
||||
// This entry has a big enough gap between it and the next, so
|
||||
// use it!
|
||||
break;
|
||||
}
|
||||
// The gap wasn't big enough, so move on to another candidate.
|
||||
prior_entry = next_entry;
|
||||
}
|
||||
if (next_entry == nullptr) {
|
||||
// We're at the end of the list, so we can always append the buffer
|
||||
// here.
|
||||
break;
|
||||
}
|
||||
// Find out how much space there is between us and the next buffer.
|
||||
const int gap = next_entry->offset - candidate_offset;
|
||||
if (gap >= wanted_size) {
|
||||
// This entry has a big enough gap between it and the next, so
|
||||
// use it!
|
||||
break;
|
||||
}
|
||||
// The gap wasn't big enough, so move on to another candidate.
|
||||
prior_entry = next_entry;
|
||||
} else {
|
||||
// Offline planned offset are to be considered constant
|
||||
candidate_offset = wanted_requirements->offline_offset;
|
||||
}
|
||||
// At this point, we've either found a gap (possibly at the end of the
|
||||
// list) and want to place the buffer there, or there are no other active
|
||||
@@ -212,26 +253,36 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
|
||||
new_entry->requirements_index = buffer_id;
|
||||
const int new_entry_index = next_free_entry_;
|
||||
++next_free_entry_;
|
||||
ListEntry* current_entry = first_entry;
|
||||
// Make sure that we insert the buffer at the correct place in the ordered
|
||||
// list.
|
||||
while (true) {
|
||||
const int next_entry_index = current_entry->next_entry_index;
|
||||
if (next_entry_index == -1) {
|
||||
// We're at the end of the list, so just add the new entry here.
|
||||
current_entry->next_entry_index = new_entry_index;
|
||||
new_entry->next_entry_index = -1;
|
||||
break;
|
||||
|
||||
if (first_entry->offset > candidate_offset) {
|
||||
// The new entry offset is smaller than the first entry offset =>
|
||||
// replace the first entry
|
||||
first_entry = new_entry;
|
||||
first_entry->next_entry_index = first_entry_index_;
|
||||
first_entry_index_ = new_entry_index;
|
||||
} else {
|
||||
ListEntry* current_entry = first_entry;
|
||||
// Make sure that we insert the buffer at the correct place in the
|
||||
// buffer-offset-ordered list
|
||||
while (true) {
|
||||
const int next_entry_index = current_entry->next_entry_index;
|
||||
if (next_entry_index == -1) {
|
||||
// We're at the end of the list, so just add the new entry here.
|
||||
current_entry->next_entry_index = new_entry_index;
|
||||
new_entry->next_entry_index = -1;
|
||||
break;
|
||||
}
|
||||
// not at the end of the list -> take a look at next entry
|
||||
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
|
||||
if (next_entry->offset > candidate_offset) {
|
||||
// We're at the right spot to do an insertion and retain the sorting
|
||||
// order, so place the new entry here.
|
||||
new_entry->next_entry_index = current_entry->next_entry_index;
|
||||
current_entry->next_entry_index = new_entry_index;
|
||||
break;
|
||||
}
|
||||
current_entry = next_entry;
|
||||
}
|
||||
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
|
||||
if (next_entry->offset > candidate_offset) {
|
||||
// We're at the right spot to do an insertion and retain the sorting
|
||||
// order, so place the new entry here.
|
||||
new_entry->next_entry_index = current_entry->next_entry_index;
|
||||
current_entry->next_entry_index = new_entry_index;
|
||||
break;
|
||||
}
|
||||
current_entry = next_entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -241,7 +292,7 @@ size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
|
||||
if (buffer_count_ == 0) {
|
||||
return 0;
|
||||
}
|
||||
ListEntry* entry = &buffers_sorted_by_offset_[0];
|
||||
ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
|
||||
size_t max_size = 0;
|
||||
while (entry) {
|
||||
BufferRequirements* requirements =
|
||||
|
||||
@@ -21,6 +21,8 @@ limitations under the License.
|
||||
|
||||
namespace tflite {
|
||||
|
||||
constexpr int kOnlinePlannedBuffer = -1;
|
||||
|
||||
// A memory planner that uses a greedy algorithm to arrange buffers in memory
|
||||
// to minimize the overall arena size needed.
|
||||
//
|
||||
@@ -59,6 +61,12 @@ class GreedyMemoryPlanner : public MemoryPlanner {
|
||||
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
|
||||
int first_time_used, int last_time_used) override;
|
||||
|
||||
// Record details of an offline planned buffer offset we want to place.
|
||||
// offline_offset is the buffer offset from the start of the arena.
|
||||
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
|
||||
int first_time_used, int last_time_used,
|
||||
int offline_offset);
|
||||
|
||||
// Returns the high-water mark of used memory. This is the minimum size of a
|
||||
// memory arena you'd need to allocate to hold these buffers.
|
||||
size_t GetMaximumMemorySize() override;
|
||||
@@ -90,8 +98,8 @@ class GreedyMemoryPlanner : public MemoryPlanner {
|
||||
static size_t per_buffer_size() {
|
||||
const int per_buffer_size =
|
||||
sizeof(BufferRequirements) + // requirements_
|
||||
sizeof(int) + // buffer_sizes_sorted_by_size_
|
||||
sizeof(int) + // buffer_ids_sorted_by_size_
|
||||
sizeof(int) + // buffer_sizes_sorted_
|
||||
sizeof(int) + // buffer_ids_sorted_
|
||||
sizeof(ListEntry) + // buffers_sorted_by_offset_
|
||||
sizeof(int); // buffer_offsets_;
|
||||
return per_buffer_size;
|
||||
@@ -121,16 +129,25 @@ class GreedyMemoryPlanner : public MemoryPlanner {
|
||||
// Records the client-provided information about each buffer.
|
||||
struct BufferRequirements {
|
||||
int size;
|
||||
int offline_offset;
|
||||
int first_time_used;
|
||||
int last_time_used;
|
||||
};
|
||||
|
||||
// Working arrays used during the layout algorithm.
|
||||
BufferRequirements* requirements_;
|
||||
int* buffer_sizes_sorted_by_size_;
|
||||
int* buffer_ids_sorted_by_size_;
|
||||
// buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
|
||||
// {
|
||||
// offline planned buffers,
|
||||
// online planned buffers sorted by size
|
||||
// }
|
||||
int* buffer_sizes_sorted_;
|
||||
int* buffer_ids_sorted_;
|
||||
ListEntry* buffers_sorted_by_offset_;
|
||||
int next_free_entry_;
|
||||
int next_free_entry_; // Index of the next free entry of
|
||||
// buffers_sorted_by_offset_
|
||||
int first_entry_index_; // Index of the first entry (smallest offset) of
|
||||
// buffers_sorted_by_offset_
|
||||
|
||||
// Stores the outcome of the plan, the location of each buffer in the arena.
|
||||
int* buffer_offsets_;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -15,33 +15,42 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/micro/simple_memory_allocator.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Namespace used for unittests.
|
||||
namespace internal {
|
||||
|
||||
// Sets up all of the data structure members for a runtime tensor
|
||||
// based on the contents of a serialized tensor.
|
||||
TfLiteStatus InitializeRuntimeTensor(
|
||||
SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
|
||||
// Sets up all of the data structure members for a TfLiteTensor based on the
|
||||
// contents of a serialized tensor in the flatbuffer.
|
||||
// TODO(b/162311891): Drop this method when the interpreter has an API for
|
||||
// returning buffers on TfLiteEvalTensor.
|
||||
TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
|
||||
SimpleMemoryAllocator* allocator, bool allocate_temp,
|
||||
const tflite::Tensor& flatbuffer_tensor,
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
|
||||
ErrorReporter* error_reporter, TfLiteTensor* result);
|
||||
|
||||
// A handle tracking scratch buffer allocation. This handle is created by
|
||||
// `RequestScratchBufferInArena`. `data` field is populated in
|
||||
// `FinishTensorAllocation` after static memory planning.
|
||||
// TODO(b/150257460) As a future optimization, this struct could be replaced by
|
||||
// a union, since once `data` is populated, `bytes` and `node_idx` is not
|
||||
// needed.
|
||||
// Holds placeholder information for a scratch buffer request from a kernel.
|
||||
// This struct is only used during the model prepare stage. Each request from a
|
||||
// kernel is stored in the head section. During the prepare stage, the head
|
||||
// section will at least hold kMaxScratchBuffersPerOp number of requests plus
|
||||
// any requests from previous kernel requests.
|
||||
//
|
||||
// When the memory plan is finalized, these structs are no longer used in favor
|
||||
// of a sequential, array of ScratchBufferHandle allocations in the tail
|
||||
// section. These allocations are indexed by the request API defined in the
|
||||
// TfLiteContext struct.
|
||||
typedef struct {
|
||||
// Pointer to the scratch buffer.
|
||||
uint8_t* data;
|
||||
// Number of bytes required by the buffer. The actual allocated size might be
|
||||
// greater than `bytes` due to buffer alignment.
|
||||
size_t bytes;
|
||||
@@ -49,7 +58,8 @@ typedef struct {
|
||||
// determine the lifetime of the buffer. In AllocationInfo, this buffer will
|
||||
// have `before` = node_idx and `after` = node_idx.
|
||||
int node_idx;
|
||||
} ScratchBufferHandle;
|
||||
} ScratchBufferRequest;
|
||||
|
||||
} // namespace internal
|
||||
|
||||
typedef struct {
|
||||
@@ -57,9 +67,27 @@ typedef struct {
|
||||
const TfLiteRegistration* registration;
|
||||
} NodeAndRegistration;
|
||||
|
||||
// Holds a pointer to a buffer for a scratch buffer requested by a kernel during
|
||||
// the model prepare stage. This struct is allocated in-place and allows for
|
||||
// quick pointer-indexed lookup for speed during model inference.
|
||||
typedef struct {
|
||||
// Pointer to location of the scratch buffer:
|
||||
uint8_t* data;
|
||||
} ScratchBufferHandle;
|
||||
|
||||
// Allocator responsible for allocating memory for all intermediate tensors
|
||||
// necessary to invoke a model.
|
||||
|
||||
//
|
||||
// The lifetime of the model, tensor arena and error reporter must be at
|
||||
// least as long as that of the allocator object, since the allocator needs
|
||||
// them to be accessible during its entire lifetime.
|
||||
//
|
||||
// The MicroAllocator simply plans out additional allocations that are required
|
||||
// to standup a model for inference in TF Micro. This class currently relies on
|
||||
// an additional allocator - SimpleMemoryAllocator - for all allocations from an
|
||||
// arena. These allocations are divided into head (non-persistent) and tail
|
||||
// (persistent) regions:
|
||||
//
|
||||
// Memory layout to help understand how it works
|
||||
// This information could change in the future version.
|
||||
// ************** .memory_allocator->GetBuffer()
|
||||
@@ -72,76 +100,179 @@ typedef struct {
|
||||
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
|
||||
class MicroAllocator {
|
||||
public:
|
||||
// The lifetime of the model, tensor allocator and error reporter must be at
|
||||
// least as long as that of the allocator object, since the allocator needs
|
||||
// them to be accessible during its entire lifetime.
|
||||
|
||||
// Creates a MicroAllocator instance from a given tensor arena. This arena
|
||||
// will be managed by the created instance.
|
||||
// Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
|
||||
// bytes aligned, otherwise some head room will be wasted.
|
||||
MicroAllocator(TfLiteContext* context, const Model* model,
|
||||
uint8_t* tensor_arena, size_t arena_size,
|
||||
ErrorReporter* error_reporter);
|
||||
// TODO(b/157615197): Cleanup constructor + factory usage.
|
||||
static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Runs through the model and allocates all necessary input, output and
|
||||
// intermediate tensors.
|
||||
// WARNING: doing any allocation after calling this method has the risk of
|
||||
// corrupting tensor data so this method should be the last non-const method
|
||||
// called in this class.
|
||||
TfLiteStatus FinishTensorAllocation();
|
||||
// Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
|
||||
// intance. This allocator instance will use the SimpleMemoryAllocator
|
||||
// instance to manage allocations internally.
|
||||
static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Returns the arena usage in bytes, only available after
|
||||
// `FinishTensorAllocation`. Otherwise, it will return 0.
|
||||
size_t used_bytes() const {
|
||||
if (active_) {
|
||||
return 0;
|
||||
}
|
||||
return memory_allocator_->GetUsedBytes();
|
||||
}
|
||||
// Begin allocating internal resources required for model inference.
|
||||
// This method will run through the flatbuffer data supplied in the model to
|
||||
// properly allocate tensor, node, and op registration data. This method is
|
||||
// expected to be followed with a call to FinishModelAllocation() before
|
||||
// resuming allocation with another model. All persistent tensor buffers are
|
||||
// stored in the out-param eval_tensors. This value is allocated from the
|
||||
// persistent memory arena and will be used to host runtime tensor buffers.
|
||||
TfLiteStatus StartModelAllocation(
|
||||
const Model* model, const MicroOpResolver& op_resolver,
|
||||
NodeAndRegistration** node_and_registrations,
|
||||
TfLiteEvalTensor** eval_tensors);
|
||||
|
||||
// Run through the model to allocate nodes and registrations. We need to keep
|
||||
// them for the entire life time of the model to allow persistent tensors.
|
||||
// This method needs to be called before FinishTensorAllocation method.
|
||||
TfLiteStatus AllocateNodeAndRegistrations(
|
||||
const OpResolver& op_resolver,
|
||||
NodeAndRegistration** node_and_registrations);
|
||||
// Finish allocating internal resources required for model inference.
|
||||
// This method will plan non-persistent buffers and commit a memory plan to
|
||||
// the 'head' section of the memory arena. All variable tensor data will also
|
||||
// be allocated. This method should be called after assigning model resources
|
||||
// in StartModelAllocation(). The eval_tensors pointer should be the value
|
||||
// passed into this class during StartModelAllocation(). Scratch buffer
|
||||
// handles are stored in the out-param `scratch_buffer_handles`. This value
|
||||
// will be used in `GetScratchBuffer` call to retrieve scratch buffers.
|
||||
TfLiteStatus FinishModelAllocation(
|
||||
const Model* model, TfLiteEvalTensor* eval_tensors,
|
||||
ScratchBufferHandle** scratch_buffer_handles);
|
||||
|
||||
// Allocates a TfLiteTensor struct and populates the returned value with
|
||||
// properties from the model flatbuffer. This struct is allocated from
|
||||
// persistent arena memory is only guaranteed for the lifetime of the
|
||||
// application. The eval_tensors pointer should be the value passed into this
|
||||
// class during StartModelAllocation() and contains the source-of-truth for
|
||||
// buffers.
|
||||
virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
|
||||
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
|
||||
|
||||
// Allocates a TfLiteTensor struct and populates the returned value with
|
||||
// properties from the model flatbuffer. This struct is allocated from
|
||||
// temporary arena memory is only guaranteed until a call is made to
|
||||
// ResetTempAllocations(). The eval_tensors pointer should be the value passed
|
||||
// into this class during StartModelAllocation() and contains the
|
||||
// source-of-truth for buffers.
|
||||
virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
|
||||
TfLiteEvalTensor* eval_tensors,
|
||||
int tensor_index);
|
||||
|
||||
// Resets all temporary allocations. This method should be called after a
|
||||
// chain of temp allocations (e.g. chain of TfLiteTensor objects via
|
||||
// AllocateTfLiteTensor()).
|
||||
virtual void ResetTempAllocations();
|
||||
|
||||
// Allocates persistent buffer which has the same life time as the allocator.
|
||||
// The memory is immediately available and is allocated from the tail of the
|
||||
// arena.
|
||||
TfLiteStatus AllocatePersistentBuffer(size_t bytes, void** ptr);
|
||||
virtual void* AllocatePersistentBuffer(size_t bytes);
|
||||
|
||||
// Register a scratch buffer of size `bytes` for Node with `node_id`.
|
||||
// This method only allocates a BufferHandle holding information for memory
|
||||
// planning. The buffer ptr is ready after `FinishTensorAllocation` and can
|
||||
// be retrieved by `GetScratchBuffer` method using the returned buffer_idx.
|
||||
// Note that there should be no tail allocation between two consecutive
|
||||
// `RequestScratchBufferInArena` calls.
|
||||
TfLiteStatus RequestScratchBufferInArena(int node_id, size_t bytes,
|
||||
int* buffer_idx);
|
||||
// Returns the pointer to the planned scratch buffer.
|
||||
void* GetScratchBuffer(int buffer_idx) const;
|
||||
// This method only requests a buffer with a given size to be used after a
|
||||
// model has finished allocation via FinishModelAllocation(). All requested
|
||||
// buffers will be accessible by the out-param in that method.
|
||||
TfLiteStatus RequestScratchBufferInArena(size_t bytes, int* buffer_idx);
|
||||
|
||||
// Finish allocating a specific NodeAndRegistration prepare block (kernel
|
||||
// entry for a model) with a given node ID. This call ensures that any scratch
|
||||
// buffer requests and temporary allocations are handled and ready for the
|
||||
// next node prepare block.
|
||||
TfLiteStatus FinishPrepareNodeAllocations(int node_id);
|
||||
|
||||
// Returns the arena usage in bytes, only available after
|
||||
// `FinishModelAllocation`. Otherwise, it will return 0.
|
||||
size_t used_bytes() const;
|
||||
|
||||
protected:
|
||||
MicroAllocator(SimpleMemoryAllocator* memory_allocator,
|
||||
ErrorReporter* error_reporter);
|
||||
virtual ~MicroAllocator();
|
||||
|
||||
// Allocates an array in the arena to hold pointers to the node and
|
||||
// registration pointers required to represent the inference graph of the
|
||||
// model.
|
||||
virtual TfLiteStatus AllocateNodeAndRegistrations(
|
||||
const Model* model, NodeAndRegistration** node_and_registrations);
|
||||
|
||||
// Populates node and registration pointers representing the inference graph
|
||||
// of the model from values inside the flatbuffer (loaded from the TfLiteModel
|
||||
// instance). Persistent data (e.g. operator data) is allocated from the
|
||||
// arena.
|
||||
virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
|
||||
const Model* model, const MicroOpResolver& op_resolver,
|
||||
NodeAndRegistration* node_and_registrations);
|
||||
|
||||
// Allocates the list of persistent TfLiteEvalTensors that are used for the
|
||||
// "eval" phase of model inference. These structs will be the source of truth
|
||||
// for all tensor buffers. Allocation results are stored in the out-param
|
||||
// eval_tensors.
|
||||
virtual TfLiteStatus AllocateTfLiteEvalTensors(
|
||||
const Model* model, TfLiteEvalTensor** eval_tensors);
|
||||
|
||||
// Allocates persistent tensor buffers for variable tensors in the subgraph.
|
||||
virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
|
||||
TfLiteEvalTensor* eval_tensors);
|
||||
|
||||
// Allocate and return a persistent TfLiteTensor.
|
||||
// TODO(b/162311891): Drop this method when the interpreter has an API for
|
||||
// accessing TfLiteEvalTensor structs.
|
||||
virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
|
||||
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
|
||||
|
||||
// Populates a TfLiteTensor struct with data from the model flatbuffer. Any
|
||||
// quantization data is allocated from either the tail (persistent) or temp
|
||||
// sections of the arena based on the allocation flag.
|
||||
virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
|
||||
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
|
||||
int tensor_index, bool allocate_temp);
|
||||
|
||||
ErrorReporter* error_reporter() const;
|
||||
|
||||
// Returns the first subgraph from the model.
|
||||
const SubGraph* GetSubGraphFromModel(const Model* model);
|
||||
|
||||
private:
|
||||
TfLiteStatus Init();
|
||||
// Commits a memory plan for all non-persistent buffer allocations in the
|
||||
// 'head' section of the memory arena. The eval_tensors pointer is the list of
|
||||
// pre-allocated TfLiteEvalTensor structs that will point to the buffers that
|
||||
// will be allocated into the head section in this function call. The
|
||||
// scratch_buffer_handles pointer is the array of pre-allocated
|
||||
// ScratchBufferHandle structs that will point to allocated buffers also in
|
||||
// the head section.
|
||||
virtual TfLiteStatus CommitStaticMemoryPlan(
|
||||
const Model* model, const SubGraph* subgraph,
|
||||
TfLiteEvalTensor* eval_tensors,
|
||||
ScratchBufferHandle* scratch_buffer_handles);
|
||||
|
||||
const Model* model_;
|
||||
// A simple memory allocator that always allocate from the arena tail.
|
||||
// Allocates an array of ScratchBufferHandle structs in the tail section for a
|
||||
// given number of handles.
|
||||
virtual TfLiteStatus AllocateScratchBufferHandles(
|
||||
ScratchBufferHandle** scratch_buffer_handles, size_t handle_count);
|
||||
|
||||
// Clears all internal scratch buffer request counts and resets the head to
|
||||
// prepare for kernels to request scratch buffer data when a model is
|
||||
// preparing.
|
||||
TfLiteStatus InitScratchBufferData();
|
||||
|
||||
// Returns the pointer for the array of ScratchBufferRequest allocations in
|
||||
// the head section.
|
||||
internal::ScratchBufferRequest* GetScratchBufferRequests();
|
||||
|
||||
// A simple memory allocator that always allocate from the arena tail or head.
|
||||
SimpleMemoryAllocator* memory_allocator_;
|
||||
|
||||
ErrorReporter* error_reporter_;
|
||||
TfLiteContext* context_;
|
||||
// Indicating if the allocator is ready for allocation.
|
||||
bool active_ = false;
|
||||
bool model_is_allocating_;
|
||||
|
||||
// In reverse order for efficiency.
|
||||
// i.e. scratch_buffer_handles_[0] is the handle for the last buffer,
|
||||
// corresponding to the last RequestScratchBufferInArena call.
|
||||
internal::ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
|
||||
// How many scratch buffers have been allocated.
|
||||
size_t scratch_buffer_count_ = 0;
|
||||
// Holds the number of ScratchBufferRequest instances stored in the head
|
||||
// section when a model is allocating.
|
||||
size_t scratch_buffer_request_count_ = 0;
|
||||
|
||||
const SubGraph* subgraph_;
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Operator>>* operators_;
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors_;
|
||||
// Holds the byte length of the memory plan with the largest head usage. Used
|
||||
// to ensure that multi-tenant allocations can share the head for buffers.
|
||||
size_t max_head_buffer_usage_ = 0;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
@@ -15,7 +15,10 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
#include <cstdarg>
|
||||
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
#include "tensorflow/lite/micro/debug_log.h"
|
||||
#include "tensorflow/lite/micro/micro_string.h"
|
||||
#endif
|
||||
|
||||
|
||||
@@ -15,9 +15,10 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
|
||||
|
||||
#include <cstdarg>
|
||||
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/debug_log.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -14,16 +14,24 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||
|
||||
#include <cstdarg>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/tensor_utils.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/micro_optional_debug_tools.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/micro/micro_profiler.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
|
||||
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
|
||||
return registration->custom_name;
|
||||
@@ -31,88 +39,111 @@ const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
|
||||
return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
|
||||
}
|
||||
}
|
||||
#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS)
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace internal {
|
||||
|
||||
TfLiteStatus ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
|
||||
size_t bytes, void** ptr) {
|
||||
ContextHelper::ContextHelper(ErrorReporter* error_reporter,
|
||||
MicroAllocator* allocator, const Model* model)
|
||||
: allocator_(allocator), error_reporter_(error_reporter), model_(model) {}
|
||||
|
||||
void* ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
|
||||
size_t bytes) {
|
||||
return reinterpret_cast<ContextHelper*>(ctx->impl_)
|
||||
->allocator_->AllocatePersistentBuffer(bytes, ptr);
|
||||
->allocator_->AllocatePersistentBuffer(bytes);
|
||||
}
|
||||
|
||||
TfLiteStatus ContextHelper::RequestScratchBufferInArena(TfLiteContext* ctx,
|
||||
size_t bytes,
|
||||
int* buffer_idx) {
|
||||
ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
|
||||
return helper->allocator_->RequestScratchBufferInArena(
|
||||
helper->current_node_idx_, bytes, buffer_idx);
|
||||
return helper->allocator_->RequestScratchBufferInArena(bytes, buffer_idx);
|
||||
}
|
||||
|
||||
void* ContextHelper::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
|
||||
return reinterpret_cast<ContextHelper*>(ctx->impl_)
|
||||
->allocator_->GetScratchBuffer(buffer_idx);
|
||||
ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
|
||||
ScratchBufferHandle* handle = helper->scratch_buffer_handles_ + buffer_idx;
|
||||
return handle->data;
|
||||
}
|
||||
|
||||
void ContextHelper::ReportOpError(struct TfLiteContext* context,
|
||||
const char* format, ...) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
TF_LITE_REPORT_ERROR(helper->error_reporter_, format, args);
|
||||
va_end(args);
|
||||
#endif
|
||||
}
|
||||
|
||||
TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_idx) {
|
||||
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
|
||||
return helper->allocator_->AllocateTempTfLiteTensor(
|
||||
helper->model_, helper->eval_tensors_, tensor_idx);
|
||||
}
|
||||
|
||||
TfLiteEvalTensor* ContextHelper::GetEvalTensor(
|
||||
const struct TfLiteContext* context, int tensor_idx) {
|
||||
ContextHelper* helper = reinterpret_cast<ContextHelper*>(context->impl_);
|
||||
return &helper->eval_tensors_[tensor_idx];
|
||||
}
|
||||
|
||||
void ContextHelper::SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors) {
|
||||
eval_tensors_ = eval_tensors;
|
||||
}
|
||||
|
||||
void ContextHelper::SetScratchBufferHandles(
|
||||
ScratchBufferHandle* scratch_buffer_handles) {
|
||||
scratch_buffer_handles_ = scratch_buffer_handles;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
MicroInterpreter::MicroInterpreter(const Model* model,
|
||||
const OpResolver& op_resolver,
|
||||
const MicroOpResolver& op_resolver,
|
||||
uint8_t* tensor_arena,
|
||||
size_t tensor_arena_size,
|
||||
ErrorReporter* error_reporter)
|
||||
ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler)
|
||||
: model_(model),
|
||||
op_resolver_(op_resolver),
|
||||
error_reporter_(error_reporter),
|
||||
allocator_(&context_, model_, tensor_arena, tensor_arena_size,
|
||||
error_reporter_),
|
||||
allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size,
|
||||
error_reporter)),
|
||||
tensors_allocated_(false),
|
||||
context_helper_(error_reporter_, &allocator_) {
|
||||
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
|
||||
model->subgraphs();
|
||||
if (subgraphs->size() != 1) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter,
|
||||
"Only 1 subgraph is currently supported.\n");
|
||||
initialization_status_ = kTfLiteError;
|
||||
return;
|
||||
}
|
||||
subgraph_ = (*subgraphs)[0];
|
||||
tensors_ = subgraph_->tensors();
|
||||
operators_ = subgraph_->operators();
|
||||
initialization_status_(kTfLiteError),
|
||||
eval_tensors_(nullptr),
|
||||
context_helper_(error_reporter_, &allocator_, model),
|
||||
input_tensor_(nullptr),
|
||||
output_tensor_(nullptr) {
|
||||
Init(profiler);
|
||||
}
|
||||
|
||||
context_.impl_ = static_cast<void*>(&context_helper_);
|
||||
context_.ReportError = context_helper_.ReportOpError;
|
||||
context_.recommended_num_threads = 1;
|
||||
|
||||
// If the system is big endian then convert weights from the flatbuffer from
|
||||
// little to big endian on startup so that it does not need to be done during
|
||||
// inference.
|
||||
// NOTE: This requires that the flatbuffer is held in memory which can be
|
||||
// modified by this process.
|
||||
if (!FLATBUFFERS_LITTLEENDIAN) {
|
||||
for (size_t t = 0; t < tensors_size(); ++t) {
|
||||
TfLiteTensor* thisTensor = &context_.tensors[t];
|
||||
if (thisTensor->allocation_type == kTfLiteMmapRo)
|
||||
CorrectTensorEndianness(thisTensor);
|
||||
}
|
||||
}
|
||||
|
||||
initialization_status_ = kTfLiteOk;
|
||||
MicroInterpreter::MicroInterpreter(const Model* model,
|
||||
const MicroOpResolver& op_resolver,
|
||||
MicroAllocator* allocator,
|
||||
ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler)
|
||||
: model_(model),
|
||||
op_resolver_(op_resolver),
|
||||
error_reporter_(error_reporter),
|
||||
allocator_(*allocator),
|
||||
tensors_allocated_(false),
|
||||
initialization_status_(kTfLiteError),
|
||||
eval_tensors_(nullptr),
|
||||
context_helper_(error_reporter_, &allocator_, model),
|
||||
input_tensor_(nullptr),
|
||||
output_tensor_(nullptr) {
|
||||
Init(profiler);
|
||||
}
|
||||
|
||||
MicroInterpreter::~MicroInterpreter() {
|
||||
if (node_and_registrations_ != nullptr) {
|
||||
for (size_t i = 0; i < operators_->size(); ++i) {
|
||||
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
|
||||
TfLiteNode* node = &(node_and_registrations_[i].node);
|
||||
const TfLiteRegistration* registration =
|
||||
node_and_registrations_[i].registration;
|
||||
@@ -125,7 +156,28 @@ MicroInterpreter::~MicroInterpreter() {
|
||||
}
|
||||
}
|
||||
|
||||
void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) {
|
||||
void MicroInterpreter::Init(tflite::Profiler* profiler) {
|
||||
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
|
||||
model_->subgraphs();
|
||||
if (subgraphs->size() != 1) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Only 1 subgraph is currently supported.\n");
|
||||
initialization_status_ = kTfLiteError;
|
||||
return;
|
||||
}
|
||||
subgraph_ = (*subgraphs)[0];
|
||||
|
||||
context_.impl_ = static_cast<void*>(&context_helper_);
|
||||
context_.ReportError = context_helper_.ReportOpError;
|
||||
context_.GetTensor = context_helper_.GetTensor;
|
||||
context_.GetEvalTensor = context_helper_.GetEvalTensor;
|
||||
context_.recommended_num_threads = 1;
|
||||
context_.profiler = profiler;
|
||||
|
||||
initialization_status_ = kTfLiteOk;
|
||||
}
|
||||
|
||||
void MicroInterpreter::CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr) {
|
||||
int32_t tensorSize = 1;
|
||||
for (int d = 0; d < tensorCorr->dims->size; ++d)
|
||||
tensorSize *= reinterpret_cast<const int32_t*>(tensorCorr->dims->data)[d];
|
||||
@@ -149,6 +201,9 @@ void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) {
|
||||
case TfLiteType::kTfLiteComplex64:
|
||||
CorrectTensorDataEndianness(tensorCorr->data.c64, tensorSize);
|
||||
break;
|
||||
case TfLiteType::kTfLiteComplex128:
|
||||
CorrectTensorDataEndianness(tensorCorr->data.c128, tensorSize);
|
||||
break;
|
||||
default:
|
||||
// Do nothing for other data types.
|
||||
break;
|
||||
@@ -163,16 +218,50 @@ void MicroInterpreter::CorrectTensorDataEndianness(T* data, int32_t size) {
|
||||
}
|
||||
|
||||
TfLiteStatus MicroInterpreter::AllocateTensors() {
|
||||
TF_LITE_ENSURE_OK(&context_, allocator_.AllocateNodeAndRegistrations(
|
||||
op_resolver_, &node_and_registrations_));
|
||||
if (allocator_.StartModelAllocation(model_, op_resolver_,
|
||||
&node_and_registrations_,
|
||||
&eval_tensors_) != kTfLiteOk) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Failed starting model allocation.\n");
|
||||
initialization_status_ = kTfLiteError;
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
// Update the pointer now that TfLiteEvalTensor allocation has completed on
|
||||
// the context helper.
|
||||
// TODO(b/16157777): This call would not be needed if ContextHelper rolled
|
||||
// into the interpreter.
|
||||
context_helper_.SetTfLiteEvalTensors(eval_tensors_);
|
||||
context_.tensors_size = subgraph_->tensors()->size();
|
||||
|
||||
// If the system is big endian then convert weights from the flatbuffer from
|
||||
// little to big endian on startup so that it does not need to be done during
|
||||
// inference.
|
||||
// NOTE: This requires that the flatbuffer is held in memory which can be
|
||||
// modified by this process.
|
||||
if (!FLATBUFFERS_LITTLEENDIAN) {
|
||||
for (size_t t = 0; t < subgraph_->tensors()->size(); ++t) {
|
||||
if (auto* buffer =
|
||||
(*model_->buffers())[subgraph_->tensors()->Get(t)->buffer()]) {
|
||||
// If we've found a buffer, does it have any data?
|
||||
if (auto* array = buffer->data()) {
|
||||
// If it has any data, is the data size larger than zero?
|
||||
if (array->size()) {
|
||||
// Update the endianness of the corresponding eval tensor since that
|
||||
// struct holds the buffer used at inference time.
|
||||
CorrectTensorEndianness(&eval_tensors_[t]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only allow AllocatePersistentBuffer in Init stage.
|
||||
context_.AllocatePersistentBuffer = context_helper_.AllocatePersistentBuffer;
|
||||
context_.RequestScratchBufferInArena = nullptr;
|
||||
context_.GetScratchBuffer = nullptr;
|
||||
|
||||
for (size_t i = 0; i < operators_->size(); ++i) {
|
||||
context_helper_.SetNodeIndex(i);
|
||||
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
|
||||
auto* node = &(node_and_registrations_[i].node);
|
||||
auto* registration = node_and_registrations_[i].registration;
|
||||
size_t init_data_size;
|
||||
@@ -189,15 +278,12 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
|
||||
registration->init(&context_, init_data, init_data_size);
|
||||
}
|
||||
}
|
||||
context_helper_.SetNodeIndex(-1);
|
||||
|
||||
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is available
|
||||
// in Prepare stage.
|
||||
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is
|
||||
// available in Prepare stage.
|
||||
context_.RequestScratchBufferInArena =
|
||||
context_helper_.RequestScratchBufferInArena;
|
||||
for (size_t i = 0; i < operators_->size(); ++i) {
|
||||
// Set node idx to annotate the lifetime for scratch buffers.
|
||||
context_helper_.SetNodeIndex(i);
|
||||
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
|
||||
auto* node = &(node_and_registrations_[i].node);
|
||||
auto* registration = node_and_registrations_[i].registration;
|
||||
if (registration->prepare) {
|
||||
@@ -210,8 +296,8 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
allocator_.FinishPrepareNodeAllocations(/*node_id=*/i);
|
||||
}
|
||||
context_helper_.SetNodeIndex(-1);
|
||||
|
||||
// Prepare is done, we're ready for Invoke. Memory allocation is no longer
|
||||
// allowed. Kernels can only fetch scratch buffers via GetScratchBuffer.
|
||||
@@ -219,7 +305,14 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
|
||||
context_.RequestScratchBufferInArena = nullptr;
|
||||
context_.GetScratchBuffer = context_helper_.GetScratchBuffer;
|
||||
|
||||
TF_LITE_ENSURE_OK(&context_, allocator_.FinishTensorAllocation());
|
||||
TF_LITE_ENSURE_OK(&context_,
|
||||
allocator_.FinishModelAllocation(model_, eval_tensors_,
|
||||
&scratch_buffer_handles_));
|
||||
// TODO(b/16157777): Remove this when ContextHelper is rolled into this class.
|
||||
context_helper_.SetScratchBufferHandles(scratch_buffer_handles_);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(ResetVariableTensors());
|
||||
|
||||
tensors_allocated_ = true;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
@@ -237,12 +330,28 @@ TfLiteStatus MicroInterpreter::Invoke() {
|
||||
TF_LITE_ENSURE_OK(&context_, AllocateTensors());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < operators_->size(); ++i) {
|
||||
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
|
||||
auto* node = &(node_and_registrations_[i].node);
|
||||
auto* registration = node_and_registrations_[i].registration;
|
||||
|
||||
if (registration->invoke) {
|
||||
TfLiteStatus invoke_status = registration->invoke(&context_, node);
|
||||
TfLiteStatus invoke_status;
|
||||
#ifndef NDEBUG // Omit profiler overhead from release builds.
|
||||
// The case where profiler == nullptr is handled by
|
||||
// ScopedOperatorProfile.
|
||||
tflite::Profiler* profiler =
|
||||
reinterpret_cast<tflite::Profiler*>(context_.profiler);
|
||||
ScopedOperatorProfile scoped_profiler(
|
||||
profiler, OpNameFromRegistration(registration), i);
|
||||
#endif
|
||||
invoke_status = registration->invoke(&context_, node);
|
||||
|
||||
// All TfLiteTensor structs used in the kernel are allocated from temp
|
||||
// memory in the allocator. This creates a chain of allocations in the
|
||||
// temp section. The call below resets the chain of allocations to
|
||||
// prepare for the next call.
|
||||
allocator_.ResetTempAllocations();
|
||||
|
||||
if (invoke_status == kTfLiteError) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
@@ -259,50 +368,82 @@ TfLiteStatus MicroInterpreter::Invoke() {
|
||||
|
||||
TfLiteTensor* MicroInterpreter::input(size_t index) {
|
||||
const size_t length = inputs_size();
|
||||
if ((index < 0) || (index >= length)) {
|
||||
if (index >= length) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Input index %d out of range (length is %d)", index,
|
||||
length);
|
||||
return nullptr;
|
||||
}
|
||||
return &(context_.tensors[inputs().Get(index)]);
|
||||
if (index != 0) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Input tensors not at index 0 are allocated from the "
|
||||
"persistent memory arena. Repeat calls will cause excess "
|
||||
"allocation!");
|
||||
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
|
||||
inputs().Get(index));
|
||||
}
|
||||
if (input_tensor_ == nullptr) {
|
||||
input_tensor_ = allocator_.AllocatePersistentTfLiteTensor(
|
||||
model_, eval_tensors_, inputs().Get(index));
|
||||
}
|
||||
return input_tensor_;
|
||||
}
|
||||
|
||||
TfLiteTensor* MicroInterpreter::output(size_t index) {
|
||||
const size_t length = outputs_size();
|
||||
if ((index < 0) || (index >= length)) {
|
||||
if (index >= length) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Output index %d out of range (length is %d)", index,
|
||||
length);
|
||||
return nullptr;
|
||||
}
|
||||
return &(context_.tensors[outputs().Get(index)]);
|
||||
if (index != 0) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Output tensors not at index 0 are allocated from the "
|
||||
"persistent memory arena. Repeat calls will cause excess "
|
||||
"allocation!");
|
||||
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
|
||||
outputs().Get(index));
|
||||
}
|
||||
if (output_tensor_ == nullptr) {
|
||||
// TODO(b/162311891): Drop these allocations when the interpreter supports
|
||||
// handling buffers from TfLiteEvalTensor.
|
||||
output_tensor_ = allocator_.AllocatePersistentTfLiteTensor(
|
||||
model_, eval_tensors_, outputs().Get(index));
|
||||
}
|
||||
return output_tensor_;
|
||||
}
|
||||
|
||||
TfLiteTensor* MicroInterpreter::tensor(size_t index) {
|
||||
const size_t length = tensors_size();
|
||||
if ((index < 0) || (index >= length)) {
|
||||
if (index >= length) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Tensor index %d out of range (length is %d)", index,
|
||||
length);
|
||||
return nullptr;
|
||||
}
|
||||
return &context_.tensors[index];
|
||||
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
|
||||
index);
|
||||
}
|
||||
|
||||
TfLiteStatus MicroInterpreter::ResetVariableTensors() {
|
||||
const size_t length = tensors_size();
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
TfLiteTensor* cur_tensor = tensor(i);
|
||||
if (cur_tensor->is_variable) {
|
||||
TfLiteStatus status = tflite::ResetVariableTensor(cur_tensor);
|
||||
if (status != kTfLiteOk) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Failed to reset variable tensor at index: %d", i);
|
||||
return status;
|
||||
for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) {
|
||||
auto* tensor = subgraph_->tensors()->Get(i);
|
||||
if (tensor->is_variable()) {
|
||||
size_t buffer_size;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
TfLiteEvalTensorByteLength(&eval_tensors_[i], &buffer_size));
|
||||
|
||||
int value = 0;
|
||||
if (tensor->type() == tflite::TensorType_INT8) {
|
||||
value = tensor->quantization()->zero_point()->Get(0);
|
||||
}
|
||||
memset(eval_tensors_[i].data.raw, value, buffer_size);
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -15,13 +15,18 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
#include "tensorflow/lite/core/api/profiler.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/portable_type_to_tflitetype.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
#include "tensorflow/lite/type_to_tflitetype.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -30,47 +35,63 @@ namespace internal {
|
||||
// A helper class to encapsulate the implementation of APIs in Context.
|
||||
// context->impl_ points to an instance of this class.
|
||||
// Check tensorflow/lite/c/common.h for detailed descriptions.
|
||||
// TODO(b/16157777): Consider rolling this class into MicroInterpreter.
|
||||
class ContextHelper {
|
||||
public:
|
||||
explicit ContextHelper(ErrorReporter* error_reporter,
|
||||
MicroAllocator* allocator)
|
||||
: allocator_(allocator), error_reporter_(error_reporter) {}
|
||||
|
||||
static TfLiteStatus AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes,
|
||||
void** ptr);
|
||||
MicroAllocator* allocator, const Model* model);
|
||||
|
||||
// Functions that will be assigned to function pointers on TfLiteContext:
|
||||
static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
|
||||
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
|
||||
size_t bytes,
|
||||
int* buffer_idx);
|
||||
|
||||
static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
|
||||
|
||||
static void ReportOpError(struct TfLiteContext* context, const char* format,
|
||||
...);
|
||||
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_idx);
|
||||
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
|
||||
int tensor_idx);
|
||||
|
||||
void SetNodeIndex(int idx) { current_node_idx_ = idx; }
|
||||
// Sets the pointer to a list of TfLiteEvalTensor instances.
|
||||
void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors);
|
||||
|
||||
// Sets the pointer to a list of ScratchBufferHandle instances.
|
||||
void SetScratchBufferHandles(ScratchBufferHandle* scratch_buffer_handles);
|
||||
|
||||
private:
|
||||
MicroAllocator* allocator_;
|
||||
ErrorReporter* error_reporter_;
|
||||
int current_node_idx_ = -1;
|
||||
MicroAllocator* allocator_ = nullptr;
|
||||
ErrorReporter* error_reporter_ = nullptr;
|
||||
const Model* model_ = nullptr;
|
||||
TfLiteEvalTensor* eval_tensors_ = nullptr;
|
||||
ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
class MicroInterpreter {
|
||||
public:
|
||||
// The lifetime of the model, op resolver, tensor arena, and error reporter
|
||||
// must be at least as long as that of the interpreter object, since the
|
||||
// interpreter may need to access them at any time. This means that you should
|
||||
// usually create them with the same scope as each other, for example having
|
||||
// them all allocated on the stack as local variables through a top-level
|
||||
// function.
|
||||
// The interpreter doesn't do any deallocation of any of the pointed-to
|
||||
// objects, ownership remains with the caller.
|
||||
MicroInterpreter(const Model* model, const OpResolver& op_resolver,
|
||||
// The lifetime of the model, op resolver, tensor arena, error reporter and
|
||||
// profiler must be at least as long as that of the interpreter object, since
|
||||
// the interpreter may need to access them at any time. This means that you
|
||||
// should usually create them with the same scope as each other, for example
|
||||
// having them all allocated on the stack as local variables through a
|
||||
// top-level function. The interpreter doesn't do any deallocation of any of
|
||||
// the pointed-to objects, ownership remains with the caller.
|
||||
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
|
||||
uint8_t* tensor_arena, size_t tensor_arena_size,
|
||||
ErrorReporter* error_reporter);
|
||||
ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler = nullptr);
|
||||
|
||||
// Create an interpreter instance using an existing MicroAllocator instance.
|
||||
// This constructor should be used when creating an allocator that needs to
|
||||
// have allocation handled in more than one interpreter or for recording
|
||||
// allocations inside the interpreter. The lifetime of the allocator must be
|
||||
// as long as that of the interpreter object.
|
||||
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
|
||||
MicroAllocator* allocator, ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler = nullptr);
|
||||
|
||||
~MicroInterpreter();
|
||||
|
||||
@@ -132,7 +153,7 @@ class MicroInterpreter {
|
||||
|
||||
TfLiteStatus initialization_status() const { return initialization_status_; }
|
||||
|
||||
size_t operators_size() const { return operators_->size(); }
|
||||
size_t operators_size() const { return subgraph_->operators()->size(); }
|
||||
|
||||
// For debugging only.
|
||||
const NodeAndRegistration node_and_registration(int node_index) const {
|
||||
@@ -147,8 +168,16 @@ class MicroInterpreter {
|
||||
// arena_used_bytes() + 16.
|
||||
size_t arena_used_bytes() const { return allocator_.used_bytes(); }
|
||||
|
||||
protected:
|
||||
const MicroAllocator& allocator() const { return allocator_; }
|
||||
const TfLiteContext& context() const { return context_; }
|
||||
|
||||
private:
|
||||
void CorrectTensorEndianness(TfLiteTensor* tensorCorr);
|
||||
// TODO(b/158263161): Consider switching to Create() function to enable better
|
||||
// error reporting during initialization.
|
||||
void Init(tflite::Profiler* profiler);
|
||||
|
||||
void CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr);
|
||||
|
||||
template <class T>
|
||||
void CorrectTensorDataEndianness(T* data, int32_t size);
|
||||
@@ -156,18 +185,25 @@ class MicroInterpreter {
|
||||
NodeAndRegistration* node_and_registrations_ = nullptr;
|
||||
|
||||
const Model* model_;
|
||||
const OpResolver& op_resolver_;
|
||||
const MicroOpResolver& op_resolver_;
|
||||
ErrorReporter* error_reporter_;
|
||||
TfLiteContext context_ = {};
|
||||
MicroAllocator allocator_;
|
||||
MicroAllocator& allocator_;
|
||||
bool tensors_allocated_;
|
||||
|
||||
TfLiteStatus initialization_status_;
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors_;
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Operator>>* operators_;
|
||||
|
||||
const SubGraph* subgraph_;
|
||||
const SubGraph* subgraph_ = nullptr;
|
||||
TfLiteEvalTensor* eval_tensors_ = nullptr;
|
||||
ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
|
||||
|
||||
// TODO(b/16157777): Drop this reference:
|
||||
internal::ContextHelper context_helper_;
|
||||
|
||||
// TODO(b/162311891): Clean these pointers up when this class supports buffers
|
||||
// from TfLiteEvalTensor.
|
||||
TfLiteTensor* input_tensor_;
|
||||
TfLiteTensor* output_tensor_;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -15,109 +15,454 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
#ifndef TFLITE_REGISTRATIONS_MAX
|
||||
#define TFLITE_REGISTRATIONS_MAX (128)
|
||||
#endif
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/kernels/fully_connected.h"
|
||||
#include "tensorflow/lite/micro/kernels/micro_ops.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Op versions discussed in this file are enumerated here:
|
||||
// tensorflow/lite/tools/versioning/op_version.cc
|
||||
|
||||
inline int MicroOpResolverAnyVersion() { return 0; }
|
||||
|
||||
template <unsigned int tOpCount = TFLITE_REGISTRATIONS_MAX>
|
||||
class MicroOpResolver : public OpResolver {
|
||||
template <unsigned int tOpCount>
|
||||
class MicroMutableOpResolver : public MicroOpResolver {
|
||||
public:
|
||||
const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
|
||||
int version) const override {
|
||||
explicit MicroMutableOpResolver(ErrorReporter* error_reporter = nullptr)
|
||||
: error_reporter_(error_reporter) {}
|
||||
|
||||
const TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override {
|
||||
if (op == BuiltinOperator_CUSTOM) return nullptr;
|
||||
|
||||
for (unsigned int i = 0; i < registrations_len_; ++i) {
|
||||
const TfLiteRegistration& registration = registrations_[i];
|
||||
if ((registration.builtin_code == op) &&
|
||||
(registration.version == MicroOpResolverAnyVersion() ||
|
||||
registration.version == version)) {
|
||||
if (registration.builtin_code == op) {
|
||||
return ®istration;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const TfLiteRegistration* FindOp(const char* op, int version) const override {
|
||||
const TfLiteRegistration* FindOp(const char* op) const override {
|
||||
for (unsigned int i = 0; i < registrations_len_; ++i) {
|
||||
const TfLiteRegistration& registration = registrations_[i];
|
||||
if ((registration.builtin_code == BuiltinOperator_CUSTOM) &&
|
||||
(strcmp(registration.custom_name, op) == 0) &&
|
||||
(registration.version == MicroOpResolverAnyVersion() ||
|
||||
registration.version == version)) {
|
||||
(strcmp(registration.custom_name, op) == 0)) {
|
||||
return ®istration;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
|
||||
int version = 1) {
|
||||
if (registrations_len_ >= tOpCount) {
|
||||
// TODO(b/147748244) - Add error reporting hooks so we can report this!
|
||||
return;
|
||||
MicroOpResolver::BuiltinParseFunction GetOpDataParser(
|
||||
BuiltinOperator op) const override {
|
||||
TFLITE_DCHECK(num_buitin_ops_ <= tOpCount);
|
||||
for (unsigned int i = 0; i < num_buitin_ops_; ++i) {
|
||||
if (builtin_codes_[i] == op) return builtin_parsers_[i];
|
||||
}
|
||||
TfLiteRegistration* new_registration = ®istrations_[registrations_len_];
|
||||
registrations_len_ += 1;
|
||||
|
||||
*new_registration = *registration;
|
||||
new_registration->builtin_code = op;
|
||||
new_registration->version = version;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
|
||||
int min_version, int max_version) {
|
||||
for (int version = min_version; version <= max_version; ++version) {
|
||||
AddBuiltin(op, registration, version);
|
||||
}
|
||||
}
|
||||
|
||||
void AddCustom(const char* name, TfLiteRegistration* registration,
|
||||
int version = 1) {
|
||||
// Registers a Custom Operator with the MicroOpResolver.
|
||||
//
|
||||
// Only the first call for a given name will be successful. i.e. if this
|
||||
// function is called again for a previously added Custom Operator, the
|
||||
// MicroOpResolver will be unchanged and this function will return
|
||||
// kTfLiteError.
|
||||
TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration) {
|
||||
if (registrations_len_ >= tOpCount) {
|
||||
// TODO(b/147748244) - Add error reporting hooks so we can report this!
|
||||
return;
|
||||
if (error_reporter_) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Couldn't register custom op '%s', resolver size is too small (%d)",
|
||||
name, tOpCount);
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (FindOp(name) != nullptr) {
|
||||
if (error_reporter_ != nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Calling AddCustom for the same op more than once "
|
||||
"is not supported (Op: %s).",
|
||||
name);
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
TfLiteRegistration* new_registration = ®istrations_[registrations_len_];
|
||||
registrations_len_ += 1;
|
||||
|
||||
*new_registration = *registration;
|
||||
new_registration->builtin_code = BuiltinOperator_CUSTOM;
|
||||
new_registration->custom_name = name;
|
||||
new_registration->version = version;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void AddCustom(const char* name, TfLiteRegistration* registration,
|
||||
int min_version, int max_version) {
|
||||
for (int version = min_version; version <= max_version; ++version) {
|
||||
AddCustom(name, registration, version);
|
||||
}
|
||||
// The Add* functions below add the various Builtin operators to the
|
||||
// MicroMutableOpResolver object.
|
||||
|
||||
TfLiteStatus AddAbs() {
|
||||
return AddBuiltin(BuiltinOperator_ABS, tflite::ops::micro::Register_ABS(),
|
||||
ParseAbs);
|
||||
}
|
||||
|
||||
TfLiteStatus AddAdd() {
|
||||
return AddBuiltin(BuiltinOperator_ADD, tflite::ops::micro::Register_ADD(),
|
||||
ParseAdd);
|
||||
}
|
||||
|
||||
TfLiteStatus AddArgMax() {
|
||||
return AddBuiltin(BuiltinOperator_ARG_MAX,
|
||||
tflite::ops::micro::Register_ARG_MAX(), ParseArgMax);
|
||||
}
|
||||
|
||||
TfLiteStatus AddArgMin() {
|
||||
return AddBuiltin(BuiltinOperator_ARG_MIN,
|
||||
tflite::ops::micro::Register_ARG_MIN(), ParseArgMin);
|
||||
}
|
||||
|
||||
TfLiteStatus AddAveragePool2D() {
|
||||
return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D,
|
||||
tflite::ops::micro::Register_AVERAGE_POOL_2D(),
|
||||
ParsePool);
|
||||
}
|
||||
|
||||
TfLiteStatus AddCeil() {
|
||||
return AddBuiltin(BuiltinOperator_CEIL, tflite::ops::micro::Register_CEIL(),
|
||||
ParseCeil);
|
||||
}
|
||||
|
||||
TfLiteStatus AddCircularBuffer() {
|
||||
return AddCustom("CIRCULAR_BUFFER",
|
||||
tflite::ops::micro::Register_CIRCULAR_BUFFER());
|
||||
}
|
||||
|
||||
TfLiteStatus AddConcatenation() {
|
||||
return AddBuiltin(BuiltinOperator_CONCATENATION,
|
||||
tflite::ops::micro::Register_CONCATENATION(),
|
||||
ParseConcatenation);
|
||||
}
|
||||
|
||||
TfLiteStatus AddConv2D() {
|
||||
return AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), ParseConv2D);
|
||||
}
|
||||
|
||||
TfLiteStatus AddCos() {
|
||||
return AddBuiltin(BuiltinOperator_COS, tflite::ops::micro::Register_COS(),
|
||||
ParseCos);
|
||||
}
|
||||
|
||||
TfLiteStatus AddDepthwiseConv2D() {
|
||||
return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
|
||||
Register_DEPTHWISE_CONV_2D(), ParseDepthwiseConv2D);
|
||||
}
|
||||
|
||||
TfLiteStatus AddDequantize() {
|
||||
return AddBuiltin(BuiltinOperator_DEQUANTIZE,
|
||||
tflite::ops::micro::Register_DEQUANTIZE(),
|
||||
ParseDequantize);
|
||||
}
|
||||
|
||||
TfLiteStatus AddEqual() {
|
||||
return AddBuiltin(BuiltinOperator_EQUAL,
|
||||
tflite::ops::micro::Register_EQUAL(), ParseEqual);
|
||||
}
|
||||
|
||||
TfLiteStatus AddFloor() {
|
||||
return AddBuiltin(BuiltinOperator_FLOOR,
|
||||
tflite::ops::micro::Register_FLOOR(), ParseFloor);
|
||||
}
|
||||
|
||||
TfLiteStatus AddFullyConnected(
|
||||
const TfLiteRegistration& registration = Register_FULLY_CONNECTED()) {
|
||||
return AddBuiltin(BuiltinOperator_FULLY_CONNECTED, registration,
|
||||
ParseFullyConnected);
|
||||
}
|
||||
|
||||
TfLiteStatus AddGreater() {
|
||||
return AddBuiltin(BuiltinOperator_GREATER,
|
||||
tflite::ops::micro::Register_GREATER(), ParseGreater);
|
||||
}
|
||||
|
||||
TfLiteStatus AddGreaterEqual() {
|
||||
return AddBuiltin(BuiltinOperator_GREATER_EQUAL,
|
||||
tflite::ops::micro::Register_GREATER_EQUAL(),
|
||||
ParseGreaterEqual);
|
||||
}
|
||||
|
||||
TfLiteStatus AddHardSwish() {
|
||||
return AddBuiltin(BuiltinOperator_HARD_SWISH,
|
||||
tflite::ops::micro::Register_HARD_SWISH(),
|
||||
ParseHardSwish);
|
||||
}
|
||||
|
||||
TfLiteStatus AddL2Normalization() {
|
||||
return AddBuiltin(BuiltinOperator_L2_NORMALIZATION,
|
||||
tflite::ops::micro::Register_L2_NORMALIZATION(),
|
||||
ParseL2Normalization);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLess() {
|
||||
return AddBuiltin(BuiltinOperator_LESS, tflite::ops::micro::Register_LESS(),
|
||||
ParseLess);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLessEqual() {
|
||||
return AddBuiltin(BuiltinOperator_LESS_EQUAL,
|
||||
tflite::ops::micro::Register_LESS_EQUAL(),
|
||||
ParseLessEqual);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLog() {
|
||||
return AddBuiltin(BuiltinOperator_LOG, tflite::ops::micro::Register_LOG(),
|
||||
ParseLog);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLogicalAnd() {
|
||||
return AddBuiltin(BuiltinOperator_LOGICAL_AND,
|
||||
tflite::ops::micro::Register_LOGICAL_AND(),
|
||||
ParseLogicalAnd);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLogicalNot() {
|
||||
return AddBuiltin(BuiltinOperator_LOGICAL_NOT,
|
||||
tflite::ops::micro::Register_LOGICAL_NOT(),
|
||||
ParseLogicalNot);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLogicalOr() {
|
||||
return AddBuiltin(BuiltinOperator_LOGICAL_OR,
|
||||
tflite::ops::micro::Register_LOGICAL_OR(),
|
||||
ParseLogicalOr);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLogistic() {
|
||||
return AddBuiltin(BuiltinOperator_LOGISTIC,
|
||||
tflite::ops::micro::Register_LOGISTIC(), ParseLogistic);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMaximum() {
|
||||
return AddBuiltin(BuiltinOperator_MAXIMUM,
|
||||
tflite::ops::micro::Register_MAXIMUM(), ParseMaximum);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMaxPool2D() {
|
||||
return AddBuiltin(BuiltinOperator_MAX_POOL_2D,
|
||||
tflite::ops::micro::Register_MAX_POOL_2D(), ParsePool);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMean() {
|
||||
return AddBuiltin(BuiltinOperator_MEAN, tflite::ops::micro::Register_MEAN(),
|
||||
ParseReducer);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMinimum() {
|
||||
return AddBuiltin(BuiltinOperator_MINIMUM,
|
||||
tflite::ops::micro::Register_MINIMUM(), ParseMinimum);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMul() {
|
||||
return AddBuiltin(BuiltinOperator_MUL, tflite::ops::micro::Register_MUL(),
|
||||
ParseMul);
|
||||
}
|
||||
|
||||
TfLiteStatus AddNeg() {
|
||||
return AddBuiltin(BuiltinOperator_NEG, tflite::ops::micro::Register_NEG(),
|
||||
ParseNeg);
|
||||
}
|
||||
|
||||
TfLiteStatus AddNotEqual() {
|
||||
return AddBuiltin(BuiltinOperator_NOT_EQUAL,
|
||||
tflite::ops::micro::Register_NOT_EQUAL(), ParseNotEqual);
|
||||
}
|
||||
|
||||
TfLiteStatus AddPack() {
|
||||
return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(),
|
||||
ParsePack);
|
||||
}
|
||||
|
||||
TfLiteStatus AddPad() {
|
||||
return AddBuiltin(BuiltinOperator_PAD, tflite::ops::micro::Register_PAD(),
|
||||
ParsePad);
|
||||
}
|
||||
|
||||
TfLiteStatus AddPadV2() {
|
||||
return AddBuiltin(BuiltinOperator_PADV2,
|
||||
tflite::ops::micro::Register_PADV2(), ParsePadV2);
|
||||
}
|
||||
|
||||
TfLiteStatus AddPrelu() {
|
||||
return AddBuiltin(BuiltinOperator_PRELU,
|
||||
tflite::ops::micro::Register_PRELU(), ParsePrelu);
|
||||
}
|
||||
|
||||
TfLiteStatus AddQuantize() {
|
||||
return AddBuiltin(BuiltinOperator_QUANTIZE, Register_QUANTIZE(),
|
||||
ParseQuantize);
|
||||
}
|
||||
|
||||
TfLiteStatus AddReduceMax() {
|
||||
return AddBuiltin(BuiltinOperator_REDUCE_MAX,
|
||||
tflite::ops::micro::Register_REDUCE_MAX(), ParseReducer);
|
||||
}
|
||||
|
||||
TfLiteStatus AddRelu() {
|
||||
return AddBuiltin(BuiltinOperator_RELU, tflite::ops::micro::Register_RELU(),
|
||||
ParseRelu);
|
||||
}
|
||||
|
||||
TfLiteStatus AddRelu6() {
|
||||
return AddBuiltin(BuiltinOperator_RELU6,
|
||||
tflite::ops::micro::Register_RELU6(), ParseRelu6);
|
||||
}
|
||||
|
||||
TfLiteStatus AddReshape() {
|
||||
return AddBuiltin(BuiltinOperator_RESHAPE,
|
||||
tflite::ops::micro::Register_RESHAPE(), ParseReshape);
|
||||
}
|
||||
|
||||
TfLiteStatus AddResizeNearestNeighbor() {
|
||||
return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
|
||||
tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(),
|
||||
ParseResizeNearestNeighbor);
|
||||
}
|
||||
|
||||
TfLiteStatus AddRound() {
|
||||
return AddBuiltin(BuiltinOperator_ROUND,
|
||||
tflite::ops::micro::Register_ROUND(), ParseRound);
|
||||
}
|
||||
|
||||
TfLiteStatus AddRsqrt() {
|
||||
return AddBuiltin(BuiltinOperator_RSQRT,
|
||||
tflite::ops::micro::Register_RSQRT(), ParseRsqrt);
|
||||
}
|
||||
|
||||
TfLiteStatus AddShape() {
|
||||
return AddBuiltin(BuiltinOperator_SHAPE, Register_SHAPE(), ParseShape);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSin() {
|
||||
return AddBuiltin(BuiltinOperator_SIN, tflite::ops::micro::Register_SIN(),
|
||||
ParseSin);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSoftmax() {
|
||||
return AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(),
|
||||
ParseSoftmax);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSplit() {
|
||||
return AddBuiltin(BuiltinOperator_SPLIT,
|
||||
tflite::ops::micro::Register_SPLIT(), ParseSplit);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSplitV() {
|
||||
return AddBuiltin(BuiltinOperator_SPLIT_V,
|
||||
tflite::ops::micro::Register_SPLIT_V(), ParseSplitV);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSqrt() {
|
||||
return AddBuiltin(BuiltinOperator_SQRT, tflite::ops::micro::Register_SQRT(),
|
||||
ParseSqrt);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSquare() {
|
||||
return AddBuiltin(BuiltinOperator_SQUARE,
|
||||
tflite::ops::micro::Register_SQUARE(), ParseSquare);
|
||||
}
|
||||
|
||||
TfLiteStatus AddStridedSlice() {
|
||||
return AddBuiltin(BuiltinOperator_STRIDED_SLICE,
|
||||
tflite::ops::micro::Register_STRIDED_SLICE(),
|
||||
ParseStridedSlice);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSub() {
|
||||
return AddBuiltin(BuiltinOperator_SUB, tflite::ops::micro::Register_SUB(),
|
||||
ParseSub);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSvdf() {
|
||||
return AddBuiltin(BuiltinOperator_SVDF, Register_SVDF(), ParseSvdf);
|
||||
}
|
||||
|
||||
TfLiteStatus AddTanh() {
|
||||
return AddBuiltin(BuiltinOperator_TANH, tflite::ops::micro::Register_TANH(),
|
||||
ParseTanh);
|
||||
}
|
||||
|
||||
TfLiteStatus AddUnpack() {
|
||||
return AddBuiltin(BuiltinOperator_UNPACK,
|
||||
tflite::ops::micro::Register_UNPACK(), ParseUnpack);
|
||||
}
|
||||
|
||||
unsigned int GetRegistrationLength() { return registrations_len_; }
|
||||
|
||||
private:
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
|
||||
TfLiteStatus AddBuiltin(tflite::BuiltinOperator op,
|
||||
const TfLiteRegistration& registration,
|
||||
MicroOpResolver::BuiltinParseFunction parser) {
|
||||
if (op == BuiltinOperator_CUSTOM) {
|
||||
if (error_reporter_ != nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Invalid parameter BuiltinOperator_CUSTOM to the "
|
||||
"AddBuiltin function.");
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (FindOp(op) != nullptr) {
|
||||
if (error_reporter_ != nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Calling AddBuiltin with the same op more than "
|
||||
"once is not supported (Op: #%d).",
|
||||
op);
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (registrations_len_ >= tOpCount) {
|
||||
if (error_reporter_) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Couldn't register builtin op #%d, resolver size "
|
||||
"is too small (%d).",
|
||||
op, tOpCount);
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
registrations_[registrations_len_] = registration;
|
||||
// Strictly speaking, the builtin_code is not necessary for TFLM but filling
|
||||
// it in regardless.
|
||||
registrations_[registrations_len_].builtin_code = op;
|
||||
registrations_len_++;
|
||||
|
||||
builtin_codes_[num_buitin_ops_] = op;
|
||||
builtin_parsers_[num_buitin_ops_] = parser;
|
||||
num_buitin_ops_++;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteRegistration registrations_[tOpCount];
|
||||
unsigned int registrations_len_ = 0;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
// Arrays (and counter) to store the builtin codes and their corresponding
|
||||
// parse functions as these are registered with the Op Resolver.
|
||||
BuiltinOperator builtin_codes_[tOpCount];
|
||||
MicroOpResolver::BuiltinParseFunction builtin_parsers_[tOpCount];
|
||||
unsigned int num_buitin_ops_ = 0;
|
||||
|
||||
// TODO(b/147854028): Consider switching all uses of MicroMutableOpResolver to
|
||||
// MicroOpResolver.
|
||||
class MicroMutableOpResolver
|
||||
: public MicroOpResolver<TFLITE_REGISTRATIONS_MAX> {
|
||||
private:
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
ErrorReporter* error_reporter_;
|
||||
};
|
||||
|
||||
}; // namespace tflite
|
||||
|
||||
73
code/lib/tfmicro/tensorflow/lite/micro/micro_op_resolver.h
Normal file
73
code/lib/tfmicro/tensorflow/lite/micro/micro_op_resolver.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// This is an interface for the OpResolver for TFLiteMicro. The differences from
|
||||
// the TFLite OpResolver base class are to:
|
||||
// * explicitly remove support for Op versions
|
||||
// * allow for finer grained registration of the Builtin Ops to reduce code
|
||||
// size for TFLiteMicro.
|
||||
//
|
||||
// We need an interface class instead of directly using MicroMutableOpResolver
|
||||
// because MicroMutableOpResolver is a class template with the number of
|
||||
// registered Ops as the template parameter.
|
||||
class MicroOpResolver : public OpResolver {
|
||||
public:
|
||||
typedef TfLiteStatus (*BuiltinParseFunction)(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
// Returns the Op registration struct corresponding to the enum code from the
|
||||
// flatbuffer schema. Returns nullptr if the op is not found or if op ==
|
||||
// BuiltinOperator_CUSTOM.
|
||||
virtual const TfLiteRegistration* FindOp(BuiltinOperator op) const = 0;
|
||||
|
||||
// Returns the Op registration struct corresponding to the custom operator by
|
||||
// name.
|
||||
virtual const TfLiteRegistration* FindOp(const char* op) const = 0;
|
||||
|
||||
// This implementation exists for compatibility with the OpResolver base class
|
||||
// and disregards the version parameter.
|
||||
const TfLiteRegistration* FindOp(BuiltinOperator op,
|
||||
int version) const final {
|
||||
return FindOp(op);
|
||||
}
|
||||
|
||||
// This implementation exists for compatibility with the OpResolver base class
|
||||
// and disregards the version parameter.
|
||||
const TfLiteRegistration* FindOp(const char* op, int version) const final {
|
||||
return FindOp(op);
|
||||
}
|
||||
|
||||
// Returns the operator specific parsing function for the OpData for a
|
||||
// BuiltinOperator (if registered), else nullptr.
|
||||
virtual BuiltinParseFunction GetOpDataParser(BuiltinOperator op) const = 0;
|
||||
|
||||
~MicroOpResolver() override {}
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
|
||||
@@ -1,144 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/micro/micro_optional_debug_tools.h"
|
||||
|
||||
// `cinttypes` requires `__STDC_FORMAT_MACROS` to be defined to expose `PRId32`.
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#endif
|
||||
|
||||
#include <cinttypes>
|
||||
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
std::vector<int> flatbuffersVector2StdVector(
|
||||
const flatbuffers::Vector<int32_t>& fVector) {
|
||||
std::vector<int> stdVector;
|
||||
stdVector.reserve(fVector.size());
|
||||
for (size_t i = 0; i < fVector.size(); i++) {
|
||||
stdVector.push_back(fVector.Get(i));
|
||||
}
|
||||
return stdVector;
|
||||
}
|
||||
|
||||
void PrintIntVector(const std::vector<int>& v) {
|
||||
for (const auto& it : v) {
|
||||
printf(" %d", it);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void PrintTfLiteIntVector(const TfLiteIntArray* v) {
|
||||
if (!v) {
|
||||
printf(" (null)\n");
|
||||
return;
|
||||
}
|
||||
for (int k = 0; k < v->size; k++) {
|
||||
printf(" %d", v->data[k]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
const char* TensorTypeName(TfLiteType type) {
|
||||
switch (type) {
|
||||
case kTfLiteNoType:
|
||||
return "kTfLiteNoType";
|
||||
case kTfLiteFloat32:
|
||||
return "kTfLiteFloat32";
|
||||
case kTfLiteInt32:
|
||||
return "kTfLiteInt32";
|
||||
case kTfLiteUInt8:
|
||||
return "kTfLiteUInt8";
|
||||
case kTfLiteInt8:
|
||||
return "kTfLiteInt8";
|
||||
case kTfLiteInt64:
|
||||
return "kTfLiteInt64";
|
||||
case kTfLiteString:
|
||||
return "kTfLiteString";
|
||||
case kTfLiteBool:
|
||||
return "kTfLiteBool";
|
||||
case kTfLiteInt16:
|
||||
return "kTfLiteInt16";
|
||||
case kTfLiteComplex64:
|
||||
return "kTfLiteComplex64";
|
||||
case kTfLiteFloat16:
|
||||
return "kTfLiteFloat16";
|
||||
case kTfLiteFloat64:
|
||||
return "kTfLiteFloat64";
|
||||
}
|
||||
return "(invalid)";
|
||||
}
|
||||
|
||||
const char* AllocTypeName(TfLiteAllocationType type) {
|
||||
switch (type) {
|
||||
case kTfLiteMemNone:
|
||||
return "kTfLiteMemNone";
|
||||
case kTfLiteMmapRo:
|
||||
return "kTfLiteMmapRo";
|
||||
case kTfLiteDynamic:
|
||||
return "kTfLiteDynamic";
|
||||
case kTfLiteArenaRw:
|
||||
return "kTfLiteArenaRw";
|
||||
case kTfLiteArenaRwPersistent:
|
||||
return "kTfLiteArenaRwPersistent";
|
||||
}
|
||||
return "(invalid)";
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Prints a dump of what tensors and what nodes are in the interpreter.
|
||||
void PrintInterpreterState(MicroInterpreter* interpreter) {
|
||||
printf("Interpreter has %zu tensors and %zu nodes\n",
|
||||
interpreter->tensors_size(), interpreter->operators_size());
|
||||
printf("Inputs:");
|
||||
PrintIntVector(flatbuffersVector2StdVector(interpreter->inputs()));
|
||||
printf("Outputs:");
|
||||
PrintIntVector(flatbuffersVector2StdVector(interpreter->outputs()));
|
||||
printf("\n");
|
||||
|
||||
for (size_t tensor_index = 0; tensor_index < interpreter->tensors_size();
|
||||
tensor_index++) {
|
||||
TfLiteTensor* tensor = interpreter->tensor(static_cast<int>(tensor_index));
|
||||
printf("Tensor %3zu %-20s %10s %15s %10zu bytes (%4.1f MB) ", tensor_index,
|
||||
tensor->name, TensorTypeName(tensor->type),
|
||||
AllocTypeName(tensor->allocation_type), tensor->bytes,
|
||||
static_cast<double>(tensor->bytes / (1 << 20)));
|
||||
PrintTfLiteIntVector(tensor->dims);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
for (size_t node_index = 0; node_index < interpreter->operators_size();
|
||||
node_index++) {
|
||||
const NodeAndRegistration node_and_reg =
|
||||
interpreter->node_and_registration(static_cast<int>(node_index));
|
||||
const TfLiteNode& node = node_and_reg.node;
|
||||
const TfLiteRegistration* reg = node_and_reg.registration;
|
||||
if (reg->custom_name != nullptr) {
|
||||
printf("Node %3zu Operator Custom Name %s\n", node_index,
|
||||
reg->custom_name);
|
||||
} else {
|
||||
printf("Node %3zu Operator Builtin Code %3" PRId32 " %s\n", node_index,
|
||||
reg->builtin_code, EnumNamesBuiltinOperator()[reg->builtin_code]);
|
||||
}
|
||||
printf(" Inputs:");
|
||||
PrintTfLiteIntVector(node.inputs);
|
||||
printf(" Outputs:");
|
||||
PrintTfLiteIntVector(node.outputs);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
42
code/lib/tfmicro/tensorflow/lite/micro/micro_profiler.cc
Normal file
42
code/lib/tfmicro/tensorflow/lite/micro/micro_profiler.cc
Normal file
@@ -0,0 +1,42 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/micro_profiler.h"
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_time.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
MicroProfiler::MicroProfiler(tflite::ErrorReporter* reporter)
|
||||
: reporter_(reporter) {}
|
||||
|
||||
uint32_t MicroProfiler::BeginEvent(const char* tag, EventType event_type,
|
||||
int64_t event_metadata1,
|
||||
int64_t event_metadata2) {
|
||||
start_time_ = GetCurrentTimeTicks();
|
||||
TFLITE_DCHECK(tag != nullptr);
|
||||
event_tag_ = tag;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void MicroProfiler::EndEvent(uint32_t event_handle) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
int32_t end_time = GetCurrentTimeTicks();
|
||||
TF_LITE_REPORT_ERROR(reporter_, "%s took %d cycles\n", event_tag_,
|
||||
end_time - start_time_);
|
||||
#endif
|
||||
}
|
||||
} // namespace tflite
|
||||
71
code/lib/tfmicro/tensorflow/lite/micro/micro_profiler.h
Normal file
71
code/lib/tfmicro/tensorflow/lite/micro/micro_profiler.h
Normal file
@@ -0,0 +1,71 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
|
||||
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/profiler.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// MicroProfiler creates a common way to gain fine-grained insight into runtime
|
||||
// performance. Bottleck operators can be identified along with slow code
|
||||
// sections. This can be used in conjunction with running the relevant micro
|
||||
// benchmark to evaluate end-to-end performance.
|
||||
//
|
||||
// Usage example:
|
||||
// MicroProfiler profiler(error_reporter);
|
||||
// {
|
||||
// ScopedProfile scoped_profile(profiler, tag);
|
||||
// work_to_profile();
|
||||
// }
|
||||
//
|
||||
// This will call the following methods in order:
|
||||
// int event_handle = profiler->BeginEvent(op_name, EventType::DEFAULT, 0)
|
||||
// work_to_profile();
|
||||
// profiler->EndEvent(event_handle)
|
||||
class MicroProfiler : public tflite::Profiler {
|
||||
public:
|
||||
explicit MicroProfiler(tflite::ErrorReporter* reporter);
|
||||
~MicroProfiler() override = default;
|
||||
|
||||
// AddEvent is unused for Tf Micro.
|
||||
void AddEvent(const char* tag, EventType event_type, uint64_t start,
|
||||
uint64_t end, int64_t event_metadata1,
|
||||
int64_t event_metadata2) override{};
|
||||
|
||||
// BeginEvent followed by code followed by EndEvent will profile the code
|
||||
// enclosed. Multiple concurrent events are unsupported, so the return value
|
||||
// is always 0. Event_metadata1 and event_metadata2 are unused. The tag
|
||||
// pointer must be valid until EndEvent is called.
|
||||
uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
int64_t event_metadata1,
|
||||
int64_t event_metadata2) override;
|
||||
|
||||
// Event_handle is ignored since TF Micro does not support concurrent events.
|
||||
void EndEvent(uint32_t event_handle) override;
|
||||
|
||||
private:
|
||||
tflite::ErrorReporter* reporter_;
|
||||
int32_t start_time_;
|
||||
const char* event_tag_;
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
|
||||
@@ -23,6 +23,7 @@ limitations under the License.
|
||||
|
||||
#include <cstdarg>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -125,7 +126,8 @@ char* FastFloatToBufferLeft(float f, char* buffer) {
|
||||
const int32_t exponent_shift = 23;
|
||||
const int32_t exponent_bias = 127;
|
||||
const uint32_t fraction_mask = 0x007fffff;
|
||||
const uint32_t u = *reinterpret_cast<uint32_t*>(&f);
|
||||
uint32_t u;
|
||||
memcpy(&u, &f, sizeof(int32_t));
|
||||
const int32_t exponent =
|
||||
((u & exponent_mask) >> exponent_shift) - exponent_bias;
|
||||
const uint32_t fraction = (u & fraction_mask);
|
||||
@@ -163,7 +165,49 @@ char* FastFloatToBufferLeft(float f, char* buffer) {
|
||||
*current = '.';
|
||||
current += 1;
|
||||
*current = 0;
|
||||
|
||||
// Prepend leading zeros to fill in all 7 bytes of the fraction. Truncate
|
||||
// zeros off the end of the fraction. Every fractional value takes 7 bytes.
|
||||
// For example, 2500 would be written into the buffer as 0002500 since it
|
||||
// represents .00025.
|
||||
constexpr int kMaxFractionalDigits = 7;
|
||||
|
||||
// Abort early if there is not enough space in the buffer.
|
||||
if (current_end - current <= kMaxFractionalDigits) {
|
||||
return current;
|
||||
}
|
||||
|
||||
// Pre-fill buffer with zeros to ensure zero-truncation works properly.
|
||||
for (int i = 1; i < kMaxFractionalDigits; i++) {
|
||||
*(current + i) = '0';
|
||||
}
|
||||
|
||||
// Track how large the fraction is to add leading zeros.
|
||||
char* previous = current;
|
||||
current = StrCatUInt32(current, (current_end - current), scaled_fraction, 10);
|
||||
int fraction_digits = current - previous;
|
||||
int leading_zeros = kMaxFractionalDigits - fraction_digits;
|
||||
|
||||
// Overwrite the null terminator from StrCatUInt32 to ensure zero-trunctaion
|
||||
// works properly.
|
||||
*current = '0';
|
||||
|
||||
// Shift fraction values and prepend zeros if necessary.
|
||||
if (leading_zeros != 0) {
|
||||
for (int i = 0; i < fraction_digits; i++) {
|
||||
current--;
|
||||
*(current + leading_zeros) = *current;
|
||||
*current = '0';
|
||||
}
|
||||
current += kMaxFractionalDigits;
|
||||
}
|
||||
|
||||
// Truncate trailing zeros for cleaner logs. Ensure we leave at least one
|
||||
// fractional character for the case when scaled_fraction is 0.
|
||||
while (*(current - 1) == '0' && (current - 1) > previous) {
|
||||
current--;
|
||||
}
|
||||
*current = 0;
|
||||
current = StrCatStr(current, (current_end - current), "*2^");
|
||||
current = StrCatInt32(current, (current_end - current), exponent);
|
||||
return current;
|
||||
|
||||
@@ -15,34 +15,15 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
static const uint8_t kAsymmetricUInt8Min = 0;
|
||||
static const uint8_t kAsymmetricUInt8Max = UINT8_MAX;
|
||||
static const uint8_t kSymmetricUInt8Min = 1;
|
||||
static const uint8_t kSymmetricUInt8Max = UINT8_MAX;
|
||||
static const int8_t kAsymmetricInt8Min = INT8_MIN;
|
||||
static const int8_t kAsymmetricInt8Max = INT8_MAX;
|
||||
static const int kSymmetricInt8Scale = kAsymmetricInt8Max;
|
||||
|
||||
static const int16_t kAsymmetricInt16Min = INT16_MIN;
|
||||
static const int16_t kAsymmetricInt16Max = INT16_MAX;
|
||||
static const int kSymmetricInt16Scale = kAsymmetricInt16Max;
|
||||
|
||||
static const int32_t kAsymmetricInt32Max = INT32_MAX;
|
||||
static const int kSymmetricInt32Scale = kAsymmetricInt32Max;
|
||||
|
||||
} // namespace
|
||||
|
||||
int ElementCount(const TfLiteIntArray& dims) {
|
||||
int result = 1;
|
||||
for (int i = 0; i < dims.size; ++i) {
|
||||
@@ -51,109 +32,6 @@ int ElementCount(const TfLiteIntArray& dims) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Converts a float value into an unsigned eight-bit quantized value.
|
||||
uint8_t FloatToAsymmetricQuantizedUInt8(const float value, const float scale,
|
||||
const int zero_point) {
|
||||
int32_t result = round(value / scale) + zero_point;
|
||||
if (result < kAsymmetricUInt8Min) {
|
||||
result = kAsymmetricUInt8Min;
|
||||
}
|
||||
if (result > kAsymmetricUInt8Max) {
|
||||
result = kAsymmetricUInt8Max;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
uint8_t FloatToSymmetricQuantizedUInt8(const float value, const float scale) {
|
||||
int32_t result = round(value / scale);
|
||||
if (result < kSymmetricUInt8Min) {
|
||||
result = kSymmetricUInt8Min;
|
||||
}
|
||||
if (result > kSymmetricUInt8Max) {
|
||||
result = kSymmetricUInt8Max;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int8_t FloatToAsymmetricQuantizedInt8(const float value, const float scale,
|
||||
const int zero_point) {
|
||||
int32_t result = round(value / scale) + zero_point;
|
||||
if (result < kAsymmetricInt8Min) {
|
||||
result = kAsymmetricInt8Min;
|
||||
}
|
||||
if (result > kAsymmetricInt8Max) {
|
||||
result = kAsymmetricInt8Max;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int16_t FloatToAsymmetricQuantizedInt16(const float value, const float scale,
|
||||
const int zero_point) {
|
||||
int32_t result = round(value / scale) + zero_point;
|
||||
if (result < kAsymmetricInt16Min) {
|
||||
result = kAsymmetricInt16Min;
|
||||
}
|
||||
if (result > kAsymmetricInt16Max) {
|
||||
result = kAsymmetricInt16Max;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int8_t FloatToSymmetricQuantizedInt8(const float value, const float scale) {
|
||||
return FloatToAsymmetricQuantizedInt8(value, scale, 0.0f);
|
||||
}
|
||||
|
||||
int32_t FloatToSymmetricQuantizedInt32(const float value, const float scale) {
|
||||
float quantized = round(value / scale);
|
||||
if (static_cast<int>(quantized) > INT_MAX) {
|
||||
quantized = static_cast<float>(INT_MAX);
|
||||
} else if (quantized < INT_MIN) {
|
||||
quantized = static_cast<float> INT_MIN;
|
||||
}
|
||||
|
||||
return static_cast<int>(quantized);
|
||||
}
|
||||
|
||||
void AsymmetricQuantize(const float* input, int8_t* output, int num_elements,
|
||||
float scale, int zero_point) {
|
||||
for (int i = 0; i < num_elements; i++) {
|
||||
output[i] = FloatToAsymmetricQuantizedInt8(input[i], scale, zero_point);
|
||||
}
|
||||
}
|
||||
|
||||
void AsymmetricQuantize(const float* input, uint8_t* output, int num_elements,
|
||||
float scale, int zero_point) {
|
||||
for (int i = 0; i < num_elements; i++) {
|
||||
output[i] = FloatToAsymmetricQuantizedUInt8(input[i], scale, zero_point);
|
||||
}
|
||||
}
|
||||
|
||||
void AsymmetricQuantize(const float* input, int16_t* output, int num_elements,
|
||||
float scale, int zero_point) {
|
||||
for (int i = 0; i < num_elements; i++) {
|
||||
output[i] = FloatToAsymmetricQuantizedInt16(input[i], scale, zero_point);
|
||||
}
|
||||
}
|
||||
|
||||
void SymmetricQuantize(const float* input, int32_t* output, int num_elements,
|
||||
float scale) {
|
||||
for (int i = 0; i < num_elements; i++) {
|
||||
output[i] = FloatToSymmetricQuantizedInt32(input[i], scale);
|
||||
}
|
||||
}
|
||||
|
||||
void SymmetricPerChannelQuantize(const float* input, int32_t* output,
|
||||
int num_elements, int num_channels,
|
||||
float* scales) {
|
||||
int elements_per_channel = num_elements / num_channels;
|
||||
for (int i = 0; i < num_channels; i++) {
|
||||
for (int j = 0; j < elements_per_channel; j++) {
|
||||
output[i * elements_per_channel + j] = FloatToSymmetricQuantizedInt32(
|
||||
input[i * elements_per_channel + j], scales[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SignedSymmetricPerChannelQuantize(const float* values,
|
||||
TfLiteIntArray* dims,
|
||||
int quantized_dimension,
|
||||
@@ -186,94 +64,17 @@ void SignedSymmetricPerChannelQuantize(const float* values,
|
||||
max = fmaxf(max, values[idx]);
|
||||
}
|
||||
scaling_factors[channel] =
|
||||
fmaxf(fabs(min), fabs(max)) / kSymmetricInt8Scale;
|
||||
fmaxf(fabs(min), fabs(max)) / std::numeric_limits<int8_t>::max();
|
||||
for (int i = 0; i < per_channel_size; i++) {
|
||||
int idx = channel * channel_stride + i * stride;
|
||||
const int32_t quantized_value =
|
||||
static_cast<int32_t>(roundf(values[idx] / scaling_factors[channel]));
|
||||
// Clamp: just in case some odd numeric offset.
|
||||
quantized_values[idx] = fminf(
|
||||
kSymmetricInt8Scale, fmaxf(-kSymmetricInt8Scale, quantized_value));
|
||||
quantized_values[idx] =
|
||||
fminf(std::numeric_limits<int8_t>::max(),
|
||||
fmaxf(std::numeric_limits<int8_t>::min() + 1, quantized_value));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
|
||||
int8_t* quantized_values, float* scaling_factor) {
|
||||
int input_size = ElementCount(*dims);
|
||||
|
||||
float min = 0;
|
||||
float max = 0;
|
||||
for (int i = 0; i < input_size; i++) {
|
||||
min = fminf(min, values[i]);
|
||||
max = fmaxf(max, values[i]);
|
||||
}
|
||||
*scaling_factor = fmaxf(fabs(min), fabs(max)) / kSymmetricInt8Scale;
|
||||
for (int i = 0; i < input_size; i++) {
|
||||
const int32_t quantized_value =
|
||||
static_cast<int32_t>(roundf(values[i] / *scaling_factor));
|
||||
// Clamp: just in case some odd numeric offset.
|
||||
quantized_values[i] = fminf(kSymmetricInt8Scale,
|
||||
fmaxf(-kSymmetricInt8Scale, quantized_value));
|
||||
}
|
||||
}
|
||||
|
||||
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
|
||||
int16_t* quantized_values, float* scaling_factor) {
|
||||
int input_size = ElementCount(*dims);
|
||||
|
||||
float min = 0;
|
||||
float max = 0;
|
||||
for (int i = 0; i < input_size; i++) {
|
||||
min = fminf(min, values[i]);
|
||||
max = fmaxf(max, values[i]);
|
||||
}
|
||||
*scaling_factor = fmaxf(fabs(min), fabs(max)) / kSymmetricInt16Scale;
|
||||
for (int i = 0; i < input_size; i++) {
|
||||
const int32_t quantized_value =
|
||||
static_cast<int32_t>(roundf(values[i] / *scaling_factor));
|
||||
// Clamp: just in case some odd numeric offset.
|
||||
quantized_values[i] = fminf(kSymmetricInt16Scale,
|
||||
fmaxf(-kSymmetricInt16Scale, quantized_value));
|
||||
}
|
||||
}
|
||||
|
||||
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
|
||||
int32_t* quantized_values, float* scaling_factor) {
|
||||
int input_size = ElementCount(*dims);
|
||||
|
||||
float min = 0;
|
||||
float max = 0;
|
||||
for (int i = 0; i < input_size; i++) {
|
||||
min = fminf(min, values[i]);
|
||||
max = fmaxf(max, values[i]);
|
||||
}
|
||||
|
||||
*scaling_factor =
|
||||
fmaxf(fabs(min), fabs(max)) / static_cast<float>(kSymmetricInt32Scale);
|
||||
for (int i = 0; i < input_size; i++) {
|
||||
const int32_t quantized_value =
|
||||
static_cast<int32_t>(roundf(values[i] / *scaling_factor));
|
||||
// Clamp: just in case some odd numeric offset.
|
||||
quantized_values[i] = fminf(
|
||||
static_cast<float>(kSymmetricInt32Scale),
|
||||
fmaxf(static_cast<float>(-kSymmetricInt32Scale), quantized_value));
|
||||
}
|
||||
}
|
||||
|
||||
void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
|
||||
uint8_t* quantized_values, float* scaling_factor) {
|
||||
SignedSymmetricQuantize(values, dims,
|
||||
reinterpret_cast<int8_t*>(quantized_values),
|
||||
scaling_factor);
|
||||
}
|
||||
|
||||
void SymmetricDequantize(const int8_t* values, const int size,
|
||||
const float dequantization_scale,
|
||||
float* dequantized_values) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
dequantized_values[i] = values[i] * dequantization_scale;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
@@ -16,7 +16,9 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
@@ -26,51 +28,68 @@ namespace tflite {
|
||||
|
||||
int ElementCount(const TfLiteIntArray& dims);
|
||||
|
||||
uint8_t FloatToAsymmetricQuantizedUInt8(const float value, const float scale,
|
||||
const int zero_point);
|
||||
// Converts a float value into a quantized value. Note that large values (close
|
||||
// to max int and min int) may see significant error due to a lack of floating
|
||||
// point granularity for large values.
|
||||
template <typename T>
|
||||
T FloatToQuantizedType(const float value, const float scale, int zero_point) {
|
||||
int32_t result = round(value / scale) + zero_point;
|
||||
result =
|
||||
std::max(static_cast<int32_t>(std::numeric_limits<T>::min()), result);
|
||||
result =
|
||||
std::min(static_cast<int32_t>(std::numeric_limits<T>::max()), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
uint8_t FloatToSymmetricQuantizedUInt8(const float value, const float scale);
|
||||
|
||||
int8_t FloatToAsymmetricQuantizedInt8(const float value, const float scale,
|
||||
const int zero_point);
|
||||
|
||||
int16_t FloatToAsymmetricQuantizedInt16(const float value, const float scale,
|
||||
const int zero_point);
|
||||
|
||||
int8_t FloatToSymmetricQuantizedInt8(const float value, const float scale);
|
||||
|
||||
// Converts a float value into a signed thirty-two-bit quantized value. Note
|
||||
// that values close to max int and min int may see significant error due to
|
||||
// a lack of floating point granularity for large values.
|
||||
int32_t FloatToSymmetricQuantizedInt32(const float value, const float scale);
|
||||
template <typename T>
|
||||
T FloatToSymmetricQuantizedType(const float value, const float scale) {
|
||||
int32_t result = round(value / scale);
|
||||
result =
|
||||
std::max(static_cast<int32_t>(std::numeric_limits<T>::min() + 1), result);
|
||||
result =
|
||||
std::min(static_cast<int32_t>(std::numeric_limits<T>::max()), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper methods to quantize arrays of floats to the desired format.
|
||||
//
|
||||
// There are several key flavors of quantization in TfLite:
|
||||
// asymmetric symmetric per channel
|
||||
// int8 | X | X | X |
|
||||
// uint8 | X | X | |
|
||||
// int16 | X | | |
|
||||
// int32 | | X | X |
|
||||
// int8_t | X | X | X |
|
||||
// uint8_t | X | X | |
|
||||
// int16_t | X | | |
|
||||
// int32_t | | X | X |
|
||||
//
|
||||
// The per-op quantization spec can be found here:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
template <typename T>
|
||||
void Quantize(const float* input, T* output, int num_elements, float scale,
|
||||
int zero_point) {
|
||||
for (int i = 0; i < num_elements; i++) {
|
||||
output[i] = FloatToQuantizedType<T>(input[i], scale, zero_point);
|
||||
}
|
||||
}
|
||||
|
||||
void AsymmetricQuantize(const float* input, int8_t* output, int num_elements,
|
||||
float scale, int zero_point = 0);
|
||||
template <typename T>
|
||||
void SymmetricQuantize(const float* input, T* output, int num_elements,
|
||||
float scale) {
|
||||
for (int i = 0; i < num_elements; i++) {
|
||||
output[i] = FloatToSymmetricQuantizedType<T>(input[i], scale);
|
||||
}
|
||||
}
|
||||
|
||||
void AsymmetricQuantize(const float* input, uint8_t* output, int num_elements,
|
||||
float scale, int zero_point = 128);
|
||||
|
||||
void AsymmetricQuantize(const float* input, int16_t* output, int num_elements,
|
||||
float scale, int zero_point = 0);
|
||||
|
||||
void SymmetricQuantize(const float* input, int32_t* output, int num_elements,
|
||||
float scale);
|
||||
|
||||
void SymmetricPerChannelQuantize(const float* input, int32_t* output,
|
||||
template <typename T>
|
||||
void SymmetricPerChannelQuantize(const float* input, T* output,
|
||||
int num_elements, int num_channels,
|
||||
float* scales);
|
||||
float* scales) {
|
||||
int elements_per_channel = num_elements / num_channels;
|
||||
for (int i = 0; i < num_channels; i++) {
|
||||
for (int j = 0; j < elements_per_channel; j++) {
|
||||
output[i * elements_per_channel + j] = FloatToSymmetricQuantizedType<T>(
|
||||
input[i * elements_per_channel + j], scales[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SignedSymmetricPerChannelQuantize(const float* values,
|
||||
TfLiteIntArray* dims,
|
||||
@@ -78,21 +97,37 @@ void SignedSymmetricPerChannelQuantize(const float* values,
|
||||
int8_t* quantized_values,
|
||||
float* scaling_factor);
|
||||
|
||||
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
|
||||
int8_t* quantized_values, float* scaling_factor);
|
||||
// Quantizes inputs based on the values provided, choosing the smallest range
|
||||
// which includes all input values.
|
||||
template <typename T>
|
||||
void SymmetricQuantizeCalculateScales(const float* values, TfLiteIntArray* dims,
|
||||
T* output, float* scale) {
|
||||
int input_size = ElementCount(*dims);
|
||||
|
||||
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
|
||||
int16_t* quantized_values, float* scaling_factor);
|
||||
float min = 0;
|
||||
float max = 0;
|
||||
for (int i = 0; i < input_size; i++) {
|
||||
min = fminf(min, values[i]);
|
||||
max = fmaxf(max, values[i]);
|
||||
}
|
||||
*scale = fmaxf(std::abs(min), std::abs(max)) / std::numeric_limits<T>::max();
|
||||
for (int i = 0; i < input_size; i++) {
|
||||
const int32_t quantized_value =
|
||||
static_cast<int32_t>(roundf(values[i] / *scale));
|
||||
// Clamp: just in case some odd numeric offset.
|
||||
quantized_value = fminf(std::numeric_limits<T>::max(), quantized_value);
|
||||
quantized_value = fmaxf(std::numeric_limits<T>::min() + 1, quantized_value);
|
||||
output[i] = quantized_value;
|
||||
}
|
||||
}
|
||||
|
||||
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
|
||||
int32_t* quantized_values, float* scaling_factor);
|
||||
|
||||
void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
|
||||
uint8_t* quantized_values, float* scaling_factor);
|
||||
|
||||
void SymmetricDequantize(const int8_t* values, const int size,
|
||||
const float dequantization_scale,
|
||||
float* dequantized_values);
|
||||
template <typename T>
|
||||
void Dequantize(const T* values, const int size, const float scale,
|
||||
int zero_point, float* dequantized_values) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
dequantized_values[i] = (values[i] - zero_point) * scale;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
|
||||
@@ -0,0 +1,244 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/recording_micro_allocator.h"
|
||||
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
RecordingMicroAllocator::RecordingMicroAllocator(
|
||||
RecordingSimpleMemoryAllocator* recording_memory_allocator,
|
||||
ErrorReporter* error_reporter)
|
||||
: MicroAllocator(recording_memory_allocator, error_reporter),
|
||||
recording_memory_allocator_(recording_memory_allocator) {}
|
||||
|
||||
RecordingMicroAllocator* RecordingMicroAllocator::Create(
|
||||
uint8_t* tensor_arena, size_t arena_size, ErrorReporter* error_reporter) {
|
||||
TFLITE_DCHECK(error_reporter != nullptr);
|
||||
|
||||
RecordingSimpleMemoryAllocator* simple_memory_allocator =
|
||||
RecordingSimpleMemoryAllocator::Create(error_reporter, tensor_arena,
|
||||
arena_size);
|
||||
TFLITE_DCHECK(simple_memory_allocator != nullptr);
|
||||
|
||||
uint8_t* allocator_buffer = simple_memory_allocator->AllocateFromTail(
|
||||
sizeof(RecordingMicroAllocator), alignof(RecordingMicroAllocator));
|
||||
RecordingMicroAllocator* allocator = new (allocator_buffer)
|
||||
RecordingMicroAllocator(simple_memory_allocator, error_reporter);
|
||||
return allocator;
|
||||
}
|
||||
|
||||
RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation(
|
||||
RecordedAllocationType allocation_type) const {
|
||||
switch (allocation_type) {
|
||||
case RecordedAllocationType::kTfLiteEvalTensorData:
|
||||
return recorded_tflite_eval_tensor_data_;
|
||||
case RecordedAllocationType::kPersistentTfLiteTensorData:
|
||||
return recorded_persistent_tflite_tensor_data_;
|
||||
case RecordedAllocationType::kPersistentTfLiteTensorQuantizationData:
|
||||
return recorded_persistent_tflite_tensor_quantization_data_;
|
||||
case RecordedAllocationType::kPersistentBufferData:
|
||||
return recorded_persistent_buffer_data_;
|
||||
case RecordedAllocationType::kTfLiteTensorVariableBufferData:
|
||||
return recorded_tflite_tensor_variable_buffer_data_;
|
||||
case RecordedAllocationType::kNodeAndRegistrationArray:
|
||||
return recorded_node_and_registration_array_data_;
|
||||
case RecordedAllocationType::kOpData:
|
||||
return recorded_op_data_;
|
||||
}
|
||||
TF_LITE_REPORT_ERROR(error_reporter(), "Invalid allocation type supplied: %d",
|
||||
allocation_type);
|
||||
return RecordedAllocation();
|
||||
}
|
||||
|
||||
const RecordingSimpleMemoryAllocator*
|
||||
RecordingMicroAllocator::GetSimpleMemoryAllocator() const {
|
||||
return recording_memory_allocator_;
|
||||
}
|
||||
|
||||
void RecordingMicroAllocator::PrintAllocations() const {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter(),
|
||||
"[RecordingMicroAllocator] Arena allocation total %d bytes",
|
||||
recording_memory_allocator_->GetUsedBytes());
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter(),
|
||||
"[RecordingMicroAllocator] Arena allocation head %d bytes",
|
||||
recording_memory_allocator_->GetHeadUsedBytes());
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter(),
|
||||
"[RecordingMicroAllocator] Arena allocation tail %d bytes",
|
||||
recording_memory_allocator_->GetTailUsedBytes());
|
||||
PrintRecordedAllocation(RecordedAllocationType::kTfLiteEvalTensorData,
|
||||
"TfLiteEvalTensor data", "allocations");
|
||||
PrintRecordedAllocation(RecordedAllocationType::kPersistentTfLiteTensorData,
|
||||
"Persistent TfLiteTensor data", "tensors");
|
||||
PrintRecordedAllocation(
|
||||
RecordedAllocationType::kPersistentTfLiteTensorQuantizationData,
|
||||
"Persistent TfLiteTensor quantization data", "allocations");
|
||||
PrintRecordedAllocation(RecordedAllocationType::kPersistentBufferData,
|
||||
"Persistent buffer data", "allocations");
|
||||
PrintRecordedAllocation(
|
||||
RecordedAllocationType::kTfLiteTensorVariableBufferData,
|
||||
"TfLiteTensor variable buffer data", "allocations");
|
||||
PrintRecordedAllocation(RecordedAllocationType::kNodeAndRegistrationArray,
|
||||
"NodeAndRegistration struct",
|
||||
"NodeAndRegistration structs");
|
||||
PrintRecordedAllocation(RecordedAllocationType::kOpData,
|
||||
"Operator runtime data", "OpData structs");
|
||||
}
|
||||
|
||||
void* RecordingMicroAllocator::AllocatePersistentBuffer(size_t bytes) {
|
||||
RecordedAllocation allocations = SnapshotAllocationUsage();
|
||||
void* buffer = MicroAllocator::AllocatePersistentBuffer(bytes);
|
||||
RecordAllocationUsage(allocations, recorded_persistent_buffer_data_);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void RecordingMicroAllocator::PrintRecordedAllocation(
|
||||
RecordedAllocationType allocation_type, const char* allocation_name,
|
||||
const char* allocation_description) const {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
RecordedAllocation allocation = GetRecordedAllocation(allocation_type);
|
||||
if (allocation.used_bytes > 0 || allocation.requested_bytes > 0) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter(),
|
||||
"[RecordingMicroAllocator] '%s' used %d bytes with alignment overhead "
|
||||
"(requested %d bytes for %d %s)",
|
||||
allocation_name, allocation.used_bytes, allocation.requested_bytes,
|
||||
allocation.count, allocation_description);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations(
|
||||
const Model* model, NodeAndRegistration** node_and_registrations) {
|
||||
RecordedAllocation allocations = SnapshotAllocationUsage();
|
||||
|
||||
TfLiteStatus status = MicroAllocator::AllocateNodeAndRegistrations(
|
||||
model, node_and_registrations);
|
||||
|
||||
RecordAllocationUsage(allocations,
|
||||
recorded_node_and_registration_array_data_);
|
||||
// The allocation count in SimpleMemoryAllocator will only be 1. To provide
|
||||
// better logging, decrement by 1 and add in the actual number of operators
|
||||
// used in the graph:
|
||||
// The allocation for this recording will always be 1. This is because the
|
||||
// parent class mallocs one large allocation for the number of nodes in the
|
||||
// graph (e.g. sizeof(NodeAndRegistration) * num_nodes).
|
||||
// To prevent extra overhead and potential for fragmentation, manually adjust
|
||||
// the accounting by decrementing by 1 and adding the actual number of nodes
|
||||
// used in the graph:
|
||||
recorded_node_and_registration_array_data_.count +=
|
||||
GetSubGraphFromModel(model)->operators()->size() - 1;
|
||||
return status;
|
||||
}
|
||||
|
||||
TfLiteStatus
|
||||
RecordingMicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
|
||||
const Model* model, const MicroOpResolver& op_resolver,
|
||||
NodeAndRegistration* node_and_registrations) {
|
||||
RecordedAllocation allocations = SnapshotAllocationUsage();
|
||||
|
||||
TfLiteStatus status =
|
||||
MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
|
||||
model, op_resolver, node_and_registrations);
|
||||
|
||||
RecordAllocationUsage(allocations, recorded_op_data_);
|
||||
return status;
|
||||
}
|
||||
|
||||
TfLiteStatus RecordingMicroAllocator::AllocateTfLiteEvalTensors(
|
||||
const Model* model, TfLiteEvalTensor** eval_tensors) {
|
||||
RecordedAllocation allocations = SnapshotAllocationUsage();
|
||||
|
||||
TfLiteStatus status =
|
||||
MicroAllocator::AllocateTfLiteEvalTensors(model, eval_tensors);
|
||||
|
||||
RecordAllocationUsage(allocations, recorded_tflite_eval_tensor_data_);
|
||||
// The allocation for this recording will always be 1. This is because the
|
||||
// parent class mallocs one large allocation for the number of tensors in the
|
||||
// graph (e.g. sizeof(TfLiteEvalTensor) * num_tensors).
|
||||
// To prevent extra overhead and potential for fragmentation, manually adjust
|
||||
// the accounting by decrementing by 1 and adding the actual number of tensors
|
||||
// used in the graph:
|
||||
recorded_tflite_eval_tensor_data_.count +=
|
||||
GetSubGraphFromModel(model)->tensors()->size() - 1;
|
||||
return status;
|
||||
}
|
||||
|
||||
TfLiteStatus RecordingMicroAllocator::AllocateVariables(
|
||||
const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors) {
|
||||
RecordedAllocation allocations = SnapshotAllocationUsage();
|
||||
|
||||
TfLiteStatus status =
|
||||
MicroAllocator::AllocateVariables(subgraph, eval_tensors);
|
||||
|
||||
RecordAllocationUsage(allocations,
|
||||
recorded_tflite_tensor_variable_buffer_data_);
|
||||
return status;
|
||||
}
|
||||
|
||||
TfLiteTensor* RecordingMicroAllocator::AllocatePersistentTfLiteTensorInternal(
|
||||
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
|
||||
RecordedAllocation allocations = SnapshotAllocationUsage();
|
||||
|
||||
TfLiteTensor* result = MicroAllocator::AllocatePersistentTfLiteTensorInternal(
|
||||
model, eval_tensors, tensor_index);
|
||||
|
||||
RecordAllocationUsage(allocations, recorded_persistent_tflite_tensor_data_);
|
||||
return result;
|
||||
}
|
||||
|
||||
TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
|
||||
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
|
||||
int tensor_index, bool allocate_temp) {
|
||||
RecordedAllocation allocations = SnapshotAllocationUsage();
|
||||
|
||||
TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
|
||||
model, subgraph, tensor, tensor_index, allocate_temp);
|
||||
|
||||
RecordAllocationUsage(allocations,
|
||||
recorded_persistent_tflite_tensor_quantization_data_);
|
||||
return status;
|
||||
}
|
||||
|
||||
RecordedAllocation RecordingMicroAllocator::SnapshotAllocationUsage() const {
|
||||
return {/*requested_bytes=*/recording_memory_allocator_->GetRequestedBytes(),
|
||||
/*used_bytes=*/recording_memory_allocator_->GetUsedBytes(),
|
||||
/*count=*/recording_memory_allocator_->GetAllocatedCount()};
|
||||
}
|
||||
|
||||
void RecordingMicroAllocator::RecordAllocationUsage(
|
||||
const RecordedAllocation& snapshotted_allocation,
|
||||
RecordedAllocation& recorded_allocation) {
|
||||
recorded_allocation.requested_bytes +=
|
||||
recording_memory_allocator_->GetRequestedBytes() -
|
||||
snapshotted_allocation.requested_bytes;
|
||||
recorded_allocation.used_bytes +=
|
||||
recording_memory_allocator_->GetUsedBytes() -
|
||||
snapshotted_allocation.used_bytes;
|
||||
recorded_allocation.count +=
|
||||
recording_memory_allocator_->GetAllocatedCount() -
|
||||
snapshotted_allocation.count;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,125 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// List of buckets currently recorded by this class. Each type keeps a list of
|
||||
// allocated information during model initialization.
|
||||
// TODO(b/169834511): Add tracking for scratch buffer allocations.
|
||||
enum class RecordedAllocationType {
|
||||
kTfLiteEvalTensorData,
|
||||
kPersistentTfLiteTensorData,
|
||||
kPersistentTfLiteTensorQuantizationData,
|
||||
kPersistentBufferData,
|
||||
kTfLiteTensorVariableBufferData,
|
||||
kNodeAndRegistrationArray,
|
||||
kOpData,
|
||||
};
|
||||
|
||||
// Container for holding information about allocation recordings by a given
|
||||
// type. Each recording contains the number of bytes requested, the actual bytes
|
||||
// allocated (can defer from requested by alignment), and the number of items
|
||||
// allocated.
|
||||
struct RecordedAllocation {
|
||||
size_t requested_bytes;
|
||||
size_t used_bytes;
|
||||
size_t count;
|
||||
};
|
||||
|
||||
// Utility subclass of MicroAllocator that records all allocations
|
||||
// inside the arena. A summary of allocations can be logged through the
|
||||
// ErrorReporter by invoking LogAllocations(). This special allocator requires
|
||||
// an instance of RecordingSimpleMemoryAllocator to capture allocations in the
|
||||
// head and tail. Arena allocation recording can be retrieved by type through
|
||||
// the GetRecordedAllocation() function. This class should only be used for
|
||||
// auditing memory usage or integration testing.
|
||||
class RecordingMicroAllocator : public MicroAllocator {
|
||||
public:
|
||||
static RecordingMicroAllocator* Create(uint8_t* tensor_arena,
|
||||
size_t arena_size,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Returns the recorded allocations information for a given allocation type.
|
||||
RecordedAllocation GetRecordedAllocation(
|
||||
RecordedAllocationType allocation_type) const;
|
||||
|
||||
const RecordingSimpleMemoryAllocator* GetSimpleMemoryAllocator() const;
|
||||
|
||||
// Logs out through the ErrorReporter all allocation recordings by type
|
||||
// defined in RecordedAllocationType.
|
||||
void PrintAllocations() const;
|
||||
|
||||
void* AllocatePersistentBuffer(size_t bytes) override;
|
||||
|
||||
protected:
|
||||
TfLiteStatus AllocateNodeAndRegistrations(
|
||||
const Model* model,
|
||||
NodeAndRegistration** node_and_registrations) override;
|
||||
TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
|
||||
const Model* model, const MicroOpResolver& op_resolver,
|
||||
NodeAndRegistration* node_and_registrations) override;
|
||||
TfLiteStatus AllocateTfLiteEvalTensors(
|
||||
const Model* model, TfLiteEvalTensor** eval_tensors) override;
|
||||
TfLiteStatus AllocateVariables(const SubGraph* subgraph,
|
||||
TfLiteEvalTensor* eval_tensors) override;
|
||||
// TODO(b/162311891): Once all kernels have been updated to the new API drop
|
||||
// this method. It is only used to record TfLiteTensor persistent allocations.
|
||||
TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
|
||||
const Model* model, TfLiteEvalTensor* eval_tensors,
|
||||
int tensor_index) override;
|
||||
// TODO(b/162311891): Once all kernels have been updated to the new API drop
|
||||
// this function since all allocations for quantized data will take place in
|
||||
// the temp section.
|
||||
TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
|
||||
const SubGraph* subgraph,
|
||||
TfLiteTensor* tensor,
|
||||
int tensor_index,
|
||||
bool allocate_temp) override;
|
||||
|
||||
private:
|
||||
RecordingMicroAllocator(RecordingSimpleMemoryAllocator* memory_allocator,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
void PrintRecordedAllocation(RecordedAllocationType allocation_type,
|
||||
const char* allocation_name,
|
||||
const char* allocation_description) const;
|
||||
|
||||
RecordedAllocation SnapshotAllocationUsage() const;
|
||||
void RecordAllocationUsage(const RecordedAllocation& snapshotted_allocation,
|
||||
RecordedAllocation& recorded_allocation);
|
||||
|
||||
const RecordingSimpleMemoryAllocator* recording_memory_allocator_;
|
||||
|
||||
RecordedAllocation recorded_tflite_eval_tensor_data_ = {};
|
||||
RecordedAllocation recorded_persistent_tflite_tensor_data_ = {};
|
||||
RecordedAllocation recorded_persistent_tflite_tensor_quantization_data_ = {};
|
||||
RecordedAllocation recorded_persistent_buffer_data_ = {};
|
||||
RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {};
|
||||
RecordedAllocation recorded_node_and_registration_array_data_ = {};
|
||||
RecordedAllocation recorded_op_data_ = {};
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
|
||||
@@ -0,0 +1,65 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
|
||||
|
||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||
#include "tensorflow/lite/micro/recording_micro_allocator.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Utility subclass that enables internal recordings of the MicroInterpreter.
|
||||
// This class should be used to audit and analyze memory arena usage for a given
|
||||
// model and interpreter.
|
||||
//
|
||||
// After construction and the first Invoke() or AllocateTensors() call - the
|
||||
// memory usage is recorded and available through the GetMicroAllocator()
|
||||
// function. See RecordingMicroAlloctor for more details on what is currently
|
||||
// recorded from arena allocations.
|
||||
//
|
||||
// It is recommended for users to increase the tensor arena size by at least 1kb
|
||||
// to ensure enough additional memory is available for internal recordings.
|
||||
class RecordingMicroInterpreter : public MicroInterpreter {
|
||||
public:
|
||||
RecordingMicroInterpreter(const Model* model,
|
||||
const MicroOpResolver& op_resolver,
|
||||
uint8_t* tensor_arena, size_t tensor_arena_size,
|
||||
ErrorReporter* error_reporter)
|
||||
: MicroInterpreter(model, op_resolver,
|
||||
RecordingMicroAllocator::Create(
|
||||
tensor_arena, tensor_arena_size, error_reporter),
|
||||
error_reporter),
|
||||
recording_micro_allocator_(
|
||||
static_cast<const RecordingMicroAllocator&>(allocator())) {}
|
||||
|
||||
RecordingMicroInterpreter(const Model* model,
|
||||
const MicroOpResolver& op_resolver,
|
||||
RecordingMicroAllocator* allocator,
|
||||
ErrorReporter* error_reporter)
|
||||
: MicroInterpreter(model, op_resolver, allocator, error_reporter),
|
||||
recording_micro_allocator_(*allocator) {}
|
||||
|
||||
const RecordingMicroAllocator& GetMicroAllocator() const {
|
||||
return recording_micro_allocator_;
|
||||
}
|
||||
|
||||
private:
|
||||
const RecordingMicroAllocator& recording_micro_allocator_;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
|
||||
@@ -0,0 +1,84 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
|
||||
|
||||
#include <new>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
RecordingSimpleMemoryAllocator::RecordingSimpleMemoryAllocator(
|
||||
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size)
|
||||
: SimpleMemoryAllocator(error_reporter, buffer_head, buffer_size),
|
||||
requested_head_bytes_(0),
|
||||
requested_tail_bytes_(0),
|
||||
used_bytes_(0),
|
||||
alloc_count_(0) {}
|
||||
|
||||
RecordingSimpleMemoryAllocator::~RecordingSimpleMemoryAllocator() {}
|
||||
|
||||
RecordingSimpleMemoryAllocator* RecordingSimpleMemoryAllocator::Create(
|
||||
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) {
|
||||
TFLITE_DCHECK(error_reporter != nullptr);
|
||||
TFLITE_DCHECK(buffer_head != nullptr);
|
||||
RecordingSimpleMemoryAllocator tmp =
|
||||
RecordingSimpleMemoryAllocator(error_reporter, buffer_head, buffer_size);
|
||||
|
||||
uint8_t* allocator_buffer =
|
||||
tmp.AllocateFromTail(sizeof(RecordingSimpleMemoryAllocator),
|
||||
alignof(RecordingSimpleMemoryAllocator));
|
||||
// Use the default copy constructor to populate internal states.
|
||||
return new (allocator_buffer) RecordingSimpleMemoryAllocator(tmp);
|
||||
}
|
||||
|
||||
size_t RecordingSimpleMemoryAllocator::GetRequestedBytes() const {
|
||||
return requested_head_bytes_ + requested_tail_bytes_;
|
||||
}
|
||||
|
||||
size_t RecordingSimpleMemoryAllocator::GetUsedBytes() const {
|
||||
return used_bytes_;
|
||||
}
|
||||
|
||||
size_t RecordingSimpleMemoryAllocator::GetAllocatedCount() const {
|
||||
return alloc_count_;
|
||||
}
|
||||
|
||||
TfLiteStatus RecordingSimpleMemoryAllocator::SetHeadBufferSize(
|
||||
size_t size, size_t alignment) {
|
||||
const uint8_t* previous_head = head();
|
||||
TfLiteStatus status =
|
||||
SimpleMemoryAllocator::SetHeadBufferSize(size, alignment);
|
||||
if (status == kTfLiteOk) {
|
||||
used_bytes_ += head() - previous_head;
|
||||
requested_head_bytes_ = size;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
uint8_t* RecordingSimpleMemoryAllocator::AllocateFromTail(size_t size,
|
||||
size_t alignment) {
|
||||
const uint8_t* previous_tail = tail();
|
||||
uint8_t* result = SimpleMemoryAllocator::AllocateFromTail(size, alignment);
|
||||
if (result != nullptr) {
|
||||
used_bytes_ += previous_tail - tail();
|
||||
requested_tail_bytes_ += size;
|
||||
alloc_count_++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,64 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/simple_memory_allocator.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Utility class used to log allocations of a SimpleMemoryAllocator. Should only
|
||||
// be used in debug/evaluation settings or unit tests to evaluate allocation
|
||||
// usage.
|
||||
class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator {
|
||||
public:
|
||||
RecordingSimpleMemoryAllocator(ErrorReporter* error_reporter,
|
||||
uint8_t* buffer_head, size_t buffer_size);
|
||||
// TODO(b/157615197): Cleanup constructors/destructor and use factory
|
||||
// functions.
|
||||
~RecordingSimpleMemoryAllocator() override;
|
||||
|
||||
static RecordingSimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
|
||||
uint8_t* buffer_head,
|
||||
size_t buffer_size);
|
||||
|
||||
// Returns the number of bytes requested from the head or tail.
|
||||
size_t GetRequestedBytes() const;
|
||||
|
||||
// Returns the number of bytes actually allocated from the head or tail. This
|
||||
// value will be >= to the number of requested bytes due to padding and
|
||||
// alignment.
|
||||
size_t GetUsedBytes() const;
|
||||
|
||||
// Returns the number of alloc calls from the head or tail.
|
||||
size_t GetAllocatedCount() const;
|
||||
|
||||
TfLiteStatus SetHeadBufferSize(size_t size, size_t alignment) override;
|
||||
uint8_t* AllocateFromTail(size_t size, size_t alignment) override;
|
||||
|
||||
private:
|
||||
size_t requested_head_bytes_;
|
||||
size_t requested_tail_bytes_;
|
||||
size_t used_bytes_;
|
||||
size_t alloc_count_;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -17,50 +17,133 @@ limitations under the License.
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <new>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
SimpleMemoryAllocator* CreateInPlaceSimpleMemoryAllocator(
|
||||
ErrorReporter* error_reporter, uint8_t* buffer, size_t buffer_size) {
|
||||
SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter,
|
||||
uint8_t* buffer_head,
|
||||
uint8_t* buffer_tail)
|
||||
: error_reporter_(error_reporter),
|
||||
buffer_head_(buffer_head),
|
||||
buffer_tail_(buffer_tail),
|
||||
head_(buffer_head),
|
||||
tail_(buffer_tail),
|
||||
temp_(buffer_head_) {}
|
||||
|
||||
SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter,
|
||||
uint8_t* buffer,
|
||||
size_t buffer_size)
|
||||
: SimpleMemoryAllocator(error_reporter, buffer, buffer + buffer_size) {}
|
||||
|
||||
/* static */
|
||||
SimpleMemoryAllocator* SimpleMemoryAllocator::Create(
|
||||
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) {
|
||||
TFLITE_DCHECK(error_reporter != nullptr);
|
||||
TFLITE_DCHECK(buffer_head != nullptr);
|
||||
SimpleMemoryAllocator tmp =
|
||||
SimpleMemoryAllocator(error_reporter, buffer, buffer_size);
|
||||
SimpleMemoryAllocator* in_place_allocator =
|
||||
reinterpret_cast<SimpleMemoryAllocator*>(tmp.AllocateFromTail(
|
||||
sizeof(SimpleMemoryAllocator), alignof(SimpleMemoryAllocator)));
|
||||
*in_place_allocator = tmp;
|
||||
return in_place_allocator;
|
||||
SimpleMemoryAllocator(error_reporter, buffer_head, buffer_size);
|
||||
|
||||
// Allocate enough bytes from the buffer to create a SimpleMemoryAllocator.
|
||||
// The new instance will use the current adjusted tail buffer from the tmp
|
||||
// allocator instance.
|
||||
uint8_t* allocator_buffer = tmp.AllocateFromTail(
|
||||
sizeof(SimpleMemoryAllocator), alignof(SimpleMemoryAllocator));
|
||||
// Use the default copy constructor to populate internal states.
|
||||
return new (allocator_buffer) SimpleMemoryAllocator(tmp);
|
||||
}
|
||||
|
||||
uint8_t* SimpleMemoryAllocator::AllocateFromHead(size_t size,
|
||||
size_t alignment) {
|
||||
uint8_t* const aligned_result = AlignPointerUp(head_, alignment);
|
||||
SimpleMemoryAllocator::~SimpleMemoryAllocator() {}
|
||||
|
||||
TfLiteStatus SimpleMemoryAllocator::SetHeadBufferSize(size_t size,
|
||||
size_t alignment) {
|
||||
if (head_ != temp_) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Internal error: SetHeadBufferSize() needs to be called "
|
||||
"after ResetTempAllocations().");
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
uint8_t* const aligned_result = AlignPointerUp(buffer_head_, alignment);
|
||||
const size_t available_memory = tail_ - aligned_result;
|
||||
if (available_memory < size) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Failed to allocate memory. Requested: %u, available %u, missing: %u",
|
||||
"Failed to set head size. Requested: %u, available %u, missing: %u",
|
||||
size, available_memory, size - available_memory);
|
||||
return nullptr;
|
||||
return kTfLiteError;
|
||||
}
|
||||
head_ = aligned_result + size;
|
||||
return aligned_result;
|
||||
temp_ = head_;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size,
|
||||
size_t alignment) {
|
||||
uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment);
|
||||
if (aligned_result < head_) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
const size_t missing_memory = head_ - aligned_result;
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Failed to allocate memory. Requested: %u, available %u, missing: %u",
|
||||
size, size - missing_memory, missing_memory);
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Failed to allocate tail memory. Requested: %u, "
|
||||
"available %u, missing: %u",
|
||||
size, size - missing_memory, missing_memory);
|
||||
#endif
|
||||
return nullptr;
|
||||
}
|
||||
tail_ = aligned_result;
|
||||
return aligned_result;
|
||||
}
|
||||
|
||||
uint8_t* SimpleMemoryAllocator::AllocateTemp(size_t size, size_t alignment) {
|
||||
uint8_t* const aligned_result = AlignPointerUp(temp_, alignment);
|
||||
const size_t available_memory = tail_ - aligned_result;
|
||||
if (available_memory < size) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Failed to allocate temp memory. Requested: %u, "
|
||||
"available %u, missing: %u",
|
||||
size, available_memory, size - available_memory);
|
||||
return nullptr;
|
||||
}
|
||||
temp_ = aligned_result + size;
|
||||
return aligned_result;
|
||||
}
|
||||
|
||||
void SimpleMemoryAllocator::ResetTempAllocations() { temp_ = head_; }
|
||||
|
||||
uint8_t* SimpleMemoryAllocator::GetHeadBuffer() const { return buffer_head_; }
|
||||
|
||||
size_t SimpleMemoryAllocator::GetHeadUsedBytes() const {
|
||||
return head_ - buffer_head_;
|
||||
}
|
||||
|
||||
size_t SimpleMemoryAllocator::GetTailUsedBytes() const {
|
||||
return buffer_tail_ - tail_;
|
||||
}
|
||||
|
||||
size_t SimpleMemoryAllocator::GetAvailableMemory(size_t alignment) const {
|
||||
uint8_t* const aligned_temp = AlignPointerUp(temp_, alignment);
|
||||
uint8_t* const aligned_tail = AlignPointerDown(tail_, alignment);
|
||||
return aligned_tail - aligned_temp;
|
||||
}
|
||||
|
||||
size_t SimpleMemoryAllocator::GetUsedBytes() const {
|
||||
return GetBufferSize() - (tail_ - temp_);
|
||||
}
|
||||
|
||||
size_t SimpleMemoryAllocator::GetBufferSize() const {
|
||||
return buffer_tail_ - buffer_head_;
|
||||
}
|
||||
|
||||
uint8_t* SimpleMemoryAllocator::head() const { return head_; }
|
||||
|
||||
uint8_t* SimpleMemoryAllocator::tail() const { return tail_; }
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -16,10 +16,12 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -28,43 +30,82 @@ namespace tflite {
|
||||
// This makes it pretty wasteful, so we should use a more intelligent method.
|
||||
class SimpleMemoryAllocator {
|
||||
public:
|
||||
// TODO(b/157615197): Cleanup constructors/destructor and use factory
|
||||
// functions.
|
||||
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer_head,
|
||||
uint8_t* buffer_tail)
|
||||
: error_reporter_(error_reporter),
|
||||
buffer_head_(buffer_head),
|
||||
buffer_tail_(buffer_tail),
|
||||
head_(buffer_head),
|
||||
tail_(buffer_tail) {}
|
||||
uint8_t* buffer_tail);
|
||||
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer,
|
||||
size_t buffer_size)
|
||||
: SimpleMemoryAllocator(error_reporter, buffer, buffer + buffer_size) {}
|
||||
size_t buffer_size);
|
||||
virtual ~SimpleMemoryAllocator();
|
||||
|
||||
// Creates a new SimpleMemoryAllocator from a given buffer head and size.
|
||||
static SimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
|
||||
uint8_t* buffer_head,
|
||||
size_t buffer_size);
|
||||
|
||||
// Adjusts the head (lowest address and moving upwards) memory allocation to a
|
||||
// given size. Calls to this method will also invalidate all temporary
|
||||
// allocation values (it sets the location of temp space at the end of the
|
||||
// head section). This call will fail if a chain of allocations through
|
||||
// AllocateTemp() have not been cleaned up with a call to
|
||||
// ResetTempAllocations().
|
||||
virtual TfLiteStatus SetHeadBufferSize(size_t size, size_t alignment);
|
||||
|
||||
// Allocates memory starting at the head of the arena (lowest address and
|
||||
// moving upwards).
|
||||
uint8_t* AllocateFromHead(size_t size, size_t alignment);
|
||||
// Allocates memory starting at the tail of the arena (highest address and
|
||||
// moving downwards).
|
||||
uint8_t* AllocateFromTail(size_t size, size_t alignment);
|
||||
virtual uint8_t* AllocateFromTail(size_t size, size_t alignment);
|
||||
|
||||
uint8_t* GetHead() const { return head_; }
|
||||
uint8_t* GetTail() const { return tail_; }
|
||||
size_t GetAvailableMemory() const { return tail_ - head_; }
|
||||
size_t GetUsedBytes() const { return GetBufferSize() - GetAvailableMemory(); }
|
||||
// Allocates a temporary buffer from the head of the arena (lowest address and
|
||||
// moving upwards) but does not update the actual head allocation size or
|
||||
// position. The returned buffer is guaranteed until either
|
||||
// ResetTempAllocations() is called or another call to AllocateFromHead().
|
||||
// Repeat calls to this function will create a chain of temp allocations. All
|
||||
// calls to AllocateTemp() must end with a call to ResetTempAllocations(). If
|
||||
// AllocateFromHead() is called before a call to ResetTempAllocations(), it
|
||||
// will fail with an error message.
|
||||
virtual uint8_t* AllocateTemp(size_t size, size_t alignment);
|
||||
|
||||
// Resets a chain of temporary allocations back to the current head of the
|
||||
// arena (lowest address).
|
||||
virtual void ResetTempAllocations();
|
||||
|
||||
// Returns a pointer to the buffer currently assigned to the head section.
|
||||
// This buffer is set by calling SetHeadSize().
|
||||
uint8_t* GetHeadBuffer() const;
|
||||
|
||||
// Returns the size of the head section in bytes.
|
||||
size_t GetHeadUsedBytes() const;
|
||||
|
||||
// Returns the size of all allocations in the tail section in bytes.
|
||||
size_t GetTailUsedBytes() const;
|
||||
|
||||
// Returns the number of bytes available with a given alignment. This number
|
||||
// takes in account any temporary allocations.
|
||||
size_t GetAvailableMemory(size_t alignment) const;
|
||||
|
||||
// Returns the number of used bytes in the allocator. This number takes in
|
||||
// account any temporary allocations.
|
||||
size_t GetUsedBytes() const;
|
||||
|
||||
protected:
|
||||
// Returns a pointer to the current end of the head buffer.
|
||||
uint8_t* head() const;
|
||||
|
||||
// Returns a pointer to the current end of the tail buffer.
|
||||
uint8_t* tail() const;
|
||||
|
||||
private:
|
||||
size_t GetBufferSize() const { return buffer_tail_ - buffer_head_; }
|
||||
size_t GetBufferSize() const;
|
||||
|
||||
ErrorReporter* error_reporter_;
|
||||
uint8_t* buffer_head_;
|
||||
uint8_t* buffer_tail_;
|
||||
uint8_t* head_;
|
||||
uint8_t* tail_;
|
||||
};
|
||||
uint8_t* temp_;
|
||||
|
||||
// Allocate a SimpleMemoryAllocator from the buffer and then return the pointer
|
||||
// to this allocator.
|
||||
SimpleMemoryAllocator* CreateInPlaceSimpleMemoryAllocator(
|
||||
ErrorReporter* error_reporter, uint8_t* buffer, size_t buffer_size);
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
|
||||
@@ -15,14 +15,24 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/micro/test_helpers.h"
|
||||
|
||||
#include <cstdarg>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <initializer_list>
|
||||
#include <new>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/tensor_utils.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/all_ops_resolver.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
// TODO(b/170464050): Use TFLM test only version of schema_utils.
|
||||
|
||||
namespace tflite {
|
||||
namespace testing {
|
||||
namespace {
|
||||
@@ -48,7 +58,7 @@ class StackAllocator : public flatbuffers::Allocator {
|
||||
return *inst;
|
||||
}
|
||||
|
||||
static constexpr size_t kStackAllocatorSize = 4096;
|
||||
static constexpr size_t kStackAllocatorSize = 8192;
|
||||
|
||||
private:
|
||||
uint8_t data_backing_[kStackAllocatorSize];
|
||||
@@ -76,8 +86,7 @@ class ModelBuilder {
|
||||
: builder_(builder) {}
|
||||
|
||||
// Registers an operator that will be used in the model.
|
||||
Operator RegisterOp(BuiltinOperator op, const char* custom_code,
|
||||
int32_t version);
|
||||
Operator RegisterOp(BuiltinOperator op, const char* custom_code);
|
||||
|
||||
// Adds a tensor to the model.
|
||||
Tensor AddTensor(TensorType type, std::initializer_list<int32_t> shape) {
|
||||
@@ -94,10 +103,16 @@ class ModelBuilder {
|
||||
Node AddNode(Operator op, std::initializer_list<Tensor> inputs,
|
||||
std::initializer_list<Tensor> outputs);
|
||||
|
||||
void AddMetadata(const char* description_string,
|
||||
const int32_t* metadata_buffer_data, size_t num_elements);
|
||||
|
||||
// Constructs the flatbuffer model using `builder_` and return a pointer to
|
||||
// it. The returned model has the same lifetime as `builder_`.
|
||||
// Note the default value of 0 for num_subgraph_inputs means all tensor inputs
|
||||
// are in subgraph input list.
|
||||
const Model* BuildModel(std::initializer_list<Tensor> inputs,
|
||||
std::initializer_list<Tensor> outputs);
|
||||
std::initializer_list<Tensor> outputs,
|
||||
size_t num_subgraph_inputs = 0);
|
||||
|
||||
private:
|
||||
// Adds a tensor to the model.
|
||||
@@ -116,15 +131,24 @@ class ModelBuilder {
|
||||
|
||||
static constexpr int kMaxTensors = 50;
|
||||
flatbuffers::Offset<tflite::Tensor> tensors_[kMaxTensors];
|
||||
|
||||
static constexpr int kMaxMetadataBuffers = 10;
|
||||
|
||||
static constexpr int kMaxMetadatas = 10;
|
||||
flatbuffers::Offset<Metadata> metadata_[kMaxMetadatas];
|
||||
|
||||
flatbuffers::Offset<Buffer> metadata_buffers_[kMaxMetadataBuffers];
|
||||
|
||||
int nbr_of_metadata_buffers_ = 0;
|
||||
|
||||
int next_tensor_id_ = 0;
|
||||
};
|
||||
|
||||
ModelBuilder::Operator ModelBuilder::RegisterOp(BuiltinOperator op,
|
||||
const char* custom_code,
|
||||
int32_t version) {
|
||||
const char* custom_code) {
|
||||
TFLITE_DCHECK(next_operator_code_id_ <= kMaxOperatorCodes);
|
||||
operator_codes_[next_operator_code_id_] =
|
||||
tflite::CreateOperatorCodeDirect(*builder_, op, custom_code, version);
|
||||
operator_codes_[next_operator_code_id_] = tflite::CreateOperatorCodeDirect(
|
||||
*builder_, /*deprecated_builtin_code=*/0, custom_code, /*version=*/0, op);
|
||||
next_operator_code_id_++;
|
||||
return next_operator_code_id_ - 1;
|
||||
}
|
||||
@@ -142,29 +166,75 @@ ModelBuilder::Node ModelBuilder::AddNode(
|
||||
return next_operator_id_ - 1;
|
||||
}
|
||||
|
||||
void ModelBuilder::AddMetadata(const char* description_string,
|
||||
const int32_t* metadata_buffer_data,
|
||||
size_t num_elements) {
|
||||
metadata_[ModelBuilder::nbr_of_metadata_buffers_] =
|
||||
CreateMetadata(*builder_, builder_->CreateString(description_string),
|
||||
1 + ModelBuilder::nbr_of_metadata_buffers_);
|
||||
|
||||
metadata_buffers_[nbr_of_metadata_buffers_] = tflite::CreateBuffer(
|
||||
*builder_, builder_->CreateVector((uint8_t*)metadata_buffer_data,
|
||||
sizeof(uint32_t) * num_elements));
|
||||
|
||||
ModelBuilder::nbr_of_metadata_buffers_++;
|
||||
}
|
||||
|
||||
const Model* ModelBuilder::BuildModel(
|
||||
std::initializer_list<ModelBuilder::Tensor> inputs,
|
||||
std::initializer_list<ModelBuilder::Tensor> outputs) {
|
||||
std::initializer_list<ModelBuilder::Tensor> outputs,
|
||||
size_t num_subgraph_inputs) {
|
||||
// Model schema requires an empty buffer at idx 0.
|
||||
constexpr size_t kBufferSize = 1;
|
||||
const flatbuffers::Offset<Buffer> buffers[kBufferSize] = {
|
||||
tflite::CreateBuffer(*builder_)};
|
||||
size_t buffer_size = 1 + ModelBuilder::nbr_of_metadata_buffers_;
|
||||
flatbuffers::Offset<Buffer> buffers[kMaxMetadataBuffers];
|
||||
buffers[0] = tflite::CreateBuffer(*builder_);
|
||||
|
||||
// Place the metadata buffers first in the buffer since the indices for them
|
||||
// have already been set in AddMetadata()
|
||||
for (int i = 1; i < ModelBuilder::nbr_of_metadata_buffers_ + 1; ++i) {
|
||||
buffers[i] = metadata_buffers_[i - 1];
|
||||
}
|
||||
|
||||
// TFLM only supports single subgraph.
|
||||
constexpr size_t subgraphs_size = 1;
|
||||
|
||||
// Find out number of subgraph inputs.
|
||||
if (num_subgraph_inputs == 0) {
|
||||
// This is the default case.
|
||||
num_subgraph_inputs = inputs.size();
|
||||
} else {
|
||||
// A non-zero value of num_subgraph_inputs means that some of
|
||||
// the operator input tensors are not subgraph inputs.
|
||||
TFLITE_DCHECK(num_subgraph_inputs <= inputs.size());
|
||||
}
|
||||
|
||||
const flatbuffers::Offset<SubGraph> subgraphs[subgraphs_size] = {
|
||||
tflite::CreateSubGraph(
|
||||
*builder_, builder_->CreateVector(tensors_, next_tensor_id_),
|
||||
builder_->CreateVector(inputs.begin(), inputs.size()),
|
||||
builder_->CreateVector(inputs.begin(), num_subgraph_inputs),
|
||||
builder_->CreateVector(outputs.begin(), outputs.size()),
|
||||
builder_->CreateVector(operators_, next_operator_id_),
|
||||
builder_->CreateString("test_subgraph"))};
|
||||
const flatbuffers::Offset<Model> model_offset = tflite::CreateModel(
|
||||
*builder_, 0,
|
||||
builder_->CreateVector(operator_codes_, next_operator_code_id_),
|
||||
builder_->CreateVector(subgraphs, subgraphs_size),
|
||||
builder_->CreateString("teset_model"),
|
||||
builder_->CreateVector(buffers, kBufferSize));
|
||||
|
||||
flatbuffers::Offset<Model> model_offset;
|
||||
if (ModelBuilder::nbr_of_metadata_buffers_ > 0) {
|
||||
model_offset = tflite::CreateModel(
|
||||
*builder_, 0,
|
||||
builder_->CreateVector(operator_codes_, next_operator_code_id_),
|
||||
builder_->CreateVector(subgraphs, subgraphs_size),
|
||||
builder_->CreateString("teset_model"),
|
||||
builder_->CreateVector(buffers, buffer_size), 0,
|
||||
builder_->CreateVector(metadata_,
|
||||
ModelBuilder::nbr_of_metadata_buffers_));
|
||||
} else {
|
||||
model_offset = tflite::CreateModel(
|
||||
*builder_, 0,
|
||||
builder_->CreateVector(operator_codes_, next_operator_code_id_),
|
||||
builder_->CreateVector(subgraphs, subgraphs_size),
|
||||
builder_->CreateString("teset_model"),
|
||||
builder_->CreateVector(buffers, buffer_size));
|
||||
}
|
||||
|
||||
tflite::FinishModelBuffer(*builder_, model_offset);
|
||||
void* model_pointer = builder_->GetBufferPointer();
|
||||
const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
|
||||
@@ -190,7 +260,7 @@ const Model* BuildSimpleStatefulModel() {
|
||||
ModelBuilder model_builder(fb_builder);
|
||||
|
||||
const int op_id =
|
||||
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "simple_stateful_op", 0);
|
||||
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "simple_stateful_op");
|
||||
const int input_tensor = model_builder.AddTensor(TensorType_UINT8, {3});
|
||||
const int median_tensor = model_builder.AddTensor(TensorType_UINT8, {3});
|
||||
const int invoke_count_tensor =
|
||||
@@ -231,8 +301,7 @@ const Model* BuildSimpleModelWithBranch() {
|
||||
v
|
||||
*/
|
||||
const int op_id =
|
||||
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom",
|
||||
/* version= */ 0);
|
||||
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom");
|
||||
const int t0 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
|
||||
const int t1 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
|
||||
const int t2 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
|
||||
@@ -243,6 +312,35 @@ const Model* BuildSimpleModelWithBranch() {
|
||||
return model_builder.BuildModel({t0}, {t3});
|
||||
}
|
||||
|
||||
const Model* BuildModelWithOfflinePlanning(int number_of_tensors,
|
||||
const int32_t* metadata_buffer,
|
||||
NodeConnection* node_conn,
|
||||
int num_conns,
|
||||
int num_subgraph_inputs) {
|
||||
using flatbuffers::Offset;
|
||||
flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance();
|
||||
|
||||
ModelBuilder model_builder(fb_builder);
|
||||
|
||||
const int op_id =
|
||||
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom");
|
||||
|
||||
for (int i = 0; i < number_of_tensors; ++i) {
|
||||
model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_conns; ++i) {
|
||||
model_builder.AddNode(op_id, node_conn[i].input, node_conn[i].output);
|
||||
}
|
||||
|
||||
model_builder.AddMetadata(
|
||||
"OfflineMemoryAllocation", metadata_buffer,
|
||||
number_of_tensors + tflite::testing::kOfflinePlannerHeaderSize);
|
||||
|
||||
return model_builder.BuildModel(
|
||||
node_conn[0].input, node_conn[num_conns - 1].output, num_subgraph_inputs);
|
||||
}
|
||||
|
||||
const Model* BuildSimpleMockModel() {
|
||||
using flatbuffers::Offset;
|
||||
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
|
||||
@@ -306,8 +404,9 @@ const Model* BuildSimpleMockModel() {
|
||||
builder->CreateString("test_subgraph"))};
|
||||
constexpr size_t operator_codes_size = 1;
|
||||
const Offset<OperatorCode> operator_codes[operator_codes_size] = {
|
||||
CreateOperatorCodeDirect(*builder, BuiltinOperator_CUSTOM, "mock_custom",
|
||||
0)};
|
||||
CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
|
||||
"mock_custom",
|
||||
/*version=*/0, BuiltinOperator_CUSTOM)};
|
||||
const Offset<Model> model_offset = CreateModel(
|
||||
*builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
|
||||
builder->CreateVector(subgraphs, subgraphs_size),
|
||||
@@ -455,8 +554,9 @@ const Model* BuildComplexMockModel() {
|
||||
|
||||
constexpr size_t operator_codes_size = 1;
|
||||
const Offset<OperatorCode> operator_codes[operator_codes_size] = {
|
||||
CreateOperatorCodeDirect(*builder, BuiltinOperator_CUSTOM, "mock_custom",
|
||||
0)};
|
||||
CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
|
||||
"mock_custom",
|
||||
/*version=*/0, BuiltinOperator_CUSTOM)};
|
||||
|
||||
const Offset<Model> model_offset = CreateModel(
|
||||
*builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
|
||||
@@ -472,6 +572,147 @@ const Model* BuildComplexMockModel() {
|
||||
|
||||
} // namespace
|
||||
|
||||
const TfLiteRegistration* SimpleStatefulOp::getRegistration() {
|
||||
return GetMutableRegistration();
|
||||
}
|
||||
|
||||
TfLiteRegistration* SimpleStatefulOp::GetMutableRegistration() {
|
||||
static TfLiteRegistration r;
|
||||
r.init = Init;
|
||||
r.prepare = Prepare;
|
||||
r.invoke = Invoke;
|
||||
return &r;
|
||||
}
|
||||
|
||||
void* SimpleStatefulOp::Init(TfLiteContext* context, const char* buffer,
|
||||
size_t length) {
|
||||
TFLITE_DCHECK(context->AllocateBufferForEval == nullptr);
|
||||
TFLITE_DCHECK(context->GetScratchBuffer == nullptr);
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena == nullptr);
|
||||
|
||||
void* raw = context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
OpData* data = reinterpret_cast<OpData*>(raw);
|
||||
*data = {};
|
||||
return raw;
|
||||
}
|
||||
|
||||
TfLiteStatus SimpleStatefulOp::Prepare(TfLiteContext* context,
|
||||
TfLiteNode* node) {
|
||||
OpData* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
// Make sure that the input is in uint8_t with at least 1 data entry.
|
||||
const TfLiteTensor* input;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
|
||||
if (input->type != kTfLiteUInt8) return kTfLiteError;
|
||||
if (NumElements(input->dims) == 0) return kTfLiteError;
|
||||
|
||||
// Allocate a temporary buffer with the same size of input for sorting.
|
||||
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
|
||||
context, sizeof(uint8_t) * NumElements(input->dims),
|
||||
&data->sorting_buffer));
|
||||
// We can interleave scratch / persistent buffer allocation.
|
||||
data->invoke_count = reinterpret_cast<int*>(
|
||||
context->AllocatePersistentBuffer(context, sizeof(int)));
|
||||
*data->invoke_count = 0;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context,
|
||||
TfLiteNode* node) {
|
||||
OpData* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
*data->invoke_count += 1;
|
||||
|
||||
const TfLiteTensor* input;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
|
||||
const uint8_t* input_data = GetTensorData<uint8_t>(input);
|
||||
int size = NumElements(input->dims);
|
||||
|
||||
uint8_t* sorting_buffer = reinterpret_cast<uint8_t*>(
|
||||
context->GetScratchBuffer(context, data->sorting_buffer));
|
||||
// Copy inputs data to the sorting buffer. We don't want to mutate the input
|
||||
// tensor as it might be used by a another node.
|
||||
for (int i = 0; i < size; i++) {
|
||||
sorting_buffer[i] = input_data[i];
|
||||
}
|
||||
|
||||
// In place insertion sort on `sorting_buffer`.
|
||||
for (int i = 1; i < size; i++) {
|
||||
for (int j = i; j > 0 && sorting_buffer[j] < sorting_buffer[j - 1]; j--) {
|
||||
std::swap(sorting_buffer[j], sorting_buffer[j - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteTensor* median;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetOutputSafe(context, node, kMedianTensor, &median));
|
||||
uint8_t* median_data = GetTensorData<uint8_t>(median);
|
||||
TfLiteTensor* invoke_count;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetOutputSafe(context, node, kInvokeCount, &invoke_count));
|
||||
int32_t* invoke_count_data = GetTensorData<int32_t>(invoke_count);
|
||||
|
||||
median_data[0] = sorting_buffer[size / 2];
|
||||
invoke_count_data[0] = *data->invoke_count;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
const TfLiteRegistration* MockCustom::getRegistration() {
|
||||
return GetMutableRegistration();
|
||||
}
|
||||
|
||||
TfLiteRegistration* MockCustom::GetMutableRegistration() {
|
||||
static TfLiteRegistration r;
|
||||
r.init = Init;
|
||||
r.prepare = Prepare;
|
||||
r.invoke = Invoke;
|
||||
r.free = Free;
|
||||
return &r;
|
||||
}
|
||||
|
||||
void* MockCustom::Init(TfLiteContext* context, const char* buffer,
|
||||
size_t length) {
|
||||
// We don't support delegate in TFL micro. This is a weak check to test if
|
||||
// context struct being zero-initialized.
|
||||
TFLITE_DCHECK(context->ReplaceNodeSubsetsWithDelegateKernels == nullptr);
|
||||
freed_ = false;
|
||||
// Do nothing.
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void MockCustom::Free(TfLiteContext* context, void* buffer) { freed_ = true; }
|
||||
|
||||
TfLiteStatus MockCustom::Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus MockCustom::Invoke(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
|
||||
const int32_t* input_data = input->data.i32;
|
||||
const TfLiteTensor* weight;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &weight));
|
||||
const uint8_t* weight_data = weight->data.uint8;
|
||||
TfLiteTensor* output;
|
||||
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
|
||||
int32_t* output_data = output->data.i32;
|
||||
output_data[0] =
|
||||
0; // Catch output tensor sharing memory with an input tensor
|
||||
output_data[0] = input_data[0] + weight_data[0];
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
bool MockCustom::freed_ = false;
|
||||
|
||||
AllOpsResolver GetOpResolver() {
|
||||
AllOpsResolver op_resolver;
|
||||
op_resolver.AddCustom("mock_custom", MockCustom::GetMutableRegistration());
|
||||
op_resolver.AddCustom("simple_stateful_op",
|
||||
SimpleStatefulOp::GetMutableRegistration());
|
||||
|
||||
return op_resolver;
|
||||
}
|
||||
|
||||
const Model* GetSimpleMockModel() {
|
||||
static Model* model = nullptr;
|
||||
if (!model) {
|
||||
@@ -496,6 +737,16 @@ const Model* GetSimpleModelWithBranch() {
|
||||
return model;
|
||||
}
|
||||
|
||||
const Model* GetModelWithOfflinePlanning(int num_tensors,
|
||||
const int32_t* metadata_buffer,
|
||||
NodeConnection* node_conn,
|
||||
int num_conns,
|
||||
int num_subgraph_inputs) {
|
||||
const Model* model = BuildModelWithOfflinePlanning(
|
||||
num_tensors, metadata_buffer, node_conn, num_conns, num_subgraph_inputs);
|
||||
return model;
|
||||
}
|
||||
|
||||
const Model* GetSimpleStatefulModel() {
|
||||
static Model* model = nullptr;
|
||||
if (!model) {
|
||||
@@ -592,11 +843,13 @@ int TestStrcmp(const char* a, const char* b) {
|
||||
|
||||
// Wrapper to forward kernel errors to the interpreter's error reporter.
|
||||
void ReportOpError(struct TfLiteContext* context, const char* format, ...) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
ErrorReporter* error_reporter = static_cast<ErrorReporter*>(context->impl_);
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
TF_LITE_REPORT_ERROR(error_reporter, format, args);
|
||||
va_end(args);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Create a TfLiteIntArray from an array of ints. The first element in the
|
||||
@@ -616,122 +869,27 @@ TfLiteFloatArray* FloatArrayFromFloats(const float* floats) {
|
||||
return reinterpret_cast<TfLiteFloatArray*>(const_cast<float*>(floats));
|
||||
}
|
||||
|
||||
TfLiteTensor CreateTensor(TfLiteIntArray* dims, const char* name,
|
||||
bool is_variable) {
|
||||
TfLiteTensor result;
|
||||
result.dims = dims;
|
||||
result.name = name;
|
||||
result.params = {};
|
||||
result.quantization = {kTfLiteNoQuantization, nullptr};
|
||||
result.is_variable = is_variable;
|
||||
result.allocation_type = kTfLiteMemNone;
|
||||
result.allocation = nullptr;
|
||||
return result;
|
||||
}
|
||||
|
||||
TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims,
|
||||
const char* name, bool is_variable) {
|
||||
TfLiteTensor result = CreateTensor(dims, name, is_variable);
|
||||
result.type = kTfLiteFloat32;
|
||||
result.data.f = const_cast<float*>(data);
|
||||
result.bytes = ElementCount(*dims) * sizeof(float);
|
||||
return result;
|
||||
}
|
||||
|
||||
void PopulateFloatTensor(TfLiteTensor* tensor, float* begin, float* end) {
|
||||
float* p = begin;
|
||||
float* v = tensor->data.f;
|
||||
while (p != end) {
|
||||
*v++ = *p++;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteTensor CreateBoolTensor(const bool* data, TfLiteIntArray* dims,
|
||||
const char* name, bool is_variable) {
|
||||
TfLiteTensor result = CreateTensor(dims, name, is_variable);
|
||||
result.type = kTfLiteBool;
|
||||
result.data.b = const_cast<bool*>(data);
|
||||
result.bytes = ElementCount(*dims) * sizeof(bool);
|
||||
return result;
|
||||
}
|
||||
|
||||
TfLiteTensor CreateInt32Tensor(const int32_t* data, TfLiteIntArray* dims,
|
||||
const char* name, bool is_variable) {
|
||||
TfLiteTensor result = CreateTensor(dims, name, is_variable);
|
||||
result.type = kTfLiteInt32;
|
||||
result.data.i32 = const_cast<int32_t*>(data);
|
||||
result.bytes = ElementCount(*dims) * sizeof(int32_t);
|
||||
return result;
|
||||
}
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims,
|
||||
float scale, int zero_point,
|
||||
const char* name, bool is_variable) {
|
||||
TfLiteTensor result = CreateTensor(dims, name, is_variable);
|
||||
result.type = kTfLiteUInt8;
|
||||
result.data.uint8 = const_cast<uint8_t*>(data);
|
||||
result.params = {scale, zero_point};
|
||||
result.quantization = {kTfLiteAffineQuantization, nullptr};
|
||||
result.bytes = ElementCount(*dims) * sizeof(uint8_t);
|
||||
return result;
|
||||
}
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
|
||||
float scale, int zero_point,
|
||||
const char* name, bool is_variable) {
|
||||
TfLiteTensor result = CreateTensor(dims, name, is_variable);
|
||||
result.type = kTfLiteInt8;
|
||||
result.data.int8 = const_cast<int8_t*>(data);
|
||||
result.params = {scale, zero_point};
|
||||
result.quantization = {kTfLiteAffineQuantization, nullptr};
|
||||
result.bytes = ElementCount(*dims) * sizeof(int8_t);
|
||||
return result;
|
||||
}
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(const int16_t* data, TfLiteIntArray* dims,
|
||||
float scale, int zero_point,
|
||||
const char* name, bool is_variable) {
|
||||
TfLiteTensor result = CreateTensor(dims, name, is_variable);
|
||||
result.type = kTfLiteInt16;
|
||||
result.data.i16 = const_cast<int16_t*>(data);
|
||||
result.params = {scale, zero_point};
|
||||
result.quantization = {kTfLiteAffineQuantization, nullptr};
|
||||
result.bytes = ElementCount(*dims) * sizeof(int16_t);
|
||||
return result;
|
||||
}
|
||||
|
||||
TfLiteTensor CreateQuantized32Tensor(const int32_t* data, TfLiteIntArray* dims,
|
||||
float scale, const char* name,
|
||||
bool is_variable) {
|
||||
TfLiteTensor result = CreateTensor(dims, name, is_variable);
|
||||
result.type = kTfLiteInt32;
|
||||
result.data.i32 = const_cast<int32_t*>(data);
|
||||
// Quantized int32 tensors always have a zero point of 0, since the range of
|
||||
// int32 values is large, and because zero point costs extra cycles during
|
||||
// processing.
|
||||
result.params = {scale, 0};
|
||||
result.quantization = {kTfLiteAffineQuantization, nullptr};
|
||||
result.bytes = ElementCount(*dims) * sizeof(int32_t);
|
||||
return result;
|
||||
}
|
||||
|
||||
TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
|
||||
TfLiteIntArray* dims, float input_scale,
|
||||
float weights_scale, const char* name,
|
||||
bool is_variable) {
|
||||
float weights_scale, bool is_variable) {
|
||||
float bias_scale = input_scale * weights_scale;
|
||||
tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
|
||||
return CreateQuantized32Tensor(quantized, dims, bias_scale, name,
|
||||
is_variable);
|
||||
|
||||
// Quantized int32_t tensors always have a zero point of 0, since the range of
|
||||
// int32_t values is large, and because zero point costs extra cycles during
|
||||
// processing.
|
||||
TfLiteTensor result =
|
||||
CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Quantizes int32 bias tensor with per-channel weights determined by input
|
||||
// Quantizes int32_t bias tensor with per-channel weights determined by input
|
||||
// scale multiplied by weight scale for each channel.
|
||||
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
|
||||
const float* input, int32_t* quantized, TfLiteIntArray* dims,
|
||||
float input_scale, float* weight_scales, float* scales, int* zero_points,
|
||||
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
|
||||
const char* name, bool is_variable) {
|
||||
bool is_variable) {
|
||||
int input_size = ElementCount(*dims);
|
||||
int num_channels = dims->data[quantized_dimension];
|
||||
// First element is reserved for array length
|
||||
@@ -743,25 +901,22 @@ TfLiteTensor CreatePerChannelQuantizedBiasTensor(
|
||||
zero_points[i + 1] = 0;
|
||||
}
|
||||
|
||||
SymmetricPerChannelQuantize(input, quantized, input_size, num_channels,
|
||||
scales_array);
|
||||
SymmetricPerChannelQuantize<int32_t>(input, quantized, input_size,
|
||||
num_channels, scales_array);
|
||||
|
||||
affine_quant->scale = FloatArrayFromFloats(scales);
|
||||
affine_quant->zero_point = IntArrayFromInts(zero_points);
|
||||
affine_quant->quantized_dimension = quantized_dimension;
|
||||
|
||||
TfLiteTensor result = CreateTensor(dims, name, is_variable);
|
||||
result.type = kTfLiteInt32;
|
||||
result.data.i32 = const_cast<int32_t*>(quantized);
|
||||
TfLiteTensor result = CreateTensor(quantized, dims, is_variable);
|
||||
result.quantization = {kTfLiteAffineQuantization, affine_quant};
|
||||
result.bytes = ElementCount(*dims) * sizeof(int32_t);
|
||||
return result;
|
||||
}
|
||||
|
||||
TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
|
||||
const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
|
||||
int* zero_points, TfLiteAffineQuantization* affine_quant,
|
||||
int quantized_dimension, const char* name, bool is_variable) {
|
||||
int quantized_dimension, bool is_variable) {
|
||||
int channel_count = dims->data[quantized_dimension];
|
||||
scales[0] = static_cast<float>(channel_count);
|
||||
zero_points[0] = channel_count;
|
||||
@@ -777,13 +932,18 @@ TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
|
||||
affine_quant->zero_point = IntArrayFromInts(zero_points);
|
||||
affine_quant->quantized_dimension = quantized_dimension;
|
||||
|
||||
TfLiteTensor result = CreateTensor(dims, name, is_variable);
|
||||
result.type = kTfLiteInt8;
|
||||
result.data.int8 = const_cast<int8_t*>(quantized);
|
||||
TfLiteTensor result = CreateTensor(quantized, dims, is_variable);
|
||||
result.quantization = {kTfLiteAffineQuantization, affine_quant};
|
||||
result.bytes = ElementCount(*dims) * sizeof(int8_t);
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t GetModelTensorCount(const Model* model) {
|
||||
auto* subgraphs = model->subgraphs();
|
||||
if (subgraphs) {
|
||||
return (*subgraphs)[0]->tensors()->size();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace testing
|
||||
} // namespace tflite
|
||||
|
||||
@@ -18,15 +18,67 @@ limitations under the License.
|
||||
|
||||
// Useful functions for writing tests.
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite//kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/micro/all_ops_resolver.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
#include "tensorflow/lite/portable_type_to_tflitetype.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace testing {
|
||||
|
||||
constexpr int kOfflinePlannerHeaderSize = 3;
|
||||
|
||||
struct NodeConnection_ {
|
||||
std::initializer_list<int32_t> input;
|
||||
std::initializer_list<int32_t> output;
|
||||
};
|
||||
typedef struct NodeConnection_ NodeConnection;
|
||||
|
||||
// A simple operator that returns the median of the input with the number of
|
||||
// times the kernel was invoked. The implementation below is deliberately
|
||||
// complicated, just to demonstrate how kernel memory planning works.
|
||||
class SimpleStatefulOp {
|
||||
static constexpr int kBufferNotAllocated = 0;
|
||||
// Inputs:
|
||||
static constexpr int kInputTensor = 0;
|
||||
// Outputs:
|
||||
static constexpr int kMedianTensor = 0;
|
||||
static constexpr int kInvokeCount = 1;
|
||||
struct OpData {
|
||||
int* invoke_count = nullptr;
|
||||
int sorting_buffer = kBufferNotAllocated;
|
||||
};
|
||||
|
||||
public:
|
||||
static const TfLiteRegistration* getRegistration();
|
||||
static TfLiteRegistration* GetMutableRegistration();
|
||||
static void* Init(TfLiteContext* context, const char* buffer, size_t length);
|
||||
static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
|
||||
static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
|
||||
};
|
||||
|
||||
class MockCustom {
|
||||
public:
|
||||
static const TfLiteRegistration* getRegistration();
|
||||
static TfLiteRegistration* GetMutableRegistration();
|
||||
static void* Init(TfLiteContext* context, const char* buffer, size_t length);
|
||||
static void Free(TfLiteContext* context, void* buffer);
|
||||
static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
|
||||
static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
static bool freed_;
|
||||
};
|
||||
|
||||
// Returns an Op Resolver that can be used in the testing code.
|
||||
AllOpsResolver GetOpResolver();
|
||||
|
||||
// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input,
|
||||
// 1 layer of weights, 1 output Tensor, and 1 operator.
|
||||
const Model* GetSimpleMockModel();
|
||||
@@ -38,6 +90,24 @@ const Model* GetComplexMockModel();
|
||||
// Returns a simple flatbuffer model with two branches.
|
||||
const Model* GetSimpleModelWithBranch();
|
||||
|
||||
// Returns a simple flatbuffer model with offline planned tensors
|
||||
// @param[in] num_tensors Number of tensors in the model.
|
||||
// @param[in] metadata_buffer Metadata for offline planner.
|
||||
// @param[in] node_con List of connections, i.e. operators
|
||||
// in the model.
|
||||
// @param[in] num_conns Number of connections.
|
||||
// @param[in] num_subgraph_inputs How many of the input tensors are in
|
||||
// the subgraph inputs. The default value
|
||||
// of 0 means all of the input tensors
|
||||
// are in the subgraph input list. There
|
||||
// must be at least 1 input tensor in the
|
||||
// subgraph input list.
|
||||
const Model* GetModelWithOfflinePlanning(int num_tensors,
|
||||
const int32_t* metadata_buffer,
|
||||
NodeConnection* node_conn,
|
||||
int num_conns,
|
||||
int num_subgraph_inputs = 0);
|
||||
|
||||
// Returns a flatbuffer model with `simple_stateful_op`
|
||||
const Model* GetSimpleStatefulModel();
|
||||
|
||||
@@ -72,57 +142,80 @@ TfLiteIntArray* IntArrayFromInts(const int* int_array);
|
||||
// supplied array must be the size of the array expressed as a float.
|
||||
TfLiteFloatArray* FloatArrayFromFloats(const float* floats);
|
||||
|
||||
TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims,
|
||||
const char* name, bool is_variable = false);
|
||||
template <typename T>
|
||||
TfLiteTensor CreateTensor(const T* data, TfLiteIntArray* dims,
|
||||
const bool is_variable = false) {
|
||||
TfLiteTensor result;
|
||||
result.dims = dims;
|
||||
result.params = {};
|
||||
result.quantization = {kTfLiteNoQuantization, nullptr};
|
||||
result.is_variable = is_variable;
|
||||
result.allocation_type = kTfLiteMemNone;
|
||||
result.type = typeToTfLiteType<T>();
|
||||
// Const cast is used to allow passing in const and non-const arrays within a
|
||||
// single CreateTensor method. A Const array should be used for immutable
|
||||
// input tensors and non-const array should be used for mutable and output
|
||||
// tensors.
|
||||
result.data.data = const_cast<T*>(data);
|
||||
result.quantization = {kTfLiteAffineQuantization, nullptr};
|
||||
result.bytes = ElementCount(*dims) * sizeof(T);
|
||||
return result;
|
||||
}
|
||||
|
||||
void PopulateFloatTensor(TfLiteTensor* tensor, float* begin, float* end);
|
||||
|
||||
TfLiteTensor CreateBoolTensor(const bool* data, TfLiteIntArray* dims,
|
||||
const char* name, bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateInt32Tensor(const int32_t*, TfLiteIntArray* dims,
|
||||
const char* name, bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims,
|
||||
float scale, int zero_point,
|
||||
const char* name, bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
|
||||
float scale, int zero_point,
|
||||
const char* name, bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(const int16_t* data, TfLiteIntArray* dims,
|
||||
float scale, int zero_point,
|
||||
const char* name, bool is_variable = false);
|
||||
template <typename T>
|
||||
TfLiteTensor CreateQuantizedTensor(const T* data, TfLiteIntArray* dims,
|
||||
const float scale, const int zero_point = 0,
|
||||
const bool is_variable = false) {
|
||||
TfLiteTensor result = CreateTensor(data, dims, is_variable);
|
||||
result.params = {scale, zero_point};
|
||||
result.quantization = {kTfLiteAffineQuantization, nullptr};
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
TfLiteTensor CreateQuantizedTensor(const float* input, T* quantized,
|
||||
TfLiteIntArray* dims, float scale,
|
||||
int zero_point, const char* name,
|
||||
bool is_variable = false) {
|
||||
int zero_point, bool is_variable = false) {
|
||||
int input_size = ElementCount(*dims);
|
||||
tflite::AsymmetricQuantize(input, quantized, input_size, scale, zero_point);
|
||||
return CreateQuantizedTensor(quantized, dims, scale, zero_point, name,
|
||||
is_variable);
|
||||
tflite::Quantize(input, quantized, input_size, scale, zero_point);
|
||||
return CreateQuantizedTensor(quantized, dims, scale, zero_point, is_variable);
|
||||
}
|
||||
|
||||
TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
|
||||
TfLiteIntArray* dims, float input_scale,
|
||||
float weights_scale, const char* name,
|
||||
float weights_scale,
|
||||
bool is_variable = false);
|
||||
|
||||
// Quantizes int32 bias tensor with per-channel weights determined by input
|
||||
// Quantizes int32_t bias tensor with per-channel weights determined by input
|
||||
// scale multiplied by weight scale for each channel.
|
||||
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
|
||||
const float* input, int32_t* quantized, TfLiteIntArray* dims,
|
||||
float input_scale, float* weight_scales, float* scales, int* zero_points,
|
||||
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
|
||||
const char* name, bool is_variable = false);
|
||||
bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
|
||||
const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
|
||||
int* zero_points, TfLiteAffineQuantization* affine_quant,
|
||||
int quantized_dimension, const char* name, bool is_variable = false);
|
||||
int quantized_dimension, bool is_variable = false);
|
||||
|
||||
// Returns the number of tensors in the default subgraph for a tflite::Model.
|
||||
size_t GetModelTensorCount(const Model* model);
|
||||
|
||||
// Derives the quantization scaling factor from a min and max range.
|
||||
template <typename T>
|
||||
inline float ScaleFromMinMax(const float min, const float max) {
|
||||
return (max - min) /
|
||||
static_cast<float>((std::numeric_limits<T>::max() * 1.0) -
|
||||
std::numeric_limits<T>::min());
|
||||
}
|
||||
|
||||
// Derives the quantization zero point from a min and max range.
|
||||
template <typename T>
|
||||
inline int ZeroPointFromMinMax(const float min, const float max) {
|
||||
return static_cast<int>(std::numeric_limits<T>::min()) +
|
||||
static_cast<int>(-min / ScaleFromMinMax<T>(min, max) + 0.5f);
|
||||
}
|
||||
|
||||
} // namespace testing
|
||||
} // namespace tflite
|
||||
|
||||
Reference in New Issue
Block a user