mirror of
https://github.com/jomjol/AI-on-the-edge-device.git
synced 2025-12-12 14:37:06 +03:00
Update tflite
This commit is contained in:
@@ -21,6 +21,8 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/max.h"
|
||||
#include "tensorflow/lite/kernels/internal/min.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -32,11 +34,11 @@ inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
|
||||
case kTfLiteActNone:
|
||||
return a;
|
||||
case kTfLiteActRelu:
|
||||
return std::max(0.0f, a);
|
||||
case kTfLiteActRelu1:
|
||||
return std::max(-1.0f, std::min(a, 1.0f));
|
||||
return TfLiteMax(0.0f, a);
|
||||
case kTfLiteActReluN1To1:
|
||||
return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
|
||||
case kTfLiteActRelu6:
|
||||
return std::max(0.0f, std::min(a, 6.0f));
|
||||
return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
|
||||
case kTfLiteActTanh:
|
||||
return std::tanh(a);
|
||||
case kTfLiteActSignBit:
|
||||
|
||||
@@ -18,30 +18,82 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
struct ReluOpData {
|
||||
ReluParams params;
|
||||
};
|
||||
|
||||
struct Relu6OpData {
|
||||
int8_t six_int8;
|
||||
int8_t zero_int8;
|
||||
uint8_t six_uint8;
|
||||
uint8_t zero_uint8;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename Q>
|
||||
inline void ReluQuantized(int32_t lower, const RuntimeShape& input_shape,
|
||||
const Q* input_data, const RuntimeShape& output_shape,
|
||||
Q* output_data) {
|
||||
template <typename T>
|
||||
inline void ReluQuantized(const ReluOpData& data,
|
||||
const RuntimeShape& input_shape,
|
||||
const RuntimeShape& output_shape, const T* input_data,
|
||||
T* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const Q val = input_data[i];
|
||||
const Q clamped = val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
const int32_t val = static_cast<int32_t>(input_data[i]);
|
||||
int32_t clamped =
|
||||
data.params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
|
||||
data.params.output_multiplier,
|
||||
data.params.output_shift);
|
||||
clamped = std::max(data.params.quantized_activation_min, clamped);
|
||||
clamped = std::min(data.params.quantized_activation_max, clamped);
|
||||
output_data[i] = static_cast<T>(clamped);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
ReluOpData* data) {
|
||||
float act_min = 0.0;
|
||||
float act_max = std::numeric_limits<float>::infinity();
|
||||
double real_multiplier =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
|
||||
const RuntimeShape input_shape = GetTensorShape(input);
|
||||
const RuntimeShape output_shape = GetTensorShape(output);
|
||||
|
||||
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
|
||||
&data->params.output_shift);
|
||||
|
||||
data->params.quantized_activation_min = std::max(
|
||||
static_cast<int32_t>(std::numeric_limits<T>::min()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32_t>(roundf(act_min / output->params.scale)));
|
||||
data->params.quantized_activation_max =
|
||||
act_max == std::numeric_limits<float>::infinity()
|
||||
? static_cast<int32_t>(std::numeric_limits<T>::max())
|
||||
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32_t>(
|
||||
roundf(act_max / output->params.scale)));
|
||||
data->params.input_offset = input->params.zero_point;
|
||||
data->params.output_offset = output->params.zero_point;
|
||||
}
|
||||
|
||||
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
@@ -77,33 +129,59 @@ inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
|
||||
}
|
||||
}
|
||||
|
||||
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
|
||||
}
|
||||
|
||||
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
CalculateReluOpData<int8_t>(input, output, data);
|
||||
} else if (input->type == kTfLiteUInt8) {
|
||||
CalculateReluOpData<uint8_t>(input, output, data);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
ReluFloat(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
ReluFloat(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
ReluQuantized<int8_t>(input->params.zero_point, GetTensorShape(input),
|
||||
GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<int8_t>(output));
|
||||
ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
ReluQuantized<uint8_t>(input->params.zero_point, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output));
|
||||
ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
@@ -114,37 +192,63 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
}
|
||||
|
||||
void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
|
||||
input->params.zero_point);
|
||||
data->zero_int8 = input->params.zero_point;
|
||||
} else if (input->type == kTfLiteUInt8) {
|
||||
data->six_uint8 = FloatToQuantizedType<uint8_t>(6.0f, input->params.scale,
|
||||
input->params.zero_point);
|
||||
data->zero_uint8 = input->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
Relu6Float(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
Relu6Float(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
const int8_t six = FloatToAsymmetricQuantizedInt8(
|
||||
6.0f, input->params.scale, input->params.zero_point);
|
||||
const int8_t zero = input->params.zero_point;
|
||||
Relu6Quantized<int8_t>(
|
||||
zero, six, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
const uint8_t six = FloatToAsymmetricQuantizedUInt8(
|
||||
6.0f, input->params.scale, input->params.zero_point);
|
||||
const uint8_t zero = input->params.zero_point;
|
||||
Relu6Quantized<uint8_t>(
|
||||
zero, six, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
@@ -157,28 +261,26 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_RELU() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::ReluPrepare,
|
||||
/*invoke=*/activations::ReluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RELU() {
|
||||
return {/*init=*/activations::ReluInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::ReluPrepare,
|
||||
/*invoke=*/activations::ReluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RELU6() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::Relu6Prepare,
|
||||
/*invoke=*/activations::Relu6Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RELU6() {
|
||||
return {/*init=*/activations::Relu6Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::Relu6Prepare,
|
||||
/*invoke=*/activations::Relu6Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -23,6 +23,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -40,18 +42,22 @@ struct OpData {
|
||||
// and the special 16-bit -> 16bit quantized path
|
||||
int input1_shift;
|
||||
int input2_shift;
|
||||
int32 output_activation_min;
|
||||
int32 output_activation_max;
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||
int32 input1_multiplier;
|
||||
int32 input2_multiplier;
|
||||
int32 output_multiplier;
|
||||
int32_t input1_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int left_shift;
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
|
||||
// Used only for float evals:
|
||||
float output_activation_min_f32;
|
||||
float output_activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||
@@ -89,37 +95,44 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
} else if (output->type == kTfLiteFloat32) {
|
||||
CalculateActivationRange(params->activation,
|
||||
&data->output_activation_min_f32,
|
||||
&data->output_activation_max_f32);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
|
||||
const OpData* data, const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
const OpData* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
#define TF_LITE_ADD(opname) \
|
||||
reference_ops::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<float>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<float>(input2), GetTensorShape(output), \
|
||||
GetTensorData<float>(output))
|
||||
SetActivationParams(data->output_activation_min_f32,
|
||||
data->output_activation_max_f32, &op_params);
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_ADD(BroadcastAdd4DSlow);
|
||||
reference_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_ADD(Add);
|
||||
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
||||
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteAddParams* params, const OpData* data,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
@@ -135,46 +148,91 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_ADD(type, opname, dtype) \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<dtype>(output));
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
|
||||
reference_integer_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_ADD(reference_integer_ops, Add, int8_t);
|
||||
reference_integer_ops::Add(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
|
||||
reference_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
TF_LITE_ADD(reference_ops, Add, uint8_t);
|
||||
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, data));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
OpData data;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, &data));
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalAdd(context, node, params, &data, input1, input2, output);
|
||||
EvalAdd(context, node, params, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, &data,
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
|
||||
input1, input2, output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -187,16 +245,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace add
|
||||
|
||||
TfLiteRegistration* Register_ADD() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/add::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ADD() {
|
||||
return {/*init=*/add::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/add::Prepare,
|
||||
/*invoke=*/add::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/micro_ops.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Register each supported op with:
|
||||
// AddBuiltin(<operator ID>, <registration>, [min version], [max version])
|
||||
AllOpsResolver::AllOpsResolver() {
|
||||
AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED(), 1, 4);
|
||||
AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_SVDF, Register_SVDF(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D(), 1,
|
||||
3);
|
||||
AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_ABS, Register_ABS());
|
||||
AddBuiltin(BuiltinOperator_SIN, Register_SIN());
|
||||
AddBuiltin(BuiltinOperator_COS, Register_COS());
|
||||
AddBuiltin(BuiltinOperator_LOG, Register_LOG());
|
||||
AddBuiltin(BuiltinOperator_SQRT, Register_SQRT());
|
||||
AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT());
|
||||
AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE());
|
||||
AddBuiltin(BuiltinOperator_PRELU, Register_PRELU());
|
||||
AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR());
|
||||
AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM());
|
||||
AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM());
|
||||
AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX());
|
||||
AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN());
|
||||
AddBuiltin(BuiltinOperator_LOGICAL_OR, Register_LOGICAL_OR());
|
||||
AddBuiltin(BuiltinOperator_LOGICAL_AND, Register_LOGICAL_AND());
|
||||
AddBuiltin(BuiltinOperator_LOGICAL_NOT, Register_LOGICAL_NOT());
|
||||
AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE());
|
||||
AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_GREATER, Register_GREATER(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_LESS, Register_LESS(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_CEIL, Register_CEIL());
|
||||
AddBuiltin(BuiltinOperator_ROUND, Register_ROUND());
|
||||
AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE());
|
||||
AddBuiltin(BuiltinOperator_PACK, Register_PACK(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_PAD, Register_PAD(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_PADV2, Register_PADV2(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_NEG, Register_NEG());
|
||||
AddBuiltin(BuiltinOperator_ADD, Register_ADD(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_MUL, Register_MUL(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_SUB, Register_SUB(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_QUANTIZE, Register_QUANTIZE());
|
||||
AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_RELU, Register_RELU());
|
||||
AddBuiltin(BuiltinOperator_RELU6, Register_RELU6());
|
||||
AddBuiltin(BuiltinOperator_MEAN, Register_MEAN());
|
||||
AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
|
||||
Register_RESIZE_NEAREST_NEIGHBOR(),
|
||||
/* min_version = */ 1,
|
||||
/* max_version = */ 2);
|
||||
AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION());
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -19,6 +19,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
@@ -45,14 +46,20 @@ inline void ArgMinMaxHelper(const RuntimeShape& input1_shape,
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* axis = GetInput(context, node, kAxis);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* axis =
|
||||
tflite::micro::GetEvalInput(context, node, kAxis);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
|
||||
ArgMinMaxHelper(GetTensorShape(input), GetTensorData<data_type>(input), \
|
||||
GetTensorData<axis_type>(axis), GetTensorShape(output), \
|
||||
GetTensorData<output_type>(output), is_arg_max)
|
||||
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
|
||||
ArgMinMaxHelper(tflite::micro::GetTensorShape(input), \
|
||||
tflite::micro::GetTensorData<data_type>(input), \
|
||||
tflite::micro::GetTensorData<axis_type>(axis), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<output_type>(output), \
|
||||
is_arg_max)
|
||||
if (axis->type == kTfLiteInt32) {
|
||||
if (output->type == kTfLiteInt32) {
|
||||
switch (input->type) {
|
||||
@@ -67,18 +74,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only float32, uint8 and int8 are "
|
||||
"Only float32, uint8_t and int8_t are "
|
||||
"supported currently, got %s.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only int32_t are supported currently, got %s.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32_t are supported currently, got %s.",
|
||||
TfLiteTypeGetName(axis->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
@@ -98,28 +106,26 @@ TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace arg_min_max
|
||||
|
||||
TfLiteRegistration* Register_ARG_MAX() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ARG_MAX() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_ARG_MIN() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ARG_MIN() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -18,6 +18,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -29,11 +30,13 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
@@ -43,26 +46,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Ceil(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Ceil(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace ceil
|
||||
|
||||
TfLiteRegistration* Register_CEIL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/ceil::Prepare,
|
||||
/*invoke=*/ceil::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_CEIL() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/ceil::Prepare,
|
||||
/*invoke=*/ceil::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -17,11 +17,10 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
/*
|
||||
* The circular buffer custom operator is used to implement strided streaming
|
||||
@@ -78,7 +77,9 @@ void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
@@ -89,10 +90,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[2]);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
// The circular buffer custom operator currently only supports int8.
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteInt8);
|
||||
// The circular buffer custom operator currently only supports int8_t.
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
|
||||
// TODO(b/132070898): Use statically slotted OpData structures until a
|
||||
// scratch memory API is ready.
|
||||
@@ -121,8 +122,10 @@ void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
@@ -130,8 +133,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
int depth = output->dims->data[3];
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
EvalInt8(GetTensorData<int8_t>(input), num_slots, depth,
|
||||
GetTensorData<int8_t>(output));
|
||||
EvalInt8(tflite::micro::GetTensorData<int8_t>(input), num_slots, depth,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
|
||||
@@ -18,6 +18,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -25,103 +26,109 @@ namespace micro {
|
||||
namespace comparisons {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
ComparisonParams params;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// TODO(ruic): optimize macros below to using template functions.
|
||||
#define TF_LITE_QUANTIZE_COMPARISON(opname) \
|
||||
template <typename input_dtype> \
|
||||
void EvalQuantized##opname(TfLiteContext* context, TfLiteNode* node, \
|
||||
const TfLiteTensor* input1, \
|
||||
const TfLiteTensor* input2, TfLiteTensor* output, \
|
||||
bool requires_broadcast) { \
|
||||
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) { \
|
||||
auto input1_offset = -input1->params.zero_point; \
|
||||
auto input2_offset = -input2->params.zero_point; \
|
||||
const int left_shift = 8; \
|
||||
\
|
||||
int32 input1_multiplier; \
|
||||
int input1_shift; \
|
||||
QuantizeMultiplierSmallerThanOneExp( \
|
||||
static_cast<double>(input1->params.scale), &input1_multiplier, \
|
||||
&input1_shift); \
|
||||
int32 input2_multiplier; \
|
||||
int input2_shift; \
|
||||
QuantizeMultiplierSmallerThanOneExp( \
|
||||
static_cast<double>(input2->params.scale), &input2_multiplier, \
|
||||
&input2_shift); \
|
||||
\
|
||||
ComparisonParams op_params; \
|
||||
op_params.left_shift = left_shift; \
|
||||
op_params.input1_offset = input1_offset; \
|
||||
op_params.input1_multiplier = input1_multiplier; \
|
||||
op_params.input1_shift = input1_shift; \
|
||||
op_params.input2_offset = input2_offset; \
|
||||
op_params.input2_multiplier = input2_multiplier; \
|
||||
op_params.input2_shift = input2_shift; \
|
||||
if (requires_broadcast) { \
|
||||
reference_ops::Broadcast4DSlow##opname##WithScaling( \
|
||||
op_params, GetTensorShape(input1), \
|
||||
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<bool>(output)); \
|
||||
} else { \
|
||||
reference_ops::opname##WithScaling( \
|
||||
op_params, GetTensorShape(input1), \
|
||||
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<bool>(output)); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
TF_LITE_QUANTIZE_COMPARISON(Equal);
|
||||
TF_LITE_QUANTIZE_COMPARISON(NotEqual);
|
||||
TF_LITE_QUANTIZE_COMPARISON(Greater);
|
||||
TF_LITE_QUANTIZE_COMPARISON(GreaterEqual);
|
||||
TF_LITE_QUANTIZE_COMPARISON(Less);
|
||||
TF_LITE_QUANTIZE_COMPARISON(LessEqual);
|
||||
#undef TF_LITE_QUANTIZE_COMPARISON
|
||||
|
||||
#define TF_LITE_COMPARISON(type, opname, requires_broadcast) \
|
||||
{ \
|
||||
ComparisonParams op_params; \
|
||||
requires_broadcast \
|
||||
? reference_ops::Broadcast4DSlow##opname##NoScaling( \
|
||||
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<type>(input2), \
|
||||
GetTensorShape(output), GetTensorData<bool>(output)) \
|
||||
: reference_ops::opname##NoScaling( \
|
||||
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<type>(input2), \
|
||||
GetTensorShape(output), GetTensorData<bool>(output)); \
|
||||
}
|
||||
|
||||
TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteBool:
|
||||
TF_LITE_COMPARISON(bool, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -133,30 +140,100 @@ TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
// TODO(renjieliu): Refactor the logic to avoid duplications.
|
||||
TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteBool:
|
||||
TF_LITE_COMPARISON(bool, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedNotEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedNotEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -167,27 +244,87 @@ TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Greater, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Greater, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Greater, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedGreater<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedGreater<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -198,27 +335,87 @@ TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, GreaterEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, GreaterEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, GreaterEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedGreaterEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedGreaterEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -229,27 +426,87 @@ TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Less, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Less, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Less, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedLess<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedLess<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -260,27 +517,87 @@ TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, LessEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, LessEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, LessEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedLessEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedLessEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -291,78 +608,115 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
|
||||
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) {
|
||||
auto input1_offset = -input1->params.zero_point;
|
||||
auto input2_offset = -input2->params.zero_point;
|
||||
const int kLeftShift = 8;
|
||||
|
||||
int32_t input1_multiplier;
|
||||
int input1_shift;
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
static_cast<double>(input1->params.scale), &input1_multiplier,
|
||||
&input1_shift);
|
||||
int32_t input2_multiplier;
|
||||
int input2_shift;
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
static_cast<double>(input2->params.scale), &input2_multiplier,
|
||||
&input2_shift);
|
||||
|
||||
data->params.left_shift = kLeftShift;
|
||||
data->params.input1_offset = input1_offset;
|
||||
data->params.input1_multiplier = input1_multiplier;
|
||||
data->params.input1_shift = input1_shift;
|
||||
data->params.input2_offset = input2_offset;
|
||||
data->params.input2_multiplier = input2_multiplier;
|
||||
data->params.input2_shift = input2_shift;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace comparisons
|
||||
|
||||
TfLiteRegistration* Register_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::EqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::EqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_NOT_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::NotEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_NOT_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::NotEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_GREATER() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::GreaterEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_GREATER() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::GreaterEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_GREATER_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::GreaterEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_GREATER_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::GreaterEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LESS() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::LessEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LESS() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::LessEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LESS_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::LessEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LESS_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::LessEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -18,10 +18,11 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -31,14 +32,116 @@ namespace concatenation {
|
||||
constexpr int kMaxInputNum = 10; // Maximum number of input tensors
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
ConcatenationParams params;
|
||||
};
|
||||
|
||||
// Handles negative axis index, coerces to positive index value.
|
||||
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
|
||||
if (axis >= 0) {
|
||||
return axis;
|
||||
} else {
|
||||
return NumDimensions(output_tensor) + axis;
|
||||
}
|
||||
}
|
||||
|
||||
// The following functions are helpers to get tensor data in the format that the
|
||||
// reference op implementation expects. They provide the same functionality as
|
||||
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
|
||||
|
||||
// Gets shapes from a list of tensors.
|
||||
inline void GetAllInputTensorShapes(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
RuntimeShape all_shapes[kMaxInputNum]) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
RuntimeShape shape = tflite::micro::GetTensorShape(t);
|
||||
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
|
||||
}
|
||||
}
|
||||
|
||||
// Get shape pointers from a list of shapes.
|
||||
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
|
||||
const RuntimeShape* pointers[]) {
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
pointers[i] = &shapes[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Gets data pointers from a list of tensors.
|
||||
template <typename T>
|
||||
inline void GetAllInputTensorData(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
T* all_data[kMaxInputNum]) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
all_data[i] = tflite::micro::GetTensorData<T>(t);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename data_type>
|
||||
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const data_type* inputs_data[kMaxInputNum];
|
||||
GetAllInputTensorShapes(context, node, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllInputTensorData(context, node, inputs_data);
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<data_type>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const uint8_t* inputs_data[kMaxInputNum];
|
||||
GetAllInputTensorShapes(context, node, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllInputTensorData(context, node, inputs_data);
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
reference_ops::ConcatenationWithScaling(
|
||||
data->params, inputs_shape_ptr, inputs_data,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// This function only checks the types. Additional shape validations are
|
||||
// performed in the reference implementation called during Eval().
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
TfLiteType input_type = GetInput(context, node, 0)->type;
|
||||
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
|
||||
const TfLiteTensor* input_tensor = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input_tensor != nullptr);
|
||||
TfLiteType input_type = input_tensor->type;
|
||||
const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output_tensor != nullptr);
|
||||
TfLiteType output_type = output_tensor->type;
|
||||
|
||||
// Check activation and input type
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
|
||||
@@ -57,133 +160,76 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Shapes with dimensions >4 are not yet supported with static allocation.
|
||||
for (int i = 0; i < num_inputs; ++i) {
|
||||
const TfLiteTensor* input = GetInput(context, node, i);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
int num_dimensions = NumDimensions(input);
|
||||
|
||||
if (num_dimensions > 4) {
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context,
|
||||
"Op Concatenation does not currently support num dimensions >4 "
|
||||
"Tensor '%s' has %d dimensions.",
|
||||
input->name, num_dimensions);
|
||||
"Tensor has %d dimensions.",
|
||||
num_dimensions);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate OpData.
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
switch (output_type) { // Already know in/outtypes are same.
|
||||
case kTfLiteFloat32:
|
||||
case kTfLiteInt32:
|
||||
case kTfLiteInt64: {
|
||||
data->params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
data->params.inputs_count = node->inputs->size;
|
||||
break;
|
||||
}
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8: {
|
||||
data->params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
data->params.inputs_count = node->inputs->size;
|
||||
|
||||
float* input_scales =
|
||||
reinterpret_cast<float*>(context->AllocatePersistentBuffer(
|
||||
context, node->inputs->size * sizeof(float)));
|
||||
|
||||
int32_t* input_zero_points =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, node->inputs->size * sizeof(int32_t)));
|
||||
|
||||
// Allocate persistent scale and zeropoint buffers.
|
||||
// Store input scale and zero point values in OpParams:
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteTensor* t = GetInput(context, node, i);
|
||||
TF_LITE_ENSURE(context, t != nullptr);
|
||||
input_scales[i] = t->params.scale;
|
||||
input_zero_points[i] = t->params.zero_point;
|
||||
}
|
||||
|
||||
data->params.input_scale = input_scales;
|
||||
data->params.input_zeropoint = input_zero_points;
|
||||
data->params.output_zeropoint = output->params.zero_point;
|
||||
data->params.output_scale = output->params.scale;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Op Concatenation does not currently support Type '%s'.",
|
||||
TfLiteTypeGetName(output_type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Handles negative axis index, coerces to positive index value.
|
||||
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
|
||||
if (axis >= 0) {
|
||||
return axis;
|
||||
} else {
|
||||
return NumDimensions(output_tensor) + axis;
|
||||
}
|
||||
}
|
||||
|
||||
// The following functions are helpers to get tensor data in the format that the
|
||||
// reference op implementation expects. They provide the same functionality as
|
||||
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
|
||||
|
||||
// Gets shapes from a list of tensors.
|
||||
inline void GetAllTensorShapes(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
RuntimeShape all_shapes[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
RuntimeShape shape = GetTensorShape(t);
|
||||
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
|
||||
}
|
||||
}
|
||||
|
||||
// Get shape pointers from a list of shapes.
|
||||
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
|
||||
const RuntimeShape* pointers[]) {
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
pointers[i] = &shapes[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Gets data pointers from a list of tensors.
|
||||
template <typename T>
|
||||
inline void GetAllTensorData(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
T* all_data[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
all_data[i] = GetTensorData<T>(t);
|
||||
}
|
||||
}
|
||||
|
||||
// Gets scale and zero point from a list of tensors
|
||||
inline void GetAllQuantizationParam(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
float scales[kMaxInputNum],
|
||||
int32 zero_points[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
scales[i] = t->params.scale;
|
||||
zero_points[i] = t->params.zero_point;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename data_type>
|
||||
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const data_type* inputs_data[kMaxInputNum];
|
||||
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllTensorData(*context, *node->inputs, inputs_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
ConcatenationParams op_params;
|
||||
op_params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
op_params.inputs_count = NumInputs(node);
|
||||
|
||||
reference_ops::Concatenation(op_params, inputs_shape_ptr, inputs_data,
|
||||
GetTensorShape(output),
|
||||
GetTensorData<data_type>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const uint8_t* inputs_data[kMaxInputNum];
|
||||
float inputs_scale[kMaxInputNum];
|
||||
int32 inputs_zero_point[kMaxInputNum];
|
||||
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllTensorData(*context, *node->inputs, inputs_data);
|
||||
GetAllQuantizationParam(*context, *node->inputs, inputs_scale,
|
||||
inputs_zero_point);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
ConcatenationParams op_params;
|
||||
op_params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
op_params.inputs_count = NumInputs(node);
|
||||
op_params.input_zeropoint = inputs_zero_point;
|
||||
op_params.input_scale = inputs_scale;
|
||||
op_params.output_zeropoint = output->params.zero_point;
|
||||
op_params.output_scale = output->params.scale;
|
||||
|
||||
reference_ops::ConcatenationWithScaling(op_params, inputs_shape_ptr,
|
||||
inputs_data, GetTensorShape(output),
|
||||
GetTensorData<uint8>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
|
||||
const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output_tensor != nullptr);
|
||||
TfLiteType output_type = output_tensor->type;
|
||||
|
||||
switch (output_type) { // Already know in/outtypes are same.
|
||||
case kTfLiteFloat32:
|
||||
@@ -214,16 +260,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace concatenation
|
||||
|
||||
TfLiteRegistration* Register_CONCATENATION() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/concatenation::Prepare,
|
||||
/*invoke=*/concatenation::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_CONCATENATION() {
|
||||
return {/*init=*/concatenation::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/concatenation::Prepare,
|
||||
/*invoke=*/concatenation::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -1,279 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace conv {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
// Angepasst jomjol 05.06.20
|
||||
//constexpr int kMaxChannels = 1024;
|
||||
constexpr int kMaxChannels = 4096;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
// This file has 2 implementation of Conv.
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
switch (padding) {
|
||||
case TfLitePadding::kTfLitePaddingSame:
|
||||
return PaddingType::kSame;
|
||||
case TfLitePadding::kTfLitePaddingValid:
|
||||
return PaddingType::kValid;
|
||||
case TfLitePadding::kTfLitePaddingUnknown:
|
||||
default:
|
||||
return PaddingType::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params->padding;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
params->dilation_height_factor, params->dilation_width_factor, height,
|
||||
width, filter_height, filter_width, padding, &out_height, &out_width);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift),
|
||||
output_channels));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = -data->output_shift;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input), GetTensorShape(filter),
|
||||
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32_t>(bias), GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
|
||||
GetTensorData<uint8_t>(im2col), nullptr);
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
TfLiteTensor* im2col) {
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(filter),
|
||||
GetTensorData<float>(filter), GetTensorShape(bias),
|
||||
GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output), GetTensorShape(im2col),
|
||||
GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
|
||||
int input_width = input->dims->data[2];
|
||||
int input_height = input->dims->data[1];
|
||||
int filter_width = filter->dims->data[2];
|
||||
int filter_height = filter->dims->data[1];
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
OpData data;
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, &data));
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
|
||||
output, nullptr);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, &data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace conv
|
||||
|
||||
TfLiteRegistration* Register_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -23,19 +23,15 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace conv {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
// Angepasst jomjol 05.06.20
|
||||
//constexpr int kMaxChannels = 1024;
|
||||
constexpr int kMaxChannels = 32384;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
@@ -45,15 +41,20 @@ constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
@@ -74,10 +75,10 @@ inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpData* data) {
|
||||
const TfLiteConvParams* params, int width,
|
||||
int height, int filter_width, int filter_height,
|
||||
int out_width, int out_height,
|
||||
const TfLiteType data_type, OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
@@ -94,10 +95,13 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
@@ -111,100 +115,24 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = -data->output_shift;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input), GetTensorShape(filter),
|
||||
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32_t>(bias), GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
|
||||
GetTensorData<uint8_t>(im2col), nullptr);
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
TfLiteTensor* im2col) {
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(filter),
|
||||
GetTensorData<float>(filter), GetTensorShape(bias),
|
||||
GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output), GetTensorShape(im2col),
|
||||
GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
int input_width = input->dims->data[2];
|
||||
int input_height = input->dims->data[1];
|
||||
@@ -212,9 +140,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
int filter_height = filter->dims->data[1];
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
|
||||
struct tflite::ops::micro::conv::OpData *data = (struct tflite::ops::micro::conv::OpData*) malloc(sizeof(struct tflite::ops::micro::conv::OpData));
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
data->per_channel_output_multiplier =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
@@ -222,8 +156,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
filter->quantization.params);
|
||||
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
@@ -240,6 +173,136 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
} // namespace conv
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* im2col, TfLiteEvalTensor* hwcn_weights,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output,
|
||||
TfLiteEvalTensor* im2col) {
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col,
|
||||
TfLiteEvalTensor* hwcn_weights, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, data, input, filter, bias, nullptr,
|
||||
@@ -256,27 +319,22 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
free(data);
|
||||
return kTfLiteError;
|
||||
}
|
||||
free(data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace conv
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_CONV_2D() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -24,18 +24,15 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace depthwise_conv {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
constexpr int kMaxChannels = 1024;
|
||||
|
||||
// Depthwise conv is quantized along dimension 3:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
@@ -43,16 +40,20 @@ constexpr int kDepthwiseConvQuantizedDimension = 3;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
@@ -78,125 +79,44 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
return tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels));
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(filter), GetTensorData<float>(filter),
|
||||
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
DepthwiseParams op_params;
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.weights_offset = 0;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
// TODO(b/130439627): Use calculated value for clamping.
|
||||
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
||||
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
reference_integer_ops::DepthwiseConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data->output_shift;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(filter), GetTensorData<uint8_t>(filter),
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
(NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
const TfLiteType data_type = input->type;
|
||||
int width = SizeOfDimension(input, 2);
|
||||
@@ -204,7 +124,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
int filter_width = SizeOfDimension(filter, 2);
|
||||
int filter_height = SizeOfDimension(filter, 1);
|
||||
|
||||
OpData data;
|
||||
// Per channel quantization is only needed for int8_t inference. For other
|
||||
// quantized types, only a single scale and zero point is needed.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
@@ -227,20 +156,151 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
|
||||
filter_width, filter_height, data_type,
|
||||
&data));
|
||||
data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params,
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
DepthwiseParams op_params;
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = 0;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
// TODO(b/130439627): Use calculated value for clamping.
|
||||
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
||||
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
||||
|
||||
reference_integer_ops::DepthwiseConvPerChannel(
|
||||
op_params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data.output_shift;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
|
||||
// TODO(aselle): Consider whether float conv and quantized conv should be
|
||||
// separate ops to avoid dispatch overhead here.
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input, filter, bias, output);
|
||||
EvalFloat(context, node, params, data, input, filter, bias, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
|
||||
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
|
||||
output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, &data, input, filter, bias, output);
|
||||
EvalQuantized(context, node, params, data, input, filter, bias, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -250,20 +310,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace depthwise_conv
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/depthwise_conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -22,19 +22,39 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace dequantize {
|
||||
|
||||
struct OpData {
|
||||
tflite::DequantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
// TODO(b/140515557): Add cached dequant to improve hybrid model performance.
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteUInt8 ||
|
||||
input->type == kTfLiteInt8 ||
|
||||
@@ -42,32 +62,49 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(
|
||||
context, output->type == kTfLiteFloat32 || output->type == kTfLiteInt32);
|
||||
|
||||
if (output->type == kTfLiteInt32) {
|
||||
const double effective_output_scale =
|
||||
static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = input->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(input->params.scale);
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
tflite::DequantizationParams op_params;
|
||||
op_params.zero_point = input->params.zero_point;
|
||||
op_params.scale = static_cast<double>(input->params.scale);
|
||||
switch (input->type) {
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<int16_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
@@ -76,28 +113,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (output->type == kTfLiteInt32) {
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
const double effective_output_scale =
|
||||
static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(effective_output_scale, &output_multiplier,
|
||||
&output_shift);
|
||||
int flat_size =
|
||||
MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
|
||||
int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output));
|
||||
switch (input->type) {
|
||||
case kTfLiteInt16: {
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int16_t>(input), flat_size, output_multiplier,
|
||||
output_shift, input->params.zero_point, output->params.zero_point,
|
||||
GetTensorData<int32_t>(output));
|
||||
tflite::micro::GetTensorData<int16_t>(input), flat_size,
|
||||
data->output_multiplier, data->output_shift,
|
||||
data->quantization_params.zero_point, data->output_zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int8_t>(input), flat_size, output_multiplier,
|
||||
output_shift, input->params.zero_point, output->params.zero_point,
|
||||
GetTensorData<int32_t>(output));
|
||||
tflite::micro::GetTensorData<int8_t>(input), flat_size,
|
||||
data->output_multiplier, data->output_shift,
|
||||
data->quantization_params.zero_point, data->output_zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -118,16 +150,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace dequantize
|
||||
|
||||
TfLiteRegistration* Register_DEQUANTIZE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/dequantize::Prepare,
|
||||
/*invoke=*/dequantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_DEQUANTIZE() {
|
||||
return {/*init=*/dequantize::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/dequantize::Prepare,
|
||||
/*invoke=*/dequantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -18,6 +18,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -39,8 +41,10 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
if (!IsSupportedType(input->type)) {
|
||||
TF_LITE_KERNEL_LOG(context, "Input data type %s (%d) is not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
@@ -52,13 +56,13 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
template <typename T>
|
||||
inline TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
T func(T), TfLiteType expected_type) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, expected_type);
|
||||
const int64_t num_elements = NumElements(input);
|
||||
const T* in_data = GetTensorData<T>(input);
|
||||
T* out_data = GetTensorData<T>(output);
|
||||
for (int64_t i = 0; i < num_elements; ++i) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, expected_type);
|
||||
const size_t num_elements = ElementCount(*input->dims);
|
||||
const T* in_data = tflite::micro::GetTensorData<T>(input);
|
||||
T* out_data = tflite::micro::GetTensorData<T>(output);
|
||||
for (size_t i = 0; i < num_elements; ++i) {
|
||||
out_data[i] = func(in_data[i]);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
@@ -109,116 +113,100 @@ TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace elementwise
|
||||
|
||||
TfLiteRegistration* Register_ABS() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::AbsEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ABS() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::AbsEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SIN() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SIN() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_COS() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::CosEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_COS() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::CosEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOG() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::LogEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LOG() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::LogEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SQRT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SQRT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RSQRT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::RsqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RSQRT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::RsqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SQUARE() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SquareEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SQUARE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SquareEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_NOT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
|
||||
/*invoke=*/elementwise::LogicalNotEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LOGICAL_NOT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
|
||||
/*invoke=*/elementwise::LogicalNotEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -1,34 +1,32 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
|
||||
//
|
||||
// This is a stub file for non-Ethos platforms
|
||||
//
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace custom {
|
||||
TfLiteRegistration* Register_ETHOSU() { return nullptr; }
|
||||
|
||||
class AllOpsResolver : public MicroMutableOpResolver {
|
||||
public:
|
||||
AllOpsResolver();
|
||||
|
||||
private:
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
const char* GetString_ETHOSU() { return ""; }
|
||||
|
||||
} // namespace custom
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
|
||||
@@ -17,7 +17,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -28,25 +28,28 @@ constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
reference_ops::Floor(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
reference_ops::Floor(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace floor
|
||||
|
||||
TfLiteRegistration* Register_FLOOR() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/floor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_FLOOR() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/floor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -13,20 +13,19 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
|
||||
#include "tensorflow/lite/micro/kernels/fully_connected.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace fully_connected {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
@@ -40,6 +39,10 @@ struct OpData {
|
||||
int32_t output_activation_max;
|
||||
// The index of the temporary tensor where the quantized inputs are cached.
|
||||
int input_quantized_index;
|
||||
// Cached zero point values of tensors.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
@@ -64,20 +67,17 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
void* data = nullptr;
|
||||
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||
kTfLiteError) {
|
||||
return nullptr;
|
||||
}
|
||||
return data;
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
@@ -89,11 +89,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
@@ -102,13 +105,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.weights_offset = -filter->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = -data.filter_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// TODO(b/138810107): Figure out whether output shift should be inverted
|
||||
op_params.output_shift = -data.output_shift;
|
||||
@@ -116,20 +121,25 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::FullyConnected(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(filter), GetTensorData<int8_t>(filter),
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
||||
TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = input_offset;
|
||||
@@ -141,12 +151,16 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||
reference_ops::FullyConnected( \
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
|
||||
GetTensorShape(filter), GetTensorData<uint8_t>(filter), \
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias), \
|
||||
GetTensorShape(output), GetTensorData<output_data_type>(output))
|
||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||
reference_ops::FullyConnected( \
|
||||
op_params, tflite::micro::GetTensorShape(input), \
|
||||
tflite::micro::GetTensorData<uint8_t>(input), \
|
||||
tflite::micro::GetTensorShape(filter), \
|
||||
tflite::micro::GetTensorData<uint8_t>(filter), \
|
||||
tflite::micro::GetTensorShape(bias), \
|
||||
tflite::micro::GetTensorData<int32_t>(bias), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<output_data_type>(output))
|
||||
switch (output->type) {
|
||||
case kTfLiteUInt8:
|
||||
TF_LITE_FULLY_CONNECTED(uint8_t);
|
||||
@@ -165,8 +179,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
|
||||
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteFusedActivation activation,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
@@ -174,10 +189,14 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
tflite::reference_ops::FullyConnected(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(filter), GetTensorData<float>(filter),
|
||||
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@@ -186,10 +205,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const auto* params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
tflite::micro::GetEvalInput(context, node, kBiasTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
@@ -214,20 +237,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace fully_connected
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_FULLY_CONNECTED() {
|
||||
static TfLiteRegistration r = {/*init=*/fully_connected::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/fully_connected::Prepare,
|
||||
/*invoke=*/fully_connected::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_FULLY_CONNECTED() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// This is the most generic TfLiteRegistration. The actual supported types may
|
||||
// still be target dependent. The only requirement is that every implementation
|
||||
// (reference or optimized) must define this function.
|
||||
TfLiteRegistration Register_FULLY_CONNECTED();
|
||||
|
||||
#if defined(CMSIS_NN) || defined(ARDUINO)
|
||||
// The Arduino is a special case where we use the CMSIS kernels, but because of
|
||||
// the current approach to building for Arduino, we do not support -DCMSIS_NN as
|
||||
// part of the build. As a result, we use defined(ARDUINO) as proxy for the
|
||||
// CMSIS kernels for this one special case.
|
||||
|
||||
// Returns a TfLiteRegistration struct for cmsis-nn kernel variant that only
|
||||
// supports int8.
|
||||
TfLiteRegistration Register_FULLY_CONNECTED_INT8();
|
||||
|
||||
#else
|
||||
// Note that while this block gets used for both reference and optimized kernels
|
||||
// that do not have any specialized implementations, the only goal here is to
|
||||
// define fallback implementation that allow reference kernels to still be used
|
||||
// from applications that call a more specific kernel variant.
|
||||
|
||||
inline TfLiteRegistration Register_FULLY_CONNECTED_INT8() {
|
||||
return Register_FULLY_CONNECTED();
|
||||
}
|
||||
|
||||
#endif
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
142
code/lib/tfmicro/tensorflow/lite/micro/kernels/hard_swish.cc
Normal file
142
code/lib/tfmicro/tensorflow/lite/micro/kernels/hard_swish.cc
Normal file
@@ -0,0 +1,142 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace hard_swish {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
|
||||
}
|
||||
|
||||
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
|
||||
|
||||
params->input_zero_point = input->params.zero_point;
|
||||
params->output_zero_point = output->params.zero_point;
|
||||
|
||||
const float input_scale = input->params.scale;
|
||||
const float hires_input_scale = (1.0f / 128.0f) * input_scale;
|
||||
const float reluish_scale = 3.0f / 32768.0f;
|
||||
const float output_scale = output->params.scale;
|
||||
|
||||
const double output_multiplier =
|
||||
static_cast<double>(hires_input_scale / output_scale);
|
||||
int32_t output_multiplier_fixedpoint_int32;
|
||||
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
|
||||
¶ms->output_multiplier_exponent);
|
||||
DownScaleInt32ToInt16Multiplier(
|
||||
output_multiplier_fixedpoint_int32,
|
||||
¶ms->output_multiplier_fixedpoint_int16);
|
||||
|
||||
TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
|
||||
|
||||
const double reluish_multiplier =
|
||||
static_cast<double>(hires_input_scale / reluish_scale);
|
||||
int32_t reluish_multiplier_fixedpoint_int32;
|
||||
QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
|
||||
¶ms->reluish_multiplier_exponent);
|
||||
DownScaleInt32ToInt16Multiplier(
|
||||
reluish_multiplier_fixedpoint_int32,
|
||||
¶ms->reluish_multiplier_fixedpoint_int16);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
tflite::reference_ops::HardSwish<float>(
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
tflite::reference_ops::HardSwish<uint8_t>(
|
||||
*params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
tflite::reference_ops::HardSwish<int8_t>(
|
||||
*params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} break;
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context,
|
||||
"Only float32/int8_t/uint8_t are supported currently, got %s",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace hard_swish
|
||||
|
||||
TfLiteRegistration Register_HARD_SWISH() {
|
||||
return {/*init=*/hard_swish::HardSwishInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/hard_swish::HardSwishPrepare,
|
||||
/*invoke=*/hard_swish::HardSwishEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
165
code/lib/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.cc
Normal file
165
code/lib/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.cc
Normal file
@@ -0,0 +1,165 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
namespace {
|
||||
constexpr size_t kBufferAlignment = 16;
|
||||
} // namespace
|
||||
|
||||
// TODO(b/161841696): Consider moving away from global arena buffers:
|
||||
constexpr int KernelRunner::kNumScratchBuffers_;
|
||||
constexpr int KernelRunner::kKernelRunnerBufferSize_;
|
||||
uint8_t KernelRunner::kKernelRunnerBuffer_[];
|
||||
|
||||
KernelRunner::KernelRunner(const TfLiteRegistration& registration,
|
||||
TfLiteTensor* tensors, int tensors_size,
|
||||
TfLiteIntArray* inputs, TfLiteIntArray* outputs,
|
||||
void* builtin_data, ErrorReporter* error_reporter)
|
||||
: allocator_(SimpleMemoryAllocator::Create(
|
||||
error_reporter, kKernelRunnerBuffer_, kKernelRunnerBufferSize_)),
|
||||
registration_(registration),
|
||||
tensors_(tensors),
|
||||
error_reporter_(error_reporter) {
|
||||
// Prepare TfLiteContext:
|
||||
context_.impl_ = static_cast<void*>(this);
|
||||
context_.ReportError = ReportOpError;
|
||||
context_.recommended_num_threads = 1;
|
||||
context_.GetTensor = GetTensor;
|
||||
context_.GetEvalTensor = GetEvalTensor;
|
||||
context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
|
||||
context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
|
||||
context_.GetScratchBuffer = GetScratchBuffer;
|
||||
|
||||
// Prepare TfLiteNode:
|
||||
node_.inputs = inputs;
|
||||
node_.outputs = outputs;
|
||||
node_.builtin_data = builtin_data;
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data) {
|
||||
if (registration_.init) {
|
||||
node_.user_data = registration_.init(&context_, init_data, /*length=*/0);
|
||||
}
|
||||
if (registration_.prepare) {
|
||||
TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::Invoke() {
|
||||
if (registration_.invoke == nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"TfLiteRegistration missing invoke function pointer!");
|
||||
return kTfLiteError;
|
||||
}
|
||||
return registration_.invoke(&context_, &node_);
|
||||
}
|
||||
|
||||
TfLiteTensor* KernelRunner::GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
return &runner->tensors_[tensor_index];
|
||||
}
|
||||
|
||||
TfLiteEvalTensor* KernelRunner::GetEvalTensor(
|
||||
const struct TfLiteContext* context, int tensor_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
TfLiteEvalTensor* eval_tensor =
|
||||
reinterpret_cast<TfLiteEvalTensor*>(runner->allocator_->AllocateTemp(
|
||||
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
|
||||
TFLITE_DCHECK(eval_tensor != nullptr);
|
||||
|
||||
// In unit tests, the TfLiteTensor pointer contains the source of truth for
|
||||
// buffers and values:
|
||||
eval_tensor->data = runner->tensors_[tensor_index].data;
|
||||
eval_tensor->dims = runner->tensors_[tensor_index].dims;
|
||||
eval_tensor->type = runner->tensors_[tensor_index].type;
|
||||
return eval_tensor;
|
||||
}
|
||||
|
||||
void* KernelRunner::AllocatePersistentBuffer(TfLiteContext* context,
|
||||
size_t bytes) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
return runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::RequestScratchBufferInArena(TfLiteContext* context,
|
||||
size_t bytes,
|
||||
int* buffer_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(buffer_index != nullptr);
|
||||
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
if (runner->scratch_buffer_count_ == kNumScratchBuffers_) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
runner->error_reporter_,
|
||||
"Exceeded the maximum number of scratch tensors allowed (%d).",
|
||||
kNumScratchBuffers_);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
// For tests, we allocate scratch buffers from the tail and keep them around
|
||||
// for the lifetime of model. This means that the arena size in the tests will
|
||||
// be more than what we would have if the scratch buffers could share memory.
|
||||
runner->scratch_buffers_[runner->scratch_buffer_count_] =
|
||||
runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
|
||||
TFLITE_DCHECK(runner->scratch_buffers_[runner->scratch_buffer_count_] !=
|
||||
nullptr);
|
||||
|
||||
*buffer_index = runner->scratch_buffer_count_++;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* KernelRunner::GetScratchBuffer(TfLiteContext* context, int buffer_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
TFLITE_DCHECK(runner->scratch_buffer_count_ <= kNumScratchBuffers_);
|
||||
if (buffer_index >= runner->scratch_buffer_count_) {
|
||||
return nullptr;
|
||||
}
|
||||
return runner->scratch_buffers_[buffer_index];
|
||||
}
|
||||
|
||||
void KernelRunner::ReportOpError(struct TfLiteContext* context,
|
||||
const char* format, ...) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
TF_LITE_REPORT_ERROR(runner->error_reporter_, format, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,83 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/micro/simple_memory_allocator.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle
|
||||
// (init, prepare, invoke). All internal allocations are handled by this class.
|
||||
// Simply pass in the registration, list of required tensors, inputs array,
|
||||
// outputs array, and any pre-builtin data. Calling Invoke() will automatically
|
||||
// walk the kernl and outputs will be ready on the the TfLiteTensor output
|
||||
// provided during construction.
|
||||
class KernelRunner {
|
||||
public:
|
||||
KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors,
|
||||
int tensors_size, TfLiteIntArray* inputs,
|
||||
TfLiteIntArray* outputs, void* builtin_data,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any
|
||||
// exceptions will be reported through the error_reporter and returned as a
|
||||
// status code here.
|
||||
TfLiteStatus InitAndPrepare(const char* init_data = nullptr);
|
||||
|
||||
// Calls init, prepare, and invoke on a given TfLiteRegistration pointer.
|
||||
// After successful invoke, results will be available in the output tensor as
|
||||
// passed into the constructor of this class.
|
||||
TfLiteStatus Invoke();
|
||||
|
||||
protected:
|
||||
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_index);
|
||||
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
|
||||
int tensor_index);
|
||||
static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes);
|
||||
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context,
|
||||
size_t bytes,
|
||||
int* buffer_index);
|
||||
static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
|
||||
static void ReportOpError(struct TfLiteContext* context, const char* format,
|
||||
...);
|
||||
|
||||
private:
|
||||
static constexpr int kNumScratchBuffers_ = 5;
|
||||
|
||||
static constexpr int kKernelRunnerBufferSize_ = 10000;
|
||||
static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
|
||||
|
||||
SimpleMemoryAllocator* allocator_ = nullptr;
|
||||
const TfLiteRegistration& registration_;
|
||||
TfLiteTensor* tensors_ = nullptr;
|
||||
ErrorReporter* error_reporter_ = nullptr;
|
||||
|
||||
TfLiteContext context_ = {};
|
||||
TfLiteNode node_ = {};
|
||||
|
||||
int scratch_buffer_count_ = 0;
|
||||
uint8_t* scratch_buffers_[kNumScratchBuffers_];
|
||||
};
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
@@ -0,0 +1,41 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
bool HaveSameShapes(const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2) {
|
||||
TFLITE_DCHECK(input1 != nullptr);
|
||||
TFLITE_DCHECK(input2 != nullptr);
|
||||
return TfLiteIntArrayEqual(input1->dims, input2->dims);
|
||||
}
|
||||
|
||||
const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) {
|
||||
if (tensor == nullptr || tensor->dims == nullptr) {
|
||||
return RuntimeShape();
|
||||
}
|
||||
TfLiteIntArray* dims = tensor->dims;
|
||||
const int dims_size = dims->size;
|
||||
const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data);
|
||||
return RuntimeShape(dims_size, dims_data);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
75
code/lib/tfmicro/tensorflow/lite/micro/kernels/kernel_util.h
Normal file
75
code/lib/tfmicro/tensorflow/lite/micro/kernels/kernel_util.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
// Returns a mutable tensor for a given input index. is_variable must be checked
|
||||
// during prepare when the full TfLiteTensor is available.
|
||||
inline TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
int index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
return context->GetEvalTensor(context, node->inputs->data[index]);
|
||||
}
|
||||
|
||||
// Returns the TfLiteEvalTensor struct for a given input index in a node.
|
||||
inline const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return GetMutableEvalInput(context, node, index);
|
||||
}
|
||||
|
||||
// Returns the TfLiteEvalTensor struct for a given output index in a node.
|
||||
inline TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
return context->GetEvalTensor(context, node->outputs->data[index]);
|
||||
}
|
||||
|
||||
// Returns data for a TfLiteEvalTensor struct.
|
||||
template <typename T>
|
||||
T* GetTensorData(TfLiteEvalTensor* tensor) {
|
||||
return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
|
||||
}
|
||||
|
||||
// Returns const data for a TfLiteEvalTensor struct.
|
||||
template <typename T>
|
||||
const T* GetTensorData(const TfLiteEvalTensor* tensor) {
|
||||
TFLITE_DCHECK(tensor != nullptr);
|
||||
return reinterpret_cast<const T*>(tensor->data.raw);
|
||||
}
|
||||
|
||||
// Returns the shape of a TfLiteEvalTensor struct.
|
||||
const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor);
|
||||
|
||||
// Return true if the given tensors have the same shape.
|
||||
bool HaveSameShapes(const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2);
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
@@ -14,16 +14,19 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace l2norm {
|
||||
|
||||
namespace {
|
||||
|
||||
// This file has two implementation of L2Norm.
|
||||
enum KernelType {
|
||||
kReference,
|
||||
@@ -33,44 +36,59 @@ enum KernelType {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(DEBUG)
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
|
||||
L2NormalizationParams* data =
|
||||
static_cast<L2NormalizationParams*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
|
||||
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32 ||
|
||||
output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.scale, (1. / 128.));
|
||||
if (output->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 128);
|
||||
}
|
||||
if (output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
}
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
} else if (output->type == kTfLiteFloat32) {
|
||||
data->input_zero_point = 0;
|
||||
}
|
||||
|
||||
// TODO(ahentz): For some reason our implementations don't support
|
||||
// activations.
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
|
||||
#endif
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context,
|
||||
sizeof(L2NormalizationParams));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const L2NormalizationParams& data =
|
||||
*(static_cast<const L2NormalizationParams*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// TODO(b/143912164): instead of hardcode the epsilon here, we should read it
|
||||
// from tensorflow, i.e., adding a params.
|
||||
@@ -87,39 +105,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
// So we don't even need to do handle the epsilon for quantized kernel case.
|
||||
const float epsilon = 1e-6f;
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
#define TF_LITE_L2NORM(type) \
|
||||
tflite::L2NormalizationParams op_params; \
|
||||
op_params.input_zero_point = 0; \
|
||||
type::L2Normalization(op_params, GetTensorShape(input), \
|
||||
GetTensorData<float>(input), GetTensorShape(output), \
|
||||
GetTensorData<float>(output), epsilon)
|
||||
|
||||
TF_LITE_L2NORM(reference_ops);
|
||||
#undef TF_LITE_L2NORM
|
||||
reference_ops::L2Normalization(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
epsilon);
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
#define TF_LITE_L2NORM(type) \
|
||||
tflite::L2NormalizationParams op_params; \
|
||||
op_params.input_zero_point = input->params.zero_point; \
|
||||
type::L2Normalization(op_params, GetTensorShape(input), \
|
||||
GetTensorData<uint8>(input), GetTensorShape(output), \
|
||||
GetTensorData<uint8>(output))
|
||||
|
||||
TF_LITE_L2NORM(reference_ops);
|
||||
#undef TF_LITE_L2NORM
|
||||
reference_ops::L2Normalization(
|
||||
data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
const auto input_shape = GetTensorShape(input);
|
||||
const auto output_shape = GetTensorShape(output);
|
||||
const auto input_shape = tflite::micro::GetTensorShape(input);
|
||||
const auto output_shape = tflite::micro::GetTensorShape(output);
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
reference_integer_ops::L2Normalization(input->params.zero_point, outer_size,
|
||||
depth, GetTensorData<int8>(input),
|
||||
GetTensorData<int8>(output));
|
||||
reference_integer_ops::L2Normalization(
|
||||
data.input_zero_point, outer_size, depth,
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Output type is %d, requires float.",
|
||||
output->type);
|
||||
TF_LITE_KERNEL_LOG(context, "Output type is %s, requires float.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
@@ -128,22 +139,18 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace l2norm
|
||||
|
||||
TfLiteRegistration* Register_L2NORM_REF() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/l2norm::Prepare,
|
||||
/*invoke=*/l2norm::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
|
||||
return &r;
|
||||
TfLiteRegistration Register_L2NORM_REF() {
|
||||
return {/*init=*/l2norm::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/l2norm::Prepare,
|
||||
/*invoke=*/l2norm::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_L2_NORMALIZATION() {
|
||||
return Register_L2NORM_REF();
|
||||
}
|
||||
TfLiteRegistration Register_L2_NORMALIZATION() { return Register_L2NORM_REF(); }
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@@ -15,8 +15,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -31,20 +31,29 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
bool (*func)(bool, bool)) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (HaveSameShapes(input1, input2)) {
|
||||
if (tflite::micro::HaveSameShapes(input1, input2)) {
|
||||
reference_ops::BinaryFunction<bool, bool, bool>(
|
||||
GetTensorShape(input1), GetTensorData<bool>(input1),
|
||||
GetTensorShape(input2), GetTensorData<bool>(input2),
|
||||
GetTensorShape(output), GetTensorData<bool>(output), func);
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<bool>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<bool>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<bool>(output), func);
|
||||
} else {
|
||||
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
|
||||
GetTensorShape(input1), GetTensorData<bool>(input1),
|
||||
GetTensorShape(input2), GetTensorData<bool>(input2),
|
||||
GetTensorShape(output), GetTensorData<bool>(output), func);
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<bool>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<bool>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<bool>(output), func);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
@@ -65,32 +74,30 @@ TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace logical
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_OR() {
|
||||
TfLiteRegistration Register_LOGICAL_OR() {
|
||||
// Init, Free, Prepare, Eval are satisfying the Interface required by
|
||||
// TfLiteRegistration.
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalOrEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalOrEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_AND() {
|
||||
TfLiteRegistration Register_LOGICAL_AND() {
|
||||
// Init, Free, Prepare, Eval are satisfying the Interface required by
|
||||
// TfLiteRegistration.
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalAndEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalAndEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -23,6 +23,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -42,9 +43,11 @@ struct OpData {
|
||||
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
OpData* data) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||
std::numeric_limits<int8_t>::min());
|
||||
@@ -54,6 +57,8 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(1 << (31 - kInputIntegerBits));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
|
||||
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
|
||||
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
|
||||
|
||||
@@ -64,18 +69,34 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
return CalculateArithmeticOpData(context, node, data);
|
||||
}
|
||||
|
||||
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
OpData data;
|
||||
CalculateArithmeticOpData(context, node, &data);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
reference_ops::Logistic(
|
||||
GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Logistic(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
@@ -88,10 +109,11 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::Logistic(
|
||||
input->params.zero_point, data.input_range_radius,
|
||||
data.input_multiplier, data.input_left_shift,
|
||||
NumElements(input->dims), GetTensorData<int8_t>(input),
|
||||
GetTensorData<int8_t>(output));
|
||||
data->input_zero_point, data->input_range_radius,
|
||||
data->input_multiplier, data->input_left_shift,
|
||||
NumElements(input->dims),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
@@ -113,16 +135,15 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_LOGISTIC() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/activations::LogisticEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LOGISTIC() {
|
||||
return {/*init=*/activations::LogisticInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::LogisticPrepare,
|
||||
/*invoke=*/activations::LogisticEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -40,13 +41,13 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpContext {
|
||||
OpContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
input1 = GetInput(context, node, kInputTensor1);
|
||||
input2 = GetInput(context, node, kInputTensor2);
|
||||
output = GetOutput(context, node, kOutputTensor);
|
||||
input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
output = tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
}
|
||||
const TfLiteTensor* input1;
|
||||
const TfLiteTensor* input2;
|
||||
TfLiteTensor* output;
|
||||
const TfLiteEvalTensor* input1;
|
||||
const TfLiteEvalTensor* input2;
|
||||
TfLiteEvalTensor* output;
|
||||
};
|
||||
|
||||
struct MaximumOp {
|
||||
@@ -69,12 +70,12 @@ template <typename data_type, typename op_type>
|
||||
void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpContext& op_context) {
|
||||
reference_ops::MaximumMinimumBroadcastSlow(
|
||||
GetTensorShape(op_context.input1),
|
||||
GetTensorData<data_type>(op_context.input1),
|
||||
GetTensorShape(op_context.input2),
|
||||
GetTensorData<data_type>(op_context.input2),
|
||||
GetTensorShape(op_context.output),
|
||||
GetTensorData<data_type>(op_context.output),
|
||||
tflite::micro::GetTensorShape(op_context.input1),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.input1),
|
||||
tflite::micro::GetTensorShape(op_context.input2),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.input2),
|
||||
tflite::micro::GetTensorShape(op_context.output),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.output),
|
||||
op_type::template op<data_type>);
|
||||
}
|
||||
|
||||
@@ -116,34 +117,30 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace maximum_minimum
|
||||
|
||||
TfLiteRegistration* Register_MAXIMUM() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MaximumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MAXIMUM() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MaximumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_MINIMUM() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MinimumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MINIMUM() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MinimumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -17,10 +17,6 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Forward declaration of all micro op kernel registration methods. These
|
||||
// registrations are included with the standard `BuiltinOpResolver`.
|
||||
//
|
||||
@@ -29,58 +25,73 @@ namespace micro {
|
||||
// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
|
||||
// registration in turn allows the linker to strip unused kernels.
|
||||
|
||||
TfLiteRegistration* Register_ABS();
|
||||
TfLiteRegistration* Register_ADD();
|
||||
TfLiteRegistration* Register_ARG_MAX();
|
||||
TfLiteRegistration* Register_ARG_MIN();
|
||||
TfLiteRegistration* Register_AVERAGE_POOL_2D();
|
||||
TfLiteRegistration* Register_CEIL();
|
||||
namespace tflite {
|
||||
|
||||
// TFLM is incrementally moving towards a flat tflite namespace
|
||||
// (https://abseil.io/tips/130). Any new ops (or cleanup of existing ops should
|
||||
// have their Register function declarations in the tflite namespace.
|
||||
|
||||
TfLiteRegistration Register_CONV_2D();
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
|
||||
TfLiteRegistration Register_QUANTIZE();
|
||||
TfLiteRegistration Register_SHAPE();
|
||||
TfLiteRegistration Register_SOFTMAX();
|
||||
TfLiteRegistration Register_SVDF();
|
||||
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
TfLiteRegistration Register_ABS();
|
||||
TfLiteRegistration Register_ADD();
|
||||
TfLiteRegistration Register_ARG_MAX();
|
||||
TfLiteRegistration Register_ARG_MIN();
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D();
|
||||
TfLiteRegistration Register_CEIL();
|
||||
// TODO(b/160234179): Change custom OPs to also return by value.
|
||||
TfLiteRegistration* Register_CIRCULAR_BUFFER();
|
||||
TfLiteRegistration* Register_CONV_2D();
|
||||
TfLiteRegistration* Register_CONCATENATION();
|
||||
TfLiteRegistration* Register_COS();
|
||||
TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
|
||||
TfLiteRegistration* Register_DEQUANTIZE();
|
||||
TfLiteRegistration* Register_EQUAL();
|
||||
TfLiteRegistration* Register_FLOOR();
|
||||
TfLiteRegistration* Register_FULLY_CONNECTED();
|
||||
TfLiteRegistration* Register_GREATER();
|
||||
TfLiteRegistration* Register_GREATER_EQUAL();
|
||||
TfLiteRegistration* Register_LESS();
|
||||
TfLiteRegistration* Register_LESS_EQUAL();
|
||||
TfLiteRegistration* Register_LOG();
|
||||
TfLiteRegistration* Register_LOGICAL_AND();
|
||||
TfLiteRegistration* Register_LOGICAL_NOT();
|
||||
TfLiteRegistration* Register_LOGICAL_OR();
|
||||
TfLiteRegistration* Register_LOGISTIC();
|
||||
TfLiteRegistration* Register_MAXIMUM();
|
||||
TfLiteRegistration* Register_MAX_POOL_2D();
|
||||
TfLiteRegistration* Register_MEAN();
|
||||
TfLiteRegistration* Register_MINIMUM();
|
||||
TfLiteRegistration* Register_MUL();
|
||||
TfLiteRegistration* Register_NEG();
|
||||
TfLiteRegistration* Register_NOT_EQUAL();
|
||||
TfLiteRegistration* Register_PACK();
|
||||
TfLiteRegistration* Register_PAD();
|
||||
TfLiteRegistration* Register_PADV2();
|
||||
TfLiteRegistration* Register_PRELU();
|
||||
TfLiteRegistration* Register_QUANTIZE();
|
||||
TfLiteRegistration* Register_RELU();
|
||||
TfLiteRegistration* Register_RELU6();
|
||||
TfLiteRegistration* Register_RESHAPE();
|
||||
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR();
|
||||
TfLiteRegistration* Register_ROUND();
|
||||
TfLiteRegistration* Register_RSQRT();
|
||||
TfLiteRegistration* Register_SIN();
|
||||
TfLiteRegistration* Register_SOFTMAX();
|
||||
TfLiteRegistration* Register_SPLIT();
|
||||
TfLiteRegistration* Register_SQRT();
|
||||
TfLiteRegistration* Register_SQUARE();
|
||||
TfLiteRegistration* Register_STRIDED_SLICE();
|
||||
TfLiteRegistration* Register_SUB();
|
||||
TfLiteRegistration* Register_SVDF();
|
||||
TfLiteRegistration* Register_UNPACK();
|
||||
TfLiteRegistration* Register_L2_NORMALIZATION();
|
||||
TfLiteRegistration Register_CONCATENATION();
|
||||
TfLiteRegistration Register_COS();
|
||||
TfLiteRegistration Register_DEQUANTIZE();
|
||||
TfLiteRegistration Register_EQUAL();
|
||||
TfLiteRegistration Register_FLOOR();
|
||||
TfLiteRegistration Register_GREATER();
|
||||
TfLiteRegistration Register_GREATER_EQUAL();
|
||||
TfLiteRegistration Register_HARD_SWISH();
|
||||
TfLiteRegistration Register_LESS();
|
||||
TfLiteRegistration Register_LESS_EQUAL();
|
||||
TfLiteRegistration Register_LOG();
|
||||
TfLiteRegistration Register_LOGICAL_AND();
|
||||
TfLiteRegistration Register_LOGICAL_NOT();
|
||||
TfLiteRegistration Register_LOGICAL_OR();
|
||||
TfLiteRegistration Register_LOGISTIC();
|
||||
TfLiteRegistration Register_MAXIMUM();
|
||||
TfLiteRegistration Register_MAX_POOL_2D();
|
||||
TfLiteRegistration Register_MEAN();
|
||||
TfLiteRegistration Register_MINIMUM();
|
||||
TfLiteRegistration Register_MUL();
|
||||
TfLiteRegistration Register_NEG();
|
||||
TfLiteRegistration Register_NOT_EQUAL();
|
||||
TfLiteRegistration Register_PACK();
|
||||
TfLiteRegistration Register_PAD();
|
||||
TfLiteRegistration Register_PADV2();
|
||||
TfLiteRegistration Register_PRELU();
|
||||
TfLiteRegistration Register_REDUCE_MAX();
|
||||
TfLiteRegistration Register_RELU();
|
||||
TfLiteRegistration Register_RELU6();
|
||||
TfLiteRegistration Register_RESHAPE();
|
||||
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
|
||||
TfLiteRegistration Register_ROUND();
|
||||
TfLiteRegistration Register_RSQRT();
|
||||
TfLiteRegistration Register_SIN();
|
||||
TfLiteRegistration Register_SPLIT();
|
||||
TfLiteRegistration Register_SPLIT_V();
|
||||
TfLiteRegistration Register_SQRT();
|
||||
TfLiteRegistration Register_SQUARE();
|
||||
TfLiteRegistration Register_STRIDED_SLICE();
|
||||
TfLiteRegistration Register_SUB();
|
||||
TfLiteRegistration Register_UNPACK();
|
||||
TfLiteRegistration Register_L2_NORMALIZATION();
|
||||
TfLiteRegistration Register_TANH();
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@@ -21,132 +21,194 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace mul {
|
||||
namespace {
|
||||
|
||||
constexpr int kInput1Tensor = 0;
|
||||
constexpr int kInput2Tensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
int32_t input1_zero_point;
|
||||
int32_t input2_zero_point;
|
||||
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
int32_t output_zero_point;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
float output_activation_min_f32;
|
||||
float output_activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
double real_multiplier = static_cast<double>(input1->params.scale) *
|
||||
static_cast<double>(input2->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
|
||||
data->input1_zero_point = input1->params.zero_point;
|
||||
data->input2_zero_point = input2->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
} else {
|
||||
CalculateActivationRange(params->activation,
|
||||
&data->output_activation_min_f32,
|
||||
&data->output_activation_max_f32);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data,
|
||||
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
op_params.input1_offset = -input1->params.zero_point;
|
||||
op_params.input2_offset = -input2->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
} // namespace
|
||||
|
||||
#define TF_LITE_MUL(type, opname, dtype) \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<dtype>(output));
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
op_params.input1_offset = -data->input1_zero_point;
|
||||
op_params.input2_offset = -data->input2_zero_point;
|
||||
op_params.output_offset = data->output_zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_integer_ops, Mul, int8_t);
|
||||
}
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_ops, Mul, uint8_t);
|
||||
}
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::Mul(op_params,
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::Mul(op_params,
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
}
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data,
|
||||
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
TfLiteMulParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.float_activation_min = data->output_activation_min_f32;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_MUL(opname) \
|
||||
reference_ops::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<float>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<float>(input2), GetTensorShape(output), \
|
||||
GetTensorData<float>(output));
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(BroadcastMul4DSlow);
|
||||
reference_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_MUL(Mul);
|
||||
reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
return CalculateOpData(context, node, params, data);
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
CalculateOpData(context, node, params, &data);
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInput1Tensor);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInput2Tensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input1->type) {
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
EvalQuantized(context, node, params, &data, input1, input2, output);
|
||||
EvalQuantized(context, node, data, input1, input2, output);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input1, input2, output);
|
||||
EvalFloat(context, node, params, data, input1, input2, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -158,16 +220,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} // namespace mul
|
||||
|
||||
TfLiteRegistration* Register_MUL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/mul::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MUL() {
|
||||
return {/*init=*/mul::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/mul::Prepare,
|
||||
/*invoke=*/mul::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -17,7 +17,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -28,14 +28,17 @@ constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
switch (input->type) {
|
||||
// TODO(wangtz): handle for kTfLiteInt8
|
||||
case kTfLiteFloat32:
|
||||
reference_ops::Negate(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
reference_ops::Negate(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -47,16 +50,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace neg
|
||||
|
||||
TfLiteRegistration* Register_NEG() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/neg::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_NEG() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/neg::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -16,7 +16,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -28,9 +28,11 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteTensor* output, int values_count, int axis) {
|
||||
TfLiteEvalTensor* output, int values_count, int axis) {
|
||||
const TfLiteEvalTensor* input0 =
|
||||
tflite::micro::GetEvalInput(context, node, 0);
|
||||
|
||||
const int dimensions = output->dims->size;
|
||||
const TfLiteTensor* input0 = GetInput(context, node, 0);
|
||||
const TfLiteIntArray* input_dims = input0->dims;
|
||||
const TfLiteIntArray* output_dims = output->dims;
|
||||
|
||||
@@ -52,11 +54,11 @@ TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
TFLITE_DCHECK_EQ(input_size, copy_size * outer_size);
|
||||
|
||||
T* output_data = GetTensorData<T>(output);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(output);
|
||||
|
||||
for (int i = 0; i < values_count; ++i) {
|
||||
const TfLiteTensor* t = GetInput(context, node, i);
|
||||
const T* input_data = GetTensorData<T>(t);
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
const T* input_data = tflite::micro::GetTensorData<T>(t);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
const T* input_ptr = input_data + copy_size * k;
|
||||
int loc = k * values_count * copy_size + i * copy_size;
|
||||
@@ -72,7 +74,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLitePackParams* data =
|
||||
reinterpret_cast<TfLitePackParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@@ -108,16 +111,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace pack
|
||||
|
||||
TfLiteRegistration* Register_PACK() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PACK() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -16,189 +16,208 @@ limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
#ifdef MEMORY_SANITIZER
|
||||
#include <sanitizer/msan_interface.h>
|
||||
#else
|
||||
#define __msan_check_mem_is_initialized(ptr, size)
|
||||
#endif
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace pad {
|
||||
namespace {
|
||||
|
||||
struct PadContext {
|
||||
PadContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
input = GetInput(context, node, 0);
|
||||
paddings = GetInput(context, node, 1);
|
||||
constant_values = nullptr;
|
||||
if (NumInputs(node) == 3) {
|
||||
constant_values = GetOptionalInputTensor(context, node, 2);
|
||||
} else {
|
||||
constant_values = nullptr;
|
||||
}
|
||||
output = GetOutput(context, node, 0);
|
||||
dims = NumDimensions(input);
|
||||
|
||||
resizing_category = ResizingCategory::kGenericResize;
|
||||
const int paddings_total = GetTensorShape(paddings).FlatSize();
|
||||
const int32* paddings_data = GetTensorData<int32>(paddings);
|
||||
// Paddings will be a n,2 array, and we need to detect 4D arrays with the
|
||||
// pattern { {0,0}, {a, b}, {c, d}, {0,0} }.
|
||||
if (IsConstantTensor(paddings) && paddings_total == 8 &&
|
||||
(paddings_data[0] == 0 && paddings_data[1] == 0) &&
|
||||
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
|
||||
resizing_category = ResizingCategory::kImageStyle;
|
||||
}
|
||||
}
|
||||
const TfLiteTensor* constant_values;
|
||||
const TfLiteTensor* input;
|
||||
const TfLiteTensor* paddings;
|
||||
TfLiteTensor* output;
|
||||
int dims;
|
||||
ResizingCategory resizing_category;
|
||||
struct OpData {
|
||||
PadParams params;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
PadContext op_context(context, node);
|
||||
TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
|
||||
if (op_context.constant_values != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, op_context.input->type,
|
||||
op_context.constant_values->type);
|
||||
const TfLiteTensor* input = GetInput(context, node, /*index=*/0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1);
|
||||
TF_LITE_ENSURE(context, paddings != nullptr);
|
||||
const TfLiteTensor* constant_values =
|
||||
NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr;
|
||||
TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
|
||||
// Current implementations rely on the inputs being <= 4D.
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) <=
|
||||
reference_ops::PadKernelMaxDimensionCount());
|
||||
|
||||
if (constant_values != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, input->type, constant_values->type);
|
||||
// Ensure that constant_values is a scalar.
|
||||
TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1);
|
||||
}
|
||||
|
||||
// There must be a pair of paddings for each output dimension.
|
||||
TF_LITE_ENSURE_EQ(context, GetTensorShape(op_context.paddings).FlatSize(),
|
||||
op_context.output->dims->size * 2);
|
||||
TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(),
|
||||
output->dims->size * 2);
|
||||
|
||||
// On Micro, outputs must be properly sized by the converter.
|
||||
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
|
||||
for (int i = 0; i < op_context.output->dims->size; i++) {
|
||||
int output_dim = op_context.output->dims->data[i];
|
||||
int expected_dim = op_context.input->dims->data[i] + paddings_data[i * 2] +
|
||||
paddings_data[i * 2 + 1];
|
||||
// NOTE: This data is only available because the paddings buffer is stored in
|
||||
// the flatbuffer:
|
||||
TF_LITE_ENSURE(context, IsConstantTensor(paddings));
|
||||
const int32_t* paddings_data = GetTensorData<int32_t>(paddings);
|
||||
for (int i = 0; i < output->dims->size; i++) {
|
||||
int output_dim = output->dims->data[i];
|
||||
int expected_dim =
|
||||
input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1];
|
||||
TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
|
||||
}
|
||||
|
||||
// Current implementations rely on the inputs being <= 4D.
|
||||
TF_LITE_ENSURE(
|
||||
context, op_context.dims <= reference_ops::PadKernelMaxDimensionCount());
|
||||
TF_LITE_ENSURE(context, IsConstantTensor(op_context.paddings));
|
||||
// Calculate OpData:
|
||||
data->params.resizing_category = ResizingCategory::kGenericResize;
|
||||
const int paddings_total = GetTensorShape(paddings).FlatSize();
|
||||
if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) &&
|
||||
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
|
||||
data->params.resizing_category = ResizingCategory::kImageStyle;
|
||||
}
|
||||
|
||||
const int num_input_dimensions = NumDimensions(input);
|
||||
data->params.left_padding_count = num_input_dimensions;
|
||||
data->params.right_padding_count = num_input_dimensions;
|
||||
|
||||
for (int idx = num_input_dimensions - 1; idx >= 0; --idx) {
|
||||
data->params.left_padding[idx] = paddings_data[idx * 2];
|
||||
data->params.right_padding[idx] = paddings_data[idx * 2 + 1];
|
||||
}
|
||||
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
if (constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE(context, output->params.zero_point >=
|
||||
std::numeric_limits<uint8_t>::min());
|
||||
TF_LITE_ENSURE(context, output->params.zero_point <=
|
||||
std::numeric_limits<uint8_t>::max());
|
||||
} else {
|
||||
TF_LITE_ENSURE(context, output->params.zero_point >=
|
||||
std::numeric_limits<int8_t>::min());
|
||||
TF_LITE_ENSURE(context, output->params.zero_point <=
|
||||
std::numeric_limits<int8_t>::max());
|
||||
}
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||
constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE_EQ(context, static_cast<double>(output->params.scale),
|
||||
static_cast<double>(constant_values->params.scale));
|
||||
}
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
PadContext op_context(context, node);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
if (op_context.constant_values != nullptr) {
|
||||
// Ensure that constant_values is a scalar.
|
||||
TF_LITE_ENSURE_EQ(context, NumElements(op_context.constant_values), 1);
|
||||
}
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, /*index=*/0);
|
||||
const TfLiteEvalTensor* constant_values =
|
||||
NumInputs(node) == 3
|
||||
? tflite::micro::GetEvalInput(context, node, /*index=*/2)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, /*index=*/0);
|
||||
|
||||
// Create before and after padding arrays that are accepted by the kernel.
|
||||
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
|
||||
|
||||
tflite::PadParams op_params;
|
||||
memset(&op_params, 0, sizeof(PadParams));
|
||||
op_params.left_padding_count = op_context.dims;
|
||||
op_params.right_padding_count = op_context.dims;
|
||||
|
||||
for (int idx = op_context.dims - 1; idx >= 0; --idx) {
|
||||
op_params.left_padding[idx] = paddings_data[idx * 2];
|
||||
op_params.right_padding[idx] = paddings_data[idx * 2 + 1];
|
||||
}
|
||||
|
||||
#define TF_LITE_PAD(type, op_name, scalar, pad_value) \
|
||||
const scalar pad_value_copy = pad_value; \
|
||||
\
|
||||
type::op_name(op_params, GetTensorShape(op_context.input), \
|
||||
GetTensorData<scalar>(op_context.input), &pad_value_copy, \
|
||||
GetTensorShape(op_context.output), \
|
||||
GetTensorData<scalar>(op_context.output))
|
||||
switch (op_context.input->type) {
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
float pad_value = op_context.constant_values == nullptr
|
||||
? 0.f
|
||||
: *GetTensorData<float>(op_context.constant_values);
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, float, pad_value);
|
||||
float pad_value =
|
||||
constant_values == nullptr
|
||||
? 0.f
|
||||
: *tflite::micro::GetTensorData<float>(constant_values);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, float, pad_value);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
uint8_t pad_value;
|
||||
if (op_context.constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
|
||||
std::numeric_limits<uint8_t>::min());
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
|
||||
std::numeric_limits<uint8_t>::max());
|
||||
pad_value = static_cast<uint8_t>(op_context.output->params.zero_point);
|
||||
if (constant_values == nullptr) {
|
||||
pad_value = static_cast<uint8_t>(data->output_zero_point);
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
|
||||
op_context.constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context, static_cast<double>(op_context.output->params.scale),
|
||||
static_cast<double>(op_context.constant_values->params.scale));
|
||||
pad_value = *GetTensorData<uint8_t>(op_context.constant_values);
|
||||
pad_value = *tflite::micro::GetTensorData<uint8_t>(constant_values);
|
||||
}
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, uint8_t, pad_value);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, uint8_t, pad_value);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
int8_t pad_value;
|
||||
if (op_context.constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
|
||||
std::numeric_limits<int8_t>::min());
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
|
||||
std::numeric_limits<int8_t>::max());
|
||||
pad_value = static_cast<int8_t>(op_context.output->params.zero_point);
|
||||
if (constant_values == nullptr) {
|
||||
pad_value = static_cast<uint8_t>(data->output_zero_point);
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
|
||||
op_context.constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE(context, op_context.output->params.scale ==
|
||||
op_context.constant_values->params.scale);
|
||||
pad_value = *GetTensorData<int8_t>(op_context.constant_values);
|
||||
pad_value = *tflite::micro::GetTensorData<int8_t>(constant_values);
|
||||
}
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, int8_t, pad_value);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, int8_t, pad_value);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt32: {
|
||||
int32_t pad_value =
|
||||
op_context.constant_values == nullptr
|
||||
constant_values == nullptr
|
||||
? 0
|
||||
: *GetTensorData<int32_t>(op_context.constant_values);
|
||||
TF_LITE_PAD(reference_ops, Pad, int32_t, pad_value);
|
||||
: *tflite::micro::GetTensorData<int32_t>(constant_values);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int32_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
} break;
|
||||
default:
|
||||
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported by Pad.",
|
||||
TfLiteTypeGetName(op_context.input->type));
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
#undef TF_LITE_PAD
|
||||
@@ -207,29 +226,27 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace pad
|
||||
|
||||
TfLiteRegistration* Register_PAD() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PAD() {
|
||||
return {/*init=*/pad::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
// Also register Pad as PadV2.
|
||||
TfLiteRegistration* Register_PADV2() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PADV2() {
|
||||
return {/*init=*/pad::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -19,6 +19,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -32,6 +33,10 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
int32_t activation_min;
|
||||
int32_t activation_max;
|
||||
float activation_min_f32;
|
||||
float activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(const TfLiteContext* context,
|
||||
@@ -55,11 +60,7 @@ TfLiteStatus CalculateOpData(const TfLiteContext* context,
|
||||
|
||||
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params->activation, &activation_min,
|
||||
&activation_max);
|
||||
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
@@ -67,20 +68,19 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = activation_min;
|
||||
op_params.float_activation_max = activation_max;
|
||||
reference_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
op_params.float_activation_min = data->activation_min_f32;
|
||||
op_params.float_activation_max = data->activation_max_f32;
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||
int32_t activation_min, activation_max;
|
||||
(void)CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&activation_min, &activation_max);
|
||||
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
@@ -89,27 +89,26 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = activation_min;
|
||||
op_params.quantized_activation_max = activation_max;
|
||||
op_params.quantized_activation_min = data->activation_min;
|
||||
op_params.quantized_activation_max = data->activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
|
||||
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params->activation, &activation_min,
|
||||
&activation_max);
|
||||
|
||||
TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
@@ -117,22 +116,17 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = activation_min;
|
||||
op_params.float_activation_max = activation_max;
|
||||
reference_ops::MaxPool(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
op_params.float_activation_min = data->activation_min_f32;
|
||||
op_params.float_activation_max = data->activation_max_f32;
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||
|
||||
int32_t activation_min, activation_max;
|
||||
(void)CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&activation_min, &activation_max);
|
||||
|
||||
TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
@@ -140,39 +134,44 @@ void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = activation_min;
|
||||
op_params.quantized_activation_max = activation_max;
|
||||
op_params.quantized_activation_min = data->activation_min;
|
||||
op_params.quantized_activation_max = data->activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::MaxPool(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::MaxPool(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
||||
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// Inputs and outputs share the same type, guaranteed by the converter.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
AverageEvalFloat(context, node, params, &data, input, output);
|
||||
AverageEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
AverageEvalQuantized(context, node, params, &data, input, output);
|
||||
AverageEvalQuantized(context, node, params, data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
|
||||
@@ -183,21 +182,24 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
MaxEvalFloat(context, node, params, &data, input, output);
|
||||
MaxEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
MaxEvalQuantized(context, node, params, &data, input, output);
|
||||
MaxEvalQuantized(context, node, params, data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
|
||||
@@ -207,30 +209,59 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pooling
|
||||
|
||||
TfLiteRegistration* Register_AVERAGE_POOL_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pooling::AverageEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_MAX_POOL_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pooling::MaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
CalculateActivationRange(params->activation, &data->activation_min_f32,
|
||||
&data->activation_max_f32);
|
||||
} else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&data->activation_min,
|
||||
&data->activation_max);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pooling
|
||||
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D() {
|
||||
return {/*init=*/pooling::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pooling::Prepare,
|
||||
/*invoke=*/pooling::AverageEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_MAX_POOL_2D() {
|
||||
return {/*init=*/pooling::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pooling::Prepare,
|
||||
/*invoke=*/pooling::MaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -15,20 +15,45 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
|
||||
const TfLiteTensor* alpha,
|
||||
TfLiteTensor* output, PreluParams* params) {
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt16) {
|
||||
double real_multiplier_1 = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
double real_multiplier_2 = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier_1, ¶ms->output_multiplier_1,
|
||||
¶ms->output_shift_1);
|
||||
QuantizeMultiplier(real_multiplier_2, ¶ms->output_multiplier_2,
|
||||
¶ms->output_shift_2);
|
||||
|
||||
params->input_offset = -input->params.zero_point;
|
||||
params->alpha_offset = -alpha->params.zero_point;
|
||||
params->output_offset = output->params.zero_point;
|
||||
}
|
||||
|
||||
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
inline void BroadcastPrelu4DSlowFloat(
|
||||
const RuntimeShape& unextended_input1_shape, const float* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape, const float* input2_data,
|
||||
@@ -60,43 +85,67 @@ inline void BroadcastPrelu4DSlowFloat(
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(PreluParams));
|
||||
}
|
||||
|
||||
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
PreluParams* params = static_cast<PreluParams*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* alpha = GetInput(context, node, 1);
|
||||
TF_LITE_ENSURE(context, alpha != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
int32_t output_multiplier = 0;
|
||||
int output_shift = 0;
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
|
||||
double real_multiplier = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
|
||||
&output_shift);
|
||||
}
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
return CalculatePreluParams(input, alpha, output, params);
|
||||
}
|
||||
|
||||
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const PreluParams& params =
|
||||
*(static_cast<const PreluParams*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* alpha = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
BroadcastPrelu4DSlowFloat(
|
||||
GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(alpha), GetTensorData<float>(alpha),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
BroadcastPrelu4DSlowFloat(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<float>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
PreluParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.alpha_offset = -alpha->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = output_multiplier;
|
||||
op_params.output_shift = output_shift;
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<uint8_t>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<int8_t>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Only float32 and uint8 are supported currently, got %d.",
|
||||
context, "Only float32 and uint8_t are supported currently, got %d.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
@@ -104,16 +153,15 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_PRELU() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::PreluPrepare,
|
||||
/*invoke=*/activations::PreluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PRELU() {
|
||||
return {/*init=*/activations::PreluInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::PreluPrepare,
|
||||
/*invoke=*/activations::PreluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -19,19 +19,38 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace quantize {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
tflite::QuantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
int32_t input_zero_point;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
|
||||
// Currently this only support affine per-layer quantization.
|
||||
@@ -43,34 +62,61 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
input->type == kTfLiteFloat32 || input->type == kTfLiteInt16);
|
||||
TF_LITE_ENSURE(context,
|
||||
output->type == kTfLiteUInt8 || output->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 ||
|
||||
input->type == kTfLiteInt16 ||
|
||||
input->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt8 ||
|
||||
output->type == kTfLiteInt16 ||
|
||||
output->type == kTfLiteInt32);
|
||||
|
||||
if (((input->type == kTfLiteInt16 || input->type == kTfLiteInt8) &&
|
||||
output->type == kTfLiteInt8) ||
|
||||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt16)) {
|
||||
double effective_scale = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
|
||||
QuantizeMultiplier(effective_scale, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = output->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(output->params.scale);
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
tflite::QuantizationParams op_params;
|
||||
op_params.zero_point = output->params.zero_point;
|
||||
op_params.scale = static_cast<double>(output->params.scale);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
@@ -79,17 +125,45 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
size_t size = ElementCount(*input->dims);
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
double effective_scale =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
QuantizeMultiplier(effective_scale, &output_multiplier, &output_shift);
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
|
||||
size, data->output_multiplier,
|
||||
data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int16_t>(input), size, output_multiplier,
|
||||
output_shift, input->params.zero_point, output->params.zero_point,
|
||||
GetTensorData<int8_t>(output));
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->output_multiplier, data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
case kTfLiteInt32:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->output_multiplier, data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
// Int8 to Int8 requantization, required if the input and output tensors
|
||||
// have different scales and/or zero points.
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int8_t>(input),
|
||||
size, data->output_multiplier,
|
||||
data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
@@ -107,23 +181,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace quantize
|
||||
} // namespace
|
||||
|
||||
// This Op (QUANTIZE) quantizes the input and produces quantized output.
|
||||
// AffineQuantize takes scale and zero point and quantizes the float value to
|
||||
// quantized output, in int8 or uint8 format.
|
||||
TfLiteRegistration* Register_QUANTIZE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/quantize::Prepare,
|
||||
/*invoke=*/quantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_QUANTIZE() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -18,9 +18,12 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mean.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -30,10 +33,27 @@ namespace reduce {
|
||||
constexpr int kMaxNumberOfAxis = 4;
|
||||
constexpr int kMaxNumberOfReducedAxis = 2;
|
||||
|
||||
struct OpData {
|
||||
int32_t multiplier;
|
||||
int shift;
|
||||
int temp_buffer_idx;
|
||||
int resolved_axis_idx;
|
||||
int input_zp;
|
||||
float input_scale;
|
||||
int output_zp;
|
||||
float output_scale;
|
||||
int num_output_elements;
|
||||
};
|
||||
|
||||
void* InitReduce(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Inputs Tensor (dtype depends on quantization):
|
||||
// [0] = Input
|
||||
// [1] = Axis
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
|
||||
// Outputs Tensor (dtype depends on quantization):
|
||||
// [0] = Output
|
||||
@@ -44,13 +64,63 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
// Validate axis type
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
TF_LITE_ENSURE(context, axis != nullptr);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
const TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const double real_multiplier = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &data->multiplier, &data->shift);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
|
||||
|
||||
OpData* op_data = static_cast<OpData*>(node->user_data);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
const TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
|
||||
op_data->input_scale = input->params.scale;
|
||||
op_data->output_scale = output->params.scale;
|
||||
op_data->num_output_elements = NumElements(output);
|
||||
|
||||
context->RequestScratchBufferInArena(context, sizeof(int) * input->dims->size,
|
||||
&op_data->temp_buffer_idx);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, sizeof(int) * static_cast<int>(ElementCount(*axis->dims)),
|
||||
&op_data->resolved_axis_idx);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
const TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
if (input->type == kTfLiteInt8) {
|
||||
const double real_multiplier = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &op_data->multiplier, &op_data->shift);
|
||||
}
|
||||
|
||||
int output_size = NumElements(output);
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
context->RequestScratchBufferInArena(context, output_size * sizeof(int32_t),
|
||||
&op_data->temp_buffer_idx);
|
||||
op_data->input_zp = input->params.zero_point;
|
||||
op_data->input_scale = input->params.scale;
|
||||
op_data->output_zp = output->params.zero_point;
|
||||
op_data->output_scale = output->params.scale;
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
|
||||
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
|
||||
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@@ -58,7 +128,7 @@ void ResolveAxis(const int* axis_data, int axis_count,
|
||||
tflite::MeanParams* op_params) {
|
||||
int i = 0;
|
||||
for (; i < axis_count; ++i) {
|
||||
op_params->axis[i] = static_cast<int16>(axis_data[i]);
|
||||
op_params->axis[i] = static_cast<int16_t>(axis_data[i]);
|
||||
}
|
||||
for (; i < 4; ++i) {
|
||||
op_params->axis[i] = 1;
|
||||
@@ -67,69 +137,206 @@ void ResolveAxis(const int* axis_data, int axis_count,
|
||||
}
|
||||
|
||||
TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TfLiteReducerParams* params =
|
||||
reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
int num_axis = static_cast<int>(NumElements(axis));
|
||||
int num_axis = static_cast<int>(ElementCount(*axis->dims));
|
||||
int temp_index[kMaxNumberOfAxis];
|
||||
int resolved_axis[kMaxNumberOfReducedAxis];
|
||||
|
||||
tflite::MeanParams op_params;
|
||||
ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis, &op_params);
|
||||
|
||||
// Special case mean implementation exists for 4D mean across axes 1 and 2.
|
||||
bool special_case_4d_axes_1_and_2 =
|
||||
input->dims->size == 4 && op_params.axis_count == 2 &&
|
||||
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
tflite::MeanParams op_params;
|
||||
ResolveAxis(GetTensorData<int>(axis), num_axis, &op_params);
|
||||
// TODO(b/146571391): Support only 4D Input and 2D Axis for Mean until
|
||||
// scratch tensor allocation has been implemented in (b/132070898)
|
||||
bool is_valid_inputs =
|
||||
(NumDimensions(input) == 4 && op_params.axis_count == 2 &&
|
||||
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1)));
|
||||
TF_LITE_ENSURE_MSG(
|
||||
context, is_valid_inputs == true,
|
||||
"Number of Input "
|
||||
"dimensions != 4 OR the Axis is not either [1, 2] or [2, 1]");
|
||||
// TODO(b/139102329): Handle the below special case in the combined
|
||||
// reference method.
|
||||
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
|
||||
if (params->keep_dims) {
|
||||
reference_ops::Mean(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
if (params->keep_dims && special_case_4d_axes_1_and_2) {
|
||||
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::Mean(GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, GetTensorData<float>(output),
|
||||
reference_ops::Mean(
|
||||
tflite::micro::GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<float>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis,
|
||||
tflite::micro::GetTensorData<float>(output)));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
|
||||
if (params->keep_dims && special_case_4d_axes_1_and_2) {
|
||||
reference_integer_ops::Mean(
|
||||
op_params, op_data->multiplier, op_data->shift,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output), op_data->output_zp);
|
||||
} else if (op_data->input_zp == op_data->output_zp &&
|
||||
op_data->input_scale == op_data->output_scale) {
|
||||
int32_t* temp_buffer = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::Mean(
|
||||
tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis, temp_buffer));
|
||||
} else {
|
||||
int32_t* temp_buffer = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::QuantizedMeanOrSum(
|
||||
tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
|
||||
op_data->input_scale, input->dims->data, input->dims->size,
|
||||
tflite::micro::GetTensorData<int8_t>(output),
|
||||
op_data->output_zp, op_data->output_scale, output->dims->data,
|
||||
output->dims->size, tflite::micro::GetTensorData<int>(axis),
|
||||
num_axis, params->keep_dims, temp_index, resolved_axis,
|
||||
temp_buffer, false));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
|
||||
if (params->keep_dims && special_case_4d_axes_1_and_2) {
|
||||
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
op_data->input_zp, op_data->input_scale,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
op_data->output_zp, op_data->output_scale);
|
||||
} else if (op_data->input_zp == op_data->output_zp &&
|
||||
op_data->input_scale == op_data->output_scale) {
|
||||
uint32_t* temp_buffer = static_cast<uint32_t*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::Mean(tflite::micro::GetTensorData<uint8_t>(input),
|
||||
input->dims->data, input->dims->size,
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis,
|
||||
GetTensorData<float>(output)));
|
||||
tflite::micro::GetTensorData<int>(axis),
|
||||
num_axis, params->keep_dims, temp_index,
|
||||
resolved_axis, temp_buffer));
|
||||
} else {
|
||||
uint32_t* temp_buffer = static_cast<uint32_t*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::QuantizedMeanOrSum(
|
||||
tflite::micro::GetTensorData<uint8_t>(input), op_data->input_zp,
|
||||
op_data->input_scale, input->dims->data, input->dims->size,
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
op_data->output_zp, op_data->output_scale, output->dims->data,
|
||||
output->dims->size, tflite::micro::GetTensorData<int>(axis),
|
||||
num_axis, params->keep_dims, temp_index, resolved_axis,
|
||||
temp_buffer, false));
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
|
||||
TF_LITE_ENSURE_MSG(context, false,
|
||||
"Currently, only float32 input type "
|
||||
"Currently, only float32, int8 or uint8 input type "
|
||||
"is supported.");
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalMax(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TfLiteReducerParams* params =
|
||||
static_cast<TfLiteReducerParams*>(node->builtin_data);
|
||||
OpData* op_data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
// Interpret an axis tensor with null dimensions as a scalar
|
||||
int num_axis = static_cast<int>(ElementCount(*axis->dims));
|
||||
int* temp_buffer = static_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
int* resolved_axis = static_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->resolved_axis_idx));
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::ReduceGeneric<float>(
|
||||
tflite::micro::GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<float>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_buffer, resolved_axis,
|
||||
std::numeric_limits<float>::lowest(),
|
||||
[](const float current, const float in) -> float {
|
||||
return (in > current) ? in : current;
|
||||
}));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
TF_LITE_ENSURE_EQ(context, static_cast<double>(op_data->input_scale),
|
||||
static_cast<double>(op_data->output_scale));
|
||||
TF_LITE_ENSURE_EQ(context, op_data->input_zp, op_data->output_zp);
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::ReduceGeneric<int8_t>(
|
||||
tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_buffer, resolved_axis,
|
||||
std::numeric_limits<int8_t>::lowest(),
|
||||
[](const int8_t current, const int8_t in) -> int8_t {
|
||||
return (in > current) ? in : current;
|
||||
}));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only float32 and int8 types are supported.\n");
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace reduce
|
||||
|
||||
TfLiteRegistration* Register_MEAN() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMeanOrSum,
|
||||
/*invoke=*/reduce::EvalMean,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MEAN() {
|
||||
return {/*init=*/reduce::InitReduce,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMeanOrSum,
|
||||
/*invoke=*/reduce::EvalMean,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_REDUCE_MAX() {
|
||||
return {/*init=*/reduce::InitReduce,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMax,
|
||||
/*invoke=*/reduce::EvalMax,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -18,6 +18,9 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -29,7 +32,9 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
// Tensorflow's Reshape allows one of the shape components to have the
|
||||
// special -1 value, meaning it will be calculated automatically based on the
|
||||
// input. Here we calculate what that dimension should be so that the number
|
||||
@@ -61,7 +66,7 @@ TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
|
||||
num_output_elements *= output_shape->data[stretch_dim];
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
@@ -74,13 +79,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// TODO(b/162522304): storing input bytes in OpData increases some models
|
||||
// significantly, possibly due to alignment issues.
|
||||
size_t input_bytes;
|
||||
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes));
|
||||
input_bytes *= ElementCount(*input->dims);
|
||||
|
||||
// Do nothing for in-place reshape.
|
||||
if (input->data.raw != output->data.raw) {
|
||||
// Otherwise perform reshape with copy.
|
||||
for (size_t i = 0; i < input->bytes; ++i) {
|
||||
for (size_t i = 0; i < input_bytes; ++i) {
|
||||
output->data.raw[i] = input->data.raw[i];
|
||||
}
|
||||
}
|
||||
@@ -89,16 +102,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace reshape
|
||||
|
||||
TfLiteRegistration* Register_RESHAPE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reshape::Prepare,
|
||||
/*invoke=*/reshape::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RESHAPE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reshape::Prepare,
|
||||
/*invoke=*/reshape::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -31,7 +32,6 @@ constexpr int kSizeTensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(DEBUG)
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
@@ -49,11 +49,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
output->type = input->type;
|
||||
|
||||
if (!IsConstantTensor(size)) {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Dynamic tensors are unsupported in tfmicro.");
|
||||
TF_LITE_KERNEL_LOG(context, "Dynamic tensors are unsupported in tfmicro.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
#endif
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@@ -61,9 +59,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteResizeNearestNeighborParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* size =
|
||||
tflite::micro::GetEvalInput(context, node, kSizeTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
tflite::ResizeNearestNeighborParams op_params;
|
||||
op_params.align_corners = params->align_corners;
|
||||
@@ -71,22 +72,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<int32>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<int32>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int32_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Output type is %d, requires float, uint8 or int8.",
|
||||
"Output type is %d, requires float, uint8_t or int8_t.",
|
||||
output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
@@ -95,16 +105,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} // namespace resize_nearest_neighbor
|
||||
|
||||
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/resize_nearest_neighbor::Prepare,
|
||||
/*invoke=*/resize_nearest_neighbor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/resize_nearest_neighbor::Prepare,
|
||||
/*invoke=*/resize_nearest_neighbor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -18,6 +18,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -29,11 +30,13 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
@@ -43,26 +46,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Round(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Round(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace round
|
||||
|
||||
TfLiteRegistration* Register_ROUND() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/round::Prepare,
|
||||
/*invoke=*/round::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ROUND() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/round::Prepare,
|
||||
/*invoke=*/round::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
73
code/lib/tfmicro/tensorflow/lite/micro/kernels/shape.cc
Normal file
73
code/lib/tfmicro/tensorflow/lite/micro/kernels/shape.cc
Normal file
@@ -0,0 +1,73 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
void ExtractShape(const TfLiteEvalTensor* input, int32_t* output_data) {
|
||||
for (int i = 0; i < input->dims->size; ++i) {
|
||||
output_data[i] = input->dims->data[i];
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
if (output->type != kTfLiteInt32) {
|
||||
TF_LITE_KERNEL_LOG(context, "Output type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
} else {
|
||||
ExtractShape(input, tflite::micro::GetTensorData<int32_t>(output));
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_SHAPE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -22,29 +22,35 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
// Softmax parameter data that persists in user_data
|
||||
static constexpr int kInt16LUTArraySize = 513;
|
||||
|
||||
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
TfLiteTensor* output,
|
||||
const TfLiteSoftmaxParams* params,
|
||||
SoftmaxParams* op_data) {
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteInt16) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteUInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
} else {
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768,
|
||||
(0.001f * 1.f / 32768));
|
||||
} else { // input->type == kTfLiteInt8
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
|
||||
// NOTE: Current int16 softmax output does not require symmetric scaling
|
||||
// - so no need to verify scale here.
|
||||
} else {
|
||||
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 65536,
|
||||
(0.001f * 1.f / 65536));
|
||||
} else { // output->type == kTfLiteint8
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
|
||||
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
|
||||
@@ -53,15 +59,28 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
|
||||
static const int kScaledDiffIntegerBits = 5;
|
||||
|
||||
int input_left_shift;
|
||||
tflite::PreprocessSoftmaxScaling(
|
||||
static_cast<double>(params->beta),
|
||||
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
|
||||
&op_data->input_multiplier, &input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
op_data->diff_min =
|
||||
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
|
||||
op_data->input_left_shift);
|
||||
// Calculate input_multiplier and input_left_shift
|
||||
if (input->type == kTfLiteInt16) {
|
||||
int input_left_shift;
|
||||
double input_scale_beta_rescale =
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(params->beta) /
|
||||
(10.0 / 65535.0); // scale the input_diff such that [-65535, 0]
|
||||
// correspond to [-10.0, 0.0]
|
||||
QuantizeMultiplier(input_scale_beta_rescale, &op_data->input_multiplier,
|
||||
&input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
} else {
|
||||
int input_left_shift;
|
||||
tflite::PreprocessSoftmaxScaling(
|
||||
static_cast<double>(params->beta),
|
||||
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
|
||||
&op_data->input_multiplier, &input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
op_data->diff_min =
|
||||
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
|
||||
op_data->input_left_shift);
|
||||
}
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
@@ -70,53 +89,106 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
// Takes a tensor and performs softmax along the last dimension.
|
||||
void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
if (output->type == kTfLiteInt16) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else {
|
||||
tflite::reference_ops::SoftmaxInt16(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
}
|
||||
}
|
||||
|
||||
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Takes a tensor and performs softmax along the last dimension.
|
||||
void SoftmaxFloat(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
if (output->type == kTfLiteInt16) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
}
|
||||
TF_LITE_ENSURE(context, node->user_data != nullptr);
|
||||
SoftmaxParams* op_data = static_cast<SoftmaxParams*>(node->user_data);
|
||||
// Only allocate LUTs for KTfLiteInt16 data type
|
||||
if (input->type == kTfLiteInt16) {
|
||||
void* raw_exp_lut = context->AllocatePersistentBuffer(
|
||||
context, sizeof(int16_t) * kInt16LUTArraySize);
|
||||
TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
|
||||
op_data->exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
|
||||
void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
|
||||
context, sizeof(int16_t) * kInt16LUTArraySize);
|
||||
TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
|
||||
op_data->one_over_one_plus_x_lut =
|
||||
reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
|
||||
}
|
||||
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteUInt8 ||
|
||||
input->type == kTfLiteInt16);
|
||||
} else {
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
}
|
||||
|
||||
// Populate LUT if required
|
||||
if (input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
// exp LUT only used on negative values
|
||||
// we consider exp(-10.0) is insignificant to accumulation
|
||||
gen_lut([](float value) { return std::exp(value); }, -10.0f, 0.0f,
|
||||
op_data->exp_lut, kInt16LUTArraySize);
|
||||
gen_lut([](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f,
|
||||
op_data->one_over_one_plus_x_lut, kInt16LUTArraySize);
|
||||
op_data->zero_point = output->params.zero_point;
|
||||
op_data->scale = output->params.scale;
|
||||
}
|
||||
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
return CalculateSoftmaxParams(context, input, output, params, op_data);
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
SoftmaxParams op_data;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateSoftmaxParams(context, input, output, params, &op_data));
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
SoftmaxParams op_data = *static_cast<SoftmaxParams*>(node->user_data);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@@ -124,7 +196,8 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8:
|
||||
case kTfLiteUInt8: {
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt16: {
|
||||
SoftmaxQuantized(input, output, op_data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
@@ -134,20 +207,17 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
} // namespace activations
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_SOFTMAX() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::SoftmaxPrepare,
|
||||
/*invoke=*/activations::SoftmaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SOFTMAX() {
|
||||
return {/*init=*/SoftmaxInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/SoftmaxPrepare,
|
||||
/*invoke=*/SoftmaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -25,10 +26,11 @@ namespace split {
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteTensor* input, int axis_value) {
|
||||
const TfLiteEvalTensor* input, int axis_value) {
|
||||
const int output_count = NumOutputs(node);
|
||||
const TfLiteIntArray* input_dims = input->dims;
|
||||
const TfLiteTensor* output0 = GetOutput(context, node, 0);
|
||||
const TfLiteEvalTensor* output0 =
|
||||
tflite::micro::GetEvalOutput(context, node, 0);
|
||||
const TfLiteIntArray* output_dims = output0->dims;
|
||||
|
||||
const int split_dimensions = input_dims->size;
|
||||
@@ -50,11 +52,11 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
base_inner_size *= input_dims->data[i];
|
||||
}
|
||||
|
||||
const T* input_ptr = GetTensorData<T>(input);
|
||||
const T* input_ptr = tflite::micro::GetTensorData<T>(input);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
for (int i = 0; i < output_count; ++i) {
|
||||
TfLiteTensor* t = GetOutput(context, node, i);
|
||||
T* output_data = GetTensorData<T>(t);
|
||||
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(t);
|
||||
const int copy_size = output_dims->data[axis] * base_inner_size;
|
||||
T* output_ptr = output_data + k * copy_size;
|
||||
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
|
||||
@@ -65,23 +67,29 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* axis = GetInput(context, node, 0);
|
||||
const TfLiteTensor* input = GetInput(context, node, 1);
|
||||
TF_LITE_ENSURE(context, axis != nullptr);
|
||||
|
||||
// Dynamic output tensors are needed if axis tensor is not constant.
|
||||
// But Micro doesn't support dynamic memory allocation, so we only support
|
||||
// constant axis tensor for now.
|
||||
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
|
||||
"Non constant axis tensor not supported");
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
int axis_value = GetTensorData<int32_t>(axis)[0];
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 1);
|
||||
|
||||
int axis_value = tflite::micro::GetTensorData<int32_t>(axis)[0];
|
||||
if (axis_value < 0) {
|
||||
axis_value += NumDimensions(input);
|
||||
axis_value += input->dims->size;
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE(context, axis_value >= 0);
|
||||
TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
|
||||
TF_LITE_ENSURE(context, axis_value < input->dims->size);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@@ -111,16 +119,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace split
|
||||
|
||||
TfLiteRegistration* Register_SPLIT() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/split::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SPLIT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/split::Prepare,
|
||||
/*invoke=*/split::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
135
code/lib/tfmicro/tensorflow/lite/micro/kernels/split_v.cc
Normal file
135
code/lib/tfmicro/tensorflow/lite/micro/kernels/split_v.cc
Normal file
@@ -0,0 +1,135 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace split_v {
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteEvalTensor* input, int axis_value) {
|
||||
const TfLiteIntArray* input_dims = input->dims;
|
||||
const TfLiteEvalTensor* output0 =
|
||||
tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
const int split_dimensions = input_dims->size;
|
||||
|
||||
TFLITE_DCHECK_LT(axis_value, split_dimensions);
|
||||
TFLITE_DCHECK_EQ(output0->dims->size, split_dimensions);
|
||||
|
||||
int64_t split_size = 0;
|
||||
const int output_count = NumOutputs(node);
|
||||
for (int i = 0; i < output_count; i++) {
|
||||
split_size +=
|
||||
tflite::micro::GetEvalOutput(context, node, i)->dims->data[axis_value];
|
||||
}
|
||||
TFLITE_DCHECK_EQ(split_size, input_dims->data[axis_value]);
|
||||
int64_t outer_size = 1;
|
||||
for (int i = 0; i < axis_value; ++i) {
|
||||
outer_size *= input_dims->data[i];
|
||||
}
|
||||
|
||||
int64_t base_inner_size = 1;
|
||||
for (int i = axis_value + 1; i < split_dimensions; ++i) {
|
||||
base_inner_size *= input_dims->data[i];
|
||||
}
|
||||
|
||||
const T* input_ptr = tflite::micro::GetTensorData<T>(input);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
for (int i = 0; i < output_count; ++i) {
|
||||
TfLiteEvalTensor* output_tensor =
|
||||
tflite::micro::GetEvalOutput(context, node, i);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(output_tensor);
|
||||
const int copy_size =
|
||||
output_tensor->dims->data[axis_value] * base_inner_size;
|
||||
T* output_ptr = output_data + k * copy_size;
|
||||
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
|
||||
input_ptr += copy_size;
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
|
||||
|
||||
// Dynamic output tensors are needed if axis tensor is not constant.
|
||||
// But Micro doesn't support dynamic memory allocation, so we only support
|
||||
// constant axis tensor for now.
|
||||
const TfLiteTensor* axis = GetInput(context, node, 2);
|
||||
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
|
||||
"Non constant axis tensor not supported");
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 2);
|
||||
|
||||
int axis_value = tflite::micro::GetTensorData<int32_t>(axis)[0];
|
||||
if (axis_value < 0) {
|
||||
axis_value += input->dims->size;
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE(context, axis_value >= 0);
|
||||
TF_LITE_ENSURE(context, axis_value < input->dims->size);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
return SplitImpl<float>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
return SplitImpl<int8_t>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteInt16: {
|
||||
return SplitImpl<int16_t>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteInt32: {
|
||||
return SplitImpl<int32_t>(context, node, input, axis_value);
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s currently not supported.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace split_v
|
||||
|
||||
TfLiteRegistration Register_SPLIT_V() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/split_v::Prepare,
|
||||
/*invoke=*/split_v::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -15,23 +15,20 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace strided_slice {
|
||||
|
||||
enum KernelType {
|
||||
kReference,
|
||||
// TODO(soroosh): add kGenericOptimized
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kBeginTensor = 1;
|
||||
constexpr int kEndTensor = 2;
|
||||
@@ -120,64 +117,74 @@ TfLiteStatus CheckOutputSize(TfLiteContext* context,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(StridedSliceParams));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
StridedSliceParams* op_params =
|
||||
static_cast<StridedSliceParams*>(node->user_data);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 4);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
StridedSliceContext op_context(context, node);
|
||||
TF_LITE_ENSURE_MSG(context, op_context.dims <= kMaxDim,
|
||||
"input dim should not exceed 4");
|
||||
auto params = BuildStridedSliceParams(&op_context);
|
||||
memcpy(op_params, ¶ms, sizeof(StridedSliceParams));
|
||||
return CheckOutputSize(context, &op_context);
|
||||
}
|
||||
|
||||
template <KernelType kernel_type>
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
StridedSliceContext op_context(context, node);
|
||||
auto op_params = BuildStridedSliceParams(&op_context);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const StridedSliceParams& op_params =
|
||||
*(static_cast<const StridedSliceParams*>(node->user_data));
|
||||
|
||||
#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \
|
||||
kernel_type::StridedSlice(op_params, GetTensorShape(op_context.input), \
|
||||
GetTensorData<data_type>(op_context.input), \
|
||||
GetTensorShape(op_context.output), \
|
||||
GetTensorData<data_type>(op_context.output))
|
||||
|
||||
switch (op_context.input->type) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32:
|
||||
if (kernel_type == kReference) {
|
||||
TF_LITE_STRIDED_SLICE(reference_ops, float);
|
||||
}
|
||||
reference_ops::StridedSlice(op_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
if (kernel_type == kReference) {
|
||||
TF_LITE_STRIDED_SLICE(reference_ops, uint8_t);
|
||||
}
|
||||
reference_ops::StridedSlice(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
if (kernel_type == kReference) {
|
||||
TF_LITE_STRIDED_SLICE(reference_ops, int8_t);
|
||||
}
|
||||
reference_ops::StridedSlice(op_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(op_context.input->type),
|
||||
op_context.input->type);
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
#undef TF_LITE_STRIDED_SLICE
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace strided_slice
|
||||
|
||||
TfLiteRegistration* Register_STRIDED_SLICE() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/strided_slice::Prepare,
|
||||
/*invoke=*/strided_slice::Eval<strided_slice::kReference>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_STRIDED_SLICE() {
|
||||
return {/*init=*/strided_slice::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/strided_slice::Prepare,
|
||||
/*invoke=*/strided_slice::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -21,8 +21,10 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -40,18 +42,18 @@ struct OpData {
|
||||
// and the special 16-bit -> 16bit quantized path
|
||||
int input1_shift;
|
||||
int input2_shift;
|
||||
int32 output_activation_min;
|
||||
int32 output_activation_max;
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||
int32 input1_multiplier;
|
||||
int32 input2_multiplier;
|
||||
int32 output_multiplier;
|
||||
int32_t input1_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int left_shift;
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
|
||||
@@ -93,31 +95,62 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, data));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
|
||||
const OpData* data, const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output) {
|
||||
const OpData* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
#define TF_LITE_SUB(opname) \
|
||||
opname(op_params, GetTensorShape(input1), GetTensorData<float>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<float>(input2), \
|
||||
GetTensorShape(output), GetTensorData<float>(output))
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow);
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_SUB(tflite::reference_ops::SubWithActivation);
|
||||
tflite::reference_ops::SubWithActivation(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
#undef TF_LITE_SUB
|
||||
}
|
||||
|
||||
TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteSubParams* params, const OpData* data,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
@@ -133,25 +166,46 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_SUB(opname, dtype) \
|
||||
opname(op_params, GetTensorShape(input1), GetTensorData<dtype>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<dtype>(input2), \
|
||||
GetTensorShape(output), GetTensorData<dtype>(output));
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, int8_t);
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_SUB(tflite::reference_ops::Sub, int8_t);
|
||||
tflite::reference_ops::Sub(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, uint8_t);
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
TF_LITE_SUB(tflite::reference_ops::Sub, uint8_t);
|
||||
tflite::reference_ops::Sub(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_SUB
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
@@ -160,13 +214,15 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData data;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, &data));
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalSub(context, node, params, &data, input1, input2, output);
|
||||
@@ -184,16 +240,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace sub
|
||||
|
||||
TfLiteRegistration* Register_SUB() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/sub::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SUB() {
|
||||
return {/*init=*/sub::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/sub::Prepare,
|
||||
/*invoke=*/sub::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -23,25 +23,38 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/activation_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace svdf {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
int32 effective_scale_1_a;
|
||||
int32 effective_scale_2_a;
|
||||
int32_t effective_scale_1_a;
|
||||
int32_t effective_scale_2_a;
|
||||
// b versions of each scale are kept at int since the numbers are just the
|
||||
// shift value - typically between [-32, 32].
|
||||
int effective_scale_1_b;
|
||||
int effective_scale_2_b;
|
||||
int scratch_tensor_index;
|
||||
int scratch_output_tensor_index;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int input_zero_point;
|
||||
int output_zero_point;
|
||||
};
|
||||
|
||||
// Input tensors.
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kWeightsFeatureTensor = 1;
|
||||
constexpr int kWeightsTimeTensor = 2;
|
||||
constexpr int kBiasTensor = 3;
|
||||
// This is a variable tensor, and will be modified by this op.
|
||||
constexpr int kInputActivationStateTensor = 4;
|
||||
|
||||
// Output tensor.
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
/**
|
||||
* This version of SVDF is specific to TFLite Micro. It contains the following
|
||||
* differences between the TFLite version:
|
||||
@@ -107,18 +120,19 @@ static inline void ApplyTimeWeightsBiasAndActivation(
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* output_ptr_batch = output_ptr + b * num_units;
|
||||
for (int i = 0; i < num_units; ++i) {
|
||||
*output_ptr_batch = ActivationValFloat(activation, *output_ptr_batch);
|
||||
*output_ptr_batch =
|
||||
tflite::ops::micro::ActivationValFloat(activation, *output_ptr_batch);
|
||||
++output_ptr_batch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void EvalFloatSVDF(
|
||||
TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input,
|
||||
const TfLiteTensor* weights_feature, const TfLiteTensor* weights_time,
|
||||
const TfLiteTensor* bias, const TfLiteSVDFParams* params,
|
||||
int scratch_tensor_index, TfLiteTensor* activation_state,
|
||||
TfLiteTensor* output) {
|
||||
TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* weights_feature,
|
||||
const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
|
||||
const TfLiteSVDFParams* params, int scratch_tensor_index,
|
||||
TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) {
|
||||
const int rank = params->rank;
|
||||
const int batch_size = input->dims->data[0];
|
||||
const int input_size = input->dims->data[1];
|
||||
@@ -126,12 +140,14 @@ inline void EvalFloatSVDF(
|
||||
const int num_units = num_filters / rank;
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
const float* weights_feature_ptr = GetTensorData<float>(weights_feature);
|
||||
const float* weights_time_ptr = GetTensorData<float>(weights_time);
|
||||
const float* bias_ptr = GetTensorData<float>(bias);
|
||||
const float* input_ptr = GetTensorData<float>(input);
|
||||
const float* weights_feature_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_feature);
|
||||
const float* weights_time_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_time);
|
||||
const float* bias_ptr = tflite::micro::GetTensorData<float>(bias);
|
||||
const float* input_ptr = tflite::micro::GetTensorData<float>(input);
|
||||
|
||||
float* state_ptr = GetTensorData<float>(activation_state);
|
||||
float* state_ptr = tflite::micro::GetTensorData<float>(activation_state);
|
||||
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||
@@ -139,7 +155,7 @@ inline void EvalFloatSVDF(
|
||||
float* scratch_ptr = static_cast<float*>(
|
||||
context->GetScratchBuffer(context, scratch_tensor_index));
|
||||
|
||||
float* output_ptr = GetTensorData<float>(output);
|
||||
float* output_ptr = tflite::micro::GetTensorData<float>(output);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
@@ -185,14 +201,13 @@ inline void EvalFloatSVDF(
|
||||
}
|
||||
|
||||
void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteTensor* input_tensor,
|
||||
const TfLiteTensor* weights_feature_tensor,
|
||||
const TfLiteTensor* weights_time_tensor,
|
||||
const TfLiteTensor* bias_tensor,
|
||||
const TfLiteEvalTensor* input_tensor,
|
||||
const TfLiteEvalTensor* weights_feature_tensor,
|
||||
const TfLiteEvalTensor* weights_time_tensor,
|
||||
const TfLiteEvalTensor* bias_tensor,
|
||||
const TfLiteSVDFParams* params,
|
||||
TfLiteTensor* activation_state_tensor,
|
||||
TfLiteTensor* output_tensor, const OpData& data,
|
||||
int32_t input_zp, int32_t output_zp) {
|
||||
TfLiteEvalTensor* activation_state_tensor,
|
||||
TfLiteEvalTensor* output_tensor, const OpData& data) {
|
||||
const int n_rank = params->rank;
|
||||
const int n_batch = input_tensor->dims->data[0];
|
||||
const int n_input = input_tensor->dims->data[1];
|
||||
@@ -209,7 +224,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
|
||||
|
||||
// Shift states.
|
||||
int16_t* const state_ptr = GetTensorData<int16_t>(activation_state_tensor);
|
||||
int16_t* const state_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
@@ -225,10 +241,11 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
|
||||
// Feature matmul.
|
||||
{
|
||||
int16_t* state = GetTensorData<int16_t>(activation_state_tensor);
|
||||
const int8_t* input = GetTensorData<int8_t>(input_tensor);
|
||||
int16_t* state =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
|
||||
const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
|
||||
const int8_t* weight_feature =
|
||||
GetTensorData<int8_t>(weights_feature_tensor);
|
||||
tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
|
||||
const int32_t output_max = std::numeric_limits<int16_t>::max();
|
||||
const int32_t output_min = std::numeric_limits<int16_t>::min();
|
||||
int16_t* result_in_batch = state + (n_memory - 1);
|
||||
@@ -238,7 +255,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
int32_t dot_prod = 0;
|
||||
const int8_t* vector_in_batch = input + b * n_input;
|
||||
for (int c = 0; c < n_input; c++) {
|
||||
dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
|
||||
dot_prod +=
|
||||
*matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
|
||||
}
|
||||
dot_prod = MultiplyByQuantizedMultiplier(
|
||||
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
|
||||
@@ -261,9 +279,10 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
|
||||
|
||||
// Perform batched vector dot product:
|
||||
const int16_t* vector1_ptr = GetTensorData<int16_t>(weights_time_tensor);
|
||||
const int16_t* vector1_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(weights_time_tensor);
|
||||
const int16_t* vector2_ptr =
|
||||
GetTensorData<int16_t>(activation_state_tensor) +
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor) +
|
||||
b * n_memory * n_filter;
|
||||
|
||||
for (int i = 0; i < n_filter; i++) {
|
||||
@@ -281,7 +300,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
// Add bias.
|
||||
if (bias_tensor) {
|
||||
// Vector batch assign:
|
||||
const int32_t* bias_data = GetTensorData<int32_t>(bias_tensor);
|
||||
const int32_t* bias_data =
|
||||
tflite::micro::GetTensorData<int32_t>(bias_tensor);
|
||||
for (int i = 0; i < n_batch; ++i) {
|
||||
int32_t* output_ptr = scratch_output_tensor + i * n_unit;
|
||||
const int32_t* bias_ptr = bias_data;
|
||||
@@ -316,34 +336,17 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
int32_t x1 = scratch_output_tensor[i];
|
||||
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
|
||||
data.effective_scale_2_b);
|
||||
int32_t x3 = x2 + output_zp;
|
||||
int32_t x3 = x2 + data.output_zero_point;
|
||||
int32_t x4 = std::min(std::max(output_min, x3), output_max);
|
||||
GetTensorData<int8_t>(output_tensor)[i] = static_cast<int8_t>(x4);
|
||||
tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
|
||||
static_cast<int8_t>(x4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Input tensors.
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kWeightsFeatureTensor = 1;
|
||||
constexpr int kWeightsTimeTensor = 2;
|
||||
constexpr int kBiasTensor = 3;
|
||||
// This is a variable tensor, and will be modified by this op.
|
||||
constexpr int kInputActivationStateTensor = 4;
|
||||
|
||||
// Output tensor.
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
void* data = nullptr;
|
||||
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||
kTfLiteError) {
|
||||
return nullptr;
|
||||
}
|
||||
return data;
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
@@ -359,13 +362,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// [4] = Activation State (variable),
|
||||
// {2, batch_size, memory_size * num_filters}
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* weights_feature =
|
||||
GetInput(context, node, kWeightsFeatureTensor);
|
||||
TF_LITE_ENSURE(context, weights_feature != nullptr);
|
||||
const TfLiteTensor* weights_time =
|
||||
GetInput(context, node, kWeightsTimeTensor);
|
||||
TF_LITE_ENSURE(context, weights_time != nullptr);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
const TfLiteTensor* activation_state =
|
||||
GetInput(context, node, kInputActivationStateTensor);
|
||||
TF_LITE_ENSURE(context, activation_state != nullptr);
|
||||
|
||||
// Define input constants based on input tensor definition above:
|
||||
const int rank = params->rank;
|
||||
@@ -382,9 +389,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
|
||||
|
||||
// Validate Tensor Output:
|
||||
// [0] = float/int8, {2, batch_size, num_units}
|
||||
// [0] = float/int8_t, {2, batch_size, num_units}
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
|
||||
@@ -408,9 +416,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
|
||||
memory_size * num_filters);
|
||||
// Since is_variable is not part of TFLiteEvalTensor, check is_variable here.
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
|
||||
@@ -419,35 +432,30 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, output->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
|
||||
const auto* input_params =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(input->quantization.params);
|
||||
const auto* weights_feature_params =
|
||||
static_cast<const TfLiteAffineQuantization*>(
|
||||
weights_feature->quantization.params);
|
||||
const auto* state_params = static_cast<const TfLiteAffineQuantization*>(
|
||||
activation_state->quantization.params);
|
||||
const auto* weight_time_params =
|
||||
static_cast<const TfLiteAffineQuantization*>(
|
||||
weights_time->quantization.params);
|
||||
const auto* output_params = static_cast<const TfLiteAffineQuantization*>(
|
||||
output->quantization.params);
|
||||
const double effective_scale_1 = static_cast<double>(
|
||||
input_params->scale->data[0] * weights_feature_params->scale->data[0] /
|
||||
state_params->scale->data[0]);
|
||||
const double effective_scale_2 = static_cast<double>(
|
||||
state_params->scale->data[0] * weight_time_params->scale->data[0] /
|
||||
output_params->scale->data[0]);
|
||||
input->params.scale * weights_feature->params.scale /
|
||||
activation_state->params.scale);
|
||||
const double effective_scale_2 =
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale / output->params.scale);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
// TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
std::abs(static_cast<double>(bias->params.scale) -
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale)) < 1e-5);
|
||||
|
||||
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
|
||||
&(data->effective_scale_1_b));
|
||||
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
|
||||
&(data->effective_scale_2_b));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
@@ -467,10 +475,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
|
||||
}
|
||||
TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
@@ -484,20 +489,24 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* weights_feature =
|
||||
GetInput(context, node, kWeightsFeatureTensor);
|
||||
const TfLiteTensor* weights_time =
|
||||
GetInput(context, node, kWeightsTimeTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* activation_state =
|
||||
GetVariableInput(context, node, kInputActivationStateTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* weights_feature =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsFeatureTensor);
|
||||
const TfLiteEvalTensor* weights_time =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsTimeTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 5)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput(
|
||||
context, node, kInputActivationStateTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (weights_feature->type) {
|
||||
case kTfLiteFloat32: {
|
||||
EvalFloatSVDF(context, node, input, weights_feature, weights_time, bias,
|
||||
@@ -508,11 +517,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
case kTfLiteInt8: {
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu);
|
||||
|
||||
EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
|
||||
params, activation_state, output, data,
|
||||
input->params.zero_point, output->params.zero_point);
|
||||
params, activation_state, output, data);
|
||||
return kTfLiteOk;
|
||||
break;
|
||||
}
|
||||
@@ -525,20 +531,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace svdf
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_SVDF() {
|
||||
static TfLiteRegistration r = {/*init=*/svdf::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/svdf::Prepare,
|
||||
/*invoke=*/svdf::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SVDF() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
158
code/lib/tfmicro/tensorflow/lite/micro/kernels/tanh.cc
Normal file
158
code/lib/tfmicro/tensorflow/lite/micro/kernels/tanh.cc
Normal file
@@ -0,0 +1,158 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/tanh.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
int32_t input_zero_point;
|
||||
int32_t input_range_radius;
|
||||
int32_t input_multiplier;
|
||||
int input_left_shift;
|
||||
};
|
||||
|
||||
void* TanhInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
OpData* data) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
static constexpr int kInputIntegerBits = 4;
|
||||
const double input_real_multiplier =
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(1 << (31 - kInputIntegerBits));
|
||||
|
||||
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
|
||||
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
|
||||
|
||||
data->input_range_radius =
|
||||
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
return CalculateArithmeticOpData(context, node, data);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
reference_ops::Tanh(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt16: {
|
||||
TanhParams params;
|
||||
params.input_left_shift = data.input_left_shift;
|
||||
reference_ops::Tanh(params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
TanhParams params;
|
||||
params.input_zero_point = data.input_zero_point;
|
||||
params.input_range_radius = data.input_range_radius;
|
||||
params.input_multiplier = data.input_multiplier;
|
||||
params.input_left_shift = data.input_left_shift;
|
||||
reference_ops::Tanh(params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::Tanh(
|
||||
data.input_zero_point, data.input_range_radius, data.input_multiplier,
|
||||
data.input_left_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration Register_TANH() {
|
||||
return {/*init=*/activations::TanhInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::TanhPrepare,
|
||||
/*invoke=*/activations::TanhEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -28,14 +29,16 @@ constexpr int kInputTensor = 0;
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteTensor* input, int output_count, int axis) {
|
||||
const TfLiteTensor* output0 = GetOutput(context, node, 0);
|
||||
const TfLiteEvalTensor* input, int output_count,
|
||||
int axis) {
|
||||
const TfLiteEvalTensor* output0 =
|
||||
tflite::micro::GetEvalOutput(context, node, 0);
|
||||
const TfLiteIntArray* input_dims = input->dims;
|
||||
const TfLiteIntArray* output_dims = output0->dims;
|
||||
const int dimensions = input_dims->size;
|
||||
|
||||
if (axis < 0) {
|
||||
axis += NumDimensions(input);
|
||||
axis += input->dims->size;
|
||||
}
|
||||
|
||||
TFLITE_DCHECK_LT(axis, dimensions);
|
||||
@@ -54,11 +57,11 @@ TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
TFLITE_DCHECK_EQ(output_size, copy_size * outer_size);
|
||||
|
||||
const T* input_data = GetTensorData<T>(input);
|
||||
const T* input_data = tflite::micro::GetTensorData<T>(input);
|
||||
|
||||
for (int i = 0; i < output_count; ++i) {
|
||||
TfLiteTensor* t = GetOutput(context, node, i);
|
||||
T* output_data = GetTensorData<T>(t);
|
||||
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(t);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
T* output_ptr = output_data + copy_size * k;
|
||||
int loc = k * output_count * copy_size + i * copy_size;
|
||||
@@ -74,7 +77,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteUnpackParams* data =
|
||||
reinterpret_cast<TfLiteUnpackParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@@ -101,16 +105,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace unpack
|
||||
|
||||
TfLiteRegistration* Register_UNPACK() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/unpack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_UNPACK() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/unpack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
Reference in New Issue
Block a user