mirror of
https://github.com/jomjol/AI-on-the-edge-device.git
synced 2025-12-09 21:17:06 +03:00
Rolling 20210420
This commit is contained in:
119
code/components/tfmicro/tensorflow/lite/micro/kernels/add_n.cc
Normal file
119
code/components/tfmicro/tensorflow/lite/micro/kernels/add_n.cc
Normal file
@@ -0,0 +1,119 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/add_n.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor0 = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
|
||||
int num_inputs = NumInputs(node);
|
||||
TF_LITE_ENSURE(context, num_inputs >= 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input_tensor_first;
|
||||
TF_LITE_ENSURE_OK(
|
||||
context, GetInputSafe(context, node, kInputTensor0, &input_tensor_first));
|
||||
TfLiteTensor* output;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetOutputSafe(context, node, kOutputTensor, &output));
|
||||
|
||||
// Check that all tensors have the same shape and type.
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input_tensor_first->type);
|
||||
for (int i = kInputTensor0 + 1; i < num_inputs; ++i) {
|
||||
const TfLiteTensor* input;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, i, &input));
|
||||
TF_LITE_ENSURE(context, HaveSameShapes(input_tensor_first, input));
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input_tensor_first->type, input->type);
|
||||
}
|
||||
|
||||
// Allocate scratch buffer space for pointer to each tensor's data
|
||||
// and store the scratch buffer index in the node's user_data
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
int scratch_index;
|
||||
size_t scratch_size = sizeof(float*) * num_inputs;
|
||||
TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena(
|
||||
context, scratch_size, &scratch_index));
|
||||
node->user_data =
|
||||
reinterpret_cast<decltype(node->user_data)>(scratch_index);
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
return CalculateOpData(context, node);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void EvalAddN(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteEvalTensor* output) {
|
||||
int num_inputs = NumInputs(node);
|
||||
|
||||
int scratch_index =
|
||||
static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
|
||||
void* scratch_buffer = context->GetScratchBuffer(context, scratch_index);
|
||||
const T** all_inputs = static_cast<decltype(all_inputs)>(scratch_buffer);
|
||||
for (int i = 0; i < num_inputs; i++) {
|
||||
const TfLiteEvalTensor* next_input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor0 + i);
|
||||
all_inputs[i] = tflite::micro::GetTensorData<T>(next_input);
|
||||
}
|
||||
|
||||
reference_ops::AddN<T>(tflite::micro::GetTensorShape(output), num_inputs,
|
||||
all_inputs, tflite::micro::GetTensorData<T>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalAddN<float>(context, node, output);
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_ADD_N() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,111 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kBlockShapeTensor = 1;
|
||||
constexpr int kCropsTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// Currently, only 3D NHC and 4D NHWC input/output op_context are supported.
|
||||
// In case of 3D input, it will be extended to 3D NHWC by adding W=1.
|
||||
// The 4D array need to have exactly 2 spatial dimensions.
|
||||
// TODO(b/149952582): Support arbitrary dimension in SpaceToBatchND.
|
||||
const int kInputOutputMinDimensionNum = 3;
|
||||
const int kInputOutputMaxDimensionNum = 4;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr && output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) >= kInputOutputMinDimensionNum);
|
||||
TF_LITE_ENSURE(context, NumDimensions(output) >= kInputOutputMinDimensionNum);
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) <= kInputOutputMaxDimensionNum);
|
||||
TF_LITE_ENSURE(context, NumDimensions(output) <= kInputOutputMaxDimensionNum);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* block_shape =
|
||||
tflite::micro::GetEvalInput(context, node, kBlockShapeTensor);
|
||||
const TfLiteEvalTensor* crops =
|
||||
tflite::micro::GetEvalInput(context, node, kCropsTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
reference_ops::BatchToSpaceND(
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(block_shape),
|
||||
tflite::micro::GetTensorData<int32_t>(block_shape),
|
||||
tflite::micro::GetTensorShape(crops),
|
||||
tflite::micro::GetTensorData<int32_t>(crops),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
reference_ops::BatchToSpaceND(
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(block_shape),
|
||||
tflite::micro::GetTensorData<int32_t>(block_shape),
|
||||
tflite::micro::GetTensorShape(crops),
|
||||
tflite::micro::GetTensorData<int32_t>(crops),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
||||
TfLiteRegistration Register_BATCH_TO_SPACE_ND() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,96 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
template <typename FromT, typename ToT>
|
||||
void copyCast(const FromT* in, ToT* out, int num_elements) {
|
||||
std::transform(in, in + num_elements, out,
|
||||
[](FromT a) { return static_cast<ToT>(a); });
|
||||
}
|
||||
|
||||
template <typename FromT>
|
||||
TfLiteStatus copyToTensor(TfLiteContext* context, const FromT* in,
|
||||
TfLiteEvalTensor* out, int num_elements) {
|
||||
switch (out->type) {
|
||||
case kTfLiteInt8:
|
||||
copyCast(in, out->data.int8, num_elements);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
copyCast(in, tflite::micro::GetTensorData<float>(out), num_elements);
|
||||
break;
|
||||
default:
|
||||
// Unsupported type.
|
||||
TF_LITE_KERNEL_LOG(context, "Output type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(out->type), out->type);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
int num_elements = MatchingFlatSize(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output));
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteInt8:
|
||||
return copyToTensor(context, input->data.int8, output, num_elements);
|
||||
case kTfLiteFloat32:
|
||||
return copyToTensor(context, tflite::micro::GetTensorData<float>(input),
|
||||
output, num_elements);
|
||||
default:
|
||||
// Unsupported type.
|
||||
TF_LITE_KERNEL_LOG(context, "Input type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_CAST() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#define FLATBUFFERS_LOCALE_INDEPENDENT 0
|
||||
#include "flatbuffers/flexbuffers.h"
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
@@ -55,7 +57,7 @@ constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// TODO(b/149795762): Add this to TfLiteStatus enum.
|
||||
constexpr int kTfLiteAbort = -9;
|
||||
constexpr TfLiteStatus kTfLiteAbort = static_cast<TfLiteStatus>(-9);
|
||||
|
||||
// These fields control the stride period of a strided streaming model. This op
|
||||
// returns kTfLiteAbort until cycles_until_run-- is zero. At this time,
|
||||
@@ -65,47 +67,64 @@ struct OpData {
|
||||
int cycles_max;
|
||||
};
|
||||
|
||||
// These constants represent constants specific to the music detect model.
|
||||
// They exist until (b/132070898) is fixed.
|
||||
constexpr int kMaxOpDataSize = 7;
|
||||
int op_data_counter = 0;
|
||||
OpData op_data_array[kMaxOpDataSize];
|
||||
|
||||
} // namespace
|
||||
|
||||
void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
OpData* op_data = static_cast<OpData*>(
|
||||
context->AllocatePersistentBuffer(context, sizeof(OpData)));
|
||||
|
||||
if (buffer != nullptr && length > 0) {
|
||||
const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
|
||||
const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
|
||||
op_data->cycles_max = m["cycles_max"].AsInt32();
|
||||
} else {
|
||||
op_data->cycles_max = 0;
|
||||
}
|
||||
|
||||
return op_data;
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* op_data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, 1, output->dims->data[0]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[0]);
|
||||
TF_LITE_ENSURE_EQ(context, input->dims->data[0], output->dims->data[0]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[1]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, output->dims->data[2]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[2]);
|
||||
TF_LITE_ENSURE_EQ(context, input->dims->data[2], output->dims->data[2]);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
// The circular buffer custom operator currently only supports int8_t.
|
||||
// The circular buffer custom operator currently only supports int8.
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
|
||||
// TODO(b/132070898): Use statically slotted OpData structures until a
|
||||
// scratch memory API is ready.
|
||||
TFLITE_DCHECK_LE(op_data_counter, kMaxOpDataSize);
|
||||
OpData* op_data = &op_data_array[op_data_counter++];
|
||||
// The last circular buffer layer (length 5) simply accumulates outputs, and
|
||||
// does not run periodically.
|
||||
// TODO(b/150001379): Move this special case logic to the tflite flatbuffer.
|
||||
if (output->dims->data[1] == 5) {
|
||||
op_data->cycles_max = 1;
|
||||
} else {
|
||||
op_data->cycles_max = 2;
|
||||
if (op_data->cycles_max <= 0) {
|
||||
// The last circular buffer layer simply accumulates outputs, and does not
|
||||
// run periodically.
|
||||
// TODO(b/150001379): Move this special case logic to the tflite flatbuffer.
|
||||
static int cb_prepare_count = 0;
|
||||
cb_prepare_count++;
|
||||
// These checks specifically work for the only two streaming models
|
||||
// supported on TFLM. They use the shape of the output tensor along with the
|
||||
// layer number to determine if the circular buffer period should be 1 or 2.
|
||||
|
||||
// These models are outlined int the following documents:
|
||||
// https://docs.google.com/document/d/1lc_G2ZFhjiKFo02UHjBaljye1xsL0EkfybkaVELEE3Q/edit?usp=sharing
|
||||
// https://docs.google.com/document/d/1pGc42PuWyrk-Jy1-9qeqtggvsmHr1ifz8Lmqfpr2rKA/edit?usp=sharing
|
||||
if (output->dims->data[1] == 5 || output->dims->data[1] == 13 ||
|
||||
(cb_prepare_count == 5 && output->dims->data[2] == 2 &&
|
||||
output->dims->data[3] == 96)) {
|
||||
op_data->cycles_max = 1;
|
||||
cb_prepare_count = 0;
|
||||
} else {
|
||||
op_data->cycles_max = 2;
|
||||
}
|
||||
}
|
||||
op_data->cycles_until_run = op_data->cycles_max;
|
||||
node->user_data = op_data;
|
||||
@@ -127,10 +146,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
int num_slots = output->dims->data[1];
|
||||
int depth = output->dims->data[3];
|
||||
int depth = output->dims->data[2] * output->dims->data[3];
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
EvalInt8(tflite::micro::GetTensorData<int8_t>(input), num_slots, depth,
|
||||
@@ -148,12 +168,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return static_cast<TfLiteStatus>(kTfLiteAbort);
|
||||
}
|
||||
|
||||
// If prepare is ever called more than one time (for example, when testing the
|
||||
// ambient model, the interpreter is created a few times), this op data
|
||||
// counter needs to be reset so that future instances do not overrun this op
|
||||
// data array.
|
||||
op_data_counter = 0;
|
||||
|
||||
data->cycles_until_run = data->cycles_max;
|
||||
|
||||
return kTfLiteOk;
|
||||
@@ -162,8 +176,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace circular_buffer
|
||||
|
||||
TfLiteRegistration* Register_CIRCULAR_BUFFER() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/circular_buffer::Free,
|
||||
static TfLiteRegistration r = {/*init=*/circular_buffer::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/circular_buffer::Prepare,
|
||||
/*invoke=*/circular_buffer::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H
|
||||
|
||||
extern const int g_gen_data_size_circular_buffer_config;
|
||||
extern const unsigned char g_gen_data_circular_buffer_config[];
|
||||
|
||||
#endif
|
||||
@@ -13,12 +13,13 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
#include "tensorflow/lite/micro/kernels/conv.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
@@ -28,294 +29,60 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
// This file has 2 implementation of Conv.
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
switch (padding) {
|
||||
case TfLitePadding::kTfLitePaddingSame:
|
||||
return PaddingType::kSame;
|
||||
case TfLitePadding::kTfLitePaddingValid:
|
||||
return PaddingType::kValid;
|
||||
case TfLitePadding::kTfLitePaddingUnknown:
|
||||
default:
|
||||
return PaddingType::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteConvParams* params, int width,
|
||||
int height, int filter_width, int filter_height,
|
||||
int out_width, int out_height,
|
||||
const TfLiteType data_type, OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params->padding;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
params->dilation_height_factor, params->dilation_width_factor, height,
|
||||
width, filter_height, filter_width, padding, &out_height, &out_width);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift),
|
||||
output_channels));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
int input_width = input->dims->data[2];
|
||||
int input_height = input->dims->data[1];
|
||||
int filter_width = filter->dims->data[2];
|
||||
int filter_height = filter->dims->data[1];
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
data->per_channel_output_multiplier =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
} // namespace conv
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* im2col, TfLiteEvalTensor* hwcn_weights,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output,
|
||||
TfLiteEvalTensor* im2col) {
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col,
|
||||
TfLiteEvalTensor* hwcn_weights, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<float>(im2col));
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpDataConv));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
tflite::micro::GetEvalInput(context, node, kConvInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
tflite::micro::GetEvalInput(context, node, kConvWeightsTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
? tflite::micro::GetEvalInput(context, node, kConvBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
tflite::micro::GetEvalOutput(context, node, kConvOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
const auto& params =
|
||||
*(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
const auto& data = *(static_cast<const OpDataConv*>(node->user_data));
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
case kTfLiteFloat32: {
|
||||
tflite::reference_ops::Conv(
|
||||
ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
tflite::micro::GetTensorShape(nullptr), nullptr);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
|
||||
output, nullptr);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
ConvParamsQuantized(params, data), data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
@@ -329,7 +96,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteRegistration Register_CONV_2D() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*prepare=*/ConvPrepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
|
||||
77
code/components/tfmicro/tensorflow/lite/micro/kernels/conv.h
Normal file
77
code/components/tfmicro/tensorflow/lite/micro/kernels/conv.h
Normal file
@@ -0,0 +1,77 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
struct OpDataConv {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
extern const int kConvInputTensor;
|
||||
extern const int kConvWeightsTensor;
|
||||
extern const int kConvBiasTensor;
|
||||
extern const int kConvOutputTensor;
|
||||
extern const int kConvQuantizedDimension;
|
||||
|
||||
// Returns a ConvParams struct with all the parameters needed for a
|
||||
// float computation.
|
||||
ConvParams ConvParamsFloat(const TfLiteConvParams& params,
|
||||
const OpDataConv& data);
|
||||
|
||||
// Returns a ConvParams struct with all the parameters needed for a
|
||||
// quantized computation.
|
||||
ConvParams ConvParamsQuantized(const TfLiteConvParams& params,
|
||||
const OpDataConv& data);
|
||||
|
||||
TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteConvParams& params, int width,
|
||||
int height, int filter_width,
|
||||
int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpDataConv* data);
|
||||
|
||||
TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
|
||||
@@ -0,0 +1,182 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/conv.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
const int kConvInputTensor = 0;
|
||||
const int kConvWeightsTensor = 1;
|
||||
const int kConvBiasTensor = 2;
|
||||
const int kConvOutputTensor = 0;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
const int kConvQuantizedDimension = 0;
|
||||
|
||||
// Returns a ConvParams struct with all the parameters needed for a
|
||||
// float computation.
|
||||
ConvParams ConvParamsFloat(const TfLiteConvParams& params,
|
||||
const OpDataConv& data) {
|
||||
ConvParams op_params;
|
||||
CalculateActivationRange(params.activation, &op_params.float_activation_min,
|
||||
&op_params.float_activation_max);
|
||||
op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params.stride_width;
|
||||
op_params.stride_height = params.stride_height;
|
||||
op_params.dilation_width_factor = params.dilation_width_factor;
|
||||
op_params.dilation_height_factor = params.dilation_height_factor;
|
||||
return op_params;
|
||||
}
|
||||
|
||||
// Returns a ConvParams struct with all the parameters needed for a
|
||||
// quantized computation.
|
||||
ConvParams ConvParamsQuantized(const TfLiteConvParams& params,
|
||||
const OpDataConv& data) {
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = -data.filter_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding);
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.stride_height = params.stride_height;
|
||||
op_params.stride_width = params.stride_width;
|
||||
op_params.dilation_height_factor = params.dilation_height_factor;
|
||||
op_params.dilation_width_factor = params.dilation_width_factor;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
return op_params;
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteConvParams& params, int width,
|
||||
int height, int filter_width,
|
||||
int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpDataConv* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params.padding;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params.stride_height, params.stride_width, params.dilation_height_factor,
|
||||
params.dilation_width_factor, height, width, filter_height, filter_width,
|
||||
padding, &out_height, &out_width);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kConvBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params.activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift),
|
||||
output_channels));
|
||||
}
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpDataConv* data = static_cast<OpDataConv*>(node->user_data);
|
||||
const auto& params =
|
||||
*(static_cast<const TfLiteConvParams*>(node->builtin_data));
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
const int input_width = input->dims->data[2];
|
||||
const int input_height = input->dims->data[1];
|
||||
const int filter_width = filter->dims->data[2];
|
||||
const int filter_height = filter->dims->data[1];
|
||||
const int output_width = output->dims->data[2];
|
||||
const int output_height = output->dims->data[1];
|
||||
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
data->per_channel_output_multiplier =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
TFLITE_DCHECK(affine_quantization != nullptr);
|
||||
TFLITE_DCHECK(affine_quantization->scale != nullptr);
|
||||
TFLITE_DCHECK(affine_quantization->zero_point != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpDataConv(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, data));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,94 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
|
||||
#include "tensorflow/lite/micro/kernels/micro_ops.h"
|
||||
#include "tensorflow/lite/micro/test_helpers.h"
|
||||
#include "tensorflow/lite/micro/testing/micro_test.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace testing {
|
||||
|
||||
TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
|
||||
int output_length, TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, float* output_data);
|
||||
|
||||
TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
|
||||
int output_length, TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, int8_t* output_data);
|
||||
|
||||
TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
|
||||
int output_length, TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, uint8_t* output_data);
|
||||
|
||||
TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
|
||||
const float* expected_output_data,
|
||||
int output_length,
|
||||
TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration,
|
||||
float* output_data, float tolerance = 1e-5);
|
||||
|
||||
TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
|
||||
const int8_t* expected_output_data,
|
||||
int output_length,
|
||||
TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration,
|
||||
int8_t* output_data, float tolerance = 1e-5);
|
||||
|
||||
TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
|
||||
const uint8_t* expected_output_data,
|
||||
int output_length,
|
||||
TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration,
|
||||
uint8_t* output_data, float tolerance = 1e-5);
|
||||
|
||||
TfLiteStatus TestConvFloat(const int* input_dims_data, const float* input_data,
|
||||
const int* filter_dims_data,
|
||||
const float* filter_data, const int* bias_dims_data,
|
||||
const float* bias_data, const int* output_dims_data,
|
||||
const float* expected_output_data,
|
||||
TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, float* output_data);
|
||||
|
||||
TfLiteStatus TestConvQuantizedPerLayer(
|
||||
const int* input_dims_data, const float* input_data,
|
||||
uint8_t* input_quantized, float input_scale, const int* filter_dims_data,
|
||||
const float* filter_data, uint8_t* filter_quantized, float filter_scale,
|
||||
const int* bias_dims_data, const float* bias_data, int32_t* bias_quantized,
|
||||
const int* output_dims_data, const float* expected_output_data,
|
||||
uint8_t* expected_output_quantized, float output_scale,
|
||||
TfLiteConvParams* conv_params, TfLiteRegistration registration,
|
||||
uint8_t* output_data);
|
||||
|
||||
TfLiteStatus TestConvQuantizedPerChannel(
|
||||
const int* input_dims_data, const float* input_data,
|
||||
int8_t* input_quantized, float input_scale, int input_zero_point,
|
||||
const int* filter_dims_data, const float* filter_data,
|
||||
int8_t* filter_data_quantized, const int* bias_dims_data,
|
||||
const float* bias_data, int32_t* bias_data_quantized, float* bias_scales,
|
||||
int* bias_zero_points, const int* output_dims_data,
|
||||
const float* expected_output_data, int8_t* expected_output_data_quantized,
|
||||
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, int8_t* output_data);
|
||||
|
||||
} // namespace testing
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
|
||||
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
|
||||
#include "tensorflow/lite/micro/kernels/depthwise_conv.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
@@ -21,6 +21,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
@@ -29,279 +30,58 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// Depthwise conv is quantized along dimension 3:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kDepthwiseConvQuantizedDimension = 3;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, int width,
|
||||
int height, int filter_width, int filter_height,
|
||||
const TfLiteType data_type, OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
int unused_output_height, unused_output_width;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width, 1, 1, height, width,
|
||||
filter_height, filter_width, params->padding, &unused_output_height,
|
||||
&unused_output_width);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
|
||||
return tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
const TfLiteType data_type = input->type;
|
||||
int width = SizeOfDimension(input, 2);
|
||||
int height = SizeOfDimension(input, 1);
|
||||
int filter_width = SizeOfDimension(filter, 2);
|
||||
int filter_height = SizeOfDimension(filter, 1);
|
||||
|
||||
// Per channel quantization is only needed for int8_t inference. For other
|
||||
// quantized types, only a single scale and zero point is needed.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
TF_LITE_ENSURE(
|
||||
context, affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kDepthwiseConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
|
||||
filter_width, filter_height, data_type,
|
||||
data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params,
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
DepthwiseParams op_params;
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = 0;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
// TODO(b/130439627): Use calculated value for clamping.
|
||||
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
||||
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
||||
|
||||
reference_integer_ops::DepthwiseConvPerChannel(
|
||||
op_params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data.output_shift;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpDataConv));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
auto& params =
|
||||
*(reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data));
|
||||
const OpDataConv& data = *(static_cast<const OpDataConv*>(node->user_data));
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
tflite::micro::GetEvalOutput(context, node, kDepthwiseConvOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
tflite::micro::GetEvalInput(context, node, kDepthwiseConvInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor)
|
||||
: nullptr;
|
||||
|
||||
// TODO(aselle): Consider whether float conv and quantized conv should be
|
||||
// separate ops to avoid dispatch overhead here.
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, data, input, filter, bias, output);
|
||||
case kTfLiteFloat32: {
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
DepthwiseConvParamsFloat(params, data),
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
|
||||
output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, data, input, filter, bias, output);
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::DepthwiseConvPerChannel(
|
||||
DepthwiseConvParamsQuantized(params, data),
|
||||
data.per_channel_output_multiplier, data.per_channel_output_shift,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
@@ -315,7 +95,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*prepare=*/DepthwiseConvPrepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/micro/kernels/conv.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
extern const int kDepthwiseConvInputTensor;
|
||||
extern const int kDepthwiseConvWeightsTensor;
|
||||
extern const int kDepthwiseConvBiasTensor;
|
||||
extern const int kDepthwiseConvOutputTensor;
|
||||
extern const int kDepthwiseConvQuantizedDimension;
|
||||
|
||||
// Returns a DepthwiseParams struct with all the parameters needed for a
|
||||
// float computation.
|
||||
DepthwiseParams DepthwiseConvParamsFloat(
|
||||
const TfLiteDepthwiseConvParams& params, const OpDataConv& data);
|
||||
|
||||
// Returns a DepthwiseParams struct with all the parameters needed for a
|
||||
// quantized computation.
|
||||
DepthwiseParams DepthwiseConvParamsQuantized(
|
||||
const TfLiteDepthwiseConvParams& params, const OpDataConv& data);
|
||||
|
||||
TfLiteStatus CalculateOpDataDepthwiseConv(
|
||||
TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteDepthwiseConvParams& params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width, int out_height,
|
||||
const TfLiteType data_type, OpDataConv* data);
|
||||
|
||||
TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_
|
||||
@@ -0,0 +1,188 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/depthwise_conv.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
const int kDepthwiseConvInputTensor = 0;
|
||||
const int kDepthwiseConvWeightsTensor = 1;
|
||||
const int kDepthwiseConvBiasTensor = 2;
|
||||
const int kDepthwiseConvOutputTensor = 0;
|
||||
|
||||
// DepthwiseConv is quantized along dimension 3:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
const int kDepthwiseConvQuantizedDimension = 3;
|
||||
|
||||
// Returns a DepthwiseParams struct with all the parameters needed for a
|
||||
// float computation.
|
||||
DepthwiseParams DepthwiseConvParamsFloat(
|
||||
const TfLiteDepthwiseConvParams& params, const OpDataConv& data) {
|
||||
DepthwiseParams op_params;
|
||||
CalculateActivationRange(params.activation, &op_params.float_activation_min,
|
||||
&op_params.float_activation_max);
|
||||
op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params.stride_width;
|
||||
op_params.stride_height = params.stride_height;
|
||||
op_params.dilation_width_factor = params.dilation_width_factor;
|
||||
op_params.dilation_height_factor = params.dilation_height_factor;
|
||||
op_params.depth_multiplier = params.depth_multiplier;
|
||||
return op_params;
|
||||
}
|
||||
|
||||
// Returns a DepthwiseParams struct with all the parameters needed for a
|
||||
// quantized computation.
|
||||
DepthwiseParams DepthwiseConvParamsQuantized(
|
||||
const TfLiteDepthwiseConvParams& params, const OpDataConv& data) {
|
||||
DepthwiseParams op_params;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = -data.filter_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding);
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.stride_height = params.stride_height;
|
||||
op_params.stride_width = params.stride_width;
|
||||
op_params.dilation_height_factor = params.dilation_height_factor;
|
||||
op_params.dilation_width_factor = params.dilation_width_factor;
|
||||
op_params.depth_multiplier = params.depth_multiplier;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
return op_params;
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpDataDepthwiseConv(
|
||||
TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteDepthwiseConvParams& params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width, int out_height,
|
||||
const TfLiteType data_type, OpDataConv* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params.padding;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params.stride_height, params.stride_width, params.dilation_height_factor,
|
||||
params.dilation_width_factor, height, width, filter_height, filter_width,
|
||||
padding, &out_height, &out_width);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kConvBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
int output_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params.activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift),
|
||||
output_channels));
|
||||
}
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpDataConv* data = static_cast<OpDataConv*>(node->user_data);
|
||||
const auto& params =
|
||||
*(static_cast<const TfLiteDepthwiseConvParams*>(node->builtin_data));
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kDepthwiseConvOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
const TfLiteTensor* input =
|
||||
GetInput(context, node, kDepthwiseConvInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter =
|
||||
GetInput(context, node, kDepthwiseConvWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
const int input_width = input->dims->data[2];
|
||||
const int input_height = input->dims->data[1];
|
||||
const int filter_width = filter->dims->data[2];
|
||||
const int filter_height = filter->dims->data[1];
|
||||
const int output_width = output->dims->data[2];
|
||||
const int output_height = output->dims->data[1];
|
||||
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
data->per_channel_output_multiplier =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
TFLITE_DCHECK(affine_quantization != nullptr);
|
||||
TFLITE_DCHECK(affine_quantization->scale != nullptr);
|
||||
TFLITE_DCHECK(affine_quantization->zero_point != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(
|
||||
context, affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kDepthwiseConvQuantizedDimension]);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpDataDepthwiseConv(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, data));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -59,8 +59,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteUInt8 ||
|
||||
input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteInt16);
|
||||
TF_LITE_ENSURE(
|
||||
context, output->type == kTfLiteFloat32 || output->type == kTfLiteInt32);
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32);
|
||||
|
||||
if (output->type == kTfLiteInt32) {
|
||||
const double effective_output_scale =
|
||||
@@ -112,32 +111,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (output->type == kTfLiteInt32) {
|
||||
int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output));
|
||||
switch (input->type) {
|
||||
case kTfLiteInt16: {
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), flat_size,
|
||||
data->output_multiplier, data->output_shift,
|
||||
data->quantization_params.zero_point, data->output_zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int8_t>(input), flat_size,
|
||||
data->output_multiplier, data->output_shift,
|
||||
data->quantization_params.zero_point, data->output_zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
|
||||
@@ -0,0 +1,805 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#define FLATBUFFERS_LOCALE_INDEPENDENT 0
|
||||
#include "flatbuffers/flexbuffers.h"
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* This version of detection_postprocess is specific to TFLite Micro. It
|
||||
* contains the following differences between the TFLite version:
|
||||
*
|
||||
* 1.) Temporaries (temporary tensors) - Micro use instead scratch buffer API.
|
||||
* 2.) Output dimensions - the TFLite version does not support undefined out
|
||||
* dimensions. So model must have static out dimensions.
|
||||
*/
|
||||
|
||||
// Input tensors
|
||||
constexpr int kInputTensorBoxEncodings = 0;
|
||||
constexpr int kInputTensorClassPredictions = 1;
|
||||
constexpr int kInputTensorAnchors = 2;
|
||||
|
||||
// Output tensors
|
||||
constexpr int kOutputTensorDetectionBoxes = 0;
|
||||
constexpr int kOutputTensorDetectionClasses = 1;
|
||||
constexpr int kOutputTensorDetectionScores = 2;
|
||||
constexpr int kOutputTensorNumDetections = 3;
|
||||
|
||||
constexpr int kNumCoordBox = 4;
|
||||
constexpr int kBatchSize = 1;
|
||||
|
||||
constexpr int kNumDetectionsPerClass = 100;
|
||||
|
||||
// Object Detection model produces axis-aligned boxes in two formats:
|
||||
// BoxCorner represents the lower left corner (xmin, ymin) and
|
||||
// the upper right corner (xmax, ymax).
|
||||
// CenterSize represents the center (xcenter, ycenter), height and width.
|
||||
// BoxCornerEncoding and CenterSizeEncoding are related as follows:
|
||||
// ycenter = y / y_scale * anchor.h + anchor.y;
|
||||
// xcenter = x / x_scale * anchor.w + anchor.x;
|
||||
// half_h = 0.5*exp(h/ h_scale)) * anchor.h;
|
||||
// half_w = 0.5*exp(w / w_scale)) * anchor.w;
|
||||
// ymin = ycenter - half_h
|
||||
// ymax = ycenter + half_h
|
||||
// xmin = xcenter - half_w
|
||||
// xmax = xcenter + half_w
|
||||
struct BoxCornerEncoding {
|
||||
float ymin;
|
||||
float xmin;
|
||||
float ymax;
|
||||
float xmax;
|
||||
};
|
||||
|
||||
struct CenterSizeEncoding {
|
||||
float y;
|
||||
float x;
|
||||
float h;
|
||||
float w;
|
||||
};
|
||||
// We make sure that the memory allocations are contiguous with static_assert.
|
||||
static_assert(sizeof(BoxCornerEncoding) == sizeof(float) * kNumCoordBox,
|
||||
"Size of BoxCornerEncoding is 4 float values");
|
||||
static_assert(sizeof(CenterSizeEncoding) == sizeof(float) * kNumCoordBox,
|
||||
"Size of CenterSizeEncoding is 4 float values");
|
||||
|
||||
struct OpData {
|
||||
int max_detections;
|
||||
int max_classes_per_detection; // Fast Non-Max-Suppression
|
||||
int detections_per_class; // Regular Non-Max-Suppression
|
||||
float non_max_suppression_score_threshold;
|
||||
float intersection_over_union_threshold;
|
||||
int num_classes;
|
||||
bool use_regular_non_max_suppression;
|
||||
CenterSizeEncoding scale_values;
|
||||
|
||||
// Scratch buffers indexes
|
||||
int active_candidate_idx;
|
||||
int decoded_boxes_idx;
|
||||
int scores_idx;
|
||||
int score_buffer_idx;
|
||||
int keep_scores_idx;
|
||||
int scores_after_regular_non_max_suppression_idx;
|
||||
int sorted_values_idx;
|
||||
int keep_indices_idx;
|
||||
int sorted_indices_idx;
|
||||
int buffer_idx;
|
||||
int selected_idx;
|
||||
|
||||
// Cached tensor scale and zero point values for quantized operations
|
||||
TfLiteQuantizationParams input_box_encodings;
|
||||
TfLiteQuantizationParams input_class_predictions;
|
||||
TfLiteQuantizationParams input_anchors;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
OpData* op_data = nullptr;
|
||||
|
||||
const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
|
||||
const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
|
||||
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
op_data = reinterpret_cast<OpData*>(
|
||||
context->AllocatePersistentBuffer(context, sizeof(OpData)));
|
||||
|
||||
op_data->max_detections = m["max_detections"].AsInt32();
|
||||
op_data->max_classes_per_detection = m["max_classes_per_detection"].AsInt32();
|
||||
if (m["detections_per_class"].IsNull())
|
||||
op_data->detections_per_class = kNumDetectionsPerClass;
|
||||
else
|
||||
op_data->detections_per_class = m["detections_per_class"].AsInt32();
|
||||
if (m["use_regular_nms"].IsNull())
|
||||
op_data->use_regular_non_max_suppression = false;
|
||||
else
|
||||
op_data->use_regular_non_max_suppression = m["use_regular_nms"].AsBool();
|
||||
|
||||
op_data->non_max_suppression_score_threshold =
|
||||
m["nms_score_threshold"].AsFloat();
|
||||
op_data->intersection_over_union_threshold = m["nms_iou_threshold"].AsFloat();
|
||||
op_data->num_classes = m["num_classes"].AsInt32();
|
||||
op_data->scale_values.y = m["y_scale"].AsFloat();
|
||||
op_data->scale_values.x = m["x_scale"].AsFloat();
|
||||
op_data->scale_values.h = m["h_scale"].AsFloat();
|
||||
op_data->scale_values.w = m["w_scale"].AsFloat();
|
||||
|
||||
return op_data;
|
||||
}
|
||||
|
||||
void Free(TfLiteContext* context, void* buffer) {}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* op_data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
// Inputs: box_encodings, scores, anchors
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
|
||||
const TfLiteTensor* input_box_encodings =
|
||||
GetInput(context, node, kInputTensorBoxEncodings);
|
||||
const TfLiteTensor* input_class_predictions =
|
||||
GetInput(context, node, kInputTensorClassPredictions);
|
||||
const TfLiteTensor* input_anchors =
|
||||
GetInput(context, node, kInputTensorAnchors);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input_box_encodings), 3);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input_class_predictions), 3);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input_anchors), 2);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4);
|
||||
const int num_boxes = input_box_encodings->dims->data[1];
|
||||
const int num_classes = op_data->num_classes;
|
||||
|
||||
op_data->input_box_encodings.scale = input_box_encodings->params.scale;
|
||||
op_data->input_box_encodings.zero_point =
|
||||
input_box_encodings->params.zero_point;
|
||||
op_data->input_class_predictions.scale =
|
||||
input_class_predictions->params.scale;
|
||||
op_data->input_class_predictions.zero_point =
|
||||
input_class_predictions->params.zero_point;
|
||||
op_data->input_anchors.scale = input_anchors->params.scale;
|
||||
op_data->input_anchors.zero_point = input_anchors->params.zero_point;
|
||||
|
||||
// Scratch tensors
|
||||
context->RequestScratchBufferInArena(context, num_boxes,
|
||||
&op_data->active_candidate_idx);
|
||||
context->RequestScratchBufferInArena(context,
|
||||
num_boxes * kNumCoordBox * sizeof(float),
|
||||
&op_data->decoded_boxes_idx);
|
||||
context->RequestScratchBufferInArena(
|
||||
context,
|
||||
input_class_predictions->dims->data[1] *
|
||||
input_class_predictions->dims->data[2] * sizeof(float),
|
||||
&op_data->scores_idx);
|
||||
|
||||
// Additional buffers
|
||||
context->RequestScratchBufferInArena(context, num_boxes * sizeof(float),
|
||||
&op_data->score_buffer_idx);
|
||||
context->RequestScratchBufferInArena(context, num_boxes * sizeof(float),
|
||||
&op_data->keep_scores_idx);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, op_data->max_detections * num_boxes * sizeof(float),
|
||||
&op_data->scores_after_regular_non_max_suppression_idx);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, op_data->max_detections * num_boxes * sizeof(float),
|
||||
&op_data->sorted_values_idx);
|
||||
context->RequestScratchBufferInArena(context, num_boxes * sizeof(int),
|
||||
&op_data->keep_indices_idx);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, op_data->max_detections * num_boxes * sizeof(int),
|
||||
&op_data->sorted_indices_idx);
|
||||
int buffer_size = std::max(num_classes, op_data->max_detections);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, buffer_size * num_boxes * sizeof(int), &op_data->buffer_idx);
|
||||
buffer_size = std::min(num_boxes, op_data->max_detections);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, buffer_size * num_boxes * sizeof(int), &op_data->selected_idx);
|
||||
|
||||
// Outputs: detection_boxes, detection_scores, detection_classes,
|
||||
// num_detections
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
class Dequantizer {
|
||||
public:
|
||||
Dequantizer(int zero_point, float scale)
|
||||
: zero_point_(zero_point), scale_(scale) {}
|
||||
float operator()(uint8_t x) {
|
||||
return (static_cast<float>(x) - zero_point_) * scale_;
|
||||
}
|
||||
|
||||
private:
|
||||
int zero_point_;
|
||||
float scale_;
|
||||
};
|
||||
|
||||
void DequantizeBoxEncodings(const TfLiteEvalTensor* input_box_encodings,
|
||||
int idx, float quant_zero_point, float quant_scale,
|
||||
int length_box_encoding,
|
||||
CenterSizeEncoding* box_centersize) {
|
||||
const uint8_t* boxes =
|
||||
tflite::micro::GetTensorData<uint8_t>(input_box_encodings) +
|
||||
length_box_encoding * idx;
|
||||
Dequantizer dequantize(quant_zero_point, quant_scale);
|
||||
// See definition of the KeyPointBoxCoder at
|
||||
// https://github.com/tensorflow/models/blob/master/research/object_detection/box_coders/keypoint_box_coder.py
|
||||
// The first four elements are the box coordinates, which is the same as the
|
||||
// FastRnnBoxCoder at
|
||||
// https://github.com/tensorflow/models/blob/master/research/object_detection/box_coders/faster_rcnn_box_coder.py
|
||||
box_centersize->y = dequantize(boxes[0]);
|
||||
box_centersize->x = dequantize(boxes[1]);
|
||||
box_centersize->h = dequantize(boxes[2]);
|
||||
box_centersize->w = dequantize(boxes[3]);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T ReInterpretTensor(const TfLiteEvalTensor* tensor) {
|
||||
const float* tensor_base = tflite::micro::GetTensorData<float>(tensor);
|
||||
return reinterpret_cast<T>(tensor_base);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T ReInterpretTensor(TfLiteEvalTensor* tensor) {
|
||||
float* tensor_base = tflite::micro::GetTensorData<float>(tensor);
|
||||
return reinterpret_cast<T>(tensor_base);
|
||||
}
|
||||
|
||||
TfLiteStatus DecodeCenterSizeBoxes(TfLiteContext* context, TfLiteNode* node,
|
||||
OpData* op_data) {
|
||||
// Parse input tensor boxencodings
|
||||
const TfLiteEvalTensor* input_box_encodings =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
|
||||
TF_LITE_ENSURE_EQ(context, input_box_encodings->dims->data[0], kBatchSize);
|
||||
const int num_boxes = input_box_encodings->dims->data[1];
|
||||
TF_LITE_ENSURE(context, input_box_encodings->dims->data[2] >= kNumCoordBox);
|
||||
const TfLiteEvalTensor* input_anchors =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorAnchors);
|
||||
|
||||
// Decode the boxes to get (ymin, xmin, ymax, xmax) based on the anchors
|
||||
CenterSizeEncoding box_centersize;
|
||||
CenterSizeEncoding scale_values = op_data->scale_values;
|
||||
CenterSizeEncoding anchor;
|
||||
for (int idx = 0; idx < num_boxes; ++idx) {
|
||||
switch (input_box_encodings->type) {
|
||||
// Quantized
|
||||
case kTfLiteUInt8:
|
||||
DequantizeBoxEncodings(
|
||||
input_box_encodings, idx,
|
||||
static_cast<float>(op_data->input_box_encodings.zero_point),
|
||||
static_cast<float>(op_data->input_box_encodings.scale),
|
||||
input_box_encodings->dims->data[2], &box_centersize);
|
||||
DequantizeBoxEncodings(
|
||||
input_anchors, idx,
|
||||
static_cast<float>(op_data->input_anchors.zero_point),
|
||||
static_cast<float>(op_data->input_anchors.scale), kNumCoordBox,
|
||||
&anchor);
|
||||
break;
|
||||
// Float
|
||||
case kTfLiteFloat32: {
|
||||
// Please see DequantizeBoxEncodings function for the support detail.
|
||||
const int box_encoding_idx = idx * input_box_encodings->dims->data[2];
|
||||
const float* boxes = &(tflite::micro::GetTensorData<float>(
|
||||
input_box_encodings)[box_encoding_idx]);
|
||||
box_centersize = *reinterpret_cast<const CenterSizeEncoding*>(boxes);
|
||||
anchor =
|
||||
ReInterpretTensor<const CenterSizeEncoding*>(input_anchors)[idx];
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Unsupported type.
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
float ycenter = static_cast<float>(static_cast<double>(box_centersize.y) /
|
||||
static_cast<double>(scale_values.y) *
|
||||
static_cast<double>(anchor.h) +
|
||||
static_cast<double>(anchor.y));
|
||||
|
||||
float xcenter = static_cast<float>(static_cast<double>(box_centersize.x) /
|
||||
static_cast<double>(scale_values.x) *
|
||||
static_cast<double>(anchor.w) +
|
||||
static_cast<double>(anchor.x));
|
||||
|
||||
float half_h =
|
||||
static_cast<float>(0.5 *
|
||||
(std::exp(static_cast<double>(box_centersize.h) /
|
||||
static_cast<double>(scale_values.h))) *
|
||||
static_cast<double>(anchor.h));
|
||||
float half_w =
|
||||
static_cast<float>(0.5 *
|
||||
(std::exp(static_cast<double>(box_centersize.w) /
|
||||
static_cast<double>(scale_values.w))) *
|
||||
static_cast<double>(anchor.w));
|
||||
|
||||
float* decoded_boxes = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
|
||||
auto& box = reinterpret_cast<BoxCornerEncoding*>(decoded_boxes)[idx];
|
||||
box.ymin = ycenter - half_h;
|
||||
box.xmin = xcenter - half_w;
|
||||
box.ymax = ycenter + half_h;
|
||||
box.xmax = xcenter + half_w;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void DecreasingPartialArgSort(const float* values, int num_values,
|
||||
int num_to_sort, int* indices) {
|
||||
std::iota(indices, indices + num_values, 0);
|
||||
std::partial_sort(
|
||||
indices, indices + num_to_sort, indices + num_values,
|
||||
[&values](const int i, const int j) { return values[i] > values[j]; });
|
||||
}
|
||||
|
||||
int SelectDetectionsAboveScoreThreshold(const float* values, int size,
|
||||
const float threshold,
|
||||
float* keep_values, int* keep_indices) {
|
||||
int counter = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (values[i] >= threshold) {
|
||||
keep_values[counter] = values[i];
|
||||
keep_indices[counter] = i;
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
return counter;
|
||||
}
|
||||
|
||||
bool ValidateBoxes(const float* decoded_boxes, const int num_boxes) {
|
||||
for (int i = 0; i < num_boxes; ++i) {
|
||||
// ymax>=ymin, xmax>=xmin
|
||||
auto& box = reinterpret_cast<const BoxCornerEncoding*>(decoded_boxes)[i];
|
||||
if (box.ymin >= box.ymax || box.xmin >= box.xmax) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
float ComputeIntersectionOverUnion(const float* decoded_boxes, const int i,
|
||||
const int j) {
|
||||
auto& box_i = reinterpret_cast<const BoxCornerEncoding*>(decoded_boxes)[i];
|
||||
auto& box_j = reinterpret_cast<const BoxCornerEncoding*>(decoded_boxes)[j];
|
||||
const float area_i = (box_i.ymax - box_i.ymin) * (box_i.xmax - box_i.xmin);
|
||||
const float area_j = (box_j.ymax - box_j.ymin) * (box_j.xmax - box_j.xmin);
|
||||
if (area_i <= 0 || area_j <= 0) return 0.0;
|
||||
const float intersection_ymin = std::max<float>(box_i.ymin, box_j.ymin);
|
||||
const float intersection_xmin = std::max<float>(box_i.xmin, box_j.xmin);
|
||||
const float intersection_ymax = std::min<float>(box_i.ymax, box_j.ymax);
|
||||
const float intersection_xmax = std::min<float>(box_i.xmax, box_j.xmax);
|
||||
const float intersection_area =
|
||||
std::max<float>(intersection_ymax - intersection_ymin, 0.0) *
|
||||
std::max<float>(intersection_xmax - intersection_xmin, 0.0);
|
||||
return intersection_area / (area_i + area_j - intersection_area);
|
||||
}
|
||||
|
||||
// NonMaxSuppressionSingleClass() prunes out the box locations with high overlap
|
||||
// before selecting the highest scoring boxes (max_detections in number)
|
||||
// It assumes all boxes are good in beginning and sorts based on the scores.
|
||||
// If lower-scoring box has too much overlap with a higher-scoring box,
|
||||
// we get rid of the lower-scoring box.
|
||||
// Complexity is O(N^2) pairwise comparison between boxes
|
||||
TfLiteStatus NonMaxSuppressionSingleClassHelper(
|
||||
TfLiteContext* context, TfLiteNode* node, OpData* op_data,
|
||||
const float* scores, int* selected, int* selected_size,
|
||||
int max_detections) {
|
||||
const TfLiteEvalTensor* input_box_encodings =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
|
||||
const int num_boxes = input_box_encodings->dims->data[1];
|
||||
const float non_max_suppression_score_threshold =
|
||||
op_data->non_max_suppression_score_threshold;
|
||||
const float intersection_over_union_threshold =
|
||||
op_data->intersection_over_union_threshold;
|
||||
// Maximum detections should be positive.
|
||||
TF_LITE_ENSURE(context, (max_detections >= 0));
|
||||
// intersection_over_union_threshold should be positive
|
||||
// and should be less than 1.
|
||||
TF_LITE_ENSURE(context, (intersection_over_union_threshold > 0.0f) &&
|
||||
(intersection_over_union_threshold <= 1.0f));
|
||||
// Validate boxes
|
||||
float* decoded_boxes = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
|
||||
|
||||
TF_LITE_ENSURE(context, ValidateBoxes(decoded_boxes, num_boxes));
|
||||
|
||||
// threshold scores
|
||||
int* keep_indices = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->keep_indices_idx));
|
||||
float* keep_scores = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->keep_scores_idx));
|
||||
int num_scores_kept = SelectDetectionsAboveScoreThreshold(
|
||||
scores, num_boxes, non_max_suppression_score_threshold, keep_scores,
|
||||
keep_indices);
|
||||
int* sorted_indices = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->sorted_indices_idx));
|
||||
|
||||
DecreasingPartialArgSort(keep_scores, num_scores_kept, num_scores_kept,
|
||||
sorted_indices);
|
||||
|
||||
const int num_boxes_kept = num_scores_kept;
|
||||
const int output_size = std::min(num_boxes_kept, max_detections);
|
||||
*selected_size = 0;
|
||||
|
||||
int num_active_candidate = num_boxes_kept;
|
||||
uint8_t* active_box_candidate = reinterpret_cast<uint8_t*>(
|
||||
context->GetScratchBuffer(context, op_data->active_candidate_idx));
|
||||
|
||||
for (int row = 0; row < num_boxes_kept; row++) {
|
||||
active_box_candidate[row] = 1;
|
||||
}
|
||||
for (int i = 0; i < num_boxes_kept; ++i) {
|
||||
if (num_active_candidate == 0 || *selected_size >= output_size) break;
|
||||
if (active_box_candidate[i] == 1) {
|
||||
selected[(*selected_size)++] = keep_indices[sorted_indices[i]];
|
||||
active_box_candidate[i] = 0;
|
||||
num_active_candidate--;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
for (int j = i + 1; j < num_boxes_kept; ++j) {
|
||||
if (active_box_candidate[j] == 1) {
|
||||
float intersection_over_union = ComputeIntersectionOverUnion(
|
||||
decoded_boxes, keep_indices[sorted_indices[i]],
|
||||
keep_indices[sorted_indices[j]]);
|
||||
|
||||
if (intersection_over_union > intersection_over_union_threshold) {
|
||||
active_box_candidate[j] = 0;
|
||||
num_active_candidate--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// This function implements a regular version of Non Maximal Suppression (NMS)
|
||||
// for multiple classes where
|
||||
// 1) we do NMS separately for each class across all anchors and
|
||||
// 2) keep only the highest anchor scores across all classes
|
||||
// 3) The worst runtime of the regular NMS is O(K*N^2)
|
||||
// where N is the number of anchors and K the number of
|
||||
// classes.
|
||||
TfLiteStatus NonMaxSuppressionMultiClassRegularHelper(TfLiteContext* context,
|
||||
TfLiteNode* node,
|
||||
OpData* op_data,
|
||||
const float* scores) {
|
||||
const TfLiteEvalTensor* input_box_encodings =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
|
||||
const TfLiteEvalTensor* input_class_predictions =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions);
|
||||
TfLiteEvalTensor* detection_boxes =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionBoxes);
|
||||
TfLiteEvalTensor* detection_classes = tflite::micro::GetEvalOutput(
|
||||
context, node, kOutputTensorDetectionClasses);
|
||||
TfLiteEvalTensor* detection_scores =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionScores);
|
||||
TfLiteEvalTensor* num_detections =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensorNumDetections);
|
||||
|
||||
const int num_boxes = input_box_encodings->dims->data[1];
|
||||
const int num_classes = op_data->num_classes;
|
||||
const int num_detections_per_class = op_data->detections_per_class;
|
||||
const int max_detections = op_data->max_detections;
|
||||
const int num_classes_with_background =
|
||||
input_class_predictions->dims->data[2];
|
||||
// The row index offset is 1 if background class is included and 0 otherwise.
|
||||
int label_offset = num_classes_with_background - num_classes;
|
||||
TF_LITE_ENSURE(context, num_detections_per_class > 0);
|
||||
|
||||
// For each class, perform non-max suppression.
|
||||
float* class_scores = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->score_buffer_idx));
|
||||
int* box_indices_after_regular_non_max_suppression = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->buffer_idx));
|
||||
float* scores_after_regular_non_max_suppression =
|
||||
reinterpret_cast<float*>(context->GetScratchBuffer(
|
||||
context, op_data->scores_after_regular_non_max_suppression_idx));
|
||||
|
||||
int size_of_sorted_indices = 0;
|
||||
int* sorted_indices = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->sorted_indices_idx));
|
||||
float* sorted_values = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->sorted_values_idx));
|
||||
|
||||
for (int col = 0; col < num_classes; col++) {
|
||||
for (int row = 0; row < num_boxes; row++) {
|
||||
// Get scores of boxes corresponding to all anchors for single class
|
||||
class_scores[row] =
|
||||
*(scores + row * num_classes_with_background + col + label_offset);
|
||||
}
|
||||
// Perform non-maximal suppression on single class
|
||||
int selected_size = 0;
|
||||
int* selected = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->selected_idx));
|
||||
TF_LITE_ENSURE_STATUS(NonMaxSuppressionSingleClassHelper(
|
||||
context, node, op_data, class_scores, selected, &selected_size,
|
||||
num_detections_per_class));
|
||||
// Add selected indices from non-max suppression of boxes in this class
|
||||
int output_index = size_of_sorted_indices;
|
||||
for (int i = 0; i < selected_size; i++) {
|
||||
int selected_index = selected[i];
|
||||
|
||||
box_indices_after_regular_non_max_suppression[output_index] =
|
||||
(selected_index * num_classes_with_background + col + label_offset);
|
||||
scores_after_regular_non_max_suppression[output_index] =
|
||||
class_scores[selected_index];
|
||||
output_index++;
|
||||
}
|
||||
// Sort the max scores among the selected indices
|
||||
// Get the indices for top scores
|
||||
int num_indices_to_sort = std::min(output_index, max_detections);
|
||||
DecreasingPartialArgSort(scores_after_regular_non_max_suppression,
|
||||
output_index, num_indices_to_sort, sorted_indices);
|
||||
|
||||
// Copy values to temporary vectors
|
||||
for (int row = 0; row < num_indices_to_sort; row++) {
|
||||
int temp = sorted_indices[row];
|
||||
sorted_indices[row] = box_indices_after_regular_non_max_suppression[temp];
|
||||
sorted_values[row] = scores_after_regular_non_max_suppression[temp];
|
||||
}
|
||||
// Copy scores and indices from temporary vectors
|
||||
for (int row = 0; row < num_indices_to_sort; row++) {
|
||||
box_indices_after_regular_non_max_suppression[row] = sorted_indices[row];
|
||||
scores_after_regular_non_max_suppression[row] = sorted_values[row];
|
||||
}
|
||||
size_of_sorted_indices = num_indices_to_sort;
|
||||
}
|
||||
|
||||
// Allocate output tensors
|
||||
for (int output_box_index = 0; output_box_index < max_detections;
|
||||
output_box_index++) {
|
||||
if (output_box_index < size_of_sorted_indices) {
|
||||
const int anchor_index = floor(
|
||||
box_indices_after_regular_non_max_suppression[output_box_index] /
|
||||
num_classes_with_background);
|
||||
const int class_index =
|
||||
box_indices_after_regular_non_max_suppression[output_box_index] -
|
||||
anchor_index * num_classes_with_background - label_offset;
|
||||
const float selected_score =
|
||||
scores_after_regular_non_max_suppression[output_box_index];
|
||||
// detection_boxes
|
||||
float* decoded_boxes = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
|
||||
ReInterpretTensor<BoxCornerEncoding*>(detection_boxes)[output_box_index] =
|
||||
reinterpret_cast<BoxCornerEncoding*>(decoded_boxes)[anchor_index];
|
||||
// detection_classes
|
||||
tflite::micro::GetTensorData<float>(detection_classes)[output_box_index] =
|
||||
class_index;
|
||||
// detection_scores
|
||||
tflite::micro::GetTensorData<float>(detection_scores)[output_box_index] =
|
||||
selected_score;
|
||||
} else {
|
||||
ReInterpretTensor<BoxCornerEncoding*>(
|
||||
detection_boxes)[output_box_index] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
// detection_classes
|
||||
tflite::micro::GetTensorData<float>(detection_classes)[output_box_index] =
|
||||
0.0f;
|
||||
// detection_scores
|
||||
tflite::micro::GetTensorData<float>(detection_scores)[output_box_index] =
|
||||
0.0f;
|
||||
}
|
||||
}
|
||||
tflite::micro::GetTensorData<float>(num_detections)[0] =
|
||||
size_of_sorted_indices;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// This function implements a fast version of Non Maximal Suppression for
|
||||
// multiple classes where
|
||||
// 1) we keep the top-k scores for each anchor and
|
||||
// 2) during NMS, each anchor only uses the highest class score for sorting.
|
||||
// 3) Compared to standard NMS, the worst runtime of this version is O(N^2)
|
||||
// instead of O(KN^2) where N is the number of anchors and K the number of
|
||||
// classes.
|
||||
TfLiteStatus NonMaxSuppressionMultiClassFastHelper(TfLiteContext* context,
|
||||
TfLiteNode* node,
|
||||
OpData* op_data,
|
||||
const float* scores) {
|
||||
const TfLiteEvalTensor* input_box_encodings =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
|
||||
const TfLiteEvalTensor* input_class_predictions =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions);
|
||||
TfLiteEvalTensor* detection_boxes =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionBoxes);
|
||||
|
||||
TfLiteEvalTensor* detection_classes = tflite::micro::GetEvalOutput(
|
||||
context, node, kOutputTensorDetectionClasses);
|
||||
TfLiteEvalTensor* detection_scores =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionScores);
|
||||
TfLiteEvalTensor* num_detections =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensorNumDetections);
|
||||
|
||||
const int num_boxes = input_box_encodings->dims->data[1];
|
||||
const int num_classes = op_data->num_classes;
|
||||
const int max_categories_per_anchor = op_data->max_classes_per_detection;
|
||||
const int num_classes_with_background =
|
||||
input_class_predictions->dims->data[2];
|
||||
|
||||
// The row index offset is 1 if background class is included and 0 otherwise.
|
||||
int label_offset = num_classes_with_background - num_classes;
|
||||
TF_LITE_ENSURE(context, (max_categories_per_anchor > 0));
|
||||
const int num_categories_per_anchor =
|
||||
std::min(max_categories_per_anchor, num_classes);
|
||||
float* max_scores = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->score_buffer_idx));
|
||||
int* sorted_class_indices = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->buffer_idx));
|
||||
|
||||
for (int row = 0; row < num_boxes; row++) {
|
||||
const float* box_scores =
|
||||
scores + row * num_classes_with_background + label_offset;
|
||||
int* class_indices = sorted_class_indices + row * num_classes;
|
||||
DecreasingPartialArgSort(box_scores, num_classes, num_categories_per_anchor,
|
||||
class_indices);
|
||||
max_scores[row] = box_scores[class_indices[0]];
|
||||
}
|
||||
|
||||
// Perform non-maximal suppression on max scores
|
||||
int selected_size = 0;
|
||||
int* selected = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->selected_idx));
|
||||
TF_LITE_ENSURE_STATUS(NonMaxSuppressionSingleClassHelper(
|
||||
context, node, op_data, max_scores, selected, &selected_size,
|
||||
op_data->max_detections));
|
||||
|
||||
// Allocate output tensors
|
||||
int output_box_index = 0;
|
||||
|
||||
for (int i = 0; i < selected_size; i++) {
|
||||
int selected_index = selected[i];
|
||||
|
||||
const float* box_scores =
|
||||
scores + selected_index * num_classes_with_background + label_offset;
|
||||
const int* class_indices =
|
||||
sorted_class_indices + selected_index * num_classes;
|
||||
|
||||
for (int col = 0; col < num_categories_per_anchor; ++col) {
|
||||
int box_offset = num_categories_per_anchor * output_box_index + col;
|
||||
|
||||
// detection_boxes
|
||||
float* decoded_boxes = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
|
||||
ReInterpretTensor<BoxCornerEncoding*>(detection_boxes)[box_offset] =
|
||||
reinterpret_cast<BoxCornerEncoding*>(decoded_boxes)[selected_index];
|
||||
|
||||
// detection_classes
|
||||
tflite::micro::GetTensorData<float>(detection_classes)[box_offset] =
|
||||
class_indices[col];
|
||||
|
||||
// detection_scores
|
||||
tflite::micro::GetTensorData<float>(detection_scores)[box_offset] =
|
||||
box_scores[class_indices[col]];
|
||||
|
||||
output_box_index++;
|
||||
}
|
||||
}
|
||||
|
||||
tflite::micro::GetTensorData<float>(num_detections)[0] = output_box_index;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void DequantizeClassPredictions(const TfLiteEvalTensor* input_class_predictions,
|
||||
const int num_boxes,
|
||||
const int num_classes_with_background,
|
||||
float* scores, OpData* op_data) {
|
||||
float quant_zero_point =
|
||||
static_cast<float>(op_data->input_class_predictions.zero_point);
|
||||
float quant_scale =
|
||||
static_cast<float>(op_data->input_class_predictions.scale);
|
||||
Dequantizer dequantize(quant_zero_point, quant_scale);
|
||||
const uint8_t* scores_quant =
|
||||
tflite::micro::GetTensorData<uint8_t>(input_class_predictions);
|
||||
for (int idx = 0; idx < num_boxes * num_classes_with_background; ++idx) {
|
||||
scores[idx] = dequantize(scores_quant[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus NonMaxSuppressionMultiClass(TfLiteContext* context,
|
||||
TfLiteNode* node, OpData* op_data) {
|
||||
// Get the input tensors
|
||||
const TfLiteEvalTensor* input_box_encodings =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
|
||||
const TfLiteEvalTensor* input_class_predictions =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions);
|
||||
const int num_boxes = input_box_encodings->dims->data[1];
|
||||
const int num_classes = op_data->num_classes;
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input_class_predictions->dims->data[0],
|
||||
kBatchSize);
|
||||
TF_LITE_ENSURE_EQ(context, input_class_predictions->dims->data[1], num_boxes);
|
||||
const int num_classes_with_background =
|
||||
input_class_predictions->dims->data[2];
|
||||
|
||||
TF_LITE_ENSURE(context, (num_classes_with_background - num_classes <= 1));
|
||||
TF_LITE_ENSURE(context, (num_classes_with_background >= num_classes));
|
||||
|
||||
const float* scores;
|
||||
switch (input_class_predictions->type) {
|
||||
case kTfLiteUInt8: {
|
||||
float* temporary_scores = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->scores_idx));
|
||||
DequantizeClassPredictions(input_class_predictions, num_boxes,
|
||||
num_classes_with_background, temporary_scores,
|
||||
op_data);
|
||||
scores = temporary_scores;
|
||||
} break;
|
||||
case kTfLiteFloat32:
|
||||
scores = tflite::micro::GetTensorData<float>(input_class_predictions);
|
||||
break;
|
||||
default:
|
||||
// Unsupported type.
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (op_data->use_regular_non_max_suppression) {
|
||||
TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClassRegularHelper(
|
||||
context, node, op_data, scores));
|
||||
} else {
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
NonMaxSuppressionMultiClassFastHelper(context, node, op_data, scores));
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, (kBatchSize == 1));
|
||||
auto* op_data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
// These two functions correspond to two blocks in the Object Detection model.
|
||||
// In future, we would like to break the custom op in two blocks, which is
|
||||
// currently not feasible because we would like to input quantized inputs
|
||||
// and do all calculations in float. Mixed quantized/float calculations are
|
||||
// currently not supported in TFLite.
|
||||
|
||||
// This fills in temporary decoded_boxes
|
||||
// by transforming input_box_encodings and input_anchors from
|
||||
// CenterSizeEncodings to BoxCornerEncoding
|
||||
TF_LITE_ENSURE_STATUS(DecodeCenterSizeBoxes(context, node, op_data));
|
||||
|
||||
// This fills in the output tensors
|
||||
// by choosing effective set of decoded boxes
|
||||
// based on Non Maximal Suppression, i.e. selecting
|
||||
// highest scoring non-overlapping boxes.
|
||||
TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClass(context, node, op_data));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_DETECTION_POSTPROCESS() {
|
||||
static TfLiteRegistration r = {/*init=*/Init,
|
||||
/*free=*/Free,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,25 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H
|
||||
|
||||
extern const int g_gen_data_size_none_regular_nms;
|
||||
extern const unsigned char g_gen_data_none_regular_nms[];
|
||||
|
||||
extern const int g_gen_data_size_regular_nms;
|
||||
extern const unsigned char g_gen_data_regular_nms[];
|
||||
|
||||
#endif
|
||||
206
code/components/tfmicro/tensorflow/lite/micro/kernels/div.cc
Normal file
206
code/components/tfmicro/tensorflow/lite/micro/kernels/div.cc
Normal file
@@ -0,0 +1,206 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/div.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
// Parameters used in the quantized paths where the output is 8bit
|
||||
int32_t input1_zero_point;
|
||||
int32_t input2_zero_point;
|
||||
int32_t output_zero_point;
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// Parameters used in all quantized paths
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDivParams* params, OpData* data) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input1;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetInputSafe(context, node, kInputTensor1, &input1));
|
||||
const TfLiteTensor* input2;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetInputSafe(context, node, kInputTensor2, &input2));
|
||||
TfLiteTensor* output;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetOutputSafe(context, node, kOutputTensor, &output));
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
const double real_multiplier = static_cast<double>(
|
||||
input1->params.scale / (input2->params.scale * output->params.scale));
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
data->input1_zero_point = input1->params.zero_point;
|
||||
data->input2_zero_point = input2->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
|
||||
auto* data = static_cast<OpData*>(node->user_data);
|
||||
return CalculateOpData(context, node, params, data);
|
||||
}
|
||||
|
||||
void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
|
||||
const OpData* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
|
||||
#define TF_LITE_DIV(type, opname, data_type) \
|
||||
data_type output_activation_min, output_activation_max; \
|
||||
CalculateActivationRange(params->activation, &output_activation_min, \
|
||||
&output_activation_max); \
|
||||
SetActivationParams(output_activation_min, output_activation_max, \
|
||||
&op_params); \
|
||||
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
|
||||
tflite::micro::GetTensorData<data_type>(input1), \
|
||||
tflite::micro::GetTensorShape(input2), \
|
||||
tflite::micro::GetTensorData<data_type>(input2), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<data_type>(output))
|
||||
|
||||
bool requires_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (requires_broadcast) {
|
||||
TF_LITE_DIV(reference_ops, BroadcastDivSlow, float);
|
||||
} else {
|
||||
TF_LITE_DIV(reference_ops, Div, float);
|
||||
}
|
||||
#undef TF_LITE_DIV
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDivParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
|
||||
#define TF_LITE_DIV(type, opname, dtype) \
|
||||
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
|
||||
tflite::micro::GetTensorData<dtype>(input1), \
|
||||
tflite::micro::GetTensorShape(input2), \
|
||||
tflite::micro::GetTensorData<dtype>(input2), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<dtype>(output))
|
||||
|
||||
if (input1->type == kTfLiteInt8 && input2->type == kTfLiteInt8 &&
|
||||
output->type == kTfLiteInt8) {
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
op_params.input1_offset = -data->input1_zero_point;
|
||||
op_params.input2_offset = -data->input2_zero_point;
|
||||
op_params.output_offset = data->output_zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
|
||||
bool requires_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (requires_broadcast) {
|
||||
TF_LITE_DIV(reference_ops, BroadcastDivSlow, int8_t);
|
||||
} else {
|
||||
TF_LITE_DIV(reference_ops, Div, int8_t);
|
||||
}
|
||||
#undef TF_LITE_DIV
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Unsupported combination of input and output types in DIV.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
auto* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalDiv(context, node, params, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_OK(context, EvalQuantized(context, node, params, data,
|
||||
input1, input2, output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"DIV only supports FLOAT32, quantized INT8 "
|
||||
"now, got type %s (%d).",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_DIV() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
151
code/components/tfmicro/tensorflow/lite/micro/kernels/elu.cc
Normal file
151
code/components/tfmicro/tensorflow/lite/micro/kernels/elu.cc
Normal file
@@ -0,0 +1,151 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/elu.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
// Input/output tensor index.
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// OLD-TODO(b/142762739): We should figure out a multi-threading plan for most
|
||||
// of the activation ops below.
|
||||
|
||||
struct OpData {
|
||||
int8_t table[256];
|
||||
};
|
||||
|
||||
using TransformFunc = float (*)(float);
|
||||
|
||||
template <typename T>
|
||||
void PopulateLookupTable(const TfLiteTensor* input, const TfLiteTensor* output,
|
||||
const TransformFunc transform, OpData* data) {
|
||||
if (sizeof(T) != 1) TF_LITE_FATAL("Lookup table valid only for 8bit");
|
||||
|
||||
const float inverse_scale = 1 / output->params.scale;
|
||||
int32_t maxval = std::numeric_limits<T>::max();
|
||||
int32_t minval = std::numeric_limits<T>::min();
|
||||
for (int32_t val = minval; val <= maxval; ++val) {
|
||||
const float dequantized =
|
||||
input->params.scale * (val - input->params.zero_point);
|
||||
const float transformed = transform(dequantized);
|
||||
const float rescaled = TfLiteRound(transformed * inverse_scale);
|
||||
const int32_t quantized =
|
||||
static_cast<int32_t>(rescaled + output->params.zero_point);
|
||||
data->table[static_cast<uint8_t>(static_cast<T>(val))] =
|
||||
static_cast<T>(std::max(std::min(maxval, quantized), minval));
|
||||
}
|
||||
}
|
||||
|
||||
// OLD-TODO(b/143696793): move this to optimized_ops.
|
||||
void EvalUsingLookupTable(const OpData* data, const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output));
|
||||
int8_t* output_data = tflite::micro::GetTensorData<int8_t>(output);
|
||||
const int8_t* input_data = tflite::micro::GetTensorData<int8_t>(input);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
output_data[i] = data->table[static_cast<uint8_t>(input_data[i])];
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
|
||||
TfLiteTensor* output;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetOutputSafe(context, node, kOutputTensor, &output));
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
// Use LUT to handle quantized elu path.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
TransformFunc transform = [](float value) {
|
||||
return value < 0.0f ? std::exp(value) - 1.0f : value;
|
||||
};
|
||||
PopulateLookupTable<int8_t>(input, output, transform, data);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* EluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
// This is a builtin op, so we don't use the contents in 'buffer', if any.
|
||||
// Instead, we allocate a new object to carry information from Prepare() to
|
||||
// Eval().
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus EluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
return CalculateOpData(context, node);
|
||||
}
|
||||
|
||||
TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
reference_ops::Elu(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
const OpData* data = static_cast<OpData*>(node->user_data);
|
||||
EvalUsingLookupTable(data, input, output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "ELU only supports float32 and int8 currently, got %s.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_ELU() {
|
||||
return {/*init=*/EluInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/EluPrepare,
|
||||
/*invoke=*/EluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -19,14 +19,9 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace custom {
|
||||
|
||||
TfLiteRegistration* Register_ETHOSU() { return nullptr; }
|
||||
|
||||
const char* GetString_ETHOSU() { return ""; }
|
||||
|
||||
} // namespace custom
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteRegistration* Register_ETHOSU();
|
||||
|
||||
const char* GetString_ETHOSU();
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_
|
||||
78
code/components/tfmicro/tensorflow/lite/micro/kernels/exp.cc
Normal file
78
code/components/tfmicro/tensorflow/lite/micro/kernels/exp.cc
Normal file
@@ -0,0 +1,78 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/exp.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output));
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
reference_ops::Exp(tflite::micro::GetTensorData<float>(input),
|
||||
static_cast<size_t>(flat_size),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) currently not supported by Exp.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_EXP() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,152 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kAxisTensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus ExpandTensorDim(TfLiteContext* context,
|
||||
const TfLiteEvalTensor* input, int32_t axis,
|
||||
TfLiteEvalTensor* output) {
|
||||
const TfLiteIntArray* input_dims = input->dims;
|
||||
TfLiteIntArray* output_dims = output->dims;
|
||||
if (axis < 0) {
|
||||
axis = input_dims->size + 1 + axis;
|
||||
}
|
||||
TF_LITE_ENSURE(context, (axis <= input_dims->size));
|
||||
|
||||
output_dims->size = input_dims->size + 1;
|
||||
for (int i = 0; i < output_dims->size; ++i) {
|
||||
if (i < axis) {
|
||||
output_dims->data[i] = input_dims->data[i];
|
||||
} else if (i == axis) {
|
||||
output_dims->data[i] = 1;
|
||||
} else {
|
||||
output_dims->data[i] = input_dims->data[i - 1];
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus GetAxisValueFromTensor(TfLiteContext* context,
|
||||
const TfLiteEvalTensor* axis,
|
||||
int32_t* axis_value) {
|
||||
const int axis_dims = (tflite::micro::GetTensorShape(axis)).DimensionsCount();
|
||||
if (axis_dims > 1) {
|
||||
TF_LITE_KERNEL_LOG(context, "Axis has only one element for Expand_Dims.",
|
||||
axis_dims);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (kTfLiteInt32 == (axis->type)) {
|
||||
const int32_t* axis_ptr = tflite::micro::GetTensorData<int32_t>(axis);
|
||||
*axis_value = axis_ptr[0];
|
||||
return kTfLiteOk;
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Axis type %s (%d) not supported by Expand_Dims.",
|
||||
TfLiteTypeGetName(axis->type), axis->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
|
||||
const TfLiteTensor* axis;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kAxisTensor, &axis));
|
||||
TfLiteTensor* output;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetOutputSafe(context, node, kOutputTensor, &output));
|
||||
output->type = input->type;
|
||||
if (IsDynamicTensor(axis)) {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"DynamicTensor is not yet supported by Expand_Dims.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void memCopyN(T* out, const T* in, const int num_elements) {
|
||||
for (int i = 0; i < num_elements; ++i) {
|
||||
out[i] = in[i];
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* axis =
|
||||
tflite::micro::GetEvalInput(context, node, kAxisTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
const int flat_size = ElementCount(*input->dims);
|
||||
const int input_dims = input->dims->size;
|
||||
|
||||
int32_t axis_value;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetAxisValueFromTensor(context, axis, &axis_value));
|
||||
if ((axis_value > static_cast<int32_t>(input_dims)) ||
|
||||
(axis_value < static_cast<int32_t>(-(input_dims + 1)))) {
|
||||
TF_LITE_KERNEL_LOG(context, "Invalid Expand_Dims axis value (%d).",
|
||||
axis_value);
|
||||
return kTfLiteError;
|
||||
}
|
||||
ExpandTensorDim(context, input, axis_value, output);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
memCopyN(tflite::micro::GetTensorData<float>(output),
|
||||
tflite::micro::GetTensorData<float>(input), flat_size);
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
memCopyN(tflite::micro::GetTensorData<int8_t>(output),
|
||||
tflite::micro::GetTensorData<int8_t>(input), flat_size);
|
||||
} break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context,
|
||||
"Expand_Dims only currently supports int8 and float32, got %d.",
|
||||
input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_EXPAND_DIMS() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
131
code/components/tfmicro/tensorflow/lite/micro/kernels/fill.cc
Normal file
131
code/components/tfmicro/tensorflow/lite/micro/kernels/fill.cc
Normal file
@@ -0,0 +1,131 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/fill.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus EnsureEqImpl(TfLiteContext* context, const TfLiteIntArray* array,
|
||||
const TfLiteTensor* tensor) {
|
||||
for (int i = 0; i < array->size; ++i) {
|
||||
TF_LITE_ENSURE_EQ(context, array->data[i], GetTensorData<T>(tensor)[i]);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Ensure the equality of an int array and a tensor, which must be
|
||||
// one-dimensional and of an integer type.
|
||||
TfLiteStatus EnsureEq(TfLiteContext* context, const TfLiteIntArray* array,
|
||||
const TfLiteTensor* tensor) {
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(tensor), 1);
|
||||
const auto tensor_len = tensor->dims->data[0];
|
||||
TF_LITE_ENSURE_EQ(context, array->size, tensor_len);
|
||||
|
||||
switch (tensor->type) {
|
||||
case kTfLiteInt8:
|
||||
return EnsureEqImpl<int8_t>(context, array, tensor);
|
||||
case kTfLiteUInt8:
|
||||
return EnsureEqImpl<uint8_t>(context, array, tensor);
|
||||
case kTfLiteInt16:
|
||||
return EnsureEqImpl<int16_t>(context, array, tensor);
|
||||
case kTfLiteInt32:
|
||||
return EnsureEqImpl<int32_t>(context, array, tensor);
|
||||
case kTfLiteInt64:
|
||||
return EnsureEqImpl<int64_t>(context, array, tensor);
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"cannot compare int array to tensor of type %d.",
|
||||
tensor->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr int kDimsTensor = 0;
|
||||
constexpr int kValueTensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Ensure inputs and outputs exist.
|
||||
const TfLiteTensor* dims;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kDimsTensor, &dims));
|
||||
const TfLiteTensor* value;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kValueTensor, &value));
|
||||
TfLiteTensor* output;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetOutputSafe(context, node, kOutputTensor, &output));
|
||||
|
||||
// The value tensor must be a scalar.
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(value), 0);
|
||||
|
||||
// The value type and output type must match.
|
||||
TF_LITE_ENSURE_EQ(context, value->type, output->type);
|
||||
|
||||
// The dims tensor must match the output tensor shape. As a byproduct,
|
||||
// ensures the dims tensor is of an integer type.
|
||||
TF_LITE_ENSURE_OK(context, EnsureEq(context, output->dims, dims));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void FillImpl(const TfLiteEvalTensor* value, TfLiteEvalTensor* output) {
|
||||
reference_ops::Fill(
|
||||
micro::GetTensorShape(value), micro::GetTensorData<T>(value),
|
||||
micro::GetTensorShape(output), micro::GetTensorData<T>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* value =
|
||||
micro::GetEvalInput(context, node, kValueTensor);
|
||||
TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (value->type) {
|
||||
case kTfLiteFloat32:
|
||||
FillImpl<float>(value, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Fill only currently supports float32 for input 1, got %d.",
|
||||
TfLiteTypeGetName(value->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_FILL() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -28,176 +28,37 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
// The index of the temporary tensor where the quantized inputs are cached.
|
||||
int input_quantized_index;
|
||||
// Cached zero point values of tensors.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kWeightsTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context,
|
||||
TfLiteFusedActivation activation,
|
||||
TfLiteType data_type, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
OpData* data) {
|
||||
TfLiteStatus status = kTfLiteOk;
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
double real_multiplier = 0.0;
|
||||
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
|
||||
context, input, filter, bias, output, &real_multiplier));
|
||||
int exponent;
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
|
||||
data->output_shift = -exponent;
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
return context->AllocatePersistentBuffer(context,
|
||||
sizeof(OpDataFullyConnected));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* data = static_cast<OpDataFullyConnected*>(node->user_data);
|
||||
const auto params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* input =
|
||||
GetInput(context, node, kFullyConnectedInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
const TfLiteTensor* filter =
|
||||
GetInput(context, node, kFullyConnectedWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kFullyConnectedBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kFullyConnectedOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
return CalculateOpData(context, params->activation, input->type, input,
|
||||
filter, bias, output, data);
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = -data.filter_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// TODO(b/138810107): Figure out whether output shift should be inverted
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::FullyConnected(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||
reference_ops::FullyConnected( \
|
||||
op_params, tflite::micro::GetTensorShape(input), \
|
||||
tflite::micro::GetTensorData<uint8_t>(input), \
|
||||
tflite::micro::GetTensorShape(filter), \
|
||||
tflite::micro::GetTensorData<uint8_t>(filter), \
|
||||
tflite::micro::GetTensorShape(bias), \
|
||||
tflite::micro::GetTensorData<int32_t>(bias), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<output_data_type>(output))
|
||||
switch (output->type) {
|
||||
case kTfLiteUInt8:
|
||||
TF_LITE_FULLY_CONNECTED(uint8_t);
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
TF_LITE_FULLY_CONNECTED(int16_t);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteFusedActivation activation,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
tflite::reference_ops::FullyConnected(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
return CalculateOpDataFullyConnected(context, params->activation, input->type,
|
||||
input, filter, bias, output, data);
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
@@ -206,33 +67,66 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
tflite::micro::GetEvalInput(context, node, kFullyConnectedInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsTensor);
|
||||
tflite::micro::GetEvalInput(context, node, kFullyConnectedWeightsTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
tflite::micro::GetEvalInput(context, node, kBiasTensor);
|
||||
tflite::micro::GetEvalInput(context, node, kFullyConnectedBiasTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
const auto& data =
|
||||
*(static_cast<const OpDataFullyConnected*>(node->user_data));
|
||||
|
||||
// Checks in Prepare ensure input, output and filter types are all the same.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
return EvalFloat(context, node, params->activation, input, filter, bias,
|
||||
output);
|
||||
case kTfLiteInt8:
|
||||
return EvalQuantizedInt8(context, node, data, input, filter, bias,
|
||||
output);
|
||||
case kTfLiteFloat32: {
|
||||
tflite::reference_ops::FullyConnected(
|
||||
FullyConnectedParamsFloat(params->activation),
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
}
|
||||
|
||||
case kTfLiteUInt8:
|
||||
return EvalQuantized(context, node, data, input, filter, bias, output);
|
||||
case kTfLiteInt8: {
|
||||
tflite::reference_integer_ops::FullyConnected(
|
||||
FullyConnectedParamsQuantized(data),
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
case kTfLiteUInt8: {
|
||||
tflite::reference_ops::FullyConnected(
|
||||
FullyConnectedParamsQuantized(data),
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@@ -15,10 +15,51 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
struct OpDataFullyConnected {
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
// The index of the temporary tensor where the quantized inputs are cached.
|
||||
int input_quantized_index;
|
||||
// Cached zero point values of tensors.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
extern const int kFullyConnectedInputTensor;
|
||||
extern const int kFullyConnectedWeightsTensor;
|
||||
extern const int kFullyConnectedBiasTensor;
|
||||
extern const int kFullyConnectedOutputTensor;
|
||||
|
||||
// Returns a FullyConnectedParams struct with all the parameters needed for a
|
||||
// float computation.
|
||||
FullyConnectedParams FullyConnectedParamsFloat(
|
||||
TfLiteFusedActivation activation);
|
||||
|
||||
// Returns a FullyConnectedParams struct with all the parameters needed for a
|
||||
// quantized computation.
|
||||
FullyConnectedParams FullyConnectedParamsQuantized(
|
||||
const OpDataFullyConnected& op_data);
|
||||
|
||||
TfLiteStatus CalculateOpDataFullyConnected(
|
||||
TfLiteContext* context, TfLiteFusedActivation activation,
|
||||
TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output, OpDataFullyConnected* data);
|
||||
|
||||
// This is the most generic TfLiteRegistration. The actual supported types may
|
||||
// still be target dependent. The only requirement is that every implementation
|
||||
// (reference or optimized) must define this function.
|
||||
@@ -30,7 +71,7 @@ TfLiteRegistration Register_FULLY_CONNECTED();
|
||||
// part of the build. As a result, we use defined(ARDUINO) as proxy for the
|
||||
// CMSIS kernels for this one special case.
|
||||
|
||||
// Returns a TfLiteRegistration struct for cmsis-nn kernel variant that only
|
||||
// Returns a TfLiteRegistration struct for cmsis_nn kernel variant that only
|
||||
// supports int8.
|
||||
TfLiteRegistration Register_FULLY_CONNECTED_INT8();
|
||||
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/fully_connected.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
const int kFullyConnectedInputTensor = 0;
|
||||
const int kFullyConnectedWeightsTensor = 1;
|
||||
const int kFullyConnectedBiasTensor = 2;
|
||||
const int kFullyConnectedOutputTensor = 0;
|
||||
|
||||
FullyConnectedParams FullyConnectedParamsQuantized(
|
||||
const OpDataFullyConnected& op_data) {
|
||||
FullyConnectedParams op_params;
|
||||
op_params.input_offset = -op_data.input_zero_point;
|
||||
op_params.weights_offset = -op_data.filter_zero_point;
|
||||
op_params.output_offset = op_data.output_zero_point;
|
||||
op_params.output_multiplier = op_data.output_multiplier;
|
||||
op_params.output_shift = op_data.output_shift;
|
||||
op_params.quantized_activation_min = op_data.output_activation_min;
|
||||
op_params.quantized_activation_max = op_data.output_activation_max;
|
||||
return op_params;
|
||||
}
|
||||
|
||||
FullyConnectedParams FullyConnectedParamsFloat(
|
||||
TfLiteFusedActivation activation) {
|
||||
FullyConnectedParams op_params;
|
||||
CalculateActivationRange(activation, &op_params.float_activation_min,
|
||||
&op_params.float_activation_max);
|
||||
return op_params;
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpDataFullyConnected(
|
||||
TfLiteContext* context, TfLiteFusedActivation activation,
|
||||
TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
OpDataFullyConnected* data) {
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
double real_multiplier = 0.0;
|
||||
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
|
||||
context, input, filter, bias, output, &real_multiplier));
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return CalculateActivationRangeQuantized(context, activation, output,
|
||||
&data->output_activation_min,
|
||||
&data->output_activation_max);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
|
||||
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
@@ -30,12 +32,12 @@ uint8_t KernelRunner::kKernelRunnerBuffer_[];
|
||||
KernelRunner::KernelRunner(const TfLiteRegistration& registration,
|
||||
TfLiteTensor* tensors, int tensors_size,
|
||||
TfLiteIntArray* inputs, TfLiteIntArray* outputs,
|
||||
void* builtin_data, ErrorReporter* error_reporter)
|
||||
: allocator_(SimpleMemoryAllocator::Create(
|
||||
error_reporter, kKernelRunnerBuffer_, kKernelRunnerBufferSize_)),
|
||||
void* builtin_data)
|
||||
: allocator_(SimpleMemoryAllocator::Create(GetMicroErrorReporter(),
|
||||
kKernelRunnerBuffer_,
|
||||
kKernelRunnerBufferSize_)),
|
||||
registration_(registration),
|
||||
tensors_(tensors),
|
||||
error_reporter_(error_reporter) {
|
||||
tensors_(tensors) {
|
||||
// Prepare TfLiteContext:
|
||||
context_.impl_ = static_cast<void*>(this);
|
||||
context_.ReportError = ReportOpError;
|
||||
@@ -52,9 +54,10 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
|
||||
node_.builtin_data = builtin_data;
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data) {
|
||||
TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data,
|
||||
size_t length) {
|
||||
if (registration_.init) {
|
||||
node_.user_data = registration_.init(&context_, init_data, /*length=*/0);
|
||||
node_.user_data = registration_.init(&context_, init_data, length);
|
||||
}
|
||||
if (registration_.prepare) {
|
||||
TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_));
|
||||
@@ -64,8 +67,7 @@ TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data) {
|
||||
|
||||
TfLiteStatus KernelRunner::Invoke() {
|
||||
if (registration_.invoke == nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"TfLiteRegistration missing invoke function pointer!");
|
||||
MicroPrintf("TfLiteRegistration missing invoke function pointer!");
|
||||
return kTfLiteError;
|
||||
}
|
||||
return registration_.invoke(&context_, &node_);
|
||||
@@ -118,10 +120,8 @@ TfLiteStatus KernelRunner::RequestScratchBufferInArena(TfLiteContext* context,
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
if (runner->scratch_buffer_count_ == kNumScratchBuffers_) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
runner->error_reporter_,
|
||||
"Exceeded the maximum number of scratch tensors allowed (%d).",
|
||||
kNumScratchBuffers_);
|
||||
MicroPrintf("Exceeded the maximum number of scratch tensors allowed (%d).",
|
||||
kNumScratchBuffers_);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
@@ -151,13 +151,9 @@ void* KernelRunner::GetScratchBuffer(TfLiteContext* context, int buffer_index) {
|
||||
|
||||
void KernelRunner::ReportOpError(struct TfLiteContext* context,
|
||||
const char* format, ...) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
TF_LITE_REPORT_ERROR(runner->error_reporter_, format, args);
|
||||
GetMicroErrorReporter()->Report(format, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
|
||||
@@ -23,23 +23,22 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle
|
||||
// (init, prepare, invoke). All internal allocations are handled by this class.
|
||||
// Simply pass in the registration, list of required tensors, inputs array,
|
||||
// outputs array, and any pre-builtin data. Calling Invoke() will automatically
|
||||
// walk the kernl and outputs will be ready on the the TfLiteTensor output
|
||||
// provided during construction.
|
||||
// Helper class to perform a simulated kernel (i.e. TfLiteRegistration)
|
||||
// lifecycle (init, prepare, invoke). All internal allocations are handled by
|
||||
// this class. Simply pass in the registration, list of required tensors, inputs
|
||||
// array, outputs array, and any pre-builtin data. Calling Invoke() will
|
||||
// automatically walk the kernel and outputs will be ready on the TfLiteTensor
|
||||
// output provided during construction.
|
||||
class KernelRunner {
|
||||
public:
|
||||
KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors,
|
||||
int tensors_size, TfLiteIntArray* inputs,
|
||||
TfLiteIntArray* outputs, void* builtin_data,
|
||||
ErrorReporter* error_reporter);
|
||||
TfLiteIntArray* outputs, void* builtin_data);
|
||||
|
||||
// Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any
|
||||
// exceptions will be reported through the error_reporter and returned as a
|
||||
// status code here.
|
||||
TfLiteStatus InitAndPrepare(const char* init_data = nullptr);
|
||||
// exceptions will be DebugLog'd and returned as a status code.
|
||||
TfLiteStatus InitAndPrepare(const char* init_data = nullptr,
|
||||
size_t length = 0);
|
||||
|
||||
// Calls init, prepare, and invoke on a given TfLiteRegistration pointer.
|
||||
// After successful invoke, results will be available in the output tensor as
|
||||
@@ -60,7 +59,7 @@ class KernelRunner {
|
||||
...);
|
||||
|
||||
private:
|
||||
static constexpr int kNumScratchBuffers_ = 5;
|
||||
static constexpr int kNumScratchBuffers_ = 12;
|
||||
|
||||
static constexpr int kKernelRunnerBufferSize_ = 10000;
|
||||
static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
|
||||
@@ -68,7 +67,6 @@ class KernelRunner {
|
||||
SimpleMemoryAllocator* allocator_ = nullptr;
|
||||
const TfLiteRegistration& registration_;
|
||||
TfLiteTensor* tensors_ = nullptr;
|
||||
ErrorReporter* error_reporter_ = nullptr;
|
||||
|
||||
TfLiteContext context_ = {};
|
||||
TfLiteNode node_ = {};
|
||||
|
||||
@@ -37,5 +37,17 @@ const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) {
|
||||
return RuntimeShape(dims_size, dims_data);
|
||||
}
|
||||
|
||||
PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
switch (padding) {
|
||||
case TfLitePadding::kTfLitePaddingSame:
|
||||
return PaddingType::kSame;
|
||||
case TfLitePadding::kTfLitePaddingValid:
|
||||
return PaddingType::kValid;
|
||||
case TfLitePadding::kTfLitePaddingUnknown:
|
||||
default:
|
||||
return PaddingType::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
|
||||
@@ -18,6 +18,7 @@ limitations under the License.
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
@@ -69,6 +70,8 @@ const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor);
|
||||
bool HaveSameShapes(const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2);
|
||||
|
||||
PaddingType RuntimePaddingType(TfLitePadding padding);
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
// Input/output tensor index.
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// required rank for input/output tensor shape
|
||||
constexpr int kTensorShapeRank = 4;
|
||||
|
||||
// input/output tensor shape rank associations
|
||||
enum { kBatchRank = 0, kHeightRank, kWidthRank, kChannelRank };
|
||||
|
||||
TfLiteStatus L2Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = static_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TfLiteTensor* output;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetOutputSafe(context, node, kOutputTensor, &output));
|
||||
const TfLiteTensor* input;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input), kTensorShapeRank);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(output), kTensorShapeRank);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
int batches = SizeOfDimension(input, kBatchRank);
|
||||
int height = SizeOfDimension(input, kHeightRank);
|
||||
int width = SizeOfDimension(input, kWidthRank);
|
||||
int channels_out = SizeOfDimension(input, kChannelRank);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params->padding;
|
||||
int out_width, out_height;
|
||||
|
||||
params->computed.padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width, 1, 1, height, width,
|
||||
params->filter_height, params->filter_width, padding, &out_height,
|
||||
&out_width);
|
||||
|
||||
// We currently don't have a quantized implementation of L2Pool
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
|
||||
// We must update the output tensor dimensions.
|
||||
// The dims storage is expected to be the same area in memory
|
||||
// for both TfLiteTensor and TfLiteEvalTensor. This is important
|
||||
// because TfLiteTensor in the MicroInterpreter is a temporary
|
||||
// allocation.
|
||||
output->dims->data[kBatchRank] = batches;
|
||||
output->dims->data[kHeightRank] = out_height;
|
||||
output->dims->data[kWidthRank] = out_width;
|
||||
output->dims->data[kChannelRank] = channels_out;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void L2EvalFloat(const TfLitePoolParams& params, const TfLiteEvalTensor& input,
|
||||
tflite::PoolParams* op_params, TfLiteEvalTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params.activation, &activation_min, &activation_max);
|
||||
|
||||
op_params->float_activation_min = activation_min;
|
||||
op_params->float_activation_max = activation_max;
|
||||
reference_ops::L2Pool(*op_params, tflite::micro::GetTensorShape(&input),
|
||||
tflite::micro::GetTensorData<float>(&input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus L2Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = static_cast<const TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = params->computed.padding.height;
|
||||
op_params.padding_values.width = params->computed.padding.width;
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
L2EvalFloat(*params, *input, &op_params, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"L2_POOL_2D only supports float32 currently, got %s.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_L2_POOL_2D() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/L2Prepare,
|
||||
/*invoke=*/L2Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,153 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
// Input/output tensor index.
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct LeakyReluOpData {
|
||||
// quantization parameters
|
||||
int32_t output_multiplier_alpha;
|
||||
int32_t output_shift_alpha;
|
||||
int32_t output_multiplier_identity;
|
||||
int32_t output_shift_identity;
|
||||
int32_t input_zero_point;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void QuantizeLeakyRelu(const LeakyReluOpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
LeakyReluParams op_params = {};
|
||||
|
||||
op_params.input_offset = data.input_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier_alpha = data.output_multiplier_alpha;
|
||||
op_params.output_shift_alpha = data.output_shift_alpha;
|
||||
op_params.output_multiplier_identity = data.output_multiplier_identity;
|
||||
op_params.output_shift_identity = data.output_shift_identity;
|
||||
reference_ops::QuantizeLeakyRelu(op_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<T>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<T>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
|
||||
TfLiteTensor* output;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetOutputSafe(context, node, kOutputTensor, &output));
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
LeakyReluOpData* data = static_cast<LeakyReluOpData*>(node->user_data);
|
||||
const auto* params =
|
||||
static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
int output_shift_alpha;
|
||||
double alpha_multiplier = static_cast<double>(
|
||||
input->params.scale * params->alpha / output->params.scale);
|
||||
QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha,
|
||||
&output_shift_alpha);
|
||||
data->output_shift_alpha = static_cast<int32_t>(output_shift_alpha);
|
||||
|
||||
int output_shift_identity;
|
||||
double identity_multiplier =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity,
|
||||
&output_shift_identity);
|
||||
data->output_shift_identity = static_cast<int32_t>(output_shift_identity);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(LeakyReluOpData));
|
||||
}
|
||||
|
||||
TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
return CalculateOpData(context, node);
|
||||
}
|
||||
|
||||
TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
const LeakyReluOpData& data = *static_cast<LeakyReluOpData*>(node->user_data);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
LeakyReluParams op_params = {};
|
||||
const auto* params =
|
||||
static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
|
||||
|
||||
op_params.alpha = params->alpha;
|
||||
reference_ops::LeakyRelu(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
QuantizeLeakyRelu<int8_t>(data, input, output);
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Only float32, int8 are supported by LEAKY_RELU, got %s.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_LEAKY_RELU() {
|
||||
return {/*init=*/LeakyReluInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/LeakyReluPrepare,
|
||||
/*invoke=*/LeakyReluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -31,12 +31,26 @@ namespace tflite {
|
||||
// (https://abseil.io/tips/130). Any new ops (or cleanup of existing ops should
|
||||
// have their Register function declarations in the tflite namespace.
|
||||
|
||||
TfLiteRegistration Register_ADD_N();
|
||||
TfLiteRegistration Register_BATCH_TO_SPACE_ND();
|
||||
TfLiteRegistration Register_CAST();
|
||||
TfLiteRegistration Register_CONV_2D();
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
|
||||
TfLiteRegistration Register_DIV();
|
||||
TfLiteRegistration Register_ELU();
|
||||
TfLiteRegistration Register_EXP();
|
||||
TfLiteRegistration Register_EXPAND_DIMS();
|
||||
TfLiteRegistration Register_FILL();
|
||||
TfLiteRegistration Register_L2_POOL_2D();
|
||||
TfLiteRegistration Register_LEAKY_RELU();
|
||||
TfLiteRegistration Register_QUANTIZE();
|
||||
TfLiteRegistration Register_SHAPE();
|
||||
TfLiteRegistration Register_SOFTMAX();
|
||||
TfLiteRegistration Register_SPACE_TO_BATCH_ND();
|
||||
TfLiteRegistration Register_SQUEEZE();
|
||||
TfLiteRegistration Register_SVDF();
|
||||
TfLiteRegistration Register_TRANSPOSE_CONV();
|
||||
TfLiteRegistration Register_ZEROS_LIKE();
|
||||
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
||||
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/quantize.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
@@ -25,160 +25,10 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
tflite::QuantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
int32_t input_zero_point;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
|
||||
// Currently this only support affine per-layer quantization.
|
||||
TF_LITE_ENSURE_EQ(context, output->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(output->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
|
||||
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 ||
|
||||
input->type == kTfLiteInt16 ||
|
||||
input->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt8 ||
|
||||
output->type == kTfLiteInt16 ||
|
||||
output->type == kTfLiteInt32);
|
||||
|
||||
if (((input->type == kTfLiteInt16 || input->type == kTfLiteInt8) &&
|
||||
output->type == kTfLiteInt8) ||
|
||||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt16)) {
|
||||
double effective_scale = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
|
||||
QuantizeMultiplier(effective_scale, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = output->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(output->params.scale);
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
|
||||
size, data->output_multiplier,
|
||||
data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->output_multiplier, data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
case kTfLiteInt32:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->output_multiplier, data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
// Int8 to Int8 requantization, required if the input and output tensors
|
||||
// have different scales and/or zero points.
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int8_t>(input),
|
||||
size, data->output_multiplier,
|
||||
data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
return context->AllocatePersistentBuffer(context,
|
||||
sizeof(OpDataQuantizeReference));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -186,8 +36,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteRegistration Register_QUANTIZE() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*prepare=*/PrepareQuantizeReference,
|
||||
/*invoke=*/EvalQuantizeReference,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
struct OpDataQuantizeReference {
|
||||
tflite::QuantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t requantize_output_multiplier;
|
||||
int requantize_output_shift;
|
||||
|
||||
int32_t input_zero_point;
|
||||
};
|
||||
|
||||
TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node);
|
||||
TfLiteStatus PrepareQuantizeReference(TfLiteContext* context, TfLiteNode* node);
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
|
||||
@@ -0,0 +1,171 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/quantize.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
|
||||
TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
auto* data = static_cast<OpDataQuantizeReference*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
|
||||
// Currently this only support affine per-layer quantization.
|
||||
TF_LITE_ENSURE_EQ(context, output->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(output->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
|
||||
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 ||
|
||||
input->type == kTfLiteInt16 ||
|
||||
input->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteInt8 ||
|
||||
output->type == kTfLiteInt16 ||
|
||||
output->type == kTfLiteInt32);
|
||||
|
||||
if ((input->type == kTfLiteInt16 && output->type == kTfLiteInt8) ||
|
||||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt8) ||
|
||||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt32) ||
|
||||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt16) ||
|
||||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt32)) {
|
||||
double effective_scale = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
|
||||
QuantizeMultiplier(effective_scale, &data->requantize_output_multiplier,
|
||||
&data->requantize_output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = output->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(output->params.scale);
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
auto* data = static_cast<OpDataQuantizeReference*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
case kTfLiteInt32:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
// Int8 to Int8 requantization, required if the input and output tensors
|
||||
// have different scales and/or zero points.
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int8_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int8_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -13,12 +13,13 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/softmax.h"
|
||||
#include "tensorflow/lite/micro/kernels/softmax.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/softmax.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
@@ -27,86 +28,9 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
// Softmax parameter data that persists in user_data
|
||||
static constexpr int kInt16LUTArraySize = 513;
|
||||
|
||||
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
TfLiteTensor* output,
|
||||
const TfLiteSoftmaxParams* params,
|
||||
SoftmaxParams* op_data) {
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteInt16) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteUInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768,
|
||||
(0.001f * 1.f / 32768));
|
||||
} else { // input->type == kTfLiteInt8
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
|
||||
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 65536,
|
||||
(0.001f * 1.f / 65536));
|
||||
} else { // output->type == kTfLiteint8
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
|
||||
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
|
||||
}
|
||||
}
|
||||
|
||||
static const int kScaledDiffIntegerBits = 5;
|
||||
|
||||
// Calculate input_multiplier and input_left_shift
|
||||
if (input->type == kTfLiteInt16) {
|
||||
int input_left_shift;
|
||||
double input_scale_beta_rescale =
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(params->beta) /
|
||||
(10.0 / 65535.0); // scale the input_diff such that [-65535, 0]
|
||||
// correspond to [-10.0, 0.0]
|
||||
QuantizeMultiplier(input_scale_beta_rescale, &op_data->input_multiplier,
|
||||
&input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
} else {
|
||||
int input_left_shift;
|
||||
tflite::PreprocessSoftmaxScaling(
|
||||
static_cast<double>(params->beta),
|
||||
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
|
||||
&op_data->input_multiplier, &input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
op_data->diff_min =
|
||||
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
|
||||
op_data->input_left_shift);
|
||||
}
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
op_data->beta = static_cast<double>(params->beta);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Takes a tensor and performs softmax along the last dimension.
|
||||
void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
if (input->type == kTfLiteInt8) {
|
||||
if (output->type == kTfLiteInt16) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
@@ -129,60 +53,6 @@ void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
}
|
||||
}
|
||||
|
||||
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, node->user_data != nullptr);
|
||||
SoftmaxParams* op_data = static_cast<SoftmaxParams*>(node->user_data);
|
||||
// Only allocate LUTs for KTfLiteInt16 data type
|
||||
if (input->type == kTfLiteInt16) {
|
||||
void* raw_exp_lut = context->AllocatePersistentBuffer(
|
||||
context, sizeof(int16_t) * kInt16LUTArraySize);
|
||||
TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
|
||||
op_data->exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
|
||||
void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
|
||||
context, sizeof(int16_t) * kInt16LUTArraySize);
|
||||
TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
|
||||
op_data->one_over_one_plus_x_lut =
|
||||
reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
|
||||
}
|
||||
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteUInt8 ||
|
||||
input->type == kTfLiteInt16);
|
||||
} else {
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
}
|
||||
|
||||
// Populate LUT if required
|
||||
if (input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
// exp LUT only used on negative values
|
||||
// we consider exp(-10.0) is insignificant to accumulation
|
||||
gen_lut([](float value) { return std::exp(value); }, -10.0f, 0.0f,
|
||||
op_data->exp_lut, kInt16LUTArraySize);
|
||||
gen_lut([](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f,
|
||||
op_data->one_over_one_plus_x_lut, kInt16LUTArraySize);
|
||||
op_data->zero_point = output->params.zero_point;
|
||||
op_data->scale = output->params.scale;
|
||||
}
|
||||
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
return CalculateSoftmaxParams(context, input, output, params, op_data);
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
@@ -192,11 +62,14 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
SoftmaxFloat(input, output, op_data);
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8:
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt16: {
|
||||
SoftmaxQuantized(input, output, op_data);
|
||||
return kTfLiteOk;
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length);
|
||||
|
||||
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
|
||||
@@ -0,0 +1,140 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/softmax.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
// Softmax parameter data that persists in user_data
|
||||
const int kInt16LUTArraySize = 513;
|
||||
|
||||
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
TfLiteTensor* output,
|
||||
const TfLiteSoftmaxParams* params,
|
||||
SoftmaxParams* op_data) {
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
|
||||
if (input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768,
|
||||
(0.001f * 1.f / 32768));
|
||||
} else { // input->type == kTfLiteInt8
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
|
||||
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 65536,
|
||||
(0.001f * 1.f / 65536));
|
||||
} else { // output->type == kTfLiteint8
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
|
||||
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
|
||||
}
|
||||
}
|
||||
|
||||
static const int kScaledDiffIntegerBits = 5;
|
||||
|
||||
// Calculate input_multiplier and input_left_shift
|
||||
if (input->type == kTfLiteInt16) {
|
||||
int input_left_shift;
|
||||
double input_scale_beta_rescale =
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(params->beta) /
|
||||
(10.0 / 65535.0); // scale the input_diff such that [-65535, 0]
|
||||
// correspond to [-10.0, 0.0]
|
||||
QuantizeMultiplier(input_scale_beta_rescale, &op_data->input_multiplier,
|
||||
&input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
} else {
|
||||
int input_left_shift;
|
||||
tflite::PreprocessSoftmaxScaling(
|
||||
static_cast<double>(params->beta),
|
||||
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
|
||||
&op_data->input_multiplier, &input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
op_data->diff_min =
|
||||
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
|
||||
op_data->input_left_shift);
|
||||
}
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
op_data->beta = static_cast<double>(params->beta);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, node->user_data != nullptr);
|
||||
SoftmaxParams* op_data = static_cast<SoftmaxParams*>(node->user_data);
|
||||
// Only allocate LUTs for KTfLiteInt16 data type
|
||||
if (input->type == kTfLiteInt16) {
|
||||
void* raw_exp_lut = context->AllocatePersistentBuffer(
|
||||
context, sizeof(int16_t) * kInt16LUTArraySize);
|
||||
TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
|
||||
op_data->exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
|
||||
void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
|
||||
context, sizeof(int16_t) * kInt16LUTArraySize);
|
||||
TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
|
||||
op_data->one_over_one_plus_x_lut =
|
||||
reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
|
||||
}
|
||||
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE(context,
|
||||
input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
|
||||
} else {
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
}
|
||||
|
||||
// Populate LUT if required
|
||||
if (input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
// exp LUT only used on negative values
|
||||
// we consider exp(-10.0) is insignificant to accumulation
|
||||
gen_lut([](float value) { return std::exp(value); }, -10.0f, 0.0f,
|
||||
op_data->exp_lut, kInt16LUTArraySize);
|
||||
gen_lut([](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f,
|
||||
op_data->one_over_one_plus_x_lut, kInt16LUTArraySize);
|
||||
op_data->zero_point = output->params.zero_point;
|
||||
op_data->scale = output->params.scale;
|
||||
}
|
||||
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
return CalculateSoftmaxParams(context, input, output, params, op_data);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,121 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kBlockShapeTensor = 1;
|
||||
constexpr int kCropsTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// Currently, only 3D NHC and 4D NHWC input/output op_context are supported.
|
||||
// In case of 3D input, it will be extended to 3D NHWC by adding W=1.
|
||||
// The 4D array need to have exactly 2 spatial dimensions.
|
||||
// TODO(b/149952582): Support arbitrary dimension in SpaceToBatchND.
|
||||
const int kInputOutputMinDimensionNum = 3;
|
||||
const int kInputOutputMaxDimensionNum = 4;
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(SpaceToBatchParams));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr && output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) >= kInputOutputMinDimensionNum);
|
||||
TF_LITE_ENSURE(context, NumDimensions(output) >= kInputOutputMinDimensionNum);
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) <= kInputOutputMaxDimensionNum);
|
||||
TF_LITE_ENSURE(context, NumDimensions(output) <= kInputOutputMaxDimensionNum);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const SpaceToBatchParams& params =
|
||||
*(static_cast<const SpaceToBatchParams*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* block_shape =
|
||||
tflite::micro::GetEvalInput(context, node, kBlockShapeTensor);
|
||||
const TfLiteEvalTensor* crops =
|
||||
tflite::micro::GetEvalInput(context, node, kCropsTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
reference_ops::SpaceToBatchND(
|
||||
params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(block_shape),
|
||||
tflite::micro::GetTensorData<int32_t>(block_shape),
|
||||
tflite::micro::GetTensorShape(crops),
|
||||
tflite::micro::GetTensorData<int32_t>(crops),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
reference_ops::SpaceToBatchND(
|
||||
params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(block_shape),
|
||||
tflite::micro::GetTensorData<int32_t>(block_shape),
|
||||
tflite::micro::GetTensorShape(crops),
|
||||
tflite::micro::GetTensorData<int32_t>(crops),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
||||
TfLiteRegistration Register_SPACE_TO_BATCH_ND() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
111
code/components/tfmicro/tensorflow/lite/micro/kernels/squeeze.cc
Normal file
111
code/components/tfmicro/tensorflow/lite/micro/kernels/squeeze.cc
Normal file
@@ -0,0 +1,111 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
struct SqueezeContext {
|
||||
SqueezeContext(TfLiteContext* context, TfLiteNode* node)
|
||||
: params(reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data)),
|
||||
input(GetInput(context, node, 0)),
|
||||
output(GetOutput(context, node, 0)) {}
|
||||
TfLiteSqueezeParams* params;
|
||||
const TfLiteTensor* const input;
|
||||
TfLiteTensor* output;
|
||||
};
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
SqueezeContext op_context(context, node);
|
||||
const int input_num_dims = NumDimensions(op_context.input);
|
||||
const int num_squeeze_dims = op_context.params->num_squeeze_dims;
|
||||
|
||||
// Determines number of dimensions of output tensor after squeeze.
|
||||
const TfLiteIntArray* input_dims = op_context.input->dims;
|
||||
const TfLiteIntArray* output_dims = op_context.output->dims;
|
||||
const int* squeeze_dims = op_context.params->squeeze_dims;
|
||||
|
||||
constexpr int max_squeeze_dims = 8;
|
||||
TF_LITE_ENSURE(context, input_num_dims <= max_squeeze_dims);
|
||||
bool should_squeeze[max_squeeze_dims] = {};
|
||||
|
||||
if (num_squeeze_dims == 0) {
|
||||
for (int idx = 0; idx < input_num_dims; ++idx) {
|
||||
if (input_dims->data[idx] == 1) {
|
||||
should_squeeze[idx] = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int idx = 0; idx < num_squeeze_dims; ++idx) {
|
||||
int current = squeeze_dims[idx] < 0 ? squeeze_dims[idx] + input_num_dims
|
||||
: squeeze_dims[idx];
|
||||
TF_LITE_ENSURE(context, current >= 0 && current < input_num_dims &&
|
||||
input_dims->data[current] == 1);
|
||||
should_squeeze[current] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure output dimensions are big enough.
|
||||
for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx) {
|
||||
if (!should_squeeze[in_idx]) {
|
||||
TFLITE_CHECK_GE(output_dims->data[out_idx++], input_dims->data[in_idx]);
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
SqueezeContext op_context(context, node);
|
||||
|
||||
if (op_context.input->type == kTfLiteString) {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(op_context.input->type),
|
||||
op_context.input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, op_context.input->bytes, op_context.output->bytes);
|
||||
memcpy(op_context.output->data.raw, op_context.input->data.raw,
|
||||
op_context.input->bytes);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_SQUEEZE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/svdf.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
@@ -29,496 +31,44 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
int32_t effective_scale_1_a;
|
||||
int32_t effective_scale_2_a;
|
||||
// b versions of each scale are kept at int since the numbers are just the
|
||||
// shift value - typically between [-32, 32].
|
||||
int effective_scale_1_b;
|
||||
int effective_scale_2_b;
|
||||
int scratch_tensor_index;
|
||||
int scratch_output_tensor_index;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int input_zero_point;
|
||||
int output_zero_point;
|
||||
};
|
||||
|
||||
// Input tensors.
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kWeightsFeatureTensor = 1;
|
||||
constexpr int kWeightsTimeTensor = 2;
|
||||
constexpr int kBiasTensor = 3;
|
||||
// This is a variable tensor, and will be modified by this op.
|
||||
constexpr int kInputActivationStateTensor = 4;
|
||||
|
||||
// Output tensor.
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
/**
|
||||
* This version of SVDF is specific to TFLite Micro. It contains the following
|
||||
* differences between the TFLite version:
|
||||
*
|
||||
* 1.) Scratch tensor allocation - scratch tensors must be known ahead of time
|
||||
* for the Micro interpreter.
|
||||
* 2.) Output dimensions - the TFLite version determines output size and runtime
|
||||
* and resizes the output tensor. Micro runtime does not support tensor
|
||||
* resizing.
|
||||
*/
|
||||
static inline void ApplyTimeWeightsBiasAndActivation(
|
||||
int batch_size, int memory_size, int num_filters, int num_units, int rank,
|
||||
const float* const __restrict__ weights_time_ptr,
|
||||
const float* const __restrict__ bias_ptr, TfLiteFusedActivation activation,
|
||||
float* const __restrict__ state_ptr, float* const __restrict__ scratch_ptr,
|
||||
float* const __restrict__ output_ptr) {
|
||||
// Compute matmul(activation_state, weights_time).
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
// Perform batched vector dot product:
|
||||
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
|
||||
const float* vector1_ptr = weights_time_ptr;
|
||||
const float* vector2_ptr = state_ptr + b * memory_size * num_filters;
|
||||
for (int i = 0; i < num_filters; ++i) {
|
||||
*scratch_ptr_batch = 0.f;
|
||||
for (int j = 0; j < memory_size; ++j) {
|
||||
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
|
||||
}
|
||||
scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize output with bias if provided.
|
||||
if (bias_ptr) {
|
||||
// VectorBatchVectorAssign
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
float* output_data = output_ptr + i * num_units;
|
||||
const float* bias_data = bias_ptr;
|
||||
for (int j = 0; j < num_units; ++j) {
|
||||
*output_data++ = *bias_data++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
float* output_data = output_ptr;
|
||||
for (int i = 0; i < batch_size * num_units; ++i) {
|
||||
*output_data++ = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
// Reduction sum.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* output_ptr_batch = output_ptr + b * num_units;
|
||||
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
|
||||
|
||||
// Reduction sum vector
|
||||
for (int i = 0; i < num_units; ++i) {
|
||||
for (int j = 0; j < rank; j++) {
|
||||
output_ptr_batch[i] += *scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply activation.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* output_ptr_batch = output_ptr + b * num_units;
|
||||
for (int i = 0; i < num_units; ++i) {
|
||||
*output_ptr_batch =
|
||||
tflite::ops::micro::ActivationValFloat(activation, *output_ptr_batch);
|
||||
++output_ptr_batch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void EvalFloatSVDF(
|
||||
TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* weights_feature,
|
||||
const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
|
||||
const TfLiteSVDFParams* params, int scratch_tensor_index,
|
||||
TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) {
|
||||
const int rank = params->rank;
|
||||
const int batch_size = input->dims->data[0];
|
||||
const int input_size = input->dims->data[1];
|
||||
const int num_filters = weights_feature->dims->data[0];
|
||||
const int num_units = num_filters / rank;
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
const float* weights_feature_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_feature);
|
||||
const float* weights_time_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_time);
|
||||
const float* bias_ptr = tflite::micro::GetTensorData<float>(bias);
|
||||
const float* input_ptr = tflite::micro::GetTensorData<float>(input);
|
||||
|
||||
float* state_ptr = tflite::micro::GetTensorData<float>(activation_state);
|
||||
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||
|
||||
float* scratch_ptr = static_cast<float*>(
|
||||
context->GetScratchBuffer(context, scratch_tensor_index));
|
||||
|
||||
float* output_ptr = tflite::micro::GetTensorData<float>(output);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
float* new_state_start = state_ptr;
|
||||
const float* old_state_start = state_ptr + 1;
|
||||
const float* old_state_end =
|
||||
state_ptr + batch_size * num_filters * memory_size;
|
||||
while (old_state_start != old_state_end) {
|
||||
*new_state_start++ = *old_state_start++;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: no need to clear the latest activation, matmul is not accumulative.
|
||||
|
||||
// Compute conv1d(inputs, weights_feature).
|
||||
// The activation_state's rightmost column is used to save current cycle
|
||||
// activation. This is achieved by starting at state_ptr[memory_size - 1] and
|
||||
// having the stride equal to memory_size.
|
||||
|
||||
// Perform batched matrix vector multiply operation:
|
||||
{
|
||||
const float* matrix = weights_feature_ptr;
|
||||
const float* vector = input_ptr;
|
||||
float* result = &state_ptr[memory_size - 1];
|
||||
float* result_in_batch = result;
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
const float* matrix_ptr = matrix;
|
||||
for (int j = 0; j < num_filters; ++j) {
|
||||
float dot_prod = 0.0f;
|
||||
const float* vector_in_batch = vector + i * input_size;
|
||||
for (int k = 0; k < input_size; ++k) {
|
||||
dot_prod += *matrix_ptr++ * *vector_in_batch++;
|
||||
}
|
||||
*result_in_batch = dot_prod;
|
||||
result_in_batch += memory_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ApplyTimeWeightsBiasAndActivation(
|
||||
batch_size, memory_size, num_filters, num_units, rank, weights_time_ptr,
|
||||
bias_ptr, params->activation, state_ptr, scratch_ptr, output_ptr);
|
||||
}
|
||||
|
||||
void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteEvalTensor* input_tensor,
|
||||
const TfLiteEvalTensor* weights_feature_tensor,
|
||||
const TfLiteEvalTensor* weights_time_tensor,
|
||||
const TfLiteEvalTensor* bias_tensor,
|
||||
const TfLiteSVDFParams* params,
|
||||
TfLiteEvalTensor* activation_state_tensor,
|
||||
TfLiteEvalTensor* output_tensor, const OpData& data) {
|
||||
const int n_rank = params->rank;
|
||||
const int n_batch = input_tensor->dims->data[0];
|
||||
const int n_input = input_tensor->dims->data[1];
|
||||
const int n_filter = weights_feature_tensor->dims->data[0];
|
||||
const int n_unit = n_filter / n_rank;
|
||||
const int n_memory = weights_time_tensor->dims->data[1];
|
||||
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||
|
||||
int32_t* scratch_tensor = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, data.scratch_tensor_index));
|
||||
int32_t* scratch_output_tensor = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
|
||||
|
||||
// Shift states.
|
||||
int16_t* const state_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
int16_t* new_state_start = state_ptr;
|
||||
const int16_t* old_state_start = state_ptr + 1;
|
||||
const int16_t* old_state_end = state_ptr + n_batch * n_filter * n_memory;
|
||||
while (old_state_start != old_state_end) {
|
||||
*new_state_start++ = *old_state_start++;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: no need to clear the latest activation, matmul is not accumulative.
|
||||
|
||||
// Feature matmul.
|
||||
{
|
||||
int16_t* state =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
|
||||
const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
|
||||
const int8_t* weight_feature =
|
||||
tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
|
||||
const int32_t output_max = std::numeric_limits<int16_t>::max();
|
||||
const int32_t output_min = std::numeric_limits<int16_t>::min();
|
||||
int16_t* result_in_batch = state + (n_memory - 1);
|
||||
for (int b = 0; b < n_batch; b++) {
|
||||
const int8_t* matrix_ptr = weight_feature;
|
||||
for (int r = 0; r < n_filter; r++) {
|
||||
int32_t dot_prod = 0;
|
||||
const int8_t* vector_in_batch = input + b * n_input;
|
||||
for (int c = 0; c < n_input; c++) {
|
||||
dot_prod +=
|
||||
*matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
|
||||
}
|
||||
dot_prod = MultiplyByQuantizedMultiplier(
|
||||
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
|
||||
dot_prod = std::min(std::max(output_min, dot_prod), output_max);
|
||||
// This assumes state is symmetrically quantized. Otherwise last bit of
|
||||
// state should be initialized to its zero point and accumulate the
|
||||
// dot_prod.
|
||||
// Equivalent as the following:
|
||||
// result_in_batch = zero point, which happens to be zero.
|
||||
// result_in_batch += dot_prod_56.
|
||||
*result_in_batch = dot_prod;
|
||||
result_in_batch += n_memory;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Time.
|
||||
{
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
|
||||
|
||||
// Perform batched vector dot product:
|
||||
const int16_t* vector1_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(weights_time_tensor);
|
||||
const int16_t* vector2_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor) +
|
||||
b * n_memory * n_filter;
|
||||
|
||||
for (int i = 0; i < n_filter; i++) {
|
||||
*scratch_ptr_batch = 0;
|
||||
for (int j = 0; j < n_memory; j++) {
|
||||
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
|
||||
}
|
||||
scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reduce, add bias, rescale, activation.
|
||||
{
|
||||
// Add bias.
|
||||
if (bias_tensor) {
|
||||
// Vector batch assign:
|
||||
const int32_t* bias_data =
|
||||
tflite::micro::GetTensorData<int32_t>(bias_tensor);
|
||||
for (int i = 0; i < n_batch; ++i) {
|
||||
int32_t* output_ptr = scratch_output_tensor + i * n_unit;
|
||||
const int32_t* bias_ptr = bias_data;
|
||||
for (int j = 0; j < n_unit; ++j) {
|
||||
*output_ptr++ = *bias_ptr++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int32_t* output_ptr = scratch_output_tensor;
|
||||
for (int i = 0; i < n_batch * n_unit; ++i) {
|
||||
*output_ptr++ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Reduce.
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
int32_t* output_temp_ptr = scratch_output_tensor + b * n_unit;
|
||||
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
|
||||
|
||||
// Reduction sum vector
|
||||
for (int i = 0; i < n_unit; ++i) {
|
||||
for (int j = 0; j < n_rank; ++j) {
|
||||
output_temp_ptr[i] += *scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rescale.
|
||||
const int32_t output_max = std::numeric_limits<int8_t>::max();
|
||||
const int32_t output_min = std::numeric_limits<int8_t>::min();
|
||||
for (int i = 0; i < n_batch * n_unit; ++i) {
|
||||
int32_t x1 = scratch_output_tensor[i];
|
||||
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
|
||||
data.effective_scale_2_b);
|
||||
int32_t x3 = x2 + data.output_zero_point;
|
||||
int32_t x4 = std::min(std::max(output_min, x3), output_max);
|
||||
tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
|
||||
static_cast<int8_t>(x4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
|
||||
|
||||
// Validate Tensor Inputs (dtype depends on quantization):
|
||||
// [0] = Input, {2, batch_size, input_size}
|
||||
// [1] = Weights Feature, {2, num_filters, input_size}
|
||||
// [2] = Weights Time, {2, num_filters, memory_size}
|
||||
// [3] = Bias (optional), {1, num_units}
|
||||
// [4] = Activation State (variable),
|
||||
// {2, batch_size, memory_size * num_filters}
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* weights_feature =
|
||||
GetInput(context, node, kWeightsFeatureTensor);
|
||||
TF_LITE_ENSURE(context, weights_feature != nullptr);
|
||||
const TfLiteTensor* weights_time =
|
||||
GetInput(context, node, kWeightsTimeTensor);
|
||||
TF_LITE_ENSURE(context, weights_time != nullptr);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
const TfLiteTensor* activation_state =
|
||||
GetInput(context, node, kInputActivationStateTensor);
|
||||
TF_LITE_ENSURE(context, activation_state != nullptr);
|
||||
|
||||
// Define input constants based on input tensor definition above:
|
||||
const int rank = params->rank;
|
||||
const int input_size = input->dims->data[1];
|
||||
const int batch_size = input->dims->data[0];
|
||||
const int num_filters = weights_feature->dims->data[0];
|
||||
TF_LITE_ENSURE_EQ(context, num_filters % rank, 0);
|
||||
const int num_units = num_filters / rank;
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
// Validate Input Tensor:
|
||||
TF_LITE_ENSURE(context,
|
||||
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
|
||||
|
||||
// Validate Tensor Output:
|
||||
// [0] = float/int8_t, {2, batch_size, num_units}
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
|
||||
|
||||
// Validate Weights Feature Input Tensor:
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_feature), 2);
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->dims->data[1], input_size);
|
||||
|
||||
// Validate Weights Time Input Tensor:
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_time), 2);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[0], num_filters);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size);
|
||||
|
||||
// Validate Optional Bias Input Tensor:
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
|
||||
}
|
||||
|
||||
// Validate Activation State Input Tensor:
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(activation_state), 2);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
|
||||
memory_size * num_filters);
|
||||
// Since is_variable is not part of TFLiteEvalTensor, check is_variable here.
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
|
||||
const double effective_scale_1 = static_cast<double>(
|
||||
input->params.scale * weights_feature->params.scale /
|
||||
activation_state->params.scale);
|
||||
const double effective_scale_2 =
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale / output->params.scale);
|
||||
|
||||
// TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
std::abs(static_cast<double>(bias->params.scale) -
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale)) < 1e-5);
|
||||
|
||||
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
|
||||
&(data->effective_scale_1_b));
|
||||
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
|
||||
&(data->effective_scale_2_b));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
context, batch_size * num_filters * sizeof(int32_t),
|
||||
&(data->scratch_tensor_index));
|
||||
TF_LITE_ENSURE_OK(context, scratch_status);
|
||||
|
||||
const TfLiteStatus scratch_output_status =
|
||||
context->RequestScratchBufferInArena(
|
||||
context, batch_size * num_units * sizeof(int32_t),
|
||||
&(data->scratch_output_tensor_index));
|
||||
TF_LITE_ENSURE_OK(context, scratch_output_status);
|
||||
} else {
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
|
||||
}
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
context, batch_size * num_filters * sizeof(float),
|
||||
&(data->scratch_tensor_index));
|
||||
TF_LITE_ENSURE_OK(context, scratch_status);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
tflite::micro::GetEvalInput(context, node, kSvdfInputTensor);
|
||||
const TfLiteEvalTensor* weights_feature =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsFeatureTensor);
|
||||
tflite::micro::GetEvalInput(context, node, kSvdfWeightsFeatureTensor);
|
||||
const TfLiteEvalTensor* weights_time =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsTimeTensor);
|
||||
tflite::micro::GetEvalInput(context, node, kSvdfWeightsTimeTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 5)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
? tflite::micro::GetEvalInput(context, node, kSvdfBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput(
|
||||
context, node, kInputActivationStateTensor);
|
||||
context, node, kSvdfInputActivationStateTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
tflite::micro::GetEvalOutput(context, node, kSvdfOutputTensor);
|
||||
|
||||
switch (weights_feature->type) {
|
||||
case kTfLiteFloat32: {
|
||||
EvalFloatSVDF(context, node, input, weights_feature, weights_time, bias,
|
||||
params, data.scratch_tensor_index, activation_state,
|
||||
output);
|
||||
EvalFloatSvdfReference(
|
||||
context, node, input, weights_feature, weights_time, bias, params,
|
||||
data.scratch_tensor_index, activation_state, output);
|
||||
return kTfLiteOk;
|
||||
break;
|
||||
}
|
||||
|
||||
case kTfLiteInt8: {
|
||||
EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
|
||||
params, activation_state, output, data);
|
||||
EvalIntegerSvdfReference(context, node, input, weights_feature,
|
||||
weights_time, bias, params, activation_state,
|
||||
output, data);
|
||||
return kTfLiteOk;
|
||||
break;
|
||||
}
|
||||
@@ -536,7 +86,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteRegistration Register_SVDF() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*prepare=*/PrepareSvdf,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
|
||||
71
code/components/tfmicro/tensorflow/lite/micro/kernels/svdf.h
Normal file
71
code/components/tfmicro/tensorflow/lite/micro/kernels/svdf.h
Normal file
@@ -0,0 +1,71 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_SVDF_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_SVDF_H_
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
struct OpData {
|
||||
int32_t effective_scale_1_a;
|
||||
int32_t effective_scale_2_a;
|
||||
// b versions of each scale are kept at int since the numbers are just the
|
||||
// shift value - typically between [-32, 32].
|
||||
int effective_scale_1_b;
|
||||
int effective_scale_2_b;
|
||||
int scratch_tensor_index;
|
||||
int scratch_output_tensor_index;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int input_zero_point;
|
||||
int output_zero_point;
|
||||
};
|
||||
|
||||
// Input tensors.
|
||||
extern const int kSvdfInputTensor;
|
||||
extern const int kSvdfWeightsFeatureTensor;
|
||||
extern const int kSvdfWeightsTimeTensor;
|
||||
extern const int kSvdfBiasTensor;
|
||||
// This is a variable tensor, and will be modified by this op.
|
||||
extern const int kSvdfInputActivationStateTensor;
|
||||
|
||||
// Output tensor.
|
||||
extern const int kSvdfOutputTensor;
|
||||
|
||||
// TensorflowLite Micro-specific reference implementation for Integer SVDF.
|
||||
void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteEvalTensor* input_tensor,
|
||||
const TfLiteEvalTensor* weights_feature_tensor,
|
||||
const TfLiteEvalTensor* weights_time_tensor,
|
||||
const TfLiteEvalTensor* bias_tensor,
|
||||
const TfLiteSVDFParams* params,
|
||||
TfLiteEvalTensor* activation_state_tensor,
|
||||
TfLiteEvalTensor* output_tensor,
|
||||
const OpData& data);
|
||||
|
||||
void EvalFloatSvdfReference(
|
||||
TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* weights_feature,
|
||||
const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
|
||||
const TfLiteSVDFParams* params, int scratch_tensor_index,
|
||||
TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output);
|
||||
|
||||
TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_SVDF_H_
|
||||
@@ -0,0 +1,469 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/activation_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/svdf.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
/**
|
||||
* This version of SVDF is specific to TFLite Micro. It contains the following
|
||||
* differences between the TFLite version:
|
||||
*
|
||||
* 1.) Scratch tensor allocation - scratch tensors must be known ahead of time
|
||||
* for the Micro interpreter.
|
||||
* 2.) Output dimensions - the TFLite version determines output size and runtime
|
||||
* and resizes the output tensor. Micro runtime does not support tensor
|
||||
* resizing.
|
||||
*/
|
||||
|
||||
const int kSvdfInputTensor = 0;
|
||||
const int kSvdfWeightsFeatureTensor = 1;
|
||||
const int kSvdfWeightsTimeTensor = 2;
|
||||
const int kSvdfBiasTensor = 3;
|
||||
const int kSvdfInputActivationStateTensor =
|
||||
4; // This is a variable tensor, and will be modified by this op.
|
||||
const int kSvdfOutputTensor = 0;
|
||||
|
||||
void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteEvalTensor* input_tensor,
|
||||
const TfLiteEvalTensor* weights_feature_tensor,
|
||||
const TfLiteEvalTensor* weights_time_tensor,
|
||||
const TfLiteEvalTensor* bias_tensor,
|
||||
const TfLiteSVDFParams* params,
|
||||
TfLiteEvalTensor* activation_state_tensor,
|
||||
TfLiteEvalTensor* output_tensor,
|
||||
const OpData& data) {
|
||||
const int n_rank = params->rank;
|
||||
const int n_batch = input_tensor->dims->data[0];
|
||||
const int n_input = input_tensor->dims->data[1];
|
||||
const int n_filter = weights_feature_tensor->dims->data[0];
|
||||
const int n_unit = n_filter / n_rank;
|
||||
const int n_memory = weights_time_tensor->dims->data[1];
|
||||
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||
|
||||
int32_t* scratch_tensor = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, data.scratch_tensor_index));
|
||||
int32_t* scratch_output_tensor = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
|
||||
|
||||
// Shift states.
|
||||
int16_t* const state_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
int16_t* new_state_start = state_ptr;
|
||||
const int16_t* old_state_start = state_ptr + 1;
|
||||
const int16_t* old_state_end = state_ptr + n_batch * n_filter * n_memory;
|
||||
while (old_state_start != old_state_end) {
|
||||
*new_state_start++ = *old_state_start++;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: no need to clear the latest activation, matmul is not accumulative.
|
||||
|
||||
// Feature matmul.
|
||||
{
|
||||
int16_t* state =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
|
||||
const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
|
||||
const int8_t* weight_feature =
|
||||
tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
|
||||
const int32_t output_max = std::numeric_limits<int16_t>::max();
|
||||
const int32_t output_min = std::numeric_limits<int16_t>::min();
|
||||
int16_t* result_in_batch = state + (n_memory - 1);
|
||||
for (int b = 0; b < n_batch; b++) {
|
||||
const int8_t* matrix_ptr = weight_feature;
|
||||
for (int r = 0; r < n_filter; r++) {
|
||||
int32_t dot_prod = 0;
|
||||
const int8_t* vector_in_batch = input + b * n_input;
|
||||
for (int c = 0; c < n_input; c++) {
|
||||
dot_prod +=
|
||||
*matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
|
||||
}
|
||||
dot_prod = MultiplyByQuantizedMultiplier(
|
||||
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
|
||||
dot_prod = std::min(std::max(output_min, dot_prod), output_max);
|
||||
// This assumes state is symmetrically quantized. Otherwise last bit of
|
||||
// state should be initialized to its zero point and accumulate the
|
||||
// dot_prod.
|
||||
// Equivalent as the following:
|
||||
// result_in_batch = zero point, which happens to be zero.
|
||||
// result_in_batch += dot_prod_56.
|
||||
*result_in_batch = dot_prod;
|
||||
result_in_batch += n_memory;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Time.
|
||||
{
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
|
||||
|
||||
// Perform batched vector dot product:
|
||||
const int16_t* vector1_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(weights_time_tensor);
|
||||
const int16_t* vector2_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor) +
|
||||
b * n_memory * n_filter;
|
||||
|
||||
for (int i = 0; i < n_filter; i++) {
|
||||
*scratch_ptr_batch = 0;
|
||||
for (int j = 0; j < n_memory; j++) {
|
||||
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
|
||||
}
|
||||
scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reduce, add bias, rescale, activation.
|
||||
{
|
||||
// Add bias.
|
||||
if (bias_tensor) {
|
||||
// Vector batch assign:
|
||||
const int32_t* bias_data =
|
||||
tflite::micro::GetTensorData<int32_t>(bias_tensor);
|
||||
for (int i = 0; i < n_batch; ++i) {
|
||||
int32_t* output_ptr = scratch_output_tensor + i * n_unit;
|
||||
const int32_t* bias_ptr = bias_data;
|
||||
for (int j = 0; j < n_unit; ++j) {
|
||||
*output_ptr++ = *bias_ptr++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int32_t* output_ptr = scratch_output_tensor;
|
||||
for (int i = 0; i < n_batch * n_unit; ++i) {
|
||||
*output_ptr++ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Reduce.
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
int32_t* output_temp_ptr = scratch_output_tensor + b * n_unit;
|
||||
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
|
||||
|
||||
// Reduction sum vector
|
||||
for (int i = 0; i < n_unit; ++i) {
|
||||
for (int j = 0; j < n_rank; ++j) {
|
||||
output_temp_ptr[i] += *scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rescale.
|
||||
const int32_t output_max = std::numeric_limits<int8_t>::max();
|
||||
const int32_t output_min = std::numeric_limits<int8_t>::min();
|
||||
for (int i = 0; i < n_batch * n_unit; ++i) {
|
||||
int32_t x1 = scratch_output_tensor[i];
|
||||
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
|
||||
data.effective_scale_2_b);
|
||||
int32_t x3 = x2 + data.output_zero_point;
|
||||
int32_t x4 = std::min(std::max(output_min, x3), output_max);
|
||||
tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
|
||||
static_cast<int8_t>(x4);
|
||||
}
|
||||
}
|
||||
}
|
||||
static inline void ApplyTimeWeightsBiasAndActivation(
|
||||
int batch_size, int memory_size, int num_filters, int num_units, int rank,
|
||||
const float* const __restrict__ weights_time_ptr,
|
||||
const float* const __restrict__ bias_ptr, TfLiteFusedActivation activation,
|
||||
float* const __restrict__ state_ptr, float* const __restrict__ scratch_ptr,
|
||||
float* const __restrict__ output_ptr) {
|
||||
// Compute matmul(activation_state, weights_time).
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
// Perform batched vector dot product:
|
||||
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
|
||||
const float* vector1_ptr = weights_time_ptr;
|
||||
const float* vector2_ptr = state_ptr + b * memory_size * num_filters;
|
||||
for (int i = 0; i < num_filters; ++i) {
|
||||
*scratch_ptr_batch = 0.f;
|
||||
for (int j = 0; j < memory_size; ++j) {
|
||||
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
|
||||
}
|
||||
scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize output with bias if provided.
|
||||
if (bias_ptr) {
|
||||
// VectorBatchVectorAssign
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
float* output_data = output_ptr + i * num_units;
|
||||
const float* bias_data = bias_ptr;
|
||||
for (int j = 0; j < num_units; ++j) {
|
||||
*output_data++ = *bias_data++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
float* output_data = output_ptr;
|
||||
for (int i = 0; i < batch_size * num_units; ++i) {
|
||||
*output_data++ = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
// Reduction sum.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* output_ptr_batch = output_ptr + b * num_units;
|
||||
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
|
||||
|
||||
// Reduction sum vector
|
||||
for (int i = 0; i < num_units; ++i) {
|
||||
for (int j = 0; j < rank; j++) {
|
||||
output_ptr_batch[i] += *scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply activation.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* output_ptr_batch = output_ptr + b * num_units;
|
||||
for (int i = 0; i < num_units; ++i) {
|
||||
*output_ptr_batch =
|
||||
tflite::ops::micro::ActivationValFloat(activation, *output_ptr_batch);
|
||||
++output_ptr_batch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EvalFloatSvdfReference(
|
||||
TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* weights_feature,
|
||||
const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
|
||||
const TfLiteSVDFParams* params, int scratch_tensor_index,
|
||||
TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) {
|
||||
const int rank = params->rank;
|
||||
const int batch_size = input->dims->data[0];
|
||||
const int input_size = input->dims->data[1];
|
||||
const int num_filters = weights_feature->dims->data[0];
|
||||
const int num_units = num_filters / rank;
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
const float* weights_feature_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_feature);
|
||||
const float* weights_time_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_time);
|
||||
const float* bias_ptr = tflite::micro::GetTensorData<float>(bias);
|
||||
const float* input_ptr = tflite::micro::GetTensorData<float>(input);
|
||||
|
||||
float* state_ptr = tflite::micro::GetTensorData<float>(activation_state);
|
||||
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||
|
||||
float* scratch_ptr = static_cast<float*>(
|
||||
context->GetScratchBuffer(context, scratch_tensor_index));
|
||||
|
||||
float* output_ptr = tflite::micro::GetTensorData<float>(output);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
float* new_state_start = state_ptr;
|
||||
const float* old_state_start = state_ptr + 1;
|
||||
const float* old_state_end =
|
||||
state_ptr + batch_size * num_filters * memory_size;
|
||||
while (old_state_start != old_state_end) {
|
||||
*new_state_start++ = *old_state_start++;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: no need to clear the latest activation, matmul is not accumulative.
|
||||
|
||||
// Compute conv1d(inputs, weights_feature).
|
||||
// The activation_state's rightmost column is used to save current cycle
|
||||
// activation. This is achieved by starting at state_ptr[memory_size - 1] and
|
||||
// having the stride equal to memory_size.
|
||||
|
||||
// Perform batched matrix vector multiply operation:
|
||||
{
|
||||
const float* matrix = weights_feature_ptr;
|
||||
const float* vector = input_ptr;
|
||||
float* result = &state_ptr[memory_size - 1];
|
||||
float* result_in_batch = result;
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
const float* matrix_ptr = matrix;
|
||||
for (int j = 0; j < num_filters; ++j) {
|
||||
float dot_prod = 0.0f;
|
||||
const float* vector_in_batch = vector + i * input_size;
|
||||
for (int k = 0; k < input_size; ++k) {
|
||||
dot_prod += *matrix_ptr++ * *vector_in_batch++;
|
||||
}
|
||||
*result_in_batch = dot_prod;
|
||||
result_in_batch += memory_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ApplyTimeWeightsBiasAndActivation(
|
||||
batch_size, memory_size, num_filters, num_units, rank, weights_time_ptr,
|
||||
bias_ptr, params->activation, state_ptr, scratch_ptr, output_ptr);
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
|
||||
|
||||
// Validate Tensor Inputs (dtype depends on quantization):
|
||||
// [0] = Input, {2, batch_size, input_size}
|
||||
// [1] = Weights Feature, {2, num_filters, input_size}
|
||||
// [2] = Weights Time, {2, num_filters, memory_size}
|
||||
// [3] = Bias (optional), {1, num_units}
|
||||
// [4] = Activation State (variable),
|
||||
// {2, batch_size, memory_size * num_filters}
|
||||
const TfLiteTensor* input = GetInput(context, node, kSvdfInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* weights_feature =
|
||||
GetInput(context, node, kSvdfWeightsFeatureTensor);
|
||||
TF_LITE_ENSURE(context, weights_feature != nullptr);
|
||||
const TfLiteTensor* weights_time =
|
||||
GetInput(context, node, kSvdfWeightsTimeTensor);
|
||||
TF_LITE_ENSURE(context, weights_time != nullptr);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kSvdfBiasTensor);
|
||||
const TfLiteTensor* activation_state =
|
||||
GetInput(context, node, kSvdfInputActivationStateTensor);
|
||||
TF_LITE_ENSURE(context, activation_state != nullptr);
|
||||
|
||||
// Define input constants based on input tensor definition above:
|
||||
const int rank = params->rank;
|
||||
const int input_size = input->dims->data[1];
|
||||
const int batch_size = input->dims->data[0];
|
||||
const int num_filters = weights_feature->dims->data[0];
|
||||
TF_LITE_ENSURE_EQ(context, num_filters % rank, 0);
|
||||
const int num_units = num_filters / rank;
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
// Validate Input Tensor:
|
||||
TF_LITE_ENSURE(context,
|
||||
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
|
||||
|
||||
// Validate Tensor Output:
|
||||
// [0] = float/int8_t, {2, batch_size, num_units}
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, kSvdfOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
|
||||
|
||||
// Validate Weights Feature Input Tensor:
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_feature), 2);
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->dims->data[1], input_size);
|
||||
|
||||
// Validate Weights Time Input Tensor:
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_time), 2);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[0], num_filters);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size);
|
||||
|
||||
// Validate Optional Bias Input Tensor:
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
|
||||
}
|
||||
|
||||
// Validate Activation State Input Tensor:
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(activation_state), 2);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
|
||||
memory_size * num_filters);
|
||||
// Since is_variable is not part of TFLiteEvalTensor, check is_variable here.
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
|
||||
const double effective_scale_1 = static_cast<double>(
|
||||
input->params.scale * weights_feature->params.scale /
|
||||
activation_state->params.scale);
|
||||
const double effective_scale_2 =
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale / output->params.scale);
|
||||
|
||||
// TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
std::abs(static_cast<double>(bias->params.scale) -
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale)) < 1e-5);
|
||||
|
||||
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
|
||||
&(data->effective_scale_1_b));
|
||||
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
|
||||
&(data->effective_scale_2_b));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
context, batch_size * num_filters * sizeof(int32_t),
|
||||
&(data->scratch_tensor_index));
|
||||
TF_LITE_ENSURE_OK(context, scratch_status);
|
||||
|
||||
const TfLiteStatus scratch_output_status =
|
||||
context->RequestScratchBufferInArena(
|
||||
context, batch_size * num_units * sizeof(int32_t),
|
||||
&(data->scratch_output_tensor_index));
|
||||
TF_LITE_ENSURE_OK(context, scratch_output_status);
|
||||
} else {
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
|
||||
}
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
context, batch_size * num_filters * sizeof(float),
|
||||
&(data->scratch_tensor_index));
|
||||
TF_LITE_ENSURE_OK(context, scratch_status);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,269 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
// For the TfLite transpose_conv implementation, input tensor 0 corresponds to
|
||||
// the OutputShapeTensor. However, since TFLM does not support dynamic tensors,
|
||||
// the TFLM implementation ignores input tensor 0 and the only inputs we care
|
||||
// about are kFilterTensor, kInputTensor and kBiasTensor.
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kInputTensor = 2;
|
||||
constexpr int kBiasTensor = 3;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
struct OpData {
|
||||
ConvParams params;
|
||||
|
||||
// A scratch buffer is required for quantized implementations.
|
||||
int scratch_buffer_index;
|
||||
|
||||
// Multiplier and shift arrays are required for the int8 implementation.
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
};
|
||||
|
||||
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
switch (padding) {
|
||||
case TfLitePadding::kTfLitePaddingSame:
|
||||
return PaddingType::kSame;
|
||||
case TfLitePadding::kTfLitePaddingValid:
|
||||
return PaddingType::kValid;
|
||||
case TfLitePadding::kTfLitePaddingUnknown:
|
||||
default:
|
||||
return PaddingType::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteConvParams* params, int width,
|
||||
int height, int filter_width, int filter_height,
|
||||
int out_width, int out_height,
|
||||
const TfLiteType data_type, OpData* data) {
|
||||
bool has_bias = node->inputs->size == 4;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 3);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params->padding;
|
||||
TfLitePaddingValues padding_values = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
params->dilation_height_factor, params->dilation_width_factor, height,
|
||||
width, filter_height, filter_width, padding, &out_height, &out_width);
|
||||
|
||||
data->params.padding_type = RuntimePaddingType(padding);
|
||||
data->params.padding_values.width = padding_values.width;
|
||||
data->params.padding_values.height = padding_values.height;
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->params.output_multiplier, &data->params.output_shift,
|
||||
&data->params.quantized_activation_min,
|
||||
&data->params.quantized_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift),
|
||||
output_channels));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
int input_width = input->dims->data[2];
|
||||
int input_height = input->dims->data[1];
|
||||
int filter_width = filter->dims->data[2];
|
||||
int filter_height = filter->dims->data[1];
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
data->per_channel_output_multiplier =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// Quantized kernels use an int32 scratch buffer.
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena(
|
||||
context,
|
||||
GetTensorShape(output).FlatSize() * sizeof(int32_t),
|
||||
&(data->scratch_buffer_index)) == kTfLiteOk);
|
||||
}
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, data));
|
||||
|
||||
// Offsets (zero points)
|
||||
data->params.input_offset = -input->params.zero_point;
|
||||
data->params.weights_offset = -filter->params.zero_point;
|
||||
data->params.output_offset = output->params.zero_point;
|
||||
|
||||
// Stride + dilation
|
||||
data->params.stride_width = params->stride_width;
|
||||
data->params.stride_height = params->stride_height;
|
||||
data->params.dilation_width_factor = params->dilation_width_factor;
|
||||
data->params.dilation_height_factor = params->dilation_height_factor;
|
||||
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
data->params.float_activation_min = output_activation_min;
|
||||
data->params.float_activation_max = output_activation_max;
|
||||
return kTfLiteOk;
|
||||
} // namespace conv
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 4)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32: {
|
||||
reference_ops::TransposeConv(
|
||||
data.params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
tflite::micro::GetTensorShape(nullptr), nullptr);
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
int32_t* scratch_buffer = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, data.scratch_buffer_index));
|
||||
reference_integer_ops::TransposeConv(
|
||||
data.params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output),
|
||||
tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_TRANSPOSE_CONV() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,89 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input;
|
||||
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
|
||||
TfLiteTensor* output;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetOutputSafe(context, node, kOutputTensor, &output));
|
||||
output->type = input->type;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void resetZeros(T* out, const int num_elements) {
|
||||
for (int i = 0; i < num_elements; ++i) {
|
||||
out[i] = static_cast<T>(0);
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output));
|
||||
switch (input->type) {
|
||||
case kTfLiteInt64:
|
||||
resetZeros(tflite::micro::GetTensorData<int64_t>(output), flat_size);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
resetZeros(tflite::micro::GetTensorData<int32_t>(output), flat_size);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
resetZeros(tflite::micro::GetTensorData<int8_t>(output), flat_size);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
resetZeros(tflite::micro::GetTensorData<float>(output), flat_size);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"ZerosLike only currently supports int64, int32, "
|
||||
"and float32, got %d.",
|
||||
input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_ZEROS_LIKE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
Reference in New Issue
Block a user