Rolling 20220526

This commit is contained in:
jomjol
2022-05-26 20:31:26 +02:00
parent cce812ff11
commit 00028010ee
203 changed files with 12003 additions and 1226 deletions

View File

@@ -29,8 +29,12 @@ AllOpsResolver::AllOpsResolver() {
AddAssignVariable();
AddAveragePool2D();
AddBatchToSpaceNd();
AddBroadcastArgs();
AddBroadcastTo();
AddCallOnce();
AddCast();
AddCeil();
AddCircularBuffer();
AddConcatenation();
AddConv2D();
AddCos();
@@ -49,9 +53,12 @@ AllOpsResolver::AllOpsResolver() {
AddFloorDiv();
AddFloorMod();
AddFullyConnected();
AddGather();
AddGatherNd();
AddGreater();
AddGreaterEqual();
AddHardSwish();
AddIf();
AddL2Normalization();
AddL2Pool2D();
AddLeakyRelu();
@@ -66,6 +73,7 @@ AllOpsResolver::AllOpsResolver() {
AddMaximum();
AddMean();
AddMinimum();
AddMirrorPad();
AddMul();
AddNeg();
AddNotEqual();
@@ -85,6 +93,7 @@ AllOpsResolver::AllOpsResolver() {
AddRsqrt();
AddShape();
AddSin();
AddSlice();
AddSoftmax();
AddSpaceToBatchNd();
AddSpaceToDepth();
@@ -101,6 +110,8 @@ AllOpsResolver::AllOpsResolver() {
AddTransposeConv();
AddUnpack();
AddVarHandle();
AddWhile();
AddZerosLike();
}
} // namespace tflite

View File

@@ -0,0 +1,107 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/fake_micro_context.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_arena_constants.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
namespace tflite {
namespace {
// Dummy static variables to allow creation of dummy MicroAllocator.
// All tests are guarateed to run serially.
static constexpr int KDummyTensorArenaSize = 256;
static uint8_t dummy_tensor_arena[KDummyTensorArenaSize];
} // namespace
FakeMicroContext::FakeMicroContext(TfLiteTensor* tensors,
SimpleMemoryAllocator* allocator,
MicroGraph* micro_graph)
: MicroContext(
MicroAllocator::Create(dummy_tensor_arena, KDummyTensorArenaSize,
GetMicroErrorReporter()),
nullptr, micro_graph),
tensors_(tensors),
allocator_(allocator) {}
TfLiteTensor* FakeMicroContext::AllocateTempTfLiteTensor(int tensor_index) {
allocated_tensor_count_++;
return &tensors_[tensor_index];
}
void FakeMicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
allocated_tensor_count_--;
}
bool FakeMicroContext::IsAllTempTfLiteTensorDeallocated() {
return !allocated_tensor_count_;
}
TfLiteEvalTensor* FakeMicroContext::GetEvalTensor(int tensor_index) {
TfLiteEvalTensor* eval_tensor =
reinterpret_cast<TfLiteEvalTensor*>(allocator_->AllocateTemp(
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
TFLITE_DCHECK(eval_tensor != nullptr);
// In unit tests, the TfLiteTensor pointer contains the source of truth for
// buffers and values:
eval_tensor->data = tensors_[tensor_index].data;
eval_tensor->dims = tensors_[tensor_index].dims;
eval_tensor->type = tensors_[tensor_index].type;
return eval_tensor;
}
void* FakeMicroContext::AllocatePersistentBuffer(size_t bytes) {
// FakeMicroContext use SimpleMemoryAllocator, which does not automatically
// apply the buffer alignment like MicroAllocator.
// The buffer alignment is potentially wasteful but allows the
// fake_micro_context to work correctly with optimized kernels.
return allocator_->AllocatePersistentBuffer(bytes,
MicroArenaBufferAlignment());
}
TfLiteStatus FakeMicroContext::RequestScratchBufferInArena(size_t bytes,
int* buffer_index) {
TFLITE_DCHECK(buffer_index != nullptr);
if (scratch_buffer_count_ == kNumScratchBuffers_) {
MicroPrintf("Exceeded the maximum number of scratch tensors allowed (%d).",
kNumScratchBuffers_);
return kTfLiteError;
}
// For tests, we allocate scratch buffers from the tail and keep them around
// for the lifetime of model. This means that the arena size in the tests will
// be more than what we would have if the scratch buffers could share memory.
scratch_buffers_[scratch_buffer_count_] =
allocator_->AllocatePersistentBuffer(bytes, MicroArenaBufferAlignment());
TFLITE_DCHECK(scratch_buffers_[scratch_buffer_count_] != nullptr);
*buffer_index = scratch_buffer_count_++;
return kTfLiteOk;
}
void* FakeMicroContext::GetScratchBuffer(int buffer_index) {
TFLITE_DCHECK(scratch_buffer_count_ <= kNumScratchBuffers_);
if (buffer_index >= scratch_buffer_count_) {
return nullptr;
}
return scratch_buffers_[buffer_index];
}
} // namespace tflite

View File

@@ -0,0 +1,56 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
#define TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
namespace tflite {
// A fake of MicroContext for kernel util tests.
class FakeMicroContext : public MicroContext {
public:
FakeMicroContext(TfLiteTensor* tensors, SimpleMemoryAllocator* allocator,
MicroGraph* micro_graph);
void* AllocatePersistentBuffer(size_t bytes) override;
TfLiteStatus RequestScratchBufferInArena(size_t bytes,
int* buffer_index) override;
void* GetScratchBuffer(int buffer_index) override;
TfLiteTensor* AllocateTempTfLiteTensor(int tensor_index) override;
void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) override;
bool IsAllTempTfLiteTensorDeallocated();
TfLiteEvalTensor* GetEvalTensor(int tensor_index) override;
private:
static constexpr int kNumScratchBuffers_ = 12;
int scratch_buffer_count_ = 0;
uint8_t* scratch_buffers_[kNumScratchBuffers_];
TfLiteTensor* tensors_;
int allocated_tensor_count_ = 0;
SimpleMemoryAllocator* allocator_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_

View File

@@ -0,0 +1,100 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_IBUFFER_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_IBUFFER_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/c_api_types.h"
namespace tflite {
// Interface classes that the TFLM framework relies on to get buffers it needs.
// There are two types of buffers that the TFLM framework requires: persistent
// and non-persistent. Persistent buffers, once allocated, are never freed by
// the TFLM framework. Non-persist buffers can be allocated and deallocated by
// the TFLM framework. This file defines two interfaces classes that TFLM
// framework will rely on to manage these buffers.
// Interface class for managing persistent buffers.
class IPersistentBufferAllocator {
public:
IPersistentBufferAllocator() {}
virtual ~IPersistentBufferAllocator() {}
// Allocates persistent memory. The persistent buffer is never freed.
virtual uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) = 0;
// Returns the size of all persistent allocations in bytes.
virtual size_t GetPersistentUsedBytes() const = 0;
};
// Interface class for managing non-persistent buffers.
// The default non-persistent buffers are temp buffers that are not resizable.
// Support of at least one resizable buffer is required.
class INonPersistentBufferAllocator {
public:
INonPersistentBufferAllocator() {}
virtual ~INonPersistentBufferAllocator() {}
// Allocates a temporary buffer. This buffer is not resizable.
virtual uint8_t* AllocateTemp(size_t size, size_t alignment) = 0;
// Signals that a temporary buffer is no longer needed.
virtual void DeallocateTemp(uint8_t* buf) = 0;
// Returns true if all temporary buffers are already deallocated.
virtual bool IsAllTempDeallocated() = 0;
// Signals that all temporary allocations can be reclaimed. TFLM calls this
// API when it knows that all temporary buffers that it requested has been
// deallocated. The goal of API is to facilitate implementations of
// INonPersistentBufferAllocator can reuse buffer with some reasonable
// complexity.
virtual TfLiteStatus ResetTempAllocations() = 0;
// Returns a buffer that is resizable viable ResizeBuffer().
virtual uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) = 0;
// Resizes a buffer that is previously returned by the
// AllocateResizableBuffer.
virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
size_t alignment) = 0;
// Frees up the memory occupied by the resizable buffer.
virtual TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) = 0;
// Returns a pointer pointing to the start of the overlay memory, which is
// used for activation tensors and scratch buffers by kernels at Invoke stage.
virtual uint8_t* GetOverlayMemoryAddress() const = 0;
// Reserves the size of the overlay memory. This overlay is reserved for the
// kernels at Invoke stage. This is referred to as the overlay because before
// Invoket state, the same memory can be used for temp buffers. The layout of
// the memory is planned by the memory planner separately at Invoke stage.
virtual TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size,
size_t alignment) = 0;
// Returns the size of non-persistent buffer in use.
virtual size_t GetNonPersistentUsedBytes() const = 0;
// Returns the number of bytes available with a given alignment. This number
// takes in account any temporary allocations.
virtual size_t GetAvailableMemory(size_t alignment) const = 0;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_IBUFFER_ALLOCATOR_H_

View File

@@ -117,15 +117,21 @@ TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kActivationsInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kActivationsOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kActivationsOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
CalculateReluOpData<int8_t>(input, output, data);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
@@ -133,7 +139,9 @@ TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kActivationsInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
if (input->type == kTfLiteInt8) {
@@ -142,6 +150,8 @@ TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
data->zero_int8 = input->params.zero_point;
}
micro_context->DeallocateTempTfLiteTensor(input);
return kTfLiteOk;
}

View File

@@ -80,11 +80,15 @@ TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
const TfLiteTensor* input1 = GetInput(context, node, kAddInputTensor1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kAddInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
const TfLiteTensor* input2 = GetInput(context, node, kAddInputTensor2);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kAddInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output = GetOutput(context, node, kAddOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kAddOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
OpDataAdd* data = static_cast<OpDataAdd*>(node->user_data);
@@ -93,6 +97,9 @@ TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_STATUS(
CalculateOpDataAdd(context, params, input1, input2, output, data));
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -50,18 +50,19 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, num_inputs >= 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input_tensor_first;
TF_LITE_ENSURE_OK(
context, GetInputSafe(context, node, kInputTensor0, &input_tensor_first));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input_tensor_first =
micro_context->AllocateTempInputTensor(node, kInputTensor0);
TF_LITE_ENSURE(context, input_tensor_first != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// Check that all tensors have the same shape and type.
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input_tensor_first->type);
for (int i = kInputTensor0 + 1; i < num_inputs; ++i) {
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, i, &input));
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, i);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE(context, HaveSameShapes(input_tensor_first, input));
TF_LITE_ENSURE_TYPES_EQ(context, input_tensor_first->type, input->type);
@@ -72,6 +73,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context,
input_tensor_first->params.scale == input->params.scale);
}
micro_context->DeallocateTempTfLiteTensor(input);
}
if (output->type == kTfLiteFloat32) {
@@ -123,6 +126,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
}
micro_context->DeallocateTempTfLiteTensor(input_tensor_first);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -52,21 +52,19 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
input_resource_id_tensor->type == kTfLiteInt32));
TF_LITE_ENSURE_EQ(context, NumElements(input_resource_id_tensor->dims), 1);
const TfLiteTensor* input_value = GetInput(context, node, kInputValue);
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
TfLiteTensor* input_value =
micro_context->AllocateTempInputTensor(node, kInputValue);
TFLITE_DCHECK(input_value != nullptr);
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
MicroResourceVariables* resources = graph_info->GetResourceVariables();
MicroGraph& graph_info = micro_context->graph();
MicroResourceVariables* resources = graph_info.GetResourceVariables();
TF_LITE_ENSURE_OK(context,
resources->Allocate(input_resource_id_tensor->data.i32[0],
context, input_value));
micro_context->DeallocateTempTfLiteTensor(input_value);
return kTfLiteOk;
}
@@ -79,14 +77,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetEvalInput(context, node, kInputValue);
TFLITE_DCHECK(input_value != nullptr);
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
MicroResourceVariables* resources = graph_info->GetResourceVariables();
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
MicroResourceVariables* resources = graph_info.GetResourceVariables();
if (resources == nullptr) {
MicroPrintf(
"ASSIGN_VARIABLE requires resource variables. Please create "

View File

@@ -41,8 +41,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, input != nullptr && output != nullptr);
TF_LITE_ENSURE(context, NumDimensions(input) >= kInputOutputMinDimensionNum);
@@ -51,6 +55,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumDimensions(output) <= kInputOutputMaxDimensionNum);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -0,0 +1,97 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/broadcast_args.h"
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_context.h"
namespace tflite {
namespace {
constexpr int kShape1Tensor = 0;
constexpr int kShape2Tensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus BroadcastArgsPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumInputs(node) == 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* shape1 =
micro_context->AllocateTempInputTensor(node, kShape1Tensor);
TfLiteTensor* shape2 =
micro_context->AllocateTempInputTensor(node, kShape2Tensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context,
shape1->type == kTfLiteInt32 || shape1->type == kTfLiteInt64);
TF_LITE_ENSURE_EQ(context, shape1->type, shape2->type);
TF_LITE_ENSURE_EQ(context, shape1->type, output->type);
// Ensures the shapes are 1D tensor.
TF_LITE_ENSURE_EQ(context, NumDimensions(shape1), 1);
TF_LITE_ENSURE_EQ(context, NumDimensions(shape2), 1);
// Ensure the shape of the output tensor is compatible
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 1);
micro_context->DeallocateTempTfLiteTensor(shape1);
micro_context->DeallocateTempTfLiteTensor(shape2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus BroadcastArgsEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* shape1 =
micro::GetEvalInput(context, node, kShape1Tensor);
const TfLiteEvalTensor* shape2 =
micro::GetEvalInput(context, node, kShape2Tensor);
TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteInt32) {
reference_ops::BroadcastArgs(
micro::GetTensorShape(shape1), micro::GetTensorData<int32_t>(shape1),
micro::GetTensorShape(shape2), micro::GetTensorData<int32_t>(shape2),
micro::GetTensorShape(output), micro::GetTensorData<int32_t>(output));
} else {
reference_ops::BroadcastArgs(
micro::GetTensorShape(shape1), micro::GetTensorData<int64_t>(shape1),
micro::GetTensorShape(shape2), micro::GetTensorData<int64_t>(shape2),
micro::GetTensorShape(output), micro::GetTensorData<int64_t>(output));
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_BROADCAST_ARGS() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/BroadcastArgsPrepare,
/*invoke=*/BroadcastArgsEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,129 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/broadcast_to.h"
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_context.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kShapeTensor = 1;
constexpr int kOutputTensor = 0;
// Support a maximum of 5 dimensions in TFLM.
constexpr int kMaxDims = 5;
TfLiteStatus ValidateOutputTensor(TfLiteContext* context, TfLiteTensor* input,
TfLiteTensor* shape, TfLiteTensor* output) {
// Ensures the shape is 1D tensor.
TF_LITE_ENSURE_EQ(context, NumDimensions(shape), 1);
// Ensure output dims is not less than input dims.
int input_num_dims = NumDimensions(input);
int output_num_dims = NumDimensions(output);
int shape_num_dims = SizeOfDimension(shape, 0);
TF_LITE_ENSURE_MSG(context, output_num_dims == shape_num_dims,
"Output must match with the expected shape dimension.");
TF_LITE_ENSURE_MSG(context, input_num_dims <= output_num_dims,
"Output shape must be broadcastable from input shape.");
TF_LITE_ENSURE_MSG(context, output_num_dims <= kMaxDims,
"BroadcastTo only supports 1-5D tensor.");
// Check if output shape is broadcastable from input shape.
auto get_shape_data = [shape](int i) -> int32_t {
if (shape->type == kTfLiteInt32) {
return GetTensorData<int32_t>(shape)[i];
} else {
return GetTensorData<int64_t>(shape)[i];
}
};
int extending_dims = output_num_dims - input_num_dims;
for (int idx = 0; idx < input_num_dims; ++idx) {
TF_LITE_ENSURE_MSG(
context,
(SizeOfDimension(input, idx) == 1 ||
SizeOfDimension(input, idx) == get_shape_data(extending_dims + idx)),
"Output shape must be broadcastable from input shape.");
}
// Validating the shape of the output tensor.
tflite::RuntimeShape output_shape = tflite::GetTensorShape(output);
for (int idx = 0; idx < output_num_dims; ++idx) {
TF_LITE_ENSURE(context, output_shape.Dims(idx) == get_shape_data(idx));
}
return kTfLiteOk;
}
TfLiteStatus BroadcastToPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumInputs(node) == 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* shape =
micro_context->AllocateTempInputTensor(node, kShapeTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE_MSG(context, (NumDimensions(input) <= kMaxDims),
"BroadcastTo only supports 1-5D tensor.");
TF_LITE_ENSURE(context,
shape->type == kTfLiteInt32 || shape->type == kTfLiteInt64);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
// Does not support String type due to its variable size. This limitation is
// the same as TFLite.
TF_LITE_ENSURE(context, input->type != kTfLiteString);
TF_LITE_ENSURE_STATUS(ValidateOutputTensor(context, input, shape, output));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(shape);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus BroadcastToEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
// BroadcastTo op support upto 5 dims, different from 8 dims in TFLite.
reference_ops::BroadcastTo<kMaxDims>(
micro::GetTensorShape(input), input->data.raw,
micro::GetTensorShape(output), output->data.raw, input->type);
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_BROADCAST_TO() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/BroadcastToPrepare,
/*invoke=*/BroadcastToEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/schema/schema_generated.h"
@@ -50,16 +51,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumInputs(node) == 0);
TF_LITE_ENSURE(context, NumOutputs(node) == 0);
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
TF_LITE_ENSURE(context,
op_data->init_subgraph_index < graph_info->NumSubgraphs());
op_data->init_subgraph_index < graph_info.NumSubgraphs());
return kTfLiteOk;
}
@@ -72,16 +68,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(op_data->init_subgraph_index));
graph_info.InvokeSubgraph(op_data->init_subgraph_index));
op_data->has_run = true;

View File

@@ -28,11 +28,19 @@ constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
@@ -83,6 +91,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
case kTfLiteInt32:
return copyToTensor(context, tflite::micro::GetTensorData<int32_t>(input),
output, num_elements);
case kTfLiteUInt32:
return copyToTensor(context,
tflite::micro::GetTensorData<uint32_t>(input), output,
num_elements);
case kTfLiteFloat32:
return copyToTensor(context, tflite::micro::GetTensorData<float>(input),
output, num_elements);

View File

@@ -29,9 +29,13 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
@@ -42,6 +46,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
for (int i = 0; i < output->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -39,9 +39,13 @@ const int kCircularBufferCyclesMaxIndex = 0; // 'cycles_max'
const TfLiteStatus kTfLiteAbort = static_cast<TfLiteStatus>(-9);
TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input =
GetInput(context, node, kCircularBufferInputTensor);
TfLiteTensor* output = GetOutput(context, node, kCircularBufferOutputTensor);
MicroContext * micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context-> AllocateTempInputTensor(node, kCircularBufferInputTensor);
TfLiteTensor* output =
micro_context-> AllocateTempOutputTensor(node, kCircularBufferOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
OpDataCircularBuffer* op_data =
@@ -85,6 +89,9 @@ TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node) {
op_data->cycles_until_run = op_data->cycles_max;
node->user_data = op_data;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -540,9 +540,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
if (input1->type == kTfLiteInt8) {
@@ -570,6 +574,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
data->params.input2_shift = input2_shift;
}
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
return kTfLiteOk;
}

View File

@@ -115,13 +115,19 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteConcatenationParams* params =
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
const TfLiteTensor* input_tensor = GetInput(context, node, 0);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input_tensor = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input_tensor != nullptr);
TfLiteType input_type = input_tensor->type;
const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output_tensor =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output_tensor != nullptr);
TfLiteType output_type = output_tensor->type;
micro_context->DeallocateTempTfLiteTensor(input_tensor);
micro_context->DeallocateTempTfLiteTensor(output_tensor);
// Check activation and input type
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
TF_LITE_ENSURE(context,
@@ -138,7 +144,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// Shapes with dimensions >4 are not yet supported with static allocation.
for (int i = 0; i < num_inputs; ++i) {
const TfLiteTensor* input = GetInput(context, node, i);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, i);
TF_LITE_ENSURE(context, input != nullptr);
int num_dimensions = NumDimensions(input);
@@ -150,13 +156,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
num_dimensions);
return kTfLiteError;
}
micro_context->DeallocateTempTfLiteTensor(input);
}
// Calculate OpData.
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
switch (output_type) { // Already know in/outtypes are same.
@@ -183,10 +191,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// Allocate persistent scale and zeropoint buffers.
// Store input scale and zero point values in OpParams:
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteTensor* t = GetInput(context, node, i);
TfLiteTensor* t = micro_context->AllocateTempInputTensor(node, i);
TF_LITE_ENSURE(context, t != nullptr);
input_scales[i] = t->params.scale;
input_zero_points[i] = t->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(t);
}
data->params.input_scale = input_scales;
@@ -202,6 +211,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
}
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -1,4 +1,4 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -79,7 +79,8 @@ TfLiteRegistration Register_CONV_2D();
#if defined(XTENSA)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 inputs and outputs.
// int8 activations and int8 weights and always calls the reference
// implementation.
TfLiteRegistration Register_CONV_2D_INT8REF();
#else
inline TfLiteRegistration Register_CONV_2D_INT8REF() {
@@ -87,6 +88,25 @@ inline TfLiteRegistration Register_CONV_2D_INT8REF() {
}
#endif
#if defined(CMSIS_NN)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 activations and int8 weights and uses the latency optimized
// implementations.
TfLiteRegistration Register_CONV_2D_INT8();
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int16 activations and int8 weights and uses the latency optimized
// implementations.
TfLiteRegistration Register_CONV_2D_INT16();
#else
inline TfLiteRegistration Register_CONV_2D_INT8() { return Register_CONV_2D(); }
inline TfLiteRegistration Register_CONV_2D_INT16() {
return Register_CONV_2D();
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_

View File

@@ -93,13 +93,18 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
params.dilation_width_factor, height, width, filter_height, filter_width,
padding, &out_height, &out_width);
const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kConvBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// Note that quantized inference requires that all tensors have their
@@ -119,6 +124,11 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(bias);
return kTfLiteOk;
}
@@ -129,12 +139,16 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
OpDataConv* data = static_cast<OpDataConv*>(node->user_data);
const auto& params =
*(static_cast<const TfLiteConvParams*>(node->builtin_data));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const int input_width = input->dims->data[2];
@@ -174,6 +188,10 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, data));
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -47,8 +47,12 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* axis = GetInput(context, node, kAxisTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* axis =
micro_context->AllocateTempInputTensor(node, kAxisTensor);
TF_LITE_ENSURE(context,
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
@@ -58,7 +62,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE(context, HaveSameShapes(input, output));
@@ -91,6 +96,10 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
&data->output_activation_max));
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(axis);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -40,11 +40,14 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
@@ -83,6 +86,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
output->dims->data[kWidthRank] = output_width;
output->dims->data[kDepthRank] = output_channels;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -94,13 +94,18 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
params.dilation_width_factor, height, width, filter_height, filter_width,
padding, &out_height, &out_width);
const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kConvBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// Note that quantized inference requires that all tensors have their
@@ -120,6 +125,11 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(bias);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
@@ -130,14 +140,16 @@ TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
OpDataConv* data = static_cast<OpDataConv*>(node->user_data);
const auto& params =
*(static_cast<const TfLiteDepthwiseConvParams*>(node->builtin_data));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* output = GetOutput(context, node, kDepthwiseConvOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kDepthwiseConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
const TfLiteTensor* input =
GetInput(context, node, kDepthwiseConvInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kDepthwiseConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter =
GetInput(context, node, kDepthwiseConvWeightsTensor);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kDepthwiseConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const int input_width = input->dims->data[2];
@@ -180,6 +192,10 @@ TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, data));
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
return kTfLiteOk;
}

View File

@@ -33,10 +33,12 @@ TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
MicroContext* micro_context = GetMicroContext(context);
// TODO(b/140515557): Add cached dequant to improve hybrid model performance.
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE(context,
@@ -54,6 +56,10 @@ TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node) {
data->quantization_params.zero_point = input->params.zero_point;
data->quantization_params.scale = static_cast<double>(input->params.scale);
data->output_zero_point = output->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <algorithm>
#include <numeric>
#include <tuple>
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/c/builtin_op_data.h"
@@ -152,14 +154,17 @@ void Free(TfLiteContext* context, void* buffer) {}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
auto* op_data = static_cast<OpData*>(node->user_data);
MicroContext* micro_context = GetMicroContext(context);
// Inputs: box_encodings, scores, anchors
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
const TfLiteTensor* input_box_encodings =
GetInput(context, node, kInputTensorBoxEncodings);
const TfLiteTensor* input_class_predictions =
GetInput(context, node, kInputTensorClassPredictions);
const TfLiteTensor* input_anchors =
GetInput(context, node, kInputTensorAnchors);
TfLiteTensor* input_box_encodings =
micro_context->AllocateTempInputTensor(node, kInputTensorBoxEncodings);
TfLiteTensor* input_class_predictions =
micro_context->AllocateTempInputTensor(node,
kInputTensorClassPredictions);
TfLiteTensor* input_anchors =
micro_context->AllocateTempInputTensor(node, kInputTensorAnchors);
TF_LITE_ENSURE_EQ(context, NumDimensions(input_box_encodings), 3);
TF_LITE_ENSURE_EQ(context, NumDimensions(input_class_predictions), 3);
TF_LITE_ENSURE_EQ(context, NumDimensions(input_anchors), 2);
@@ -217,6 +222,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// num_detections
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4);
micro_context->DeallocateTempTfLiteTensor(input_box_encodings);
micro_context->DeallocateTempTfLiteTensor(input_class_predictions);
micro_context->DeallocateTempTfLiteTensor(input_anchors);
return kTfLiteOk;
}
@@ -313,9 +322,10 @@ TfLiteStatus DecodeCenterSizeBoxes(TfLiteContext* context, TfLiteNode* node,
void DecreasingPartialArgSort(const float* values, int num_values,
int num_to_sort, int* indices) {
std::iota(indices, indices + num_values, 0);
std::partial_sort(
indices, indices + num_to_sort, indices + num_values,
[&values](const int i, const int j) { return values[i] > values[j]; });
std::partial_sort(indices, indices + num_to_sort, indices + num_values,
[&values](const int i, const int j) {
return std::tie(values[i], j) > std::tie(values[j], i);
});
}
template <typename Compare>

View File

@@ -38,11 +38,13 @@ bool IsLogicalSupportedType(const TfLiteType type) {
typedef bool (*IsSupportedType)(TfLiteType);
template <IsSupportedType>
TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (!IsSupportedType(input->type)) {
@@ -50,6 +52,9 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -80,13 +80,16 @@ void EvalUsingLookupTable(const OpData* data, const TfLiteEvalTensor* input,
}
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
// Use LUT to handle quantized elu path.
@@ -97,7 +100,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
};
PopulateLookupTable<int8_t>(input, output, transform, data);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -0,0 +1,11 @@
# Info
These are the Espressif chipset specific replacement kernels.
The kernels call optimized routines or reference routines depending upon optimization option selected.
By default optimizations are selected if available.
To change this behaviour, please make the appropriate `ESP-NN` menu selection after running:
```
idf.py menuconfig
```

View File

@@ -0,0 +1,209 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/add.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include <esp_timer.h>
#if ESP_NN
#include <esp_nn.h>
#endif
long long add_total_time = 0;
namespace tflite {
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
const OpDataAdd* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params;
SetActivationParams(data->output_activation_min_f32,
data->output_activation_max_f32, &op_params);
if (data->requires_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
}
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteAddParams* params, const OpDataAdd* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min, data->output_activation_max,
&op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
switch (output->type) {
case kTfLiteInt8: {
if (need_broadcast) {
reference_integer_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
#if ESP_NN
const int8_t *input1_data = tflite::micro::GetTensorData<int8_t>(input1);
const int8_t *input2_data = tflite::micro::GetTensorData<int8_t>(input2);
int8_t *out_data = tflite::micro::GetTensorData<int8_t>(output);
esp_nn_add_elementwise_s8(input1_data,
input2_data,
data->input1_offset,
data->input2_offset,
data->input1_multiplier,
data->input2_multiplier,
data->input1_shift,
data->input2_shift,
data->left_shift,
out_data,
data->output_offset,
data->output_multiplier,
data->output_shift,
data->output_activation_min,
data->output_activation_max,
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorShape(output))
);
#else
reference_integer_ops::Add(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
#endif
}
break;
}
case kTfLiteInt16: {
if (need_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output),
false);
}
break;
}
default:
MicroPrintf("Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
void* AddInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd));
}
TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataAdd* data = static_cast<const OpDataAdd*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kAddInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kAddInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kAddOutputTensor);
long long start_time = esp_timer_get_time();
if (output->type == kTfLiteFloat32) {
EvalAdd(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
input1, input2, output));
} else {
MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
output->type);
return kTfLiteError;
}
add_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
TfLiteRegistration Register_ADD() {
return {/*init=*/AddInit,
/*free=*/nullptr,
/*prepare=*/AddPrepare,
/*invoke=*/AddEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,319 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/conv.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/conv.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "freertos/FreeRTOS.h"
#include <esp_timer.h>
#if ESP_NN
#include <esp_nn.h>
#endif
long long conv_total_time = 0;
namespace tflite {
namespace {
struct NodeData {
OpDataConv op_data;
#if ESP_NN
int buffer_idx;
#endif
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(NodeData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
NodeData* data = static_cast<NodeData*>(node->user_data);
const auto& params =
*(static_cast<const TfLiteConvParams*>(node->builtin_data));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
const int input_width = input->dims->data[2];
const int input_height = input->dims->data[1];
const int filter_width = filter->dims->data[2];
const int filter_height = filter->dims->data[1];
const int output_width = output->dims->data[2];
const int output_height = output->dims->data[1];
// Dynamically allocate per-channel quantization parameters.
const int num_channels = filter->dims->data[kConvQuantizedDimension];
data->op_data.per_channel_output_multiplier =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
data->op_data.per_channel_output_shift =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
TFLITE_DCHECK(affine_quantization != nullptr);
TFLITE_DCHECK(affine_quantization->scale != nullptr);
TFLITE_DCHECK(affine_quantization->zero_point != nullptr);
TF_LITE_ENSURE(context,
affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kConvQuantizedDimension]);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
affine_quantization->zero_point->size);
}
TF_LITE_ENSURE_STATUS(CalculateOpDataConv(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, &data->op_data));
#if ESP_NN
if (input->type == kTfLiteInt8) {
int scratch_buf_size = esp_nn_get_conv_scratch_size(
input_width, input_height, input->dims->data[3],
output->dims->data[3], filter_width, filter_height);
if (scratch_buf_size > 0) {
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, scratch_buf_size, &data->buffer_idx));
} else {
data->buffer_idx = -1;
}
}
#endif
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
return kTfLiteOk;
}
#if ESP_NN
// Fixed-point per-channel-quantization convolution Int8 function wrapper.
inline void EvalQuantizedPerChannel(
TfLiteContext* context, TfLiteNode* node, const TfLiteConvParams& params,
const NodeData& data, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
if (dilation_width_factor == 1 && dilation_height_factor == 1) {
// Get parameters.
RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int32_t input_offset = -data.op_data.input_zero_point;
const int32_t output_offset = data.op_data.output_zero_point;
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int pad_width = data.op_data.padding.width;
const int pad_height = data.op_data.padding.height;
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
// Set min and max value of the output.
const int32_t activation_min = data.op_data.output_activation_min;
const int32_t activation_max = data.op_data.output_activation_max;
// Consistency check.
TFLITE_DCHECK_LE(activation_min, activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (tflite::micro::GetTensorData<int8_t>(bias)) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
void *scratch_buf = NULL;
if (data.buffer_idx > -1) {
scratch_buf = context->GetScratchBuffer(context, data.buffer_idx);
}
esp_nn_set_conv_scratch_buf(scratch_buf);
const int input_size = input_width * input_height * input_depth;
const int output_size = output_width * output_height * output_depth;
for (int i_batch = 0; i_batch < batch_size; i_batch++) {
esp_nn_conv_s8(input_data + i_batch * input_size,
input_width, input_height, input_depth, input_offset,
pad_width, pad_height, stride_width, stride_height,
tflite::micro::GetTensorData<int8_t>(filter),
filter_width, filter_height,
tflite::micro::GetTensorData<int32_t>(bias),
output_data + i_batch * output_size,
output_width, output_height, output_depth, output_offset,
data.op_data.per_channel_output_shift,
data.op_data.per_channel_output_multiplier,
activation_min, activation_max);
}
} else {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data.op_data),
data.op_data.per_channel_output_multiplier,
data.op_data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
#endif
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kConvInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kConvWeightsTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kConvBiasTensor)
: nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kConvOutputTensor);
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto& params =
*(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
TFLITE_DCHECK(node->user_data != nullptr);
const auto& data = *(static_cast<const NodeData*>(node->user_data));
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
long long start_time = esp_timer_get_time();
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32: {
tflite::reference_ops::Conv(
ConvParamsFloat(params, data.op_data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
tflite::micro::GetTensorShape(nullptr), nullptr);
break;
}
case kTfLiteInt8: {
#if ESP_NN
EvalQuantizedPerChannel(context, node, params, data, input, filter,
bias, output);
#else
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data.op_data),
data.op_data.per_channel_output_multiplier,
data.op_data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
#endif
break;
}
case kTfLiteUInt8: {
//EvalQuantized
reference_ops::Conv(ConvParamsQuantized(params, data.op_data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output),
tflite::micro::GetTensorShape(nullptr), nullptr,
nullptr);
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
conv_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_CONV_2D() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,319 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/depthwise_conv.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "freertos/FreeRTOS.h"
#include <esp_timer.h>
#if ESP_NN
#include <esp_nn.h>
#endif
long long dc_total_time = 0;
namespace tflite {
namespace {
struct NodeData {
OpDataConv op_data;
#if ESP_NN
int buffer_idx;
#endif
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(NodeData));
}
#if ESP_NN
inline void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
const TfLiteDepthwiseConvParams& params,
const NodeData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
if (dilation_width_factor == 1 && dilation_height_factor == 1) {
// Get parameters.
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int depth_multiplier = params.depth_multiplier;
const int32_t input_offset = -data.op_data.input_zero_point;
const int32_t output_offset = data.op_data.output_zero_point;
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int pad_width = data.op_data.padding.width;
const int pad_height = data.op_data.padding.height;
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
// Set min and max value of the output.
const int32_t activation_min = data.op_data.output_activation_min;
const int32_t activation_max = data.op_data.output_activation_max;
// Consistency check.
TFLITE_DCHECK_LE(activation_min, activation_max);
const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
if (tflite::micro::GetTensorData<int8_t>(bias)) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
const int input_size = input_width * input_height * input_depth;
const int output_size = output_width * output_height * output_depth;
void *scratch_buf = NULL;
if (data.buffer_idx > -1) {
scratch_buf = context->GetScratchBuffer(context, data.buffer_idx);
}
esp_nn_set_depthwise_conv_scratch_buf(scratch_buf);
for (int i_batch = 0; i_batch < batch_size; i_batch++) {
esp_nn_depthwise_conv_s8(input_data + i_batch * input_size, input_width,
input_height, input_depth, input_offset,
pad_width, pad_height,
stride_width, stride_height, depth_multiplier,
tflite::micro::GetTensorData<int8_t>(filter),
filter_width, filter_height,
tflite::micro::GetTensorData<int32_t>(bias),
output_data + i_batch * output_size,
output_width, output_height, output_offset,
data.op_data.per_channel_output_shift,
data.op_data.per_channel_output_multiplier,
activation_min, activation_max);
}
} else {
reference_integer_ops::DepthwiseConvPerChannel(
DepthwiseConvParamsQuantized(params, data.op_data),
data.op_data.per_channel_output_multiplier,
data.op_data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
#endif
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
NodeData* data = static_cast<NodeData*>(node->user_data);
const TfLiteDepthwiseConvParams& params =
*(static_cast<const TfLiteDepthwiseConvParams*>(node->builtin_data));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
const int input_width = input->dims->data[2];
const int input_height = input->dims->data[1];
const int filter_width = filter->dims->data[2];
const int filter_height = filter->dims->data[1];
const int output_width = output->dims->data[2];
const int output_height = output->dims->data[1];
// Dynamically allocate per-channel quantization parameters.
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
data->op_data.per_channel_output_multiplier =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
data->op_data.per_channel_output_shift =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
TFLITE_DCHECK(affine_quantization != nullptr);
TFLITE_DCHECK(affine_quantization->scale != nullptr);
TFLITE_DCHECK(affine_quantization->zero_point != nullptr);
TF_LITE_ENSURE(
context, affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kDepthwiseConvQuantizedDimension]);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
affine_quantization->zero_point->size);
}
TF_LITE_ENSURE_STATUS(CalculateOpDataDepthwiseConv(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, &data->op_data));
#if ESP_NN
if (input->type == kTfLiteInt8) {
int scratch_buf_size = esp_nn_get_depthwise_conv_scratch_size(
input_width, input_height, input->dims->data[3],
params.depth_multiplier, filter_width, filter_height);
if (scratch_buf_size > 0) {
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, scratch_buf_size, &data->buffer_idx));
} else {
data->buffer_idx = -1;
}
}
#endif
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(bias);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto& params =
*(reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data));
const NodeData& data = *(static_cast<const NodeData*>(node->user_data));
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kDepthwiseConvOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kDepthwiseConvInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor)
: nullptr;
long long start_time = esp_timer_get_time();
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
tflite::reference_ops::DepthwiseConv(
DepthwiseConvParamsFloat(params, data.op_data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt8:
#if ESP_NN
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
output);
#else
reference_integer_ops::DepthwiseConvPerChannel(
DepthwiseConvParamsQuantized(params, data.op_data),
data.op_data.per_channel_output_multiplier,
data.op_data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
#endif
break;
case kTfLiteUInt8:
//EvalQuantized(context, node, params, &data, input, filter, bias, output);
reference_ops::DepthwiseConv(
DepthwiseConvParamsQuantized(params, data.op_data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
dc_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,198 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/fully_connected.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#if ESP_NN
#include <esp_nn.h>
#endif
#include <esp_timer.h>
long long fc_total_time = 0;
namespace tflite {
namespace {
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context,
sizeof(OpDataFullyConnected));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* data = static_cast<OpDataFullyConnected*>(node->user_data);
const auto params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* filter = micro_context->AllocateTempInputTensor(
node, kFullyConnectedWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
node, kFullyConnectedOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
TF_LITE_ENSURE_OK(context, CalculateOpDataFullyConnected(
context, params->activation, input->type,
input, filter, bias, output, data));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
if (bias != nullptr) {
micro_context->DeallocateTempTfLiteTensor(bias);
}
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto* params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kFullyConnectedInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kFullyConnectedWeightsTensor);
const TfLiteEvalTensor* bias =
tflite::micro::GetEvalInput(context, node, kFullyConnectedBiasTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const auto& data =
*(static_cast<const OpDataFullyConnected*>(node->user_data));
long long start_time = esp_timer_get_time();
// Checks in Prepare ensure input, output and filter types are all the same.
switch (input->type) {
case kTfLiteFloat32: {
tflite::reference_ops::FullyConnected(
FullyConnectedParamsFloat(params->activation),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
}
case kTfLiteInt8: {
const int32_t* bias_data =
nullptr != bias ? tflite::micro::GetTensorData<int32_t>(bias)
: nullptr;
#if ESP_NN
const RuntimeShape& filter_shape = tflite::micro::GetTensorShape(filter);
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = output_shape.Dims(0);
const int output_depth = output_shape.Dims(1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int8_t *filter_data = tflite::micro::GetTensorData<int8_t>(filter);
for (int b = 0; b < batches; ++b) {
esp_nn_fully_connected_s8(input_data, -data.input_zero_point,
accum_depth,
filter_data, -data.filter_zero_point,
bias_data, output_data, output_depth,
data.output_zero_point,
data.output_shift, data.output_multiplier,
data.output_activation_min,
data.output_activation_max);
input_data += accum_depth;
output_data += output_depth;
}
#else
tflite::reference_integer_ops::FullyConnected(
FullyConnectedParamsQuantized(data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias), bias_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
#endif
break;
}
case kTfLiteUInt8: {
tflite::reference_ops::FullyConnected(
FullyConnectedParamsQuantized(data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
}
default: {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
}
fc_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_FULLY_CONNECTED() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,131 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/mul.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
#include "tensorflow/lite/kernels/internal/reference/mul.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#if ESP_NN
#include <esp_nn.h>
#endif
#include <esp_timer.h>
long long mul_total_time = 0;
namespace tflite {
#if ESP_NN
void MulEvalQuantized(TfLiteContext* context, TfLiteNode* node,
const OpDataMul* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
op_params.float_activation_max = data->output_activation_max_f32;
op_params.input1_offset = -data->input1_zero_point;
op_params.input2_offset = -data->input2_zero_point;
op_params.output_offset = data->output_zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
const int8_t *input1_data = tflite::micro::GetTensorData<int8_t>(input1);
const int8_t *input2_data = tflite::micro::GetTensorData<int8_t>(input2);
int8_t *out_data = tflite::micro::GetTensorData<int8_t>(output);
esp_nn_mul_elementwise_s8(input1_data, input2_data, op_params.input1_offset,
op_params.input2_offset, out_data, op_params.output_offset,
op_params.output_multiplier, op_params.output_shift,
op_params.quantized_activation_min, op_params.quantized_activation_max,
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorShape(output)));
}
}
#endif
TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
long long start_time = esp_timer_get_time();
switch (input1->type) {
case kTfLiteInt8:
#if ESP_NN
MulEvalQuantized(context, node, data, input1, input2, output);
#else
EvalMulQuantizedReference(context, node, data, input1, input2, output);
#endif
break;
case kTfLiteInt32:
EvalMulQuantizedReference(context, node, data, input1, input2, output);
break;
case kTfLiteFloat32:
EvalMulFloatReference(context, node, params, data, input1, input2,
output);
break;
default:
MicroPrintf("Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
mul_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
TfLiteRegistration Register_MUL() {
return {/*init=*/MulInit,
/*free=*/nullptr,
/*prepare=*/MulPrepare,
/*invoke=*/MulEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,245 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/pooling.h"
#if ESP_NN
#include <esp_nn.h>
#endif
#include <esp_timer.h>
long long pooling_total_time = 0;
namespace tflite {
namespace {
#if ESP_NN
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
const int stride_height = params->stride_height;
const int stride_width = params->stride_width;
const int filter_height = params->filter_height;
const int filter_width = params->filter_width;
const int activation_min = data->activation_min;
const int activation_max = data->activation_max;
const int pad_height = data->padding.height;
const int pad_width = data->padding.width;
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
TFLITE_DCHECK_LE(activation_min, activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int input_size = input_width * input_height * depth;
const int output_size = output_width * output_height * depth;
if (depth % 4 == 0) { // S3 version only supports channels multiple of 4
for (int batch = 0; batch < batches; ++batch) {
esp_nn_avg_pool_s8(input_data, input_width, input_height,
output_data, output_width, output_height,
stride_width, stride_height,
filter_width, filter_height,
pad_width, pad_height,
activation_min, activation_max, depth);
input_data += input_size;
output_data += output_size;
}
} else {
for (int batch = 0; batch < batches; ++batch) {
esp_nn_avg_pool_s8_ansi(input_data, input_width, input_height,
output_data, output_width, output_height,
stride_width, stride_height,
filter_width, filter_height,
pad_width, pad_height,
activation_min, activation_max, depth);
input_data += input_size;
output_data += output_size;
}
}
}
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpDataPooling* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
const int stride_height = params->stride_height;
const int stride_width = params->stride_width;
const int filter_height = params->filter_height;
const int filter_width = params->filter_width;
const int activation_min = data->activation_min;
const int activation_max = data->activation_max;
const int pad_height = data->padding.height;
const int pad_width = data->padding.width;
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
TFLITE_DCHECK_LE(activation_min, activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int input_size = input_width * input_height * depth;
const int output_size = output_width * output_height * depth;
if (depth % 4 == 0) { // S3 version only supports channels multiple of 4
for (int batch = 0; batch < batches; ++batch) {
esp_nn_max_pool_s8(input_data, input_width, input_height,
output_data, output_width, output_height,
stride_width, stride_height,
filter_width, filter_height,
pad_width, pad_height,
activation_min, activation_max, depth);
input_data += input_size;
output_data += output_size;
}
} else {
for (int batch = 0; batch < batches; ++batch) {
esp_nn_max_pool_s8_ansi(input_data, input_width, input_height,
output_data, output_width, output_height,
stride_width, stride_height,
filter_width, filter_height,
pad_width, pad_height,
activation_min, activation_max, depth);
input_data += input_size;
output_data += output_size;
}
}
}
#endif
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataPooling* data =
static_cast<const OpDataPooling*>(node->user_data);
const TfLiteEvalTensor* input =
micro::GetEvalInput(context, node, kPoolingInputTensor);
TfLiteEvalTensor* output =
micro::GetEvalOutput(context, node, kPoolingOutputTensor);
long long start_time = esp_timer_get_time();
// Inputs and outputs share the same type, guaranteed by the converter.
switch (input->type) {
case kTfLiteFloat32:
AveragePoolingEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteInt8:
#if ESP_NN
AverageEvalQuantized(context, node, params, data, input, output);
#else
AveragePoolingEvalQuantized(context, node, params, data, input, output);
#endif
break;
default:
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
pooling_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataPooling* data =
static_cast<const OpDataPooling*>(node->user_data);
const TfLiteEvalTensor* input =
micro::GetEvalInput(context, node, kPoolingInputTensor);
TfLiteEvalTensor* output =
micro::GetEvalOutput(context, node, kPoolingOutputTensor);
long long start_time = esp_timer_get_time();
switch (input->type) {
case kTfLiteFloat32:
MaxPoolingEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteInt8:
#if ESP_NN
MaxEvalQuantized(context, node, params, data, input, output);
#else
MaxPoolingEvalQuantized(context, node, params, data, input, output);
#endif
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
pooling_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling));
}
} // namespace
TfLiteRegistration Register_AVERAGE_POOL_2D() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/PoolingPrepare,
/*invoke=*/AverageEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_MAX_POOL_2D() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/PoolingPrepare,
/*invoke=*/MaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -27,11 +27,15 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
@@ -40,6 +44,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
for (int i = 0; i < output->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -84,22 +84,31 @@ TfLiteStatus VerifyTensorDim(TfLiteContext* context, const TfLiteTensor* input,
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
const TfLiteTensor* axis;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kAxisTensor, &axis));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* axis =
micro_context->AllocateTempInputTensor(node, kAxisTensor);
TF_LITE_ENSURE(context, axis != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
output->type = input->type;
if (IsDynamicTensor(axis)) {
TF_LITE_KERNEL_LOG(context,
"DynamicTensor is not yet supported by Expand_Dims.");
return kTfLiteError;
}
return VerifyTensorDim(context, input, axis, output);
TF_LITE_ENSURE_OK(context, VerifyTensorDim(context, input, axis, output));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(axis);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
template <typename T>

View File

@@ -65,14 +65,18 @@ constexpr int kValueTensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
// Ensure inputs and outputs exist.
const TfLiteTensor* dims;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kDimsTensor, &dims));
const TfLiteTensor* value;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kValueTensor, &value));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* dims =
micro_context->AllocateTempInputTensor(node, kDimsTensor);
TF_LITE_ENSURE(context, dims != nullptr);
TfLiteTensor* value =
micro_context->AllocateTempInputTensor(node, kValueTensor);
TF_LITE_ENSURE(context, value != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// The value tensor must be a scalar.
TF_LITE_ENSURE_EQ(context, NumDimensions(value), 0);
@@ -90,6 +94,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, EnsureEq(context, output->dims, dims));
}
micro_context->DeallocateTempTfLiteTensor(dims);
micro_context->DeallocateTempTfLiteTensor(value);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -31,22 +31,28 @@ constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -36,22 +36,28 @@ constexpr int kOutputTensor = 0;
// OLD-TODO(b/117912880): Support quantization.
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -35,6 +35,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
@@ -42,23 +44,33 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const auto params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteTensor* input =
GetInput(context, node, kFullyConnectedInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter =
GetInput(context, node, kFullyConnectedWeightsTensor);
TfLiteTensor* filter = micro_context->AllocateTempInputTensor(
node, kFullyConnectedWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kFullyConnectedBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kFullyConnectedOutputTensor);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
node, kFullyConnectedOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
return CalculateOpDataFullyConnected(context, params->activation, input->type,
input, filter, bias, output, data);
TF_LITE_ENSURE_OK(context, CalculateOpDataFullyConnected(
context, params->activation, input->type,
input, filter, bias, output, data));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
if (bias != nullptr) {
micro_context->DeallocateTempTfLiteTensor(bias);
}
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {

View File

@@ -97,19 +97,23 @@ TfLiteStatus Gather(const TfLiteGatherParams* params,
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const auto* params =
reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
const TfLiteTensor* coords;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputPositions, &coords));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* coords =
micro_context->AllocateTempInputTensor(node, kInputPositions);
TF_LITE_ENSURE(context, coords != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
switch (coords->type) {
case kTfLiteInt32:
break;
@@ -176,6 +180,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
for (int i = axis + 1; i < input->dims->size; ++i) {
output_shape->data[output_index++] = input->dims->data[i];
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(coords);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -28,16 +28,19 @@ constexpr int kOutputTensor = 0;
constexpr int MAX_INDICES_ND = 5;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* params;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kParams, &params));
const TfLiteTensor* indices;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kIndices, &indices));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* params = micro_context->AllocateTempInputTensor(node, kParams);
TF_LITE_ENSURE(context, params != nullptr);
TfLiteTensor* indices =
micro_context->AllocateTempInputTensor(node, kIndices);
TF_LITE_ENSURE(context, indices != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
switch (params->type) {
case kTfLiteFloat32:
@@ -98,6 +101,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
output_shape->data[output_index++] = params->dims->data[i];
}
output_shape->size = output_index;
micro_context->DeallocateTempTfLiteTensor(params);
micro_context->DeallocateTempTfLiteTensor(indices);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -32,13 +32,17 @@ const int kHardSwishInputTensor = 0;
const int kHardSwishOutputTensor = 0;
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kHardSwishInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kHardSwishInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kHardSwishOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kHardSwishOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
@@ -73,6 +77,9 @@ TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
&params->reluish_multiplier_fixedpoint_int16);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/schema/schema_generated.h"
@@ -50,36 +51,33 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, node->inputs->size > 0);
// The first input is the condition.
const TfLiteTensor* cond;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &cond));
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, cond != nullptr);
TF_LITE_ENSURE_EQ(context, cond->type, kTfLiteBool);
TF_LITE_ENSURE_EQ(context, NumElements(cond), 1);
micro_context->DeallocateTempTfLiteTensor(cond);
// The first input of the node is the condition. The rest of inputs are
// passed to the branch subgraphs. Therefore, the number of subgraph inputs
// will be the number of node inputs - 1.
size_t num_inputs = node->inputs->size - 1;
size_t num_outputs = node->outputs->size;
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
MicroGraph& graph_info = micro_context->graph();
TF_LITE_ENSURE(context,
op_data->then_subgraph_index < graph_info->NumSubgraphs());
op_data->then_subgraph_index < graph_info.NumSubgraphs());
TF_LITE_ENSURE(context,
op_data->else_subgraph_index < graph_info->NumSubgraphs());
op_data->else_subgraph_index < graph_info.NumSubgraphs());
TF_LITE_ENSURE_EQ(
context, num_inputs,
graph_info->NumSubgraphInputs(op_data->then_subgraph_index));
TF_LITE_ENSURE_EQ(context, num_inputs,
graph_info.NumSubgraphInputs(op_data->then_subgraph_index));
TF_LITE_ENSURE_EQ(
context, num_outputs,
graph_info->NumSubgraphOutputs(op_data->then_subgraph_index));
graph_info.NumSubgraphOutputs(op_data->then_subgraph_index));
return kTfLiteOk;
}
@@ -87,66 +85,30 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const TfLiteTensor* cond;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &cond));
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, cond != nullptr);
bool cond_value = cond->data.b[0];
micro_context->DeallocateTempTfLiteTensor(cond);
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
// Currently we copy the input / output between the subgraphs. This isn't
// optimized yet.
MicroGraph* graph_info = &micro_context->graph();
// Currently we copy the input / output between the subgraphs.
int active_branch_subgraph_index =
cond_value ? op_data->then_subgraph_index : op_data->else_subgraph_index;
for (size_t i = 0;
i < graph_info->NumSubgraphInputs(active_branch_subgraph_index); ++i) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, i + 1);
TfLiteEvalTensor* subgraph_input =
graph_info->GetSubgraphInput(active_branch_subgraph_index, i);
// These checks must occur in Eval since TfLiteEvalTensors are not available
// during Prepare.
size_t input_bytes;
size_t subgraph_input_bytes;
TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(input, &input_bytes));
TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(
subgraph_input, &subgraph_input_bytes));
TF_LITE_ENSURE_TYPES_EQ(context, input->type, subgraph_input->type);
TF_LITE_ENSURE_EQ(context, input_bytes, subgraph_input_bytes);
memcpy(subgraph_input->data.raw, input->data.raw, input_bytes);
}
TF_LITE_ENSURE_OK(context,
tflite::micro::CopyOpInputsToSubgraphInputs(
context, node, graph_info, active_branch_subgraph_index,
/*first_tensor_idx=*/1));
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(active_branch_subgraph_index));
for (size_t i = 0;
i < graph_info->NumSubgraphOutputs(active_branch_subgraph_index); ++i) {
const TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, i);
TF_LITE_ENSURE_OK(
context, tflite::micro::CopySubgraphOutputsToOpOutputs(
context, node, graph_info, active_branch_subgraph_index));
TfLiteEvalTensor* subgraph_output =
graph_info->GetSubgraphOutput(active_branch_subgraph_index, i);
// These checks must occur in Eval since TfLiteEvalTensors are not available
// during Prepare.
size_t output_bytes;
size_t subgraph_output_bytes;
TF_LITE_ENSURE_OK(context,
TfLiteEvalTensorByteLength(output, &output_bytes));
TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(
subgraph_output, &subgraph_output_bytes));
TF_LITE_ENSURE_TYPES_EQ(context, output->type, subgraph_output->type);
TF_LITE_ENSURE_EQ(context, output_bytes, subgraph_output_bytes);
memcpy(output->data.raw, subgraph_output->data.raw, output_bytes);
}
return kTfLiteOk;
}

View File

@@ -24,7 +24,6 @@ namespace tflite {
namespace micro {
// TODO(b/161841696): Consider moving away from global arena buffers:
constexpr int KernelRunner::kNumScratchBuffers_;
constexpr int KernelRunner::kKernelRunnerBufferSize_;
uint8_t KernelRunner::kKernelRunnerBuffer_[];
@@ -32,22 +31,23 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
TfLiteTensor* tensors, int tensors_size,
TfLiteIntArray* inputs, TfLiteIntArray* outputs,
void* builtin_data)
: allocator_(SimpleMemoryAllocator::Create(GetMicroErrorReporter(),
: registration_(registration),
allocator_(SimpleMemoryAllocator::Create(GetMicroErrorReporter(),
kKernelRunnerBuffer_,
kKernelRunnerBufferSize_)),
registration_(registration),
tensors_(tensors),
mock_micro_graph_(allocator_) {
mock_micro_graph_(allocator_),
fake_micro_context_(tensors, allocator_, &mock_micro_graph_) {
// Prepare TfLiteContext:
context_.impl_ = static_cast<void*>(this);
context_.ReportError = ReportOpError;
context_.impl_ = static_cast<void*>(&fake_micro_context_);
context_.ReportError = MicroContextReportOpError;
context_.recommended_num_threads = 1;
context_.GetTensor = GetTensor;
context_.GetEvalTensor = GetEvalTensor;
context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
context_.GetScratchBuffer = GetScratchBuffer;
context_.GetExecutionPlan = GetGraph;
context_.GetTensor = MicroContextGetTensor;
context_.GetEvalTensor = MicroContextGetEvalTensor;
context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer;
context_.RequestScratchBufferInArena =
MicroContextRequestScratchBufferInArena;
context_.GetScratchBuffer = MicroContextGetScratchBuffer;
context_.recommended_num_threads = 0;
// Prepare TfLiteNode:
@@ -56,14 +56,24 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
node_.builtin_data = builtin_data;
}
bool KernelRunner::ValidateTempBufferDeallocated() {
return fake_micro_context_.IsAllTempTfLiteTensorDeallocated();
}
TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data,
size_t length) {
if (registration_.init) {
node_.user_data = registration_.init(&context_, init_data, length);
}
TF_LITE_ENSURE(&context_, ValidateTempBufferDeallocated());
if (registration_.prepare) {
TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_));
}
TF_LITE_ENSURE(&context_, ValidateTempBufferDeallocated());
return kTfLiteOk;
}
@@ -72,101 +82,11 @@ TfLiteStatus KernelRunner::Invoke() {
MicroPrintf("TfLiteRegistration missing invoke function pointer!");
return kTfLiteError;
}
return registration_.invoke(&context_, &node_);
}
TfLiteTensor* KernelRunner::GetTensor(const struct TfLiteContext* context,
int tensor_index) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
TF_LITE_ENSURE_STATUS(registration_.invoke(&context_, &node_));
return &runner->tensors_[tensor_index];
}
TF_LITE_ENSURE(&context_, ValidateTempBufferDeallocated());
TfLiteEvalTensor* KernelRunner::GetEvalTensor(
const struct TfLiteContext* context, int tensor_index) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
TfLiteEvalTensor* eval_tensor =
reinterpret_cast<TfLiteEvalTensor*>(runner->allocator_->AllocateTemp(
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
TFLITE_DCHECK(eval_tensor != nullptr);
// In unit tests, the TfLiteTensor pointer contains the source of truth for
// buffers and values:
eval_tensor->data = runner->tensors_[tensor_index].data;
eval_tensor->dims = runner->tensors_[tensor_index].dims;
eval_tensor->type = runner->tensors_[tensor_index].type;
return eval_tensor;
}
void* KernelRunner::AllocatePersistentBuffer(TfLiteContext* context,
size_t bytes) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
return runner->allocator_->AllocateFromTail(bytes,
MicroArenaBufferAlignment());
}
TfLiteStatus KernelRunner::RequestScratchBufferInArena(TfLiteContext* context,
size_t bytes,
int* buffer_index) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(buffer_index != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
if (runner->scratch_buffer_count_ == kNumScratchBuffers_) {
MicroPrintf("Exceeded the maximum number of scratch tensors allowed (%d).",
kNumScratchBuffers_);
return kTfLiteError;
}
// For tests, we allocate scratch buffers from the tail and keep them around
// for the lifetime of model. This means that the arena size in the tests will
// be more than what we would have if the scratch buffers could share memory.
runner->scratch_buffers_[runner->scratch_buffer_count_] =
runner->allocator_->AllocateFromTail(bytes, MicroArenaBufferAlignment());
TFLITE_DCHECK(runner->scratch_buffers_[runner->scratch_buffer_count_] !=
nullptr);
*buffer_index = runner->scratch_buffer_count_++;
return kTfLiteOk;
}
void* KernelRunner::GetScratchBuffer(TfLiteContext* context, int buffer_index) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
TFLITE_DCHECK(runner->scratch_buffer_count_ <= kNumScratchBuffers_);
if (buffer_index >= runner->scratch_buffer_count_) {
return nullptr;
}
return runner->scratch_buffers_[buffer_index];
}
void KernelRunner::ReportOpError(struct TfLiteContext* context,
const char* format, ...) {
va_list args;
va_start(args, format);
GetMicroErrorReporter()->Report(format, args);
va_end(args);
}
TfLiteStatus KernelRunner::GetGraph(struct TfLiteContext* context,
TfLiteIntArray** args) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
*args = reinterpret_cast<TfLiteIntArray*>(runner->GetMockGraph());
return kTfLiteOk;
}

View File

@@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/fake_micro_context.h"
#include "tensorflow/lite/micro/mock_micro_graph.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
@@ -50,40 +51,22 @@ class KernelRunner {
// to stub out MicroGraph methods and track invocations on each subgraph.
MockMicroGraph* GetMockGraph() { return &mock_micro_graph_; }
protected:
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_index);
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
int tensor_index);
static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context,
size_t bytes,
int* buffer_index);
static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
// This method matches GetExecutionPlan from TfLiteContext since TFLM reuses
// this method to get the MicroGraph from an operator context.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
static TfLiteStatus GetGraph(struct TfLiteContext* context,
TfLiteIntArray** args);
// Returns true if all temp buffer in tests are deallocated.
// TODO(b/209453859): move this function to private after deallocation checks
// are enabled for all kernel tests.
bool ValidateTempBufferDeallocated();
private:
static constexpr int kNumScratchBuffers_ = 12;
static constexpr int kKernelRunnerBufferSize_ = 10000;
static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
SimpleMemoryAllocator* allocator_ = nullptr;
const TfLiteRegistration& registration_;
TfLiteTensor* tensors_ = nullptr;
MockMicroGraph mock_micro_graph_;
TfLiteContext context_ = {};
TfLiteNode node_ = {};
const TfLiteRegistration& registration_;
int scratch_buffer_count_ = 0;
uint8_t* scratch_buffers_[kNumScratchBuffers_];
SimpleMemoryAllocator* allocator_;
MockMicroGraph mock_micro_graph_;
FakeMicroContext fake_micro_context_;
};
} // namespace micro

View File

@@ -16,6 +16,7 @@ limitations under the License.
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
namespace micro {
@@ -119,13 +120,83 @@ TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
return kTfLiteOk;
}
// Returns a blob of payload data. The payload is subjected to interpretation by
// the OP. This is the recommended API for an OP to get an external context. OP
// should use this instead of directly calling GetExternalContext function in
// context.
void* GetExternalContext(TfLiteContext* context) {
return reinterpret_cast<void*>(
context->GetExternalContext(context, kTfLiteMaxExternalContexts));
// Verify that both tensors have the same type and size, then return the size
// of both tensors in bytes if they are the same, or -1 if they are different.
size_t ValidateAndGetTensorSizes(const TfLiteEvalTensor* tensor1,
const TfLiteEvalTensor* tensor2) {
TFLITE_DCHECK(tensor1->type == tensor2->type);
size_t tensor1_size = 0;
size_t tensor2_size = 0;
TfLiteEvalTensorByteLength(tensor1, &tensor1_size);
TfLiteEvalTensorByteLength(tensor2, &tensor2_size);
return (tensor1_size == tensor2_size) ? tensor1_size : -1;
}
TfLiteStatus CopyOpInputsToOpOutputs(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, node->inputs->size == node->outputs->size);
for (int i = 0; i < node->inputs->size; i++) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, i);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, i);
int bytes = ValidateAndGetTensorSizes(input, output);
TF_LITE_ENSURE(context, bytes >= 0);
memcpy(output->data.raw, input->data.raw, bytes);
}
return kTfLiteOk;
}
TfLiteStatus CopyOpInputsToSubgraphInputs(TfLiteContext* context,
TfLiteNode* node,
MicroGraph* graph_info,
int subgraph_idx,
int first_tensor_idx) {
TF_LITE_ENSURE(context,
static_cast<size_t>(node->inputs->size - first_tensor_idx) ==
graph_info->NumSubgraphInputs(subgraph_idx));
for (int i = 0; i < node->inputs->size - first_tensor_idx; i++) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, i + first_tensor_idx);
TfLiteEvalTensor* subgraph_input =
graph_info->GetSubgraphInput(subgraph_idx, i);
int bytes = ValidateAndGetTensorSizes(input, subgraph_input);
TF_LITE_ENSURE(context, bytes >= 0);
memcpy(subgraph_input->data.raw, input->data.raw, bytes);
}
return kTfLiteOk;
}
TfLiteStatus CopyOpOutputsToSubgraphInputs(TfLiteContext* context,
TfLiteNode* node,
MicroGraph* graph_info,
int subgraph_idx) {
TF_LITE_ENSURE(context, static_cast<size_t>(node->outputs->size) ==
graph_info->NumSubgraphInputs(subgraph_idx));
for (int i = 0; i < node->outputs->size; i++) {
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, i);
TfLiteEvalTensor* subgraph_input =
graph_info->GetSubgraphInput(subgraph_idx, i);
int bytes = ValidateAndGetTensorSizes(output, subgraph_input);
TF_LITE_ENSURE(context, bytes >= 0);
memcpy(subgraph_input->data.raw, output->data.raw, bytes);
}
return kTfLiteOk;
}
TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context,
TfLiteNode* node,
MicroGraph* graph_info,
int subgraph_idx) {
TF_LITE_ENSURE(context, static_cast<size_t>(node->outputs->size) ==
graph_info->NumSubgraphOutputs(subgraph_idx));
for (int i = 0; i < node->outputs->size; i++) {
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, i);
TfLiteEvalTensor* subgraph_output =
graph_info->GetSubgraphOutput(subgraph_idx, i);
int bytes = ValidateAndGetTensorSizes(output, subgraph_output);
TF_LITE_ENSURE(context, bytes >= 0);
memcpy(output->data.raw, subgraph_output->data.raw, bytes);
}
return kTfLiteOk;
}
} // namespace micro

View File

@@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/micro/micro_context.h"
namespace tflite {
namespace micro {
@@ -69,23 +70,33 @@ TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
TfLiteTensor* tensor,
TfLiteEvalTensor* eval_tensor);
// Returns a blob of payload data. The payload is subjected to interpretation by
// the OP. This is the recommended API for an OP to get an external context. OP
// should use this instead of directly calling GetExternalContext function in
// context. Example usage:
//
// An application can set an external context through interpreter as below
// interpreter->SetMicroExternalContext(pointer_to_your_payload);
//
// Inside an OP that needs this payload, it get the payload pointer by:
// Prepare(TfliteContext * context) {
// ...
// payload_ptr =
// reinterpret_cast<your_data_type>(GetMicroExternalContext(context))
// ...
// }
//
void* GetMicroExternalContext(TfLiteContext* context);
// Copy all op input tensors to op output tensors. Requires all op input tensor
// shapes and types to be identical to op output tensor shapes and types.
TfLiteStatus CopyOpInputsToOpOutputs(TfLiteContext* context, TfLiteNode* node);
// Copy all op input tensors to subgraph input tensors. Requires all op input
// tensor shapes and types to be identical to subgraph input tensor shapes and
// types.
TfLiteStatus CopyOpInputsToSubgraphInputs(TfLiteContext* context,
TfLiteNode* node,
MicroGraph* graph_info,
int subgraph_idx,
int first_tensor_idx);
// Copy all op output tensors to subgraph input tensors. Requires all op output
// tensor shapes and types to be identical to subgraph input tensor shapes and
// types.
TfLiteStatus CopyOpOutputsToSubgraphInputs(TfLiteContext* context,
TfLiteNode* node,
MicroGraph* graph_info,
int subgraph_idx);
// Copy all subgraph output tensors to op outputs. Requires all subgraph output
// tensor shapes and types to be identical to op output tensor shapes and types.
TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context,
TfLiteNode* node,
MicroGraph* graph_info,
int subgraph_idx);
} // namespace micro
} // namespace tflite

View File

@@ -36,15 +36,18 @@ constexpr int kTensorShapeRank = 4;
enum { kBatchRank = 0, kHeightRank, kWidthRank, kChannelRank };
TfLiteStatus L2Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
auto* params = static_cast<TfLitePoolParams*>(node->builtin_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), kTensorShapeRank);
TF_LITE_ENSURE_EQ(context, NumDimensions(output), kTensorShapeRank);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
@@ -82,6 +85,9 @@ TfLiteStatus L2Prepare(TfLiteContext* context, TfLiteNode* node) {
output->dims->data[kWidthRank] = out_width;
output->dims->data[kChannelRank] = channels_out;
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(input);
return kTfLiteOk;
}

View File

@@ -49,11 +49,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
TF_LITE_ENSURE(context,
@@ -69,6 +72,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// Our implementations don't currently support activations.
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -30,13 +30,16 @@ const int kOutputTensor = 0;
TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context,
TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
@@ -62,6 +65,9 @@ TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context,
data->output_shift_identity = static_cast<int32_t>(output_shift_identity);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -43,13 +43,16 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE(context, HaveSameShapes(input, output));
@@ -89,6 +92,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
data->depth = static_cast<size_t>(input_shape.Dims(trailing_dim));
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -32,9 +32,13 @@ const int kLogisticOutputTensor = 0;
TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
TfLiteNode* node,
OpDataLogistic* data) {
const TfLiteTensor* input = GetInput(context, node, kLogisticInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kLogisticInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kLogisticOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kLogisticOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
@@ -55,6 +59,53 @@ TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
if (input->type == kTfLiteInt16) {
static constexpr int kInputIntegerBits = 3;
static constexpr int kOutputFractionalBits = 15;
// See comments in TanhPrepare about requiring zero_point==0
// and a power-of-two ("POT") scale.
TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
int input_scale_log2_rounded;
bool param_scale_pot =
CheckedLog2(input->params.scale, &input_scale_log2_rounded);
data->input_left_shift =
(15 - kInputIntegerBits) + input_scale_log2_rounded;
param_scale_pot &= (data->input_left_shift == 0);
if (param_scale_pot) {
data->input_multiplier = 0;
} else {
// Calculate multiplier to change input scale to 1/(3*4096)
// as required by the table lookup.
// In this scaling +/-2^17 represents +/-10.7
double multiplier =
static_cast<double>(input->params.scale) * 4096.0 * 3.0;
data->input_left_shift = 0;
while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
data->input_left_shift++;
multiplier = multiplier * 2.0;
}
data->input_multiplier = static_cast<int32_t>(multiplier);
}
int output_scale_log2_rounded;
TF_LITE_ENSURE(
context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
-kOutputFractionalBits);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -36,6 +36,8 @@ TfLiteRegistration Register_ADD_N();
TfLiteRegistration Register_ASSIGN_VARIABLE();
TfLiteRegistration Register_AVERAGE_POOL_2D();
TfLiteRegistration Register_BATCH_TO_SPACE_ND();
TfLiteRegistration Register_BROADCAST_ARGS();
TfLiteRegistration Register_BROADCAST_TO();
TfLiteRegistration Register_CALL_ONCE();
TfLiteRegistration Register_CAST();
// TODO(b/160234179): Change custom OPs to also return by value.
@@ -62,6 +64,7 @@ TfLiteRegistration Register_LOGICAL_AND();
TfLiteRegistration Register_LOGICAL_OR();
TfLiteRegistration Register_LOGISTIC();
TfLiteRegistration Register_MAX_POOL_2D();
TfLiteRegistration Register_MIRROR_PAD();
TfLiteRegistration Register_PRELU();
TfLiteRegistration Register_MUL();
TfLiteRegistration Register_QUANTIZE();
@@ -79,6 +82,7 @@ TfLiteRegistration Register_SVDF();
TfLiteRegistration Register_TRANSPOSE();
TfLiteRegistration Register_TRANSPOSE_CONV();
TfLiteRegistration Register_VAR_HANDLE();
TfLiteRegistration Register_WHILE();
TfLiteRegistration Register_ZEROS_LIKE();
namespace ops {

View File

@@ -0,0 +1,222 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
struct OpDataMirrorPad {
int input_dims;
int output_size;
int offset;
int output_dims_num_elements_buffer_index;
int input_dims_num_elements_buffer_index;
};
// Helper method that fills the left and right pads.
template <typename T>
inline void GetPadding(const T* data, int offset, int64_t* left_pad,
int64_t* right_pad) {
*left_pad = static_cast<int64_t>(*(data + offset * 2));
*right_pad = static_cast<int64_t>(*(data + offset * 2 + 1));
}
// Given dimension index and the left/right padding.
// Returns the corresponding dimension in the input array.
inline int GetInputDimension(int padded_dimension, int left_pad, int right_pad,
int input_dim_size, int offset) {
if (padded_dimension < left_pad) {
const int original_ind = left_pad + offset - 1;
return original_ind - (std::min(padded_dimension, original_ind - offset));
}
padded_dimension -= left_pad;
if (padded_dimension >= input_dim_size) {
padded_dimension -= input_dim_size;
const int original_ind = input_dim_size - (1 + offset);
return original_ind - std::min(padded_dimension, original_ind);
}
return padded_dimension;
}
// Given and index in output array, returns the index of the value
// in input array.
int GetFlatIndex(int index, int num_dims,
const TfLiteEvalTensor* padding_matrix,
const TfLiteIntArray* input_dims,
int* output_dims_num_elements, int* input_dims_num_elements,
const int offset) {
int flat_index = 0;
int64_t left_pad = 0, right_pad = 0, dimension_index, index_in_input;
for (int i = 0; i < num_dims; ++i) {
switch (padding_matrix->type) {
case kTfLiteInt32:
GetPadding(padding_matrix->data.i32, i, &left_pad, &right_pad);
break;
case kTfLiteInt64:
GetPadding(padding_matrix->data.i64, i, &left_pad, &right_pad);
break;
default:
break;
}
dimension_index = index / output_dims_num_elements[i];
index_in_input = GetInputDimension(dimension_index, left_pad, right_pad,
input_dims->data[i], offset);
flat_index += index_in_input * (input_dims_num_elements)[i];
index %= output_dims_num_elements[i];
}
return flat_index;
}
template <typename T>
void MirrorPad(const TfLiteEvalTensor* padding_matrix,
const TfLiteIntArray* input_dims, int* output_dims_num_elements,
int* input_dims_num_elements, const T* input_data,
T* output_data, const int offset, const int num_dims,
const int output_size) {
for (int i = 0; i < output_size; ++i) {
output_data[i] = input_data[GetFlatIndex(
i, num_dims, padding_matrix, input_dims, output_dims_num_elements,
input_dims_num_elements, offset)];
}
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TfLiteStatus status = kTfLiteOk;
const OpDataMirrorPad* data =
static_cast<const OpDataMirrorPad*>(node->user_data);
const TfLiteEvalTensor* input_tensor =
tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* padding_matrix =
tflite::micro::GetEvalInput(context, node, 1);
TfLiteEvalTensor* output_tensor =
tflite::micro::GetEvalOutput(context, node, 0);
const int input_dims = data->input_dims;
const int output_size = data->output_size;
int* input_dims_num_elements = (int*)context->GetScratchBuffer(
context, data->input_dims_num_elements_buffer_index);
int* output_dims_num_elements = (int*)context->GetScratchBuffer(
context, data->output_dims_num_elements_buffer_index);
for (int i = 0; i < input_dims; i++) {
output_dims_num_elements[i] = 1;
input_dims_num_elements[i] = 1;
}
for (int i = input_dims - 2; i >= 0; i--) {
output_dims_num_elements[i] =
output_dims_num_elements[i + 1] * output_tensor->dims->data[i + 1];
input_dims_num_elements[i] =
input_dims_num_elements[i + 1] * input_tensor->dims->data[i + 1];
}
switch (output_tensor->type) {
case kTfLiteFloat32: {
MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements,
input_dims_num_elements,
tflite::micro::GetTensorData<float>(input_tensor),
tflite::micro::GetTensorData<float>(output_tensor),
data->offset, input_dims, output_size);
break;
}
case kTfLiteInt8: {
MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements,
input_dims_num_elements,
tflite::micro::GetTensorData<int8_t>(input_tensor),
tflite::micro::GetTensorData<int8_t>(output_tensor),
data->offset, input_dims, output_size);
break;
}
default:
status = kTfLiteError;
break;
}
#undef TF_LITE_MIRROR_PAD
return status;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataMirrorPad));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
OpDataMirrorPad* data = static_cast<OpDataMirrorPad*>(node->user_data);
TfLiteTensor* input_tensor = micro_context->AllocateTempInputTensor(node, 0);
TfLiteTensor* padding_matrix =
micro_context->AllocateTempInputTensor(node, 1);
TfLiteTensor* output_tensor =
micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE_EQ(context, NumDimensions(padding_matrix), 2);
TF_LITE_ENSURE_EQ(context, SizeOfDimension(padding_matrix, 0),
NumDimensions(input_tensor));
auto* params =
reinterpret_cast<TfLiteMirrorPaddingParams*>(node->builtin_data);
if (params == nullptr) {
return kTfLiteError;
}
data->offset =
params->mode != TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingReflect ? 0
: 1;
data->input_dims = NumDimensions(input_tensor);
data->output_size = NumElements(output_tensor);
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, data->input_dims * sizeof(int),
&data->output_dims_num_elements_buffer_index));
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, data->input_dims * sizeof(int),
&data->input_dims_num_elements_buffer_index));
micro_context->DeallocateTempTfLiteTensor(input_tensor);
micro_context->DeallocateTempTfLiteTensor(padding_matrix);
micro_context->DeallocateTempTfLiteTensor(output_tensor);
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_MIRROR_PAD() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -37,11 +37,16 @@ void* MulInit(TfLiteContext* context, const char* buffer, size_t length) {
TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpDataMul* data) {
const TfLiteTensor* input1 = GetInput(context, node, kMulInput1Tensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kMulInput1Tensor);
TF_LITE_ENSURE(context, input1 != nullptr);
const TfLiteTensor* input2 = GetInput(context, node, kMulInput2Tensor);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kMulInput2Tensor);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output = GetOutput(context, node, kMulOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kMulOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
@@ -72,6 +77,9 @@ TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node,
&data->output_activation_max_f32);
}
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -43,19 +43,26 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, /*index=*/0);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, /*index=*/0);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1);
TfLiteTensor* paddings =
micro_context->AllocateTempInputTensor(node, /*index=*/1);
TF_LITE_ENSURE(context, paddings != nullptr);
const TfLiteTensor* constant_values =
NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr;
TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
TfLiteTensor* constant_values =
NumInputs(node) == 3
? micro_context->AllocateTempInputTensor(node, /*index=*/2)
: nullptr;
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, /*index=*/0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
@@ -122,6 +129,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
data->output_zero_point = output->params.zero_point;
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(paddings);
if (constant_values != nullptr) {
micro_context->DeallocateTempTfLiteTensor(constant_values);
}
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -54,9 +54,13 @@ TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpDataPooling* data = static_cast<OpDataPooling*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kPoolingInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kPoolingInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kPoolingOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kPoolingOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_STATUS(
@@ -71,6 +75,9 @@ TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
&data->activation_max);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -84,14 +84,22 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
PreluParams* params = static_cast<PreluParams*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, 0);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* alpha = GetInput(context, node, 1);
TfLiteTensor* alpha = micro_context->AllocateTempInputTensor(node, 1);
TF_LITE_ENSURE(context, alpha != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
return CalculatePreluParams(input, alpha, output, params);
TF_LITE_ENSURE_OK(context,
CalculatePreluParams(input, alpha, output, params));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(alpha);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -36,9 +36,11 @@ TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
@@ -77,6 +79,9 @@ TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
data->quantization_params.scale = static_cast<double>(output->params.scale);
data->input_zero_point = input->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -39,13 +39,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(NumInputs(node) == 1);
TFLITE_DCHECK(NumOutputs(node) == 1);
const TfLiteTensor* input_resource_id_tensor =
GetInput(context, node, kInputVariableId);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input_resource_id_tensor =
micro_context->AllocateTempInputTensor(node, kInputVariableId);
TFLITE_DCHECK(input_resource_id_tensor != nullptr);
TFLITE_DCHECK(input_resource_id_tensor->type == kTfLiteResource);
TFLITE_DCHECK(NumElements(input_resource_id_tensor) == 1);
micro_context->DeallocateTempTfLiteTensor(input_resource_id_tensor);
return kTfLiteOk;
}
@@ -58,14 +62,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetEvalOutput(context, node, kOutputValue);
TFLITE_DCHECK(output_value != nullptr);
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
MicroResourceVariables* resources = graph_info->GetResourceVariables();
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
MicroResourceVariables* resources = graph_info.GetResourceVariables();
if (resources == nullptr) {
MicroPrintf(
"READ_VARIABLE requires resource variables. Please create "

View File

@@ -50,10 +50,12 @@ void* InitReduce(TfLiteContext* context, const char* buffer, size_t length) {
}
TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
// Inputs Tensor (dtype depends on quantization):
// [0] = Input
// [1] = Axis
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
// Outputs Tensor (dtype depends on quantization):
// [0] = Output
@@ -63,28 +65,31 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
// Validate axis type
const TfLiteTensor* axis = GetInput(context, node, 1);
TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 1);
TF_LITE_ENSURE(context, axis != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
if (input->type == kTfLiteInt8) {
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
const double real_multiplier = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier, &data->multiplier, &data->shift);
micro_context->DeallocateTempTfLiteTensor(output);
}
micro_context->DeallocateTempTfLiteTensor(axis);
micro_context->DeallocateTempTfLiteTensor(input);
return kTfLiteOk;
}
TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
MicroContext* micro_context = GetMicroContext(context);
OpData* op_data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, 0);
const TfLiteTensor* output = GetOutput(context, node, 0);
const TfLiteTensor* axis = GetInput(context, node, 1);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 1);
op_data->input_scale = input->params.scale;
op_data->output_scale = output->params.scale;
@@ -96,13 +101,17 @@ TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) {
context, sizeof(int) * static_cast<int>(ElementCount(*axis->dims)),
&op_data->resolved_axis_idx);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(axis);
return kTfLiteOk;
}
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
const double real_multiplier = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
@@ -121,6 +130,8 @@ TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
@@ -31,9 +33,13 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// Tensorflow's Reshape allows one of the shape components to have the
// special -1 value, meaning it will be calculated automatically based on the
@@ -68,6 +74,9 @@ TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
@@ -93,9 +102,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
// Do nothing for in-place reshape.
if (input->data.raw != output->data.raw) {
// Otherwise perform reshape with copy.
for (size_t i = 0; i < input_bytes; ++i) {
output->data.raw[i] = input->data.raw[i];
}
memcpy(output->data.raw, input->data.raw, input_bytes);
}
return kTfLiteOk;
}

View File

@@ -30,12 +30,17 @@ constexpr int kSizeTensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* size =
micro_context->AllocateTempInputTensor(node, kSizeTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
@@ -55,6 +60,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(size);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -33,12 +33,17 @@ constexpr int kSizeTensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* size =
micro_context->AllocateTempInputTensor(node, kSizeTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
// Our current implementations rely on the input being 4D,
// and the size being 1D tensor with exactly 2 elements.
@@ -53,6 +58,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_KERNEL_LOG(context, "Dynamic tensors are unsupported in tfmicro.");
return kTfLiteError;
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(size);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -29,9 +29,13 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
@@ -42,6 +46,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
for (int i = 0; i < output->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -45,16 +45,22 @@ void GetBeginAndSizeVectors(int dimensions, const TfLiteEvalTensor* begin,
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TFLITE_DCHECK(input != nullptr);
const TfLiteTensor* begin = GetInput(context, node, kBeginTensor);
TfLiteTensor* begin =
micro_context->AllocateTempInputTensor(node, kBeginTensor);
TFLITE_DCHECK(begin != nullptr);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* size =
micro_context->AllocateTempInputTensor(node, kSizeTensor);
TFLITE_DCHECK(size != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TFLITE_DCHECK(output != nullptr);
// Ensure validity of input tensor and its dimension.
@@ -66,6 +72,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(NumDimensions(size) == 1);
TFLITE_DCHECK(NumElements(begin) == NumElements(size));
TFLITE_DCHECK(NumDimensions(input) <= kMaxDim);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(begin);
micro_context->DeallocateTempTfLiteTensor(size);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -20,6 +20,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/softmax.h"
#include "tensorflow/lite/micro/micro_context.h"
namespace tflite {
@@ -90,12 +91,14 @@ void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
}
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE(context, node->user_data != nullptr);
@@ -136,7 +139,12 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
}
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
return CalculateSoftmaxParams(context, input, output, params, op_data);
auto ret_val =
CalculateSoftmaxParams(context, input, output, params, op_data);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return ret_val;
}
} // namespace tflite

View File

@@ -44,11 +44,15 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, input != nullptr && output != nullptr);
TF_LITE_ENSURE(context, NumDimensions(input) >= kInputOutputMinDimensionNum);
@@ -57,6 +61,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumDimensions(output) <= kInputOutputMaxDimensionNum);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -39,11 +39,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
@@ -75,6 +78,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
output->dims->data[kDepthRank] =
input->dims->data[kDepthRank] * block_size * block_size;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -69,7 +69,8 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* axis = GetInput(context, node, 0);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, axis != nullptr);
// Dynamic output tensors are needed if axis tensor is not constant.
@@ -77,6 +78,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// constant axis tensor for now.
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
"Non constant axis tensor not supported");
micro_context->DeallocateTempTfLiteTensor(axis);
return kTfLiteOk;
}

View File

@@ -74,13 +74,14 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
MicroContext* micro_context = GetMicroContext(context);
// Dynamic output tensors are needed if axis tensor is not constant.
// But Micro doesn't support dynamic memory allocation, so we only support
// constant axis tensor for now.
const TfLiteTensor* axis = GetInput(context, node, 2);
TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 2);
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
"Non constant axis tensor not supported");
micro_context->DeallocateTempTfLiteTensor(axis);
return kTfLiteOk;
}

View File

@@ -27,12 +27,19 @@ namespace tflite {
namespace {
struct SqueezeContext {
SqueezeContext(TfLiteContext* context, TfLiteNode* node)
: params(reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data)),
input(GetInput(context, node, 0)),
output(GetOutput(context, node, 0)) {}
SqueezeContext(TfLiteContext* context, TfLiteNode* node) {
params = reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data);
micro_context = GetMicroContext(context);
input = micro_context->AllocateTempInputTensor(node, 0);
output = micro_context->AllocateTempOutputTensor(node, 0);
}
~SqueezeContext() {
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
}
MicroContext* micro_context;
TfLiteSqueezeParams* params;
const TfLiteTensor* const input;
TfLiteTensor* input;
TfLiteTensor* output;
};
@@ -80,18 +87,24 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
SqueezeContext op_context(context, node);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
if (op_context.input->type == kTfLiteString) {
if (input->type == kTfLiteString) {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(op_context.input->type),
op_context.input->type);
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
TF_LITE_ENSURE_EQ(context, op_context.input->bytes, op_context.output->bytes);
memcpy(op_context.output->data.raw, op_context.input->data.raw,
op_context.input->bytes);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
size_t input_byte_size;
size_t output_byte_size;
TF_LITE_ENSURE_OK(context,
TfLiteEvalTensorByteLength(input, &input_byte_size));
TF_LITE_ENSURE_OK(context,
TfLiteEvalTensorByteLength(output, &output_byte_size));
TF_LITE_ENSURE_EQ(context, input_byte_size, output_byte_size);
memcpy(output->data.raw, input->data.raw, input_byte_size);
return kTfLiteOk;
}

View File

@@ -38,18 +38,27 @@ constexpr int kOutputTensor = 0;
struct StridedSliceContext {
StridedSliceContext(TfLiteContext* context, TfLiteNode* node) {
params = reinterpret_cast<TfLiteStridedSliceParams*>(node->builtin_data);
input = GetInput(context, node, kInputTensor);
begin = GetInput(context, node, kBeginTensor);
end = GetInput(context, node, kEndTensor);
strides = GetInput(context, node, kStridesTensor);
output = GetOutput(context, node, kOutputTensor);
micro_context = GetMicroContext(context);
input = micro_context->AllocateTempInputTensor(node, kInputTensor);
begin = micro_context->AllocateTempInputTensor(node, kBeginTensor);
end = micro_context->AllocateTempInputTensor(node, kEndTensor);
strides = micro_context->AllocateTempInputTensor(node, kStridesTensor);
output = micro_context->AllocateTempOutputTensor(node, kOutputTensor);
dims = NumDimensions(input);
}
~StridedSliceContext() {
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(begin);
micro_context->DeallocateTempTfLiteTensor(end);
micro_context->DeallocateTempTfLiteTensor(strides);
micro_context->DeallocateTempTfLiteTensor(output);
}
const TfLiteStridedSliceParams* params;
const TfLiteTensor* input;
const TfLiteTensor* begin;
const TfLiteTensor* end;
const TfLiteTensor* strides;
MicroContext* micro_context;
TfLiteTensor* input;
TfLiteTensor* begin;
TfLiteTensor* end;
TfLiteTensor* strides;
TfLiteTensor* output;
int dims;
};

View File

@@ -83,15 +83,24 @@ TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node) {
OpDataSub* data = static_cast<OpDataSub*>(node->user_data);
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
const TfLiteTensor* input1 = GetInput(context, node, kSubInputTensor1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kSubInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
const TfLiteTensor* input2 = GetInput(context, node, kSubInputTensor2);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kSubInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output = GetOutput(context, node, kSubOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kSubOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_STATUS(
CalculateOpDataSub(context, params, input1, input2, output, data));
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -364,6 +364,8 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
MicroContext* micro_context = GetMicroContext(context);
// Validate Tensor Inputs (dtype depends on quantization):
// [0] = Input, {2, batch_size, input_size}
// [1] = Weights Feature, {2, num_filters, input_size}
@@ -371,18 +373,19 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
// [3] = Bias (optional), {1, num_units}
// [4] = Activation State (variable),
// {2, batch_size, memory_size * num_filters}
const TfLiteTensor* input = GetInput(context, node, kSvdfInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kSvdfInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* weights_feature =
GetInput(context, node, kSvdfWeightsFeatureTensor);
TfLiteTensor* weights_feature =
micro_context->AllocateTempInputTensor(node, kSvdfWeightsFeatureTensor);
TF_LITE_ENSURE(context, weights_feature != nullptr);
const TfLiteTensor* weights_time =
GetInput(context, node, kSvdfWeightsTimeTensor);
TfLiteTensor* weights_time =
micro_context->AllocateTempInputTensor(node, kSvdfWeightsTimeTensor);
TF_LITE_ENSURE(context, weights_time != nullptr);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kSvdfBiasTensor);
const TfLiteTensor* activation_state =
GetInput(context, node, kSvdfInputActivationStateTensor);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kSvdfBiasTensor);
TfLiteTensor* activation_state = micro_context->AllocateTempInputTensor(
node, kSvdfInputActivationStateTensor);
TF_LITE_ENSURE(context, activation_state != nullptr);
// Define input constants based on input tensor definition above:
@@ -402,7 +405,8 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
// Validate Tensor Output:
// [0] = float/int8_t, {2, batch_size, num_units}
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
TfLiteTensor* output = GetOutput(context, node, kSvdfOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kSvdfOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
@@ -498,6 +502,12 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, scratch_status);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(weights_feature);
micro_context->DeallocateTempTfLiteTensor(weights_time);
micro_context->DeallocateTempTfLiteTensor(activation_state);
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(bias);
return kTfLiteOk;
}

View File

@@ -48,11 +48,14 @@ void* TanhInit(TfLiteContext* context, const char* buffer, size_t length) {
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
OpData* data) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
@@ -69,6 +72,62 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
if (input->type == kTfLiteInt16) {
static constexpr int kInputIntegerBits = 3;
static constexpr int kOutputFractionalBits = 15;
// These operators are implemented in fixed-point arithmetic,
// which intrinsically wants symmetric ranges (zero_point==0)
// and power-of-two scales (power-of-two is abbreviated below as POT).
// While more general support would be possible by means of rescaling,
// that would add some overhead and some loss of accuracy and wouldn't
// be used at the moment as current quantized LSTM applications are
// happy with symmetric, power-of-two-scales quantization. So we just
// implement that narrow case only for now.
TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
int input_scale_log2_rounded;
bool param_scale_pot =
CheckedLog2(input->params.scale, &input_scale_log2_rounded);
data->input_left_shift =
(15 - kInputIntegerBits) + input_scale_log2_rounded;
param_scale_pot &=
(data->input_left_shift == 0 || data->input_left_shift == 1);
if (param_scale_pot) {
data->input_multiplier = 0;
} else {
// Calculate multiplier to change input scale to 1/(3*4096)
// as required by the table lookup.
// The number 3.0 in the multiplier comes from here,
// because the interval is [-10.7, 10.7] instead of [-8, 8].
// So, in this scaling +/-2^17 represents +/-10.7.
double multiplier =
static_cast<double>(input->params.scale) * 4096.0 * 3.0;
data->input_left_shift = 0;
while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
data->input_left_shift++;
multiplier = multiplier * 2.0;
}
data->input_multiplier = static_cast<int32_t>(multiplier);
}
int output_scale_log2_rounded;
TF_LITE_ENSURE(
context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
-kOutputFractionalBits);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
@@ -77,10 +136,15 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
data->input_zero_point = input->params.zero_point;
return CalculateArithmeticOpData(context, node, data);
TF_LITE_ENSURE_OK(context, CalculateArithmeticOpData(context, node, data));
micro_context->DeallocateTempTfLiteTensor(input);
return kTfLiteOk;
}
} // namespace

View File

@@ -18,18 +18,30 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kPermTensor = 1;
constexpr int kOutputTensor = 0;
struct TransposeContext {
TransposeContext(TfLiteContext* context, TfLiteNode* node) {
input = GetInput(context, node, 0);
perm = GetInput(context, node, 1);
output = GetOutput(context, node, 0);
micro_context = GetMicroContext(context);
input = micro_context->AllocateTempInputTensor(node, kInputTensor);
perm = micro_context->AllocateTempInputTensor(node, kPermTensor);
output = micro_context->AllocateTempOutputTensor(node, kOutputTensor);
}
const TfLiteTensor* input;
const TfLiteTensor* perm;
~TransposeContext() {
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(perm);
micro_context->DeallocateTempTfLiteTensor(output);
}
MicroContext* micro_context;
TfLiteTensor* input;
TfLiteTensor* perm;
TfLiteTensor* output;
};
@@ -60,10 +72,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TransposeContext op_context(context, node);
const int32_t* perm_data = GetTensorData<int32_t>(op_context.perm);
const int size = op_context.perm->dims->data[0];
const TfLiteEvalTensor* perm_tensor =
tflite::micro::GetEvalInput(context, node, kPermTensor);
const int32_t* perm_data = perm_tensor->data.i32;
const int size = perm_tensor->dims->data[0];
TransposeParams params;
params.perm_count = size;
for (int i = 0; i < size; ++i) {
@@ -73,24 +85,28 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
// Transpose kernel only does rearranging values not numeric evaluations
// on each cell. It's safe to implement per size of scalar type and this
// trick keeps the total code size in a reasonable range.
switch (op_context.input->type) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32:
reference_ops::Transpose(params, GetTensorShape(op_context.input),
GetTensorData<float>(op_context.input),
GetTensorShape(op_context.output),
GetTensorData<float>(op_context.output));
reference_ops::Transpose(params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt8:
reference_ops::Transpose(params, GetTensorShape(op_context.input),
GetTensorData<int8_t>(op_context.input),
GetTensorShape(op_context.output),
GetTensorData<int8_t>(op_context.output));
reference_ops::Transpose(params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context,
"Type %s is currently not supported by Transpose. "
"Only float32 and int8 is supported",
TfLiteTypeGetName(op_context.input->type));
TfLiteTypeGetName(input->type));
return kTfLiteError;
}

View File

@@ -94,13 +94,18 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kFilterTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kBiasTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
int output_channels = filter->dims->data[kConvQuantizedDimension];
@@ -124,6 +129,13 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
&(data->bias_converted_buffer_index)) == kTfLiteOk);
}
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(output);
if (bias != nullptr) {
micro_context->DeallocateTempTfLiteTensor(bias);
}
}
return kTfLiteOk;
}
@@ -141,11 +153,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const auto params =
static_cast<const TfLiteTransposeConvParams*>(node->builtin_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kFilterTensor);
TF_LITE_ENSURE(context, filter != nullptr);
// Get height and width of the output.
@@ -212,6 +229,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// Stride
data->params.stride_width = params->stride_width;
data->params.stride_height = params->stride_height;
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
return kTfLiteOk;
}

View File

@@ -46,14 +46,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const auto* params =
reinterpret_cast<const TfLiteVarHandleParams*>(node->builtin_data);
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
MicroResourceVariables* resources = graph_info->GetResourceVariables();
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
MicroResourceVariables* resources = graph_info.GetResourceVariables();
if (resources == nullptr) {
MicroPrintf(
"VAR_HANDLE requires resource variables. Please create "

View File

@@ -0,0 +1,140 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <stddef.h>
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
struct OpData {
int cond_subgraph_index;
int body_subgraph_index;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const auto* params =
reinterpret_cast<const TfLiteWhileParams*>(node->builtin_data);
op_data->cond_subgraph_index = params->cond_subgraph_index;
op_data->body_subgraph_index = params->body_subgraph_index;
// The first input is the condition.
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
size_t num_inputs = node->inputs->size;
size_t num_outputs = node->outputs->size;
MicroGraph& graph_info = micro_context->graph();
TF_LITE_ENSURE(context,
op_data->cond_subgraph_index < graph_info.NumSubgraphs());
TF_LITE_ENSURE(context,
op_data->body_subgraph_index < graph_info.NumSubgraphs());
TF_LITE_ENSURE_EQ(context, num_inputs,
graph_info.NumSubgraphInputs(op_data->cond_subgraph_index));
TF_LITE_ENSURE_EQ(context, num_inputs,
graph_info.NumSubgraphInputs(op_data->body_subgraph_index));
TF_LITE_ENSURE_EQ(context, num_inputs, num_outputs);
TF_LITE_ENSURE_EQ(
context, num_outputs,
graph_info.NumSubgraphOutputs(op_data->body_subgraph_index));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph* graph_info = &micro_context->graph();
TF_LITE_ENSURE_OK(context,
tflite::micro::CopyOpInputsToSubgraphInputs(
context, node, graph_info, op_data->cond_subgraph_index,
/*first_tensor_idx=*/0));
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(op_data->cond_subgraph_index));
TfLiteEvalTensor* cond_subgraph_output = graph_info->GetSubgraphOutput(
op_data->cond_subgraph_index, /*tensor_idx=*/0);
bool cond_value = cond_subgraph_output->data.b[0];
TF_LITE_ENSURE_OK(context,
tflite::micro::CopyOpInputsToSubgraphInputs(
context, node, graph_info, op_data->body_subgraph_index,
/*first_tensor_idx=*/0));
TF_LITE_ENSURE_OK(context,
tflite::micro::CopyOpInputsToOpOutputs(context, node));
while (cond_value == true) {
// Copy output of this iteration back to the body input.
TF_LITE_ENSURE_OK(
context, tflite::micro::CopyOpOutputsToSubgraphInputs(
context, node, graph_info, op_data->body_subgraph_index));
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(op_data->body_subgraph_index));
TF_LITE_ENSURE_OK(
context, tflite::micro::CopySubgraphOutputsToOpOutputs(
context, node, graph_info, op_data->body_subgraph_index));
TF_LITE_ENSURE_OK(
context, tflite::micro::CopyOpOutputsToSubgraphInputs(
context, node, graph_info, op_data->cond_subgraph_index));
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(op_data->cond_subgraph_index));
cond_subgraph_output = graph_info->GetSubgraphOutput(
op_data->cond_subgraph_index, /*tensor_idx=*/0);
cond_value = cond_subgraph_output->data.b[0];
}
return kTfLiteOk;
}
} // namespace.
TfLiteRegistration Register_WHILE() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -25,15 +25,20 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
output->type = input->type;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -0,0 +1,319 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_allocation_info.h"
#include "tensorflow/lite/c/c_api_types.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
namespace tflite {
namespace {
constexpr char kOfflineMemAllocMetadata[] = "OfflineMemoryAllocation";
constexpr int kUninitializedLifetime = -1;
} // namespace
// Mark the given Allocation info as first created at the specified allocation
// scope count. Only the first creation must be recorded since the allocation
// scope count monotonically increases throughout the lifetime marking process.
void AllocationInfoBuilder::UpdateFirstCreated(AllocationInfo* current,
int allocation_scope_count) {
TFLITE_DCHECK(current->first_created <= allocation_scope_count);
if (current->first_created == kUninitializedLifetime) {
current->first_created = allocation_scope_count;
}
}
// Mark the given AllocationInfo as last used at the specified allocation scope
// count. Update the last used marker every time, since the allocation scope
// count monotonically increases through the lifetime marking process.
void AllocationInfoBuilder::UpdateLastUsed(AllocationInfo* current,
int allocation_scope_count) {
TFLITE_DCHECK(current->last_used <= allocation_scope_count);
current->last_used = allocation_scope_count;
}
TfLiteStatus AllocationInfoBuilder::MarkSubgraphLifetimesIfNecessary(
const Operator* op, internal::ScratchBufferRequest* scratch_buffer_requests,
ScratchBufferHandle* scratch_buffer_handles,
SubgraphAllocations* allocations) {
int first_subgraph_index = -1;
int second_subgraph_index = -1;
const OperatorCode* opcode =
model_->operator_codes()->Get(op->opcode_index());
switch (opcode->builtin_code()) {
case BuiltinOperator_IF: {
first_subgraph_index =
op->builtin_options_as_IfOptions()->then_subgraph_index();
second_subgraph_index =
op->builtin_options_as_IfOptions()->else_subgraph_index();
break;
}
case BuiltinOperator_CALL_ONCE: {
first_subgraph_index =
op->builtin_options_as_CallOnceOptions()->init_subgraph_index();
break;
}
case BuiltinOperator_WHILE: {
first_subgraph_index =
op->builtin_options_as_WhileOptions()->cond_subgraph_index();
second_subgraph_index =
op->builtin_options_as_WhileOptions()->body_subgraph_index();
break;
}
default: {
break;
}
}
if (first_subgraph_index != -1) {
// Enter a new allocation scope for each subgraph.
allocation_scope_count_++;
TF_LITE_ENSURE_STATUS(
MarkAllocationLifetimes(first_subgraph_index, scratch_buffer_requests,
scratch_buffer_handles, allocations));
}
if (second_subgraph_index != -1) {
// Enter a new allocation scope for each subgraph.
allocation_scope_count_++;
TF_LITE_ENSURE_STATUS(
MarkAllocationLifetimes(second_subgraph_index, scratch_buffer_requests,
scratch_buffer_handles, allocations));
}
return kTfLiteOk;
}
TfLiteStatus AllocationInfoBuilder::CreateAllocationInfo(
int scratch_buffer_request_count) {
size_t subgraph_offsets_length = model_->subgraphs()->size() * sizeof(size_t);
info_.subgraph_offsets =
reinterpret_cast<size_t*>(non_persistent_allocator_->AllocateTemp(
subgraph_offsets_length, alignof(size_t)));
if (info_.subgraph_offsets == nullptr) {
TF_LITE_REPORT_ERROR(
reporter_,
"Failed to allocate memory for memory planning, %d bytes required",
subgraph_offsets_length);
return kTfLiteError;
}
size_t tensor_count = 0;
for (size_t subgraph_idx = 0; subgraph_idx < model_->subgraphs()->size();
subgraph_idx++) {
// Add all tensors in each subgraph to the AllocationInfo array. Even weight
// tensors are added but marked with needs_allocating = false. Including all
// tensors in the graph here simplifies logic.
info_.subgraph_offsets[subgraph_idx] = tensor_count;
tensor_count += model_->subgraphs()->Get(subgraph_idx)->tensors()->size();
}
info_.tensor_count = tensor_count;
// Scratch buffer allocations follow tensor allocations, so the scratch offset
// is equal to the number of tensor allocations.
info_.scratch_offset = tensor_count;
info_.allocation_info_count = tensor_count + scratch_buffer_request_count;
info_.scratch_buffer_count = scratch_buffer_request_count;
size_t bytes = sizeof(AllocationInfo) * info_.allocation_info_count;
// Allocate an array of AllocationInfo structs from the temp section. This
// struct will be used by AllocationInfoBuilder to find buffer usage.
info_.allocation_info = reinterpret_cast<AllocationInfo*>(
non_persistent_allocator_->AllocateTemp(bytes, alignof(AllocationInfo)));
if (info_.allocation_info == nullptr) {
TF_LITE_REPORT_ERROR(
reporter_,
"Failed to allocate memory for memory planning, %d bytes required",
bytes);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus AllocationInfoBuilder::FreeAllocationInfo() {
non_persistent_allocator_->DeallocateTemp(
reinterpret_cast<uint8_t*>(info_.allocation_info));
non_persistent_allocator_->DeallocateTemp(
reinterpret_cast<uint8_t*>(info_.subgraph_offsets));
return kTfLiteOk;
}
TfLiteStatus AllocationInfoBuilder::InitializeAllocationInfo(
const int32_t* offline_offsets, SubgraphAllocations* allocations) {
AllocationInfo* allocation_info = info_.allocation_info;
// Initialize allocation info for every tensor in every subgraph.
for (size_t subgraph_idx = 0; subgraph_idx < model_->subgraphs()->size();
subgraph_idx++) {
const SubGraph* subgraph = model_->subgraphs()->Get(subgraph_idx);
TfLiteEvalTensor* eval_tensors = allocations[subgraph_idx].tensors;
AllocationInfo* subgraph_allocation_info =
&allocation_info[info_.subgraph_offsets[subgraph_idx]];
for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
AllocationInfo* current = &subgraph_allocation_info[i];
current->output_ptr = &(eval_tensors[i].data.data);
TF_LITE_ENSURE_STATUS(
TfLiteEvalTensorByteLength(&eval_tensors[i], &current->bytes));
current->first_created = kUninitializedLifetime;
current->last_used = kUninitializedLifetime;
current->needs_allocating = (eval_tensors[i].data.data == nullptr) &&
(!subgraph->tensors()->Get(i)->is_variable());
if (offline_offsets) {
current->offline_offset = offline_offsets[i];
} else {
current->offline_offset = kOnlinePlannedBuffer;
}
}
}
// Initialize allocation info for every scratch buffer.
AllocationInfo* scratch_allocation_info =
&allocation_info[info_.scratch_offset];
for (size_t i = 0; i < info_.scratch_buffer_count; i++) {
AllocationInfo* current = &scratch_allocation_info[i];
current->first_created = -1;
current->last_used = -1;
current->needs_allocating = true;
current->offline_offset = kOnlinePlannedBuffer;
}
return kTfLiteOk;
}
TfLiteStatus AllocationInfoBuilder::MarkAllocationLifetimes(
int subgraph_idx, internal::ScratchBufferRequest* scratch_buffer_requests,
ScratchBufferHandle* scratch_buffer_handles,
SubgraphAllocations* allocations) {
const SubGraph* subgraph = model_->subgraphs()->Get(subgraph_idx);
AllocationInfo* allocation_info = info_.allocation_info;
// Each subgraph's tensor allocations are in a contiguous block starting at
// subgraph_offsets_[subgraph index] with one entry per tensor.
AllocationInfo* subgraph_allocation_info =
&allocation_info[info_.subgraph_offsets[subgraph_idx]];
uint32_t operators_size = NumSubgraphOperators(subgraph);
// Mark all inputs as created at the start of the subgraph invocation.
for (size_t i = 0;
subgraph->inputs() != nullptr && i < subgraph->inputs()->size(); ++i) {
const int tensor_index = subgraph->inputs()->Get(i);
AllocationInfo* current = &subgraph_allocation_info[tensor_index];
UpdateFirstCreated(current, allocation_scope_count_);
}
for (uint32_t i = 0; i < operators_size; i++) {
// Each operator has a new allocation scope.
allocation_scope_count_++;
const auto* op = subgraph->operators()->Get(i);
// Figure out when the first creation and use of each tensor is.
for (size_t n = 0; op->outputs() != nullptr && n < op->outputs()->size();
++n) {
const int tensor_index = op->outputs()->Get(n);
AllocationInfo* current = &subgraph_allocation_info[tensor_index];
UpdateFirstCreated(current, allocation_scope_count_);
}
// Keep track of scope count before any subgraphs, so that scratch buffers'
// lifetime within a control flow op properly overlaps with all subgraphs.
int start_allocation_scope_count = allocation_scope_count_;
// Control flow operators can invoke subgraphs. Plan these subgraphs
// before continuing on to the rest of the graph.
MarkSubgraphLifetimesIfNecessary(op, scratch_buffer_requests,
scratch_buffer_handles, allocations);
// Figure out when the last use of each tensor is.
for (size_t n = 0; op->inputs() != nullptr && n < op->inputs()->size();
++n) {
const int tensor_index = op->inputs()->Get(n);
// Optional bias tensors can have an index of -1 when they are omitted.
if (tensor_index >= 0) {
AllocationInfo* current = &subgraph_allocation_info[tensor_index];
// No need to update creation since it is either marked by the subgraph
// or producer op, or it is not part of the memory plan (weight, bias
// tensor).
UpdateLastUsed(current, allocation_scope_count_);
}
}
for (size_t n = 0; op->outputs() != nullptr && n < op->outputs()->size();
++n) {
const int tensor_index = op->outputs()->Get(n);
AllocationInfo* current = &subgraph_allocation_info[tensor_index];
UpdateLastUsed(current, allocation_scope_count_);
}
// Mark thse lifetime of scratch buffers belonging to the current node. This
// operation is O(N * M) where N is the total number of visited nodes and M
// is the total number of scratch buffers.
// TODO(b/217794030): Optimize this memory planning code.
AllocationInfo* scratch_allocation_info =
&allocation_info[info_.scratch_offset];
for (size_t scratch_idx = 0; scratch_idx < info_.scratch_buffer_count;
scratch_idx++) {
internal::ScratchBufferRequest request =
scratch_buffer_requests[scratch_idx];
AllocationInfo* current = &scratch_allocation_info[scratch_idx];
if (request.node_idx == static_cast<int>(i) &&
request.subgraph_idx == static_cast<int>(subgraph_idx)) {
ScratchBufferHandle* current_handle =
&(scratch_buffer_handles[scratch_idx]);
current->output_ptr = reinterpret_cast<void**>(&current_handle->data);
current->bytes = request.bytes;
UpdateFirstCreated(current, start_allocation_scope_count);
UpdateLastUsed(current, allocation_scope_count_);
}
}
}
// Mark all outputs as persistent to the end of the subgraph invocation.
for (size_t i = 0;
subgraph->outputs() != nullptr && i < subgraph->outputs()->size(); ++i) {
const int tensor_index = subgraph->outputs()->Get(i);
AllocationInfo* current = &subgraph_allocation_info[tensor_index];
UpdateLastUsed(current, allocation_scope_count_);
}
return kTfLiteOk;
}
// Get offline tensors allocation plan. See
// micro/docs/memory_management.md for more info.
TfLiteStatus AllocationInfoBuilder::GetOfflinePlannedOffsets(
const int32_t** offline_planner_offsets) {
if (model_->metadata()) {
for (size_t i = 0; i < model_->metadata()->size(); ++i) {
auto metadata = model_->metadata()->Get(i);
if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
strlen(kOfflineMemAllocMetadata)) == 0) {
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
model_->buffers();
auto* buffer = (*buffers)[metadata->buffer()];
auto* array = buffer->data();
const uint32_t* metadata_buffer =
reinterpret_cast<const uint32_t*>(array->data());
const size_t nbr_tensors = static_cast<size_t>(metadata_buffer[2]);
*offline_planner_offsets =
reinterpret_cast<const int32_t*>(&metadata_buffer[3]);
if (info_.tensor_count != nbr_tensors) {
TF_LITE_REPORT_ERROR(reporter_,
"Nbr of offline buffer offsets (%d) in metadata "
"not equal nbr tensors (%d)\n",
nbr_tensors, info_.tensor_count);
return kTfLiteError;
}
}
}
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -0,0 +1,141 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATION_INFO_H_
#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATION_INFO_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/flatbuffer_utils.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Used to hold information used during allocation calculations.
struct AllocationInfo {
size_t bytes;
void** output_ptr;
int first_created;
int last_used;
int32_t offline_offset;
bool needs_allocating;
};
// Used to hold the allocation info list and related metadata for the entire
// graph (including subgraphs). Since all subgraphs are planned together, the
// allocation info list contains allocations for all subgraphs. Track the offset
// into this list for each subgraph then reserve space to track all allocations.
//
// The AllocationInfo list is a contiguous list of allocations across all
// subgraphs and scratch buffers. Each element here is marked as
// s<subgraph index>t<tensor index>. The following is a possible
// AllocationInfo list:
// [s0t0, s0t1, s1t0, s2t1, s1t2, s3t0, s3t1, scratch0, scratch1, scratch2]
//
// For this example, the subgraph offsets would be [0, 2, 5] and the scratch
// offset would be 7.
struct GraphAllocationInfo {
AllocationInfo* allocation_info;
size_t allocation_info_count;
size_t* subgraph_offsets;
size_t scratch_offset;
size_t tensor_count;
size_t scratch_buffer_count;
};
// A helper class to construct AllocationInfo array. This array contains the
// lifetime of tensors / scratch_buffer and will be used to calculate the memory
// plan. Methods need to be called in order from `Create`, Init`, `Add*`, to
// `Finish`.
class AllocationInfoBuilder {
public:
AllocationInfoBuilder(const Model* model,
INonPersistentBufferAllocator* non_persistent_allocator,
ErrorReporter* reporter)
: model_(model),
non_persistent_allocator_(non_persistent_allocator),
reporter_(reporter) {}
// Check if model contains offline planned buffer offsets.
// - If there's no metadata available, offline_planner_offsets is not set
// - If there's metadata available, offline_planner_offsets will point to the
// first offset in the metadata buffer list.
TfLiteStatus GetOfflinePlannedOffsets(
const int32_t** offline_planner_offsets);
// Allocate memory for the allocation info array as well as offsets into that
// array for each subgraph.
TfLiteStatus CreateAllocationInfo(int scratch_buffer_request_count);
// Release memory used for the allocation info array.
TfLiteStatus FreeAllocationInfo();
// Initialize AllocationInfo for all tensors and scratch buffers in the graph.
TfLiteStatus InitializeAllocationInfo(const int32_t* offline_offsets,
SubgraphAllocations* allocations);
// Mark the scope of each tensor and scratch buffer across the graph. Enter
// all possible subgraphs invoked by each control flow operator. This method
// marks the maximum lifetime of each buffer so that tensors are correctly
// planned for all valid invocation flows.
TfLiteStatus MarkAllocationLifetimes(
int subgraph_idx, internal::ScratchBufferRequest* scratch_buffer_request,
ScratchBufferHandle* scratch_buffer_handles,
SubgraphAllocations* allocations);
// Identify control flow operators and recursively mark all subgraphs which
// that operator can invoke. The lifetime of all tensors within a subgraph
// can only be extended. The order of subgraph invocation does not matter
// since subgraphs within the same control flow operator are executed
// within their own allocation scope (planned buffers in a subgraph cannot
// persist beyond the end of that subgraph's invocation).
TfLiteStatus MarkSubgraphLifetimesIfNecessary(
const Operator* op,
internal::ScratchBufferRequest* scratch_buffer_requests,
ScratchBufferHandle* scratch_buffer_handles,
SubgraphAllocations* allocations);
// Returns the number of allocations.
int AllocationCount() const { return info_.allocation_info_count; }
// Returns a pointer to the built AllocationInfo array.
AllocationInfo* Finish() const { return info_.allocation_info; }
private:
// Mark the given Allocation info as first created at the specified allocation
// scope count. Only the first creation must be recorded since the allocation
// scope count monotonically increases throughout the lifetime marking
// process.
void UpdateFirstCreated(AllocationInfo* current, int allocation_scope_count);
// Mark the given AllocationInfo as last used at the specified allocation
// scope
// count. Update the last used marker every time, since the allocation scope
// count monotonically increases through the lifetime marking process.
void UpdateLastUsed(AllocationInfo* current, int allocation_scope_count);
const tflite::Model* model_ = nullptr;
INonPersistentBufferAllocator* non_persistent_allocator_ = nullptr;
ErrorReporter* reporter_ = nullptr;
GraphAllocationInfo info_;
int allocation_scope_count_ = 0;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATION_INFO_H_

View File

@@ -19,6 +19,7 @@ limitations under the License.
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/c_api_types.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
@@ -30,6 +31,7 @@ limitations under the License.
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
#include "tensorflow/lite/micro/memory_planner/micro_memory_planner.h"
#include "tensorflow/lite/micro/micro_allocation_info.h"
#include "tensorflow/lite/micro/micro_arena_constants.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
@@ -48,26 +50,17 @@ constexpr size_t kMaxScratchBuffersPerOp = 12;
// needs a node id assignment.
constexpr int kUnassignedScratchBufferRequestIndex = -1;
// Used to hold information used during allocation calculations.
struct AllocationInfo {
size_t bytes;
void** output_ptr;
int first_created;
int last_used;
int32_t offline_offset;
bool needs_allocating;
};
constexpr char kOfflineMemAllocMetadata[] = "OfflineMemoryAllocation";
const TfLiteIntArray kZeroLengthIntArray = {};
class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
public:
explicit MicroBuiltinDataAllocator(SimpleMemoryAllocator* memory_allocator)
: memory_allocator_(memory_allocator) {}
explicit MicroBuiltinDataAllocator(
IPersistentBufferAllocator* persistent_allocator)
: persistent_allocator_(persistent_allocator) {}
void* Allocate(size_t size, size_t alignment_hint) override {
return memory_allocator_->AllocateFromTail(size, alignment_hint);
return persistent_allocator_->AllocatePersistentBuffer(size,
alignment_hint);
}
void Deallocate(void* data) override {
// Do not deallocate, builtin data needs to be available for the life time
@@ -77,169 +70,9 @@ class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
TF_LITE_REMOVE_VIRTUAL_DELETE
private:
SimpleMemoryAllocator* memory_allocator_;
IPersistentBufferAllocator* persistent_allocator_;
};
// A helper class to construct AllocationInfo array. This array contains the
// lifetime of tensors / scratch_buffer and will be used to calculate the memory
// plan. Methods need to be called in order from `Init`, `Add*`, to `Finish`.
class AllocationInfoBuilder {
public:
AllocationInfoBuilder(AllocationInfo* info, size_t tensor_count,
size_t scratch_buffer_count, ErrorReporter* reporter)
: info_(info),
tensor_count_(tensor_count),
buffer_count_(scratch_buffer_count),
reporter_(reporter) {}
// Check if model contains offline planned buffer offsets.
// - If there's no metadata available, offline_planner_offsets is not set
// - If there's metadata available, offline_planner_offsets will point to the
// first offset in the metadata buffer list.
TfLiteStatus GetOfflinePlannedOffsets(
const Model* model, const int32_t** offline_planner_offsets);
// Add allocaiton information for the tensors.
TfLiteStatus AddTensors(const SubGraph* subgraph,
const int32_t* offline_offsets,
TfLiteEvalTensor* eval_tensors);
// Add allocation information for the scratch buffers.
TfLiteStatus AddScratchBuffers(
internal::ScratchBufferRequest* scratch_buffer_requests,
ScratchBufferHandle* scratch_buffer_handles);
// Returns a pointer to the built AllocationInfo array.
const AllocationInfo* Finish() const { return info_; }
private:
AllocationInfo* info_ = nullptr;
size_t tensor_count_ = 0;
size_t buffer_count_ = 0;
ErrorReporter* reporter_ = nullptr;
};
TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
const int32_t* offline_offsets,
TfLiteEvalTensor* eval_tensors) {
TFLITE_DCHECK(eval_tensors != nullptr);
// Set up allocation info for all tensors.
for (size_t i = 0; i < tensor_count_; ++i) {
AllocationInfo* current = &info_[i];
current->output_ptr = &(eval_tensors[i].data.data);
TF_LITE_ENSURE_STATUS(
TfLiteEvalTensorByteLength(&eval_tensors[i], &current->bytes));
current->first_created = -1;
current->last_used = -1;
current->needs_allocating = (eval_tensors[i].data.data == nullptr) &&
(!subgraph->tensors()->Get(i)->is_variable());
if (offline_offsets) {
current->offline_offset = offline_offsets[i];
} else {
current->offline_offset = kOnlinePlannedBuffer;
}
}
uint32_t operators_size = NumSubgraphOperators(subgraph);
for (size_t i = 0;
subgraph->inputs() != nullptr && i < subgraph->inputs()->size(); ++i) {
const int tensor_index = subgraph->inputs()->Get(i);
AllocationInfo* current = &info_[tensor_index];
current->first_created = 0;
}
// Mark all outputs as persistent to the end of the invocation.
for (size_t i = 0;
subgraph->outputs() != nullptr && i < subgraph->outputs()->size(); ++i) {
const int tensor_index = subgraph->outputs()->Get(i);
AllocationInfo* current = &info_[tensor_index];
current->last_used = operators_size - 1;
}
// Figure out when the first and last use of each tensor is.
for (int i = (operators_size - 1); i >= 0; --i) {
const auto* op = subgraph->operators()->Get(i);
for (size_t n = 0; op->inputs() != nullptr && n < op->inputs()->size();
++n) {
const int tensor_index = op->inputs()->Get(n);
AllocationInfo* current = &info_[tensor_index];
if (((current->last_used == -1) || (current->last_used < i))) {
current->last_used = i;
}
}
for (size_t n = 0; op->outputs() != nullptr && n < op->outputs()->size();
++n) {
const int tensor_index = op->outputs()->Get(n);
AllocationInfo* current = &info_[tensor_index];
if ((current->first_created == -1) || (current->first_created > i)) {
current->first_created = i;
}
// Since operator outputs are written to, they must be marked as used.
if ((current->last_used == -1) || (current->last_used < i)) {
current->last_used = i;
}
}
}
return kTfLiteOk;
}
// Get offline tensors allocation plan. See
// micro/docs/memory_management.md for more info.
TfLiteStatus AllocationInfoBuilder::GetOfflinePlannedOffsets(
const Model* model, const int32_t** offline_planner_offsets) {
if (model->metadata()) {
for (size_t i = 0; i < model->metadata()->size(); ++i) {
auto metadata = model->metadata()->Get(i);
if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
strlen(kOfflineMemAllocMetadata)) == 0) {
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
model->buffers();
auto* buffer = (*buffers)[metadata->buffer()];
auto* array = buffer->data();
const uint32_t* metadata_buffer =
reinterpret_cast<const uint32_t*>(array->data());
const size_t nbr_tensors = static_cast<size_t>(metadata_buffer[2]);
*offline_planner_offsets =
reinterpret_cast<const int32_t*>(&metadata_buffer[3]);
if (tensor_count_ != nbr_tensors) {
TF_LITE_REPORT_ERROR(reporter_,
"Nbr of offline buffer offsets (%d) in metadata "
"not equal nbr tensors (%d)\n",
nbr_tensors, tensor_count_);
return kTfLiteError;
}
}
}
}
return kTfLiteOk;
}
TfLiteStatus AllocationInfoBuilder::AddScratchBuffers(
internal::ScratchBufferRequest* scratch_buffer_requests,
ScratchBufferHandle* scratch_buffer_handles) {
// Set up allocation info for buffers.
for (size_t i = tensor_count_; i < tensor_count_ + buffer_count_; ++i) {
internal::ScratchBufferRequest* current_request =
&(scratch_buffer_requests[i - tensor_count_]);
ScratchBufferHandle* current_handle =
&(scratch_buffer_handles[i - tensor_count_]);
AllocationInfo* current = &info_[i];
current->output_ptr = reinterpret_cast<void**>(&current_handle->data);
current->bytes = current_request->bytes;
current->first_created = current_request->node_idx;
current->last_used = current_request->node_idx;
current->offline_offset = kOnlinePlannedBuffer;
current->needs_allocating = true;
}
return kTfLiteOk;
}
TfLiteStatus CreatePlan(ErrorReporter* error_reporter,
MicroMemoryPlanner* planner,
const AllocationInfo* allocation_info,
@@ -282,6 +115,7 @@ TfLiteStatus CommitPlan(ErrorReporter* error_reporter,
}
return kTfLiteOk;
}
} // namespace
namespace internal {
@@ -319,8 +153,9 @@ void* GetFlatbufferTensorBuffer(
}
TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
SimpleMemoryAllocator* allocator, bool allocate_temp,
const tflite::Tensor& flatbuffer_tensor,
IPersistentBufferAllocator* persistent_buffer_allocator,
INonPersistentBufferAllocator* non_persistent_buffer_allocator,
bool allocate_temp, const tflite::Tensor& flatbuffer_tensor,
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
ErrorReporter* error_reporter, TfLiteTensor* result) {
TFLITE_DCHECK(result != nullptr);
@@ -385,10 +220,11 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
TfLiteAffineQuantization* quantization =
allocate_temp
? reinterpret_cast<TfLiteAffineQuantization*>(
allocator->AllocateTemp(sizeof(TfLiteAffineQuantization),
alignof(TfLiteAffineQuantization)))
non_persistent_buffer_allocator->AllocateTemp(
sizeof(TfLiteAffineQuantization),
alignof(TfLiteAffineQuantization)))
: reinterpret_cast<TfLiteAffineQuantization*>(
allocator->AllocateFromTail(
persistent_buffer_allocator->AllocatePersistentBuffer(
sizeof(TfLiteAffineQuantization),
alignof(TfLiteAffineQuantization)));
if (quantization == nullptr) {
@@ -402,12 +238,14 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
// zero_point is stored as a int64_t.
quantization->zero_point =
allocate_temp
? reinterpret_cast<TfLiteIntArray*>(allocator->AllocateTemp(
TfLiteIntArrayGetSizeInBytes(channels),
alignof(TfLiteIntArray)))
: reinterpret_cast<TfLiteIntArray*>(allocator->AllocateFromTail(
TfLiteIntArrayGetSizeInBytes(channels),
alignof(TfLiteIntArray)));
? reinterpret_cast<TfLiteIntArray*>(
non_persistent_buffer_allocator->AllocateTemp(
TfLiteIntArrayGetSizeInBytes(channels),
alignof(TfLiteIntArray)))
: reinterpret_cast<TfLiteIntArray*>(
persistent_buffer_allocator->AllocatePersistentBuffer(
TfLiteIntArrayGetSizeInBytes(channels),
alignof(TfLiteIntArray)));
if (quantization->zero_point == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter,
"Unable to allocate quantization->zero_point.\n");
@@ -437,7 +275,7 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
}
TfLiteStatus InitializeTfLiteEvalTensorFromFlatbuffer(
SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
const tflite::Tensor& flatbuffer_tensor,
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
ErrorReporter* error_reporter, TfLiteEvalTensor* result) {
*result = {};
@@ -480,7 +318,8 @@ size_t MicroAllocator::GetDefaultTailUsage(bool is_memory_planner_given) {
MicroAllocator::MicroAllocator(SimpleMemoryAllocator* memory_allocator,
MicroMemoryPlanner* memory_planner,
ErrorReporter* error_reporter)
: memory_allocator_(memory_allocator),
: non_persistent_buffer_allocator_(memory_allocator),
persistent_buffer_allocator_(memory_allocator),
memory_planner_(memory_planner),
error_reporter_(error_reporter),
model_is_allocating_(false) {}
@@ -509,7 +348,7 @@ MicroAllocator* MicroAllocator::Create(uint8_t* tensor_arena, size_t arena_size,
// By default create GreedyMemoryPlanner.
// If a different MemoryPlanner is needed, use the other api.
uint8_t* memory_planner_buffer = memory_allocator->AllocateFromTail(
uint8_t* memory_planner_buffer = memory_allocator->AllocatePersistentBuffer(
sizeof(GreedyMemoryPlanner), alignof(GreedyMemoryPlanner));
GreedyMemoryPlanner* memory_planner =
new (memory_planner_buffer) GreedyMemoryPlanner();
@@ -524,7 +363,7 @@ MicroAllocator* MicroAllocator::Create(SimpleMemoryAllocator* memory_allocator,
TFLITE_DCHECK(error_reporter != nullptr);
TFLITE_DCHECK(memory_planner != nullptr);
uint8_t* allocator_buffer = memory_allocator->AllocateFromTail(
uint8_t* allocator_buffer = memory_allocator->AllocatePersistentBuffer(
sizeof(MicroAllocator), alignof(MicroAllocator));
MicroAllocator* allocator = new (allocator_buffer)
MicroAllocator(memory_allocator, memory_planner, error_reporter);
@@ -543,10 +382,12 @@ SubgraphAllocations* MicroAllocator::StartModelAllocation(const Model* model) {
model_is_allocating_ = true;
uint8_t* data_allocator_buffer = memory_allocator_->AllocateFromTail(
sizeof(MicroBuiltinDataAllocator), alignof(MicroBuiltinDataAllocator));
builtin_data_allocator_ =
new (data_allocator_buffer) MicroBuiltinDataAllocator(memory_allocator_);
uint8_t* data_allocator_buffer =
persistent_buffer_allocator_->AllocatePersistentBuffer(
sizeof(MicroBuiltinDataAllocator),
alignof(MicroBuiltinDataAllocator));
builtin_data_allocator_ = new (data_allocator_buffer)
MicroBuiltinDataAllocator(persistent_buffer_allocator_);
if (InitScratchBufferData() != kTfLiteOk) {
return nullptr;
@@ -554,7 +395,7 @@ SubgraphAllocations* MicroAllocator::StartModelAllocation(const Model* model) {
// Allocate struct to store eval tensors, nodes and registrations.
SubgraphAllocations* output = reinterpret_cast<SubgraphAllocations*>(
memory_allocator_->AllocateFromTail(
persistent_buffer_allocator_->AllocatePersistentBuffer(
sizeof(SubgraphAllocations) * model->subgraphs()->size(),
alignof(SubgraphAllocations)));
if (output == nullptr) {
@@ -579,7 +420,7 @@ TfLiteStatus MicroAllocator::FinishModelAllocation(
return kTfLiteError;
}
// TODO(b/187993197): Track scratch buffers for each subgraph.
// Allocate scratch buffer metadata and buffers for variable tensors.
for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
subgraph_idx++) {
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
@@ -587,19 +428,20 @@ TfLiteStatus MicroAllocator::FinishModelAllocation(
TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles(
scratch_buffer_handles, scratch_buffer_request_count_));
TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(
model, subgraph_allocations[subgraph_idx].tensors,
*scratch_buffer_handles, subgraph_idx));
TF_LITE_ENSURE_STATUS(AllocateVariables(
subgraph, subgraph_allocations[subgraph_idx].tensors));
}
// Plan all subgraphs and scratch buffers together.
TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph_allocations,
*scratch_buffer_handles));
model_is_allocating_ = false;
return kTfLiteOk;
}
void* MicroAllocator::AllocatePersistentBuffer(size_t bytes) {
return memory_allocator_->AllocateFromTail(bytes,
MicroArenaBufferAlignment());
return persistent_buffer_allocator_->AllocatePersistentBuffer(
bytes, MicroArenaBufferAlignment());
}
TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes,
@@ -635,6 +477,7 @@ TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes,
// allocating:
current_request->bytes = bytes;
current_request->node_idx = kUnassignedScratchBufferRequestIndex;
current_request->subgraph_idx = subgraph_idx;
// Assign the current request index to the out-param:
*buffer_idx = scratch_buffer_request_count_;
@@ -647,7 +490,7 @@ TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes,
TfLiteStatus MicroAllocator::FinishPrepareNodeAllocations(int node_id) {
// When a node has finished preparing, all temp allocations performed by the
// kernel should be cleaned up:
ResetTempAllocations();
TF_LITE_ENSURE_STATUS(ResetTempAllocations());
// Find and update any new scratch buffer requests for the current node:
internal::ScratchBufferRequest* requests = GetScratchBufferRequests();
@@ -665,7 +508,8 @@ TfLiteStatus MicroAllocator::FinishPrepareNodeAllocations(int node_id) {
// Ensure that the head is re-adjusted to allow for another at-most
// kMaxScratchBuffersPerOp scratch buffer requests in the next operator:
TF_LITE_ENSURE_STATUS(memory_allocator_->SetHeadBufferSize(
TF_LITE_ENSURE_STATUS(non_persistent_buffer_allocator_->ResizeBuffer(
scratch_buffer_head_,
sizeof(internal::ScratchBufferRequest) *
(scratch_buffer_request_count_ + kMaxScratchBuffersPerOp),
alignof(internal::ScratchBufferRequest)));
@@ -674,7 +518,8 @@ TfLiteStatus MicroAllocator::FinishPrepareNodeAllocations(int node_id) {
}
size_t MicroAllocator::used_bytes() const {
return memory_allocator_->GetUsedBytes();
return non_persistent_buffer_allocator_->GetNonPersistentUsedBytes() +
persistent_buffer_allocator_->GetPersistentUsedBytes();
}
TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
@@ -690,7 +535,7 @@ TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
// Initialize NodeAndRegistrations for the subgraph.
NodeAndRegistration* output = reinterpret_cast<NodeAndRegistration*>(
memory_allocator_->AllocateFromTail(
persistent_buffer_allocator_->AllocatePersistentBuffer(
sizeof(NodeAndRegistration) * operators_size,
alignof(NodeAndRegistration)));
if (output == nullptr) {
@@ -703,6 +548,7 @@ TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
}
return kTfLiteOk;
}
TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor(
const Model* model, const SubgraphAllocations* subgraph_allocations,
int tensor_index, int subgraph_index) {
@@ -740,6 +586,30 @@ TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor(
return tensor;
}
void MicroAllocator::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
TFLITE_DCHECK(tensor != nullptr);
if (tensor->quantization.type == kTfLiteAffineQuantization) {
TFLITE_DCHECK(tensor->quantization.params != nullptr);
TfLiteAffineQuantization* quantization =
reinterpret_cast<TfLiteAffineQuantization*>(
tensor->quantization.params);
non_persistent_buffer_allocator_->DeallocateTemp(
reinterpret_cast<uint8_t*>(quantization->zero_point));
non_persistent_buffer_allocator_->DeallocateTemp(
reinterpret_cast<uint8_t*>(quantization));
}
// Clear the data in case someone still access tensor arena by mistake
tensor->quantization.type = kTfLiteNoQuantization;
tensor->quantization.params = nullptr;
tensor->data.data = nullptr;
tensor->dims = nullptr;
non_persistent_buffer_allocator_->DeallocateTemp(
reinterpret_cast<uint8_t*>(tensor));
}
TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
const Model* model, const SubgraphAllocations* subgraph_allocations,
int tensor_index, int subgraph_index) {
@@ -749,9 +619,9 @@ TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
// This value is allocated from temporary arena space. It is guaranteed to be
// around for at least the scope of the calling function. Since this struct
// allocation takes place in temp space, no need to own or cleanup.
TfLiteTensor* tensor =
reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateTemp(
sizeof(TfLiteTensor), alignof(TfLiteTensor)));
TfLiteTensor* tensor = reinterpret_cast<TfLiteTensor*>(
non_persistent_buffer_allocator_->AllocateTemp(sizeof(TfLiteTensor),
alignof(TfLiteTensor)));
// Populate any fields from the flatbuffer, since this TfLiteTensor struct is
// allocated in the temp section of the arena, ensure that additional
@@ -780,8 +650,12 @@ TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
return tensor;
}
void MicroAllocator::ResetTempAllocations() {
memory_allocator_->ResetTempAllocations();
TfLiteStatus MicroAllocator::ResetTempAllocations() {
return non_persistent_buffer_allocator_->ResetTempAllocations();
}
bool MicroAllocator::IsAllTempDeallocated() {
return non_persistent_buffer_allocator_->IsAllTempDeallocated();
}
TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
@@ -794,8 +668,8 @@ TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
TFLITE_DCHECK(subgraph != nullptr);
size_t alloc_count = subgraph->tensors()->size();
TfLiteEvalTensor* tensors =
reinterpret_cast<TfLiteEvalTensor*>(memory_allocator_->AllocateFromTail(
TfLiteEvalTensor* tensors = reinterpret_cast<TfLiteEvalTensor*>(
persistent_buffer_allocator_->AllocatePersistentBuffer(
sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor)));
if (tensors == nullptr) {
TF_LITE_REPORT_ERROR(
@@ -808,8 +682,8 @@ TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
for (size_t i = 0; i < alloc_count; ++i) {
TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer(
memory_allocator_, *subgraph->tensors()->Get(i), model->buffers(),
error_reporter_, &tensors[i]);
*subgraph->tensors()->Get(i), model->buffers(), error_reporter_,
&tensors[i]);
if (status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
i);
@@ -820,6 +694,7 @@ TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
}
return kTfLiteOk;
}
TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors) {
for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
@@ -829,8 +704,9 @@ TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
TF_LITE_ENSURE_STATUS(
TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size));
eval_tensors[i].data.data = memory_allocator_->AllocateFromTail(
buffer_size, MicroArenaBufferAlignment());
eval_tensors[i].data.data =
persistent_buffer_allocator_->AllocatePersistentBuffer(
buffer_size, MicroArenaBufferAlignment());
if (eval_tensors[i].data.data == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
@@ -844,8 +720,9 @@ TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
}
TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal() {
return reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
sizeof(TfLiteTensor), alignof(TfLiteTensor)));
return reinterpret_cast<TfLiteTensor*>(
persistent_buffer_allocator_->AllocatePersistentBuffer(
sizeof(TfLiteTensor), alignof(TfLiteTensor)));
}
TfLiteStatus MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
@@ -855,7 +732,8 @@ TfLiteStatus MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
// allocations in the tail can be recorded. Once the interpreter has APIs for
// accessing buffers on TfLiteEvalTensor this method can be dropped.
return internal::InitializeTfLiteTensorFromFlatbuffer(
memory_allocator_, allocate_temp,
persistent_buffer_allocator_, non_persistent_buffer_allocator_,
allocate_temp,
*model->subgraphs()->Get(subgraph_idx)->tensors()->Get(tensor_index),
model->buffers(), error_reporter_, tensor);
}
@@ -865,8 +743,8 @@ ErrorReporter* MicroAllocator::error_reporter() const {
}
TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
const Model* model, TfLiteEvalTensor* eval_tensors,
ScratchBufferHandle* scratch_buffer_handles, int subgraph_idx) {
const Model* model, SubgraphAllocations* allocations,
ScratchBufferHandle* scratch_buffer_handles) {
size_t head_usage = 0;
// Create static memory plan
// 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
@@ -878,69 +756,52 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
// allocated from the temp section and cleaned up at the bottom of this
// function.
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
size_t allocation_info_count =
subgraph->tensors()->size() + scratch_buffer_request_count_;
size_t bytes = sizeof(AllocationInfo) * allocation_info_count;
// Allocate an array of AllocationInfo structs from the temp section. This
// struct will be used by AllocationInfoBuilder to find buffer usage.
AllocationInfo* allocation_info = reinterpret_cast<AllocationInfo*>(
memory_allocator_->AllocateTemp(bytes, alignof(AllocationInfo)));
if (allocation_info == nullptr) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to allocate memory for allocation_info, %d bytes required",
bytes);
return kTfLiteError;
}
// Use the AllocationInfoBuilder class to help determine where buffers are
// used in the subgraph.
AllocationInfoBuilder builder(allocation_info, subgraph->tensors()->size(),
scratch_buffer_request_count_, error_reporter_);
AllocationInfoBuilder builder(model, non_persistent_buffer_allocator_,
error_reporter_);
TF_LITE_ENSURE_STATUS(
builder.CreateAllocationInfo(scratch_buffer_request_count_));
const int32_t* offline_planner_offsets = nullptr;
TF_LITE_ENSURE_STATUS(
builder.GetOfflinePlannedOffsets(model, &offline_planner_offsets));
builder.GetOfflinePlannedOffsets(&offline_planner_offsets));
TF_LITE_ENSURE_STATUS(
builder.AddTensors(subgraph, offline_planner_offsets, eval_tensors));
builder.InitializeAllocationInfo(offline_planner_offsets, allocations));
internal::ScratchBufferRequest* scratch_buffer_requests =
GetScratchBufferRequests();
TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_requests,
scratch_buffer_handles));
TF_LITE_ENSURE_STATUS(builder.MarkAllocationLifetimes(
0, scratch_buffer_requests, scratch_buffer_handles, allocations));
int allocation_info_count = builder.AllocationCount();
AllocationInfo* allocation_info = builder.Finish();
// Remaining arena size that memory planner can use for calculating offsets.
size_t remaining_arena_size =
memory_allocator_->GetAvailableMemory(MicroArenaBufferAlignment());
uint8_t* planner_arena = memory_allocator_->AllocateTemp(
non_persistent_buffer_allocator_->GetAvailableMemory(
MicroArenaBufferAlignment());
uint8_t* planner_arena = non_persistent_buffer_allocator_->AllocateTemp(
remaining_arena_size, MicroArenaBufferAlignment());
TF_LITE_ENSURE(error_reporter_, planner_arena != nullptr);
memory_planner_->Init(planner_arena, remaining_arena_size);
TF_LITE_ENSURE_STATUS(CreatePlan(error_reporter_, memory_planner_,
allocation_info, allocation_info_count));
// Reset all temp allocations used above:
memory_allocator_->ResetTempAllocations();
size_t actual_available_arena_size =
memory_allocator_->GetAvailableMemory(MicroArenaBufferAlignment());
// Make sure we have enough arena size.
if (memory_planner_->GetMaximumMemorySize() > actual_available_arena_size) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Arena size is too small for all buffers. Needed %u but only "
"%u was available.",
memory_planner_->GetMaximumMemorySize(), actual_available_arena_size);
return kTfLiteError;
}
// Commit the plan.
TF_LITE_ENSURE_STATUS(CommitPlan(error_reporter_, memory_planner_,
memory_allocator_->GetHeadBuffer(),
allocation_info, allocation_info_count));
TF_LITE_ENSURE_STATUS(
CommitPlan(error_reporter_, memory_planner_,
non_persistent_buffer_allocator_->GetOverlayMemoryAddress(),
allocation_info, allocation_info_count));
// Reset all temp allocations used above:
builder.FreeAllocationInfo();
non_persistent_buffer_allocator_->DeallocateTemp(planner_arena);
TF_LITE_ENSURE_STATUS(
non_persistent_buffer_allocator_->ResetTempAllocations());
TF_LITE_ENSURE_STATUS(
non_persistent_buffer_allocator_->DeallocateResizableBuffer(
scratch_buffer_head_));
#ifdef TF_LITE_SHOW_MEMORY_USE
memory_planner_->PrintMemoryPlan();
#endif
@@ -958,8 +819,9 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
// The head is used for storing scratch buffer allocations before finalizing a
// memory plan in this function. Ensure that the head is set to the largest
// memory plan sent through the allocator:
TF_LITE_ENSURE_STATUS(memory_allocator_->SetHeadBufferSize(
max_head_buffer_usage_, MicroArenaBufferAlignment()));
TF_LITE_ENSURE_STATUS(
non_persistent_buffer_allocator_->ReserveNonPersistentOverlayMemory(
max_head_buffer_usage_, MicroArenaBufferAlignment()));
return kTfLiteOk;
}
@@ -975,7 +837,7 @@ TfLiteStatus MicroAllocator::AllocateScratchBufferHandles(
// Allocate a consecutive block of memory store the scratch buffer handles.
// This alignment ensures quick lookup during inference time for the model:
*scratch_buffer_handles = reinterpret_cast<ScratchBufferHandle*>(
memory_allocator_->AllocateFromTail(
persistent_buffer_allocator_->AllocatePersistentBuffer(
sizeof(ScratchBufferHandle) * handle_count,
alignof(ScratchBufferHandle)));
@@ -990,17 +852,20 @@ TfLiteStatus MicroAllocator::InitScratchBufferData() {
// All requests will be stored in the head section. Each kernel is allowed at
// most kMaxScratchBuffersPerOp requests. Adjust the head to reserve at most
// that many requests to begin:
TF_LITE_ENSURE_STATUS(memory_allocator_->SetHeadBufferSize(
sizeof(internal::ScratchBufferRequest) * kMaxScratchBuffersPerOp,
alignof(internal::ScratchBufferRequest)));
scratch_buffer_head_ =
non_persistent_buffer_allocator_->AllocateResizableBuffer(
sizeof(internal::ScratchBufferRequest) * kMaxScratchBuffersPerOp,
alignof(internal::ScratchBufferRequest));
if (scratch_buffer_head_ == nullptr) {
return kTfLiteError;
}
return kTfLiteOk;
}
internal::ScratchBufferRequest* MicroAllocator::GetScratchBufferRequests() {
return reinterpret_cast<internal::ScratchBufferRequest*>(
AlignPointerUp(memory_allocator_->GetHeadBuffer(),
alignof(internal::ScratchBufferRequest)));
return reinterpret_cast<internal::ScratchBufferRequest*>(AlignPointerUp(
scratch_buffer_head_, alignof(internal::ScratchBufferRequest)));
}
BuiltinDataAllocator* MicroAllocator::GetBuiltinDataAllocator() {

View File

@@ -38,8 +38,9 @@ namespace internal {
// TODO(b/162311891): Drop this method when the interpreter has an API for
// returning buffers on TfLiteEvalTensor.
TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
SimpleMemoryAllocator* allocator, bool allocate_temp,
const tflite::Tensor& flatbuffer_tensor,
IPersistentBufferAllocator* persistent_buffer_allocator,
INonPersistentBufferAllocator* non_persistent_buffer_allocator,
bool allocate_temp, const tflite::Tensor& flatbuffer_tensor,
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
ErrorReporter* error_reporter, TfLiteTensor* result);
@@ -61,6 +62,7 @@ typedef struct {
// determine the lifetime of the buffer. In AllocationInfo, this buffer will
// have `before` = node_idx and `after` = node_idx.
int node_idx;
int subgraph_idx;
} ScratchBufferRequest;
} // namespace internal
@@ -185,10 +187,16 @@ class MicroAllocator {
const Model* model, const SubgraphAllocations* subgraph_allocations,
int tensor_index, int subgraph_index);
virtual void DeallocateTempTfLiteTensor(TfLiteTensor*);
// Resets all temporary allocations. This method should be called after a
// chain of temp allocations (e.g. chain of TfLiteTensor objects via
// AllocateTfLiteTensor()).
virtual void ResetTempAllocations();
virtual TfLiteStatus ResetTempAllocations();
// Returns true if all temporary buffers including temp TfLiteTensor are
// already deallocated.
virtual bool IsAllTempDeallocated();
// Allocates persistent buffer which has the same life time as the allocator.
// The memory is immediately available and is allocated from the tail of the
@@ -260,8 +268,8 @@ class MicroAllocator {
// ScratchBufferHandle structs that will point to allocated buffers also in
// the head section.
virtual TfLiteStatus CommitStaticMemoryPlan(
const Model* model, TfLiteEvalTensor* eval_tensors,
ScratchBufferHandle* scratch_buffer_handles, int subgraph_idx);
const Model* model, SubgraphAllocations* allocations,
ScratchBufferHandle* scratch_buffer_handles);
// Allocates an array of ScratchBufferHandle structs in the tail section for a
// given number of handles.
@@ -278,7 +286,8 @@ class MicroAllocator {
internal::ScratchBufferRequest* GetScratchBufferRequests();
// A simple memory allocator that always allocate from the arena tail or head.
SimpleMemoryAllocator* memory_allocator_;
INonPersistentBufferAllocator* non_persistent_buffer_allocator_;
IPersistentBufferAllocator* persistent_buffer_allocator_;
// Allocator used to allocate persistent builtin data.
BuiltinDataAllocator* builtin_data_allocator_;
@@ -293,6 +302,9 @@ class MicroAllocator {
// section when a model is allocating.
size_t scratch_buffer_request_count_ = 0;
// Holds ScratchBufferRequest when a model is allocating
uint8_t* scratch_buffer_head_ = nullptr;
// Holds the byte length of the memory plan with the largest head usage. Used
// to ensure that multi-tenant allocations can share the head for buffers.
size_t max_head_buffer_usage_ = 0;

View File

@@ -0,0 +1,119 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_context.h"
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
MicroContext::MicroContext(MicroAllocator* allocator, const Model* model,
MicroGraph* graph)
: allocator_(*allocator), graph_(*graph), model_(model) {}
MicroContext::~MicroContext() {}
void* MicroContext::AllocatePersistentBuffer(size_t bytes) {
return allocator_.AllocatePersistentBuffer(bytes);
}
TfLiteStatus MicroContext::RequestScratchBufferInArena(size_t bytes,
int* buffer_idx) {
return allocator_.RequestScratchBufferInArena(
bytes, graph_.GetCurrentSubgraphIndex(), buffer_idx);
}
void* MicroContext::GetScratchBuffer(int buffer_idx) {
ScratchBufferHandle* handle = scratch_buffer_handles_ + buffer_idx;
return handle->data;
}
TfLiteTensor* MicroContext::AllocateTempTfLiteTensor(int tensor_idx) {
return allocator_.AllocateTempTfLiteTensor(model_, graph_.GetAllocations(),
tensor_idx,
graph_.GetCurrentSubgraphIndex());
}
int MicroContext::GetTensorIndex(int index, int max_size,
const int* tensor_indices) {
if (index >= 0 && index < max_size) {
const int tensor_index = tensor_indices[index];
if (tensor_index != kTfLiteOptionalTensor) {
return tensor_index;
}
}
return -1;
}
TfLiteTensor* MicroContext::AllocateTempInputTensor(const TfLiteNode* node,
int index) {
const int tensor_index =
GetTensorIndex(index, node->inputs->size, node->inputs->data);
if (tensor_index < 0) {
return nullptr;
}
return AllocateTempTfLiteTensor(tensor_index);
}
TfLiteTensor* MicroContext::AllocateTempOutputTensor(const TfLiteNode* node,
int index) {
const int tensor_index =
GetTensorIndex(index, node->outputs->size, node->outputs->data);
if (tensor_index < 0) {
return nullptr;
}
return AllocateTempTfLiteTensor(tensor_index);
}
void MicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
return allocator_.DeallocateTempTfLiteTensor(tensor);
}
TfLiteEvalTensor* MicroContext::GetEvalTensor(int tensor_idx) {
return &graph_.GetAllocations()[graph_.GetCurrentSubgraphIndex()]
.tensors[tensor_idx];
}
void MicroContext::SetScratchBufferHandles(
ScratchBufferHandle* scratch_buffer_handles) {
scratch_buffer_handles_ = scratch_buffer_handles;
}
TfLiteStatus MicroContext::set_external_context(
void* external_context_payload) {
if (external_context_payload == nullptr ||
external_context_payload_ != nullptr) {
MicroPrintf(
"Attempting to set external context to %x but it was %x already",
external_context_payload, external_context_payload_);
return kTfLiteError;
}
external_context_payload_ = external_context_payload;
return kTfLiteOk;
}
void MicroContextReportOpError(struct TfLiteContext* context,
const char* format, ...) {
va_list args;
va_start(args, format);
GetMicroErrorReporter()->Report(format, args);
va_end(args);
}
} // namespace tflite

View File

@@ -0,0 +1,154 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_
#define TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_graph.h"
namespace tflite {
// MicroContext is eventually going to become the API between TFLM and the
// kernels, replacing all the functions in TfLiteContext. The end state is code
// kernels to have code like:
//
// MicroContext* micro_context = GetMicroContext(context);
// micro_context-><TFLM kernel API>
class MicroContext {
public:
// Does not take any ownership, and all pointers must refer to valid objects
// that outlive the one constructed.
explicit MicroContext(MicroAllocator* allocator, const Model* model,
MicroGraph* graph);
virtual ~MicroContext();
// Allocate persistent buffer which has the same life time as the interpreter.
// Returns nullptr on failure.
// The memory is allocated from the tail.
// This method is only available in Init or Prepare stage.
// Virtual so that it can be faked for kernel tests.
virtual void* AllocatePersistentBuffer(size_t bytes);
// Request a scratch buffer in the arena through static memory planning.
// This method is only available in Prepare stage and the buffer is allocated
// by the interpreter between Prepare and Eval stage. In Eval stage,
// GetScratchBuffer API can be used to fetch the address.
// Virtual so that it can be faked for kernel tests.
virtual TfLiteStatus RequestScratchBufferInArena(size_t bytes,
int* buffer_idx);
// Get the scratch buffer pointer.
// This method is only available in Eval stage.
// Virtual so that it can be faked for kernel tests.
virtual void* GetScratchBuffer(int buffer_idx);
// Returns a temporary TfLiteTensor struct for a given index.
// Virtual so that it can be faked for kernel tests.
virtual TfLiteTensor* AllocateTempTfLiteTensor(int tensor_idx);
// Returns a temporary TfLiteTensor struct for the specified input tensor of a
// given mode. This is the recommended API over the deprecated
// GetInput/GetInputSafe to get a temp input tensor. The returned tensor shall
// be freed via calling DeallocateTempTfLiteTensor.
virtual TfLiteTensor* AllocateTempInputTensor(const TfLiteNode* node,
int index);
// Returns a temporary TfLiteTensor struct for the specified output tensor of
// a given mode. This is the recommended API over the deprecated
// GetOutput/GetOutputSafe to get a temp output tensor. The returned tensor
// shall be freed via calling DeallocateTempTfLiteTensor.
virtual TfLiteTensor* AllocateTempOutputTensor(const TfLiteNode* node,
int index);
// Deallocates a temp TfLiteTensor.
// Virtual so that it can be faked for kernel tests.
virtual void DeallocateTempTfLiteTensor(TfLiteTensor* tensor);
// Returns a TfLiteEvalTensor struct for a given index.
// Virtual so that it can be faked for kernel tests.
virtual TfLiteEvalTensor* GetEvalTensor(int tensor_idx);
// Does not take ownership of the pointer and the pointer must refer to valid
// an object that outlive this class instance.
// This can only be called once to set one external context.
TfLiteStatus set_external_context(void* external_context_payload);
void* external_context() { return external_context_payload_; }
MicroGraph& graph() { return graph_; }
// Sets the pointer to a list of ScratchBufferHandle instances.
// Not API between TFLM and kernels. Primarily used by the framework for
// housekeeping in MicroContext.
void SetScratchBufferHandles(ScratchBufferHandle* scratch_buffer_handles);
private:
// Return the tensor index as tensor_indices[index]. tensor_indices is of
// max_size. Return -1 if index is not in the valid range of tensor_indices.
int GetTensorIndex(int index, int max_size, const int* tensor_indices);
MicroAllocator& allocator_;
MicroGraph& graph_;
const Model* model_;
ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
void* external_context_payload_ = nullptr;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
inline MicroContext* GetMicroContext(const struct TfLiteContext* context) {
return reinterpret_cast<MicroContext*>(context->impl_);
}
// Deprecated API. Prefer to using the MicroContext API directly from the
// kernels.
// TODO(b/213010668): migrate all existing kernels to use MicroContext, delete
// these functions, and remove corresponding members from the TfLiteContext
// struct for TFLM.
inline void* MicroContextAllocatePersistentBuffer(TfLiteContext* ctx,
size_t bytes) {
return GetMicroContext(ctx)->AllocatePersistentBuffer(bytes);
}
inline TfLiteStatus MicroContextRequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx) {
return GetMicroContext(ctx)->RequestScratchBufferInArena(bytes, buffer_idx);
}
inline void* MicroContextGetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
return GetMicroContext(ctx)->GetScratchBuffer(buffer_idx);
}
inline TfLiteTensor* MicroContextGetTensor(const struct TfLiteContext* context,
int tensor_idx) {
return GetMicroContext(context)->AllocateTempTfLiteTensor(tensor_idx);
}
inline TfLiteEvalTensor* MicroContextGetEvalTensor(
const struct TfLiteContext* context, int tensor_idx) {
return GetMicroContext(context)->GetEvalTensor(tensor_idx);
}
inline TfLiteExternalContext* MicroContextGetExternalContext(
TfLiteContext* context, TfLiteExternalContextType unused) {
return reinterpret_cast<TfLiteExternalContext*>(
GetMicroContext(context)->external_context());
}
// Requests that an error be reported with format string msg.
void MicroContextReportOpError(struct TfLiteContext* context,
const char* format, ...);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_

View File

@@ -209,6 +209,9 @@ TfLiteStatus MicroGraph::ResetVariableTensors() {
}
}
}
if (resource_variables_ != nullptr) {
resource_variables_->ResetAll();
}
return kTfLiteOk;
}

View File

@@ -51,7 +51,8 @@ MicroInterpreter::MicroInterpreter(const Model* model,
tensors_allocated_(false),
initialization_status_(kTfLiteError),
input_tensors_(nullptr),
output_tensors_(nullptr) {
output_tensors_(nullptr),
micro_context_(&allocator_, model_, &graph_) {
Init(profiler);
}
@@ -69,7 +70,8 @@ MicroInterpreter::MicroInterpreter(const Model* model,
tensors_allocated_(false),
initialization_status_(kTfLiteError),
input_tensors_(nullptr),
output_tensors_(nullptr) {
output_tensors_(nullptr),
micro_context_(&allocator_, model_, &graph_) {
Init(profiler);
}
@@ -80,12 +82,10 @@ MicroInterpreter::~MicroInterpreter() {
}
void MicroInterpreter::Init(MicroProfiler* profiler) {
context_.impl_ = static_cast<void*>(this);
context_.ReportError = ReportOpError;
context_.GetTensor = GetTensor;
context_.ReportError = ReportOpError;
context_.GetTensor = GetTensor;
context_.GetEvalTensor = GetEvalTensor;
context_.impl_ = static_cast<void*>(&micro_context_);
context_.ReportError = MicroContextReportOpError;
context_.GetTensor = MicroContextGetTensor;
context_.GetEvalTensor = MicroContextGetEvalTensor;
context_.profiler = profiler;
initialization_status_ = kTfLiteOk;
@@ -200,18 +200,18 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer());
// Only allow AllocatePersistentBuffer in Init stage.
context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer;
context_.RequestScratchBufferInArena = nullptr;
context_.GetScratchBuffer = nullptr;
context_.GetExecutionPlan = GetGraph;
context_.GetExternalContext = nullptr;
TF_LITE_ENSURE_STATUS(graph_.InitSubgraphs());
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is
// available in Prepare stage.
context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
// GetExternalContext become available in Prepare stage.
context_.GetExternalContext = GetExternalContext;
context_.RequestScratchBufferInArena =
MicroContextRequestScratchBufferInArena;
// external_context become available in Prepare stage.
context_.GetExternalContext = MicroContextGetExternalContext;
TF_LITE_ENSURE_STATUS(graph_.PrepareSubgraphs());
@@ -219,12 +219,14 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
// allowed. Kernels can only fetch scratch buffers via GetScratchBuffer.
context_.AllocatePersistentBuffer = nullptr;
context_.RequestScratchBufferInArena = nullptr;
context_.GetScratchBuffer = GetScratchBuffer;
context_.GetScratchBuffer = MicroContextGetScratchBuffer;
TF_LITE_ENSURE_OK(&context_, allocator_.FinishModelAllocation(
model_, graph_.GetAllocations(),
&scratch_buffer_handles_));
micro_context_.SetScratchBufferHandles(scratch_buffer_handles_);
// TODO(b/162311891): Drop these allocations when the interpreter supports
// handling buffers from TfLiteEvalTensor.
input_tensors_ =
@@ -320,97 +322,9 @@ TfLiteStatus MicroInterpreter::ResetVariableTensors() {
return graph_.ResetVariableTensors();
}
void* MicroInterpreter::AllocatePersistentBuffer(TfLiteContext* ctx,
size_t bytes) {
return reinterpret_cast<MicroInterpreter*>(ctx->impl_)
->allocator_.AllocatePersistentBuffer(bytes);
}
TfLiteStatus MicroInterpreter::RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx) {
MicroInterpreter* interpreter =
reinterpret_cast<MicroInterpreter*>(ctx->impl_);
return interpreter->allocator_.RequestScratchBufferInArena(
bytes, interpreter->graph_.GetCurrentSubgraphIndex(), buffer_idx);
}
void* MicroInterpreter::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
MicroInterpreter* interpreter =
reinterpret_cast<MicroInterpreter*>(ctx->impl_);
ScratchBufferHandle* handle =
interpreter->scratch_buffer_handles_ + buffer_idx;
return handle->data;
}
void MicroInterpreter::ReportOpError(struct TfLiteContext* context,
const char* format, ...) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
MicroInterpreter* interpreter =
static_cast<MicroInterpreter*>(context->impl_);
va_list args;
va_start(args, format);
TF_LITE_REPORT_ERROR(interpreter->error_reporter_, format, args);
va_end(args);
#endif
}
TfLiteTensor* MicroInterpreter::GetTensor(const struct TfLiteContext* context,
int tensor_idx) {
MicroInterpreter* interpreter =
static_cast<MicroInterpreter*>(context->impl_);
return interpreter->allocator_.AllocateTempTfLiteTensor(
interpreter->model_, interpreter->graph_.GetAllocations(), tensor_idx,
interpreter->get_subgraph_index());
}
TfLiteEvalTensor* MicroInterpreter::GetEvalTensor(
const struct TfLiteContext* context, int tensor_idx) {
MicroInterpreter* interpreter =
reinterpret_cast<MicroInterpreter*>(context->impl_);
return &interpreter->graph_
.GetAllocations()[interpreter->get_subgraph_index()]
.tensors[tensor_idx];
}
TfLiteStatus MicroInterpreter::GetGraph(struct TfLiteContext* context,
TfLiteIntArray** args) {
MicroInterpreter* interpreter =
reinterpret_cast<MicroInterpreter*>(context->impl_);
*args = reinterpret_cast<TfLiteIntArray*>(&interpreter->graph_);
return kTfLiteOk;
}
TfLiteStatus MicroInterpreter::SetMicroExternalContext(
void* external_context_payload) {
if (external_context_payload == nullptr ||
external_context_payload_ != nullptr) {
MicroPrintf(
"Attempting to set external context to %x but it was %x already",
external_context_payload, external_context_payload_);
return kTfLiteError;
}
external_context_payload_ = external_context_payload;
return kTfLiteOk;
}
void* MicroInterpreter::GetMicroExternalContext() {
return external_context_payload_;
}
// This callback is an implementation for TfLiteContext::GetExternalContext
// interface.
TfLiteExternalContext* MicroInterpreter::GetExternalContext(
TfLiteContext* context, TfLiteExternalContextType unused) {
// TODO(b/205754757): TfLiteExternalContextType is unused in TFLM. This
// function is only called by the framework as a way to conform to existing
// interface. Users should use GetMicroExternalContext api in kernel_util.h to
// get context and shall not directly use this function.
MicroInterpreter* interpreter =
reinterpret_cast<MicroInterpreter*>(context->impl_);
return reinterpret_cast<TfLiteExternalContext*>(
interpreter->GetMicroExternalContext());
return micro_context_.set_external_context(external_context_payload);
}
} // namespace tflite

View File

@@ -24,6 +24,7 @@ limitations under the License.
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/micro_profiler.h"
@@ -79,10 +80,6 @@ class MicroInterpreter {
// one external context.
TfLiteStatus SetMicroExternalContext(void* external_context_payload);
// This function is used by the TfLiteContext::GetExternalContext() to get the
// external context.
void* GetMicroExternalContext();
TfLiteTensor* input(size_t index);
size_t inputs_size() const {
return model_->subgraphs()->Get(0)->inputs()->size();
@@ -150,26 +147,6 @@ class MicroInterpreter {
// Gets the current subgraph index used from within context methods.
int get_subgraph_index() { return graph_.GetCurrentSubgraphIndex(); }
// Static functions that are bound to the TfLiteContext instance:
static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx);
static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_idx);
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
int tensor_idx);
static TfLiteStatus GetGraph(struct TfLiteContext* context,
TfLiteIntArray** args);
// This callback is an implementation for TfLiteContext::GetExternalContext
// interface.
static TfLiteExternalContext* GetExternalContext(
TfLiteContext* context, TfLiteExternalContextType unused);
const Model* model_;
const MicroOpResolver& op_resolver_;
ErrorReporter* error_reporter_;
@@ -181,12 +158,13 @@ class MicroInterpreter {
TfLiteStatus initialization_status_;
ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
void* external_context_payload_ = nullptr;
// TODO(b/162311891): Clean these pointers up when this class supports buffers
// from TfLiteEvalTensor.
TfLiteTensor** input_tensors_;
TfLiteTensor** output_tensors_;
MicroContext micro_context_;
};
} // namespace tflite

View File

@@ -153,6 +153,16 @@ class MicroMutableOpResolver : public MicroOpResolver {
Register_BATCH_TO_SPACE_ND(), ParseBatchToSpaceNd);
}
TfLiteStatus AddBroadcastArgs() {
return AddBuiltin(BuiltinOperator_BROADCAST_ARGS, Register_BROADCAST_ARGS(),
ParseBroadcastArgs);
}
TfLiteStatus AddBroadcastTo() {
return AddBuiltin(BuiltinOperator_BROADCAST_TO, Register_BROADCAST_TO(),
ParseBroadcastTo);
}
TfLiteStatus AddCallOnce() {
return AddBuiltin(BuiltinOperator_CALL_ONCE, Register_CALL_ONCE(),
ParseCallOnce);
@@ -356,6 +366,11 @@ class MicroMutableOpResolver : public MicroOpResolver {
tflite::Register_MAX_POOL_2D(), ParsePool);
}
TfLiteStatus AddMirrorPad() {
return AddBuiltin(BuiltinOperator_MIRROR_PAD, tflite::Register_MIRROR_PAD(),
ParseMirrorPad);
}
TfLiteStatus AddMean() {
return AddBuiltin(BuiltinOperator_MEAN, tflite::ops::micro::Register_MEAN(),
ParseReducer);
@@ -550,6 +565,10 @@ class MicroMutableOpResolver : public MicroOpResolver {
ParseVarHandle);
}
TfLiteStatus AddWhile() {
return AddBuiltin(BuiltinOperator_WHILE, Register_WHILE(), ParseWhile);
}
TfLiteStatus AddZerosLike() {
return AddBuiltin(BuiltinOperator_ZEROS_LIKE, Register_ZEROS_LIKE(),
ParseZerosLike);

View File

@@ -124,6 +124,14 @@ TfLiteStatus MicroResourceVariables::Assign(int id,
return kTfLiteOk;
}
TfLiteStatus MicroResourceVariables::ResetAll() {
for (int i = 0; i < num_resource_variables_; i++) {
MicroResourceVariable variable = resource_variables_[i];
memset(variable.resource_buffer, 0, variable.bytes);
}
return kTfLiteOk;
}
int MicroResourceVariables::FindId(const char* container,
const char* shared_name) {
for (int i = 0; i < num_resource_variables_; i++) {

View File

@@ -51,6 +51,9 @@ class MicroResourceVariables {
// in order to allocate the resource buffer.
TfLiteStatus Assign(int id, const TfLiteEvalTensor* tensor);
// Zeros out all resource buffers.
TfLiteStatus ResetAll();
private:
int FindId(const char* container, const char* shared_name);

View File

@@ -27,12 +27,12 @@ MockMicroGraph::MockMicroGraph(SimpleMemoryAllocator* allocator)
free_count_(0) {
memset(invoke_counts_, 0, sizeof(invoke_counts_));
mock_tensor_ =
reinterpret_cast<TfLiteEvalTensor*>(allocator_->AllocateFromTail(
reinterpret_cast<TfLiteEvalTensor*>(allocator_->AllocatePersistentBuffer(
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
int* dims_array = reinterpret_cast<int*>(
allocator_->AllocateFromTail(3 * sizeof(int), alignof(int)));
allocator_->AllocatePersistentBuffer(3 * sizeof(int), alignof(int)));
float* data_array = reinterpret_cast<float*>(
allocator_->AllocateFromTail(2 * sizeof(float), alignof(float)));
allocator_->AllocatePersistentBuffer(2 * sizeof(float), alignof(float)));
int dims[] = {2, 1, 2};
memcpy(dims_array, dims, 3 * sizeof(int));
mock_tensor_->dims = testing::IntArrayFromInts(dims_array);

View File

@@ -59,12 +59,13 @@ RecordingMicroAllocator* RecordingMicroAllocator::Create(
arena_size);
TFLITE_DCHECK(simple_memory_allocator != nullptr);
uint8_t* memory_planner_buffer = simple_memory_allocator->AllocateFromTail(
sizeof(GreedyMemoryPlanner), alignof(GreedyMemoryPlanner));
uint8_t* memory_planner_buffer =
simple_memory_allocator->AllocatePersistentBuffer(
sizeof(GreedyMemoryPlanner), alignof(GreedyMemoryPlanner));
GreedyMemoryPlanner* memory_planner =
new (memory_planner_buffer) GreedyMemoryPlanner();
uint8_t* allocator_buffer = simple_memory_allocator->AllocateFromTail(
uint8_t* allocator_buffer = simple_memory_allocator->AllocatePersistentBuffer(
sizeof(RecordingMicroAllocator), alignof(RecordingMicroAllocator));
RecordingMicroAllocator* allocator =
new (allocator_buffer) RecordingMicroAllocator(
@@ -108,11 +109,11 @@ void RecordingMicroAllocator::PrintAllocations() const {
TF_LITE_REPORT_ERROR(
error_reporter(),
"[RecordingMicroAllocator] Arena allocation head %d bytes",
recording_memory_allocator_->GetHeadUsedBytes());
recording_memory_allocator_->GetNonPersistentUsedBytes());
TF_LITE_REPORT_ERROR(
error_reporter(),
"[RecordingMicroAllocator] Arena allocation tail %d bytes",
recording_memory_allocator_->GetTailUsedBytes());
recording_memory_allocator_->GetPersistentUsedBytes());
PrintRecordedAllocation(RecordedAllocationType::kTfLiteEvalTensorData,
"TfLiteEvalTensor data", "allocations");
PrintRecordedAllocation(RecordedAllocationType::kPersistentTfLiteTensorData,

View File

@@ -39,8 +39,8 @@ RecordingSimpleMemoryAllocator* RecordingSimpleMemoryAllocator::Create(
RecordingSimpleMemoryAllocator(error_reporter, buffer_head, buffer_size);
uint8_t* allocator_buffer =
tmp.AllocateFromTail(sizeof(RecordingSimpleMemoryAllocator),
alignof(RecordingSimpleMemoryAllocator));
tmp.AllocatePersistentBuffer(sizeof(RecordingSimpleMemoryAllocator),
alignof(RecordingSimpleMemoryAllocator));
// Use the default copy constructor to populate internal states.
return new (allocator_buffer) RecordingSimpleMemoryAllocator(tmp);
}
@@ -57,11 +57,11 @@ size_t RecordingSimpleMemoryAllocator::GetAllocatedCount() const {
return alloc_count_;
}
TfLiteStatus RecordingSimpleMemoryAllocator::SetHeadBufferSize(
size_t size, size_t alignment) {
TfLiteStatus RecordingSimpleMemoryAllocator::ResizeBuffer(
uint8_t* resizable_buf, size_t size, size_t alignment) {
const uint8_t* previous_head = head();
TfLiteStatus status =
SimpleMemoryAllocator::SetHeadBufferSize(size, alignment);
SimpleMemoryAllocator::ResizeBuffer(resizable_buf, size, alignment);
if (status == kTfLiteOk) {
used_bytes_ += head() - previous_head;
requested_head_bytes_ = size;
@@ -69,10 +69,11 @@ TfLiteStatus RecordingSimpleMemoryAllocator::SetHeadBufferSize(
return status;
}
uint8_t* RecordingSimpleMemoryAllocator::AllocateFromTail(size_t size,
size_t alignment) {
uint8_t* RecordingSimpleMemoryAllocator::AllocatePersistentBuffer(
size_t size, size_t alignment) {
const uint8_t* previous_tail = tail();
uint8_t* result = SimpleMemoryAllocator::AllocateFromTail(size, alignment);
uint8_t* result =
SimpleMemoryAllocator::AllocatePersistentBuffer(size, alignment);
if (result != nullptr) {
used_bytes_ += previous_tail - tail();
requested_tail_bytes_ += size;

View File

@@ -47,8 +47,9 @@ class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator {
// Returns the number of alloc calls from the head or tail.
size_t GetAllocatedCount() const;
TfLiteStatus SetHeadBufferSize(size_t size, size_t alignment) override;
uint8_t* AllocateFromTail(size_t size, size_t alignment) override;
TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
size_t alignment) override;
uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override;
private:
size_t requested_head_bytes_;

View File

@@ -19,10 +19,13 @@ limitations under the License.
#include <cstdint>
#include <new>
#include "tensorflow/lite/c/c_api_types.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
@@ -52,7 +55,7 @@ SimpleMemoryAllocator* SimpleMemoryAllocator::Create(
// Allocate enough bytes from the buffer to create a SimpleMemoryAllocator.
// The new instance will use the current adjusted tail buffer from the tmp
// allocator instance.
uint8_t* allocator_buffer = tmp.AllocateFromTail(
uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer(
sizeof(SimpleMemoryAllocator), alignof(SimpleMemoryAllocator));
// Use the default copy constructor to populate internal states.
return new (allocator_buffer) SimpleMemoryAllocator(tmp);
@@ -60,13 +63,37 @@ SimpleMemoryAllocator* SimpleMemoryAllocator::Create(
SimpleMemoryAllocator::~SimpleMemoryAllocator() {}
TfLiteStatus SimpleMemoryAllocator::SetHeadBufferSize(size_t size,
size_t alignment) {
if (head_ != temp_) {
uint8_t* SimpleMemoryAllocator::AllocateResizableBuffer(size_t size,
size_t alignment) {
// Only supports one resizable buffer, which starts at the buffer head.
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
if (ResizeBuffer(expect_resizable_buf, size, alignment) == kTfLiteOk) {
return expect_resizable_buf;
}
return nullptr;
}
TfLiteStatus SimpleMemoryAllocator::DeallocateResizableBuffer(
uint8_t* resizable_buf) {
return ResizeBuffer(resizable_buf, 0, 1);
}
TfLiteStatus SimpleMemoryAllocator::ReserveNonPersistentOverlayMemory(
size_t size, size_t alignment) {
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
return ResizeBuffer(expect_resizable_buf, size, alignment);
}
TfLiteStatus SimpleMemoryAllocator::ResizeBuffer(uint8_t* resizable_buf,
size_t size,
size_t alignment) {
// Only supports one resizable buffer, which starts at the buffer head.
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
if (head_ != temp_ || resizable_buf != expect_resizable_buf) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Internal error: SetHeadBufferSize() needs to be called "
"after ResetTempAllocations().");
"Internal error: either buffer is not resizable or "
"ResetTempAllocations() is not called before ResizeBuffer().");
return kTfLiteError;
}
@@ -75,7 +102,7 @@ TfLiteStatus SimpleMemoryAllocator::SetHeadBufferSize(size_t size,
if (available_memory < size) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to set head size. Requested: %u, available %u, missing: %u",
"Failed to resize buffer. Requested: %u, available %u, missing: %u",
size, available_memory, size - available_memory);
return kTfLiteError;
}
@@ -85,8 +112,8 @@ TfLiteStatus SimpleMemoryAllocator::SetHeadBufferSize(size_t size,
return kTfLiteOk;
}
uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size,
size_t alignment) {
uint8_t* SimpleMemoryAllocator::AllocatePersistentBuffer(size_t size,
size_t alignment) {
uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment);
if (aligned_result < head_) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
@@ -113,18 +140,47 @@ uint8_t* SimpleMemoryAllocator::AllocateTemp(size_t size, size_t alignment) {
return nullptr;
}
temp_ = aligned_result + size;
temp_buffer_ptr_check_sum_ ^= (reinterpret_cast<intptr_t>(aligned_result));
temp_buffer_count_++;
return aligned_result;
}
void SimpleMemoryAllocator::ResetTempAllocations() { temp_ = head_; }
uint8_t* SimpleMemoryAllocator::GetHeadBuffer() const { return buffer_head_; }
size_t SimpleMemoryAllocator::GetHeadUsedBytes() const {
return head_ - buffer_head_;
void SimpleMemoryAllocator::DeallocateTemp(uint8_t* temp_buf) {
temp_buffer_ptr_check_sum_ ^= (reinterpret_cast<intptr_t>(temp_buf));
temp_buffer_count_--;
}
size_t SimpleMemoryAllocator::GetTailUsedBytes() const {
bool SimpleMemoryAllocator::IsAllTempDeallocated() {
if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) {
MicroPrintf(
"Number of allocated temp buffers: %d. Checksum passing status: %d",
temp_buffer_count_, !temp_buffer_ptr_check_sum_);
return false;
}
return true;
}
TfLiteStatus SimpleMemoryAllocator::ResetTempAllocations() {
// TODO(b/209453859): enable error check based on IsAllTempDeallocated after
// all AllocateTemp have been paird with DeallocateTemp
if (!IsAllTempDeallocated()) {
MicroPrintf(
"All temp buffers must be freed before calling ResetTempAllocations()");
return kTfLiteError;
}
temp_ = head_;
return kTfLiteOk;
}
uint8_t* SimpleMemoryAllocator::GetOverlayMemoryAddress() const {
return buffer_head_;
}
size_t SimpleMemoryAllocator::GetNonPersistentUsedBytes() const {
return std::max(head_ - buffer_head_, temp_ - buffer_head_);
}
size_t SimpleMemoryAllocator::GetPersistentUsedBytes() const {
return buffer_tail_ - tail_;
}
@@ -135,7 +191,7 @@ size_t SimpleMemoryAllocator::GetAvailableMemory(size_t alignment) const {
}
size_t SimpleMemoryAllocator::GetUsedBytes() const {
return GetBufferSize() - (tail_ - temp_);
return GetPersistentUsedBytes() + GetNonPersistentUsedBytes();
}
size_t SimpleMemoryAllocator::GetBufferSize() const {

View File

@@ -22,13 +22,15 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/ibuffer_allocator.h"
namespace tflite {
// TODO(petewarden): This allocator never frees up or reuses any memory, even
// though we have enough information about lifetimes of the tensors to do so.
// This makes it pretty wasteful, so we should use a more intelligent method.
class SimpleMemoryAllocator {
class SimpleMemoryAllocator : public INonPersistentBufferAllocator,
public IPersistentBufferAllocator {
public:
// TODO(b/157615197): Cleanup constructors/destructor and use factory
// functions.
@@ -43,17 +45,33 @@ class SimpleMemoryAllocator {
uint8_t* buffer_head,
size_t buffer_size);
// Adjusts the head (lowest address and moving upwards) memory allocation to a
// given size. Calls to this method will also invalidate all temporary
// allocation values (it sets the location of temp space at the end of the
// head section). This call will fail if a chain of allocations through
// AllocateTemp() have not been cleaned up with a call to
// ResetTempAllocations().
virtual TfLiteStatus SetHeadBufferSize(size_t size, size_t alignment);
// Resizes a buffer that is previously returned by the
// AllocateResizableBuffer. In current implementation, it Adjusts the head
// (lowest address and moving upwards) memory allocation to a given size.
// Calls to this method will also invalidate all temporary allocation values
// (it sets the location of temp space at the end of the head section). This
// call will fail if a chain of allocations through AllocateTemp() have not
// been cleaned up with a call to ResetTempAllocations().
virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
size_t alignment) override;
// Allocates memory starting at the tail of the arena (highest address and
// moving downwards).
virtual uint8_t* AllocateFromTail(size_t size, size_t alignment);
// Returns a buffer that is resizable viable ResizeBuffer(). Only one
// resizable buffer is currently supported.
virtual uint8_t* AllocateResizableBuffer(size_t size,
size_t alignment) override;
// Frees up the memory occupied by the resizable buffer
virtual TfLiteStatus DeallocateResizableBuffer(
uint8_t* resizable_buf) override;
// Reserves the non-persistent memory that is planned by the memory planner.
virtual TfLiteStatus ReserveNonPersistentOverlayMemory(
size_t size, size_t alignment) override;
// Allocates persistent memory starting at the tail of the arena (highest
// address and moving downwards).
virtual uint8_t* AllocatePersistentBuffer(size_t size,
size_t alignment) override;
// Allocates a temporary buffer from the head of the arena (lowest address and
// moving upwards) but does not update the actual head allocation size or
@@ -63,25 +81,34 @@ class SimpleMemoryAllocator {
// calls to AllocateTemp() must end with a call to ResetTempAllocations(). If
// AllocateFromHead() is called before a call to ResetTempAllocations(), it
// will fail with an error message.
virtual uint8_t* AllocateTemp(size_t size, size_t alignment);
virtual uint8_t* AllocateTemp(size_t size, size_t alignment) override;
// Signals that a temporary buffer is no longer needed. This is currently for
// book-keeping purpose and the memory region are not immediately available
// for re-use. The deallocated memory region are only reclaimed after
// ResetTempAllocations is called as it is right now.
virtual void DeallocateTemp(uint8_t* buf) override;
// Returns true if all temporary buffers are already deallocated.
virtual bool IsAllTempDeallocated() override;
// Resets a chain of temporary allocations back to the current head of the
// arena (lowest address).
virtual void ResetTempAllocations();
virtual TfLiteStatus ResetTempAllocations() override;
// Returns a pointer to the buffer currently assigned to the head section.
// This buffer is set by calling SetHeadSize().
uint8_t* GetHeadBuffer() const;
uint8_t* GetOverlayMemoryAddress() const override;
// Returns the size of the head section in bytes.
size_t GetHeadUsedBytes() const;
size_t GetNonPersistentUsedBytes() const override;
// Returns the size of all allocations in the tail section in bytes.
size_t GetTailUsedBytes() const;
size_t GetPersistentUsedBytes() const override;
// Returns the number of bytes available with a given alignment. This number
// takes in account any temporary allocations.
size_t GetAvailableMemory(size_t alignment) const;
size_t GetAvailableMemory(size_t alignment) const override;
// Returns the number of used bytes in the allocator. This number takes in
// account any temporary allocations.
@@ -105,6 +132,17 @@ class SimpleMemoryAllocator {
uint8_t* head_;
uint8_t* tail_;
uint8_t* temp_;
// The combination of the checksum of outstanding temporary buffer pointers
// AND the count of outstanding temporary buffer provide a low cost mechanism
// to audit temporary buffers' allocation and deallocation.
//
// XOR Check sum for outstanding temp buffers.
// If all temp buffers are deallocated OR no temp buffers are allocated,
// temp_buffer_ptr_check_sum_ == nullptr.
intptr_t temp_buffer_ptr_check_sum_ = 0;
// Count of outstanding temp buffers.
int temp_buffer_count_ = 0;
};
} // namespace tflite

View File

@@ -0,0 +1,113 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/test_helper_custom_ops.h"
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include <initializer_list>
#include <new>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/all_ops_resolver.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
#include "tensorflow/lite/schema/schema_generated.h"
// TODO(b/170464050): Use TFLM test only version of schema_utils.
namespace tflite {
namespace testing {
const TfLiteRegistration* PackerOp::getRegistration() {
return GetMutableRegistration();
}
TfLiteRegistration* PackerOp::GetMutableRegistration() {
static TfLiteRegistration r;
r.init = Init;
r.prepare = Prepare;
r.invoke = Invoke;
r.free = Free;
return &r;
}
void* PackerOp::Init(TfLiteContext* context, const char* buffer,
size_t length) {
freed_ = false;
// Do nothing.
return nullptr;
}
void PackerOp::Free(TfLiteContext* context, void* buffer) { freed_ = true; }
TfLiteStatus PackerOp::Prepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
TfLiteStatus PackerOp::Invoke(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, 0);
TF_LITE_ENSURE(context, input1 != nullptr);
const int32_t* input1_data = input1->data.i32;
TF_LITE_ENSURE_EQ(context, input1->dims->size, 1);
const int32_t input1_len = input1->dims->data[0];
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, 1);
TF_LITE_ENSURE(context, input2 != nullptr);
const int32_t* input2_data = input2->data.i32;
TF_LITE_ENSURE_EQ(context, input2->dims->size, 1);
const int32_t input2_len = input2->dims->data[0];
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TF_LITE_ENSURE(context, output != nullptr);
int32_t* output_data = output->data.i32;
int32_t output_len = output->dims->data[0];
// Fill output with input: first with the first tensor, then with the second
// tensor up to the size of the output tensor.
int cnt = 0;
int i;
for (i = 0; i < input1_len && cnt < output_len; i++, cnt++) {
output_data[cnt] = input1_data[i];
}
if (cnt >= output_len) {
return kTfLiteOk;
}
for (i = 0; i < input2_len && cnt < output_len; i++, cnt++) {
output_data[cnt] = input2_data[i];
}
if (cnt >= output_len) {
return kTfLiteOk;
}
for (; cnt < output_len; cnt++) {
output_data[cnt] = 0;
}
return kTfLiteOk;
}
bool PackerOp::freed_ = false;
} // namespace testing
} // namespace tflite

Some files were not shown because too many files have changed in this diff Show More