Rolling 20220526

This commit is contained in:
jomjol
2022-05-26 20:31:26 +02:00
parent cce812ff11
commit 00028010ee
203 changed files with 12003 additions and 1226 deletions

View File

@@ -1,3 +1,5 @@
## TODO: GLOB is not a good way to collect files. Use explicit file list instead
cmake_minimum_required(VERSION 3.5)
set(tflite_dir "${CMAKE_CURRENT_SOURCE_DIR}/tensorflow/lite")
@@ -16,14 +18,27 @@ file(GLOB srcs_kernels
"${tfmicro_kernels_dir}/*.c"
"${tfmicro_kernels_dir}/*.cc")
# remove sources which will be provided by esp_nn
list(REMOVE_ITEM srcs_kernels
"${tfmicro_kernels_dir}/add.cc"
"${tfmicro_kernels_dir}/conv.cc"
"${tfmicro_kernels_dir}/depthwise_conv.cc"
"${tfmicro_kernels_dir}/fully_connected.cc"
"${tfmicro_kernels_dir}/mul.cc"
"${tfmicro_kernels_dir}/pooling.cc")
FILE(GLOB esp_nn_kernels
"${tfmicro_kernels_dir}/esp_nn/*.cc")
set(lib_srcs
"${srcs_micro}"
"${srcs_kernels}"
"${esp_nn_kernels}"
"${src_micro_frontend}"
"${tflite_dir}/kernels/kernel_util.cc"
"${tflite_dir}/micro/memory_planner/greedy_memory_planner.cc"
"${tflite_dir}/micro/memory_planner/linear_memory_planner.cc"
"${tflite_dir}/c/common.c"
"${tflite_dir}/c/common.cc"
"${tflite_dir}/core/api/error_reporter.cc"
"${tflite_dir}/core/api/flatbuffer_conversions.cc"
"${tflite_dir}/core/api/op_resolver.cc"
@@ -36,15 +51,17 @@ idf_component_register(
INCLUDE_DIRS "." "third_party/gemmlowp"
"third_party/flatbuffers/include"
"third_party/ruy"
"third_party/kissfft")
"third_party/kissfft"
REQUIRES "esp-nn")
# Reduce the level of paranoia to be able to compile TF sources
target_compile_options(${COMPONENT_LIB} PRIVATE
-Wno-maybe-uninitialized
-Wno-missing-field-initializers
-DESP_NN # enables ESP-NN optimizations by Espressif
-Wno-type-limits)
target_compile_options(${COMPONENT_LIB} PRIVATE -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -O3 -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -DESP -DESP_NN -Wno-nonnull -Wno-nonnull -Wno-nonnull)
target_compile_options(${COMPONENT_LIB} PRIVATE $<$<COMPILE_LANGUAGE:CXX>: -std=c++11 -fno-rtti -fno-exceptions -fno-threadsafe-statics -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -O3 -Werror -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -DESP -DESP_NN -Wno-return-type -Wno-strict-aliasing -std=gnu++14 -Wno-return-type -Wno-strict-aliasing -std=gnu++14 -Wno-return-type -Wno-strict-aliasing -std=gnu++14 >)
target_compile_options(${COMPONENT_LIB} PRIVATE -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -O3 -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -Wno-nonnull)
target_compile_options(${COMPONENT_LIB} PRIVATE $<$<COMPILE_LANGUAGE:CXX>: -std=c++11 -fno-rtti -fno-exceptions -fno-threadsafe-statics -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -O3 -Werror -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -Wno-return-type -Wno-strict-aliasing -std=gnu++14 >)
target_compile_options(${COMPONENT_LIB} INTERFACE $<$<IN_LIST:-DTF_LITE_STATIC_MEMORY,$<TARGET_PROPERTY:${COMPONENT_LIB},COMPILE_OPTIONS>>:-DTF_LITE_STATIC_MEMORY>)
target_link_libraries(${COMPONENT_LIB} PRIVATE -lm)

View File

@@ -0,0 +1,22 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Compatibility shim for new location of interface definitions.
#ifndef TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
#define TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#endif // TENSORFLOW_LITE_BUILTIN_OP_DATA_H_

View File

@@ -0,0 +1,187 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_BUILTIN_OPS_H_
#define TENSORFLOW_LITE_BUILTIN_OPS_H_
// DO NOT EDIT MANUALLY: This file is automatically generated by
// `schema/builtin_ops_header/generator.cc`.
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
// The enum for builtin operators.
// Note: CUSTOM, DELEGATE, and PLACEHOLDER_FOR_GREATER_OP_CODES are 3 special
// ops which are not real built-in ops.
typedef enum {
kTfLiteBuiltinAdd = 0,
kTfLiteBuiltinAveragePool2d = 1,
kTfLiteBuiltinConcatenation = 2,
kTfLiteBuiltinConv2d = 3,
kTfLiteBuiltinDepthwiseConv2d = 4,
kTfLiteBuiltinDepthToSpace = 5,
kTfLiteBuiltinDequantize = 6,
kTfLiteBuiltinEmbeddingLookup = 7,
kTfLiteBuiltinFloor = 8,
kTfLiteBuiltinFullyConnected = 9,
kTfLiteBuiltinHashtableLookup = 10,
kTfLiteBuiltinL2Normalization = 11,
kTfLiteBuiltinL2Pool2d = 12,
kTfLiteBuiltinLocalResponseNormalization = 13,
kTfLiteBuiltinLogistic = 14,
kTfLiteBuiltinLshProjection = 15,
kTfLiteBuiltinLstm = 16,
kTfLiteBuiltinMaxPool2d = 17,
kTfLiteBuiltinMul = 18,
kTfLiteBuiltinRelu = 19,
kTfLiteBuiltinReluN1To1 = 20,
kTfLiteBuiltinRelu6 = 21,
kTfLiteBuiltinReshape = 22,
kTfLiteBuiltinResizeBilinear = 23,
kTfLiteBuiltinRnn = 24,
kTfLiteBuiltinSoftmax = 25,
kTfLiteBuiltinSpaceToDepth = 26,
kTfLiteBuiltinSvdf = 27,
kTfLiteBuiltinTanh = 28,
kTfLiteBuiltinConcatEmbeddings = 29,
kTfLiteBuiltinSkipGram = 30,
kTfLiteBuiltinCall = 31,
kTfLiteBuiltinCustom = 32,
kTfLiteBuiltinEmbeddingLookupSparse = 33,
kTfLiteBuiltinPad = 34,
kTfLiteBuiltinUnidirectionalSequenceRnn = 35,
kTfLiteBuiltinGather = 36,
kTfLiteBuiltinBatchToSpaceNd = 37,
kTfLiteBuiltinSpaceToBatchNd = 38,
kTfLiteBuiltinTranspose = 39,
kTfLiteBuiltinMean = 40,
kTfLiteBuiltinSub = 41,
kTfLiteBuiltinDiv = 42,
kTfLiteBuiltinSqueeze = 43,
kTfLiteBuiltinUnidirectionalSequenceLstm = 44,
kTfLiteBuiltinStridedSlice = 45,
kTfLiteBuiltinBidirectionalSequenceRnn = 46,
kTfLiteBuiltinExp = 47,
kTfLiteBuiltinTopkV2 = 48,
kTfLiteBuiltinSplit = 49,
kTfLiteBuiltinLogSoftmax = 50,
kTfLiteBuiltinDelegate = 51,
kTfLiteBuiltinBidirectionalSequenceLstm = 52,
kTfLiteBuiltinCast = 53,
kTfLiteBuiltinPrelu = 54,
kTfLiteBuiltinMaximum = 55,
kTfLiteBuiltinArgMax = 56,
kTfLiteBuiltinMinimum = 57,
kTfLiteBuiltinLess = 58,
kTfLiteBuiltinNeg = 59,
kTfLiteBuiltinPadv2 = 60,
kTfLiteBuiltinGreater = 61,
kTfLiteBuiltinGreaterEqual = 62,
kTfLiteBuiltinLessEqual = 63,
kTfLiteBuiltinSelect = 64,
kTfLiteBuiltinSlice = 65,
kTfLiteBuiltinSin = 66,
kTfLiteBuiltinTransposeConv = 67,
kTfLiteBuiltinSparseToDense = 68,
kTfLiteBuiltinTile = 69,
kTfLiteBuiltinExpandDims = 70,
kTfLiteBuiltinEqual = 71,
kTfLiteBuiltinNotEqual = 72,
kTfLiteBuiltinLog = 73,
kTfLiteBuiltinSum = 74,
kTfLiteBuiltinSqrt = 75,
kTfLiteBuiltinRsqrt = 76,
kTfLiteBuiltinShape = 77,
kTfLiteBuiltinPow = 78,
kTfLiteBuiltinArgMin = 79,
kTfLiteBuiltinFakeQuant = 80,
kTfLiteBuiltinReduceProd = 81,
kTfLiteBuiltinReduceMax = 82,
kTfLiteBuiltinPack = 83,
kTfLiteBuiltinLogicalOr = 84,
kTfLiteBuiltinOneHot = 85,
kTfLiteBuiltinLogicalAnd = 86,
kTfLiteBuiltinLogicalNot = 87,
kTfLiteBuiltinUnpack = 88,
kTfLiteBuiltinReduceMin = 89,
kTfLiteBuiltinFloorDiv = 90,
kTfLiteBuiltinReduceAny = 91,
kTfLiteBuiltinSquare = 92,
kTfLiteBuiltinZerosLike = 93,
kTfLiteBuiltinFill = 94,
kTfLiteBuiltinFloorMod = 95,
kTfLiteBuiltinRange = 96,
kTfLiteBuiltinResizeNearestNeighbor = 97,
kTfLiteBuiltinLeakyRelu = 98,
kTfLiteBuiltinSquaredDifference = 99,
kTfLiteBuiltinMirrorPad = 100,
kTfLiteBuiltinAbs = 101,
kTfLiteBuiltinSplitV = 102,
kTfLiteBuiltinUnique = 103,
kTfLiteBuiltinCeil = 104,
kTfLiteBuiltinReverseV2 = 105,
kTfLiteBuiltinAddN = 106,
kTfLiteBuiltinGatherNd = 107,
kTfLiteBuiltinCos = 108,
kTfLiteBuiltinWhere = 109,
kTfLiteBuiltinRank = 110,
kTfLiteBuiltinElu = 111,
kTfLiteBuiltinReverseSequence = 112,
kTfLiteBuiltinMatrixDiag = 113,
kTfLiteBuiltinQuantize = 114,
kTfLiteBuiltinMatrixSetDiag = 115,
kTfLiteBuiltinRound = 116,
kTfLiteBuiltinHardSwish = 117,
kTfLiteBuiltinIf = 118,
kTfLiteBuiltinWhile = 119,
kTfLiteBuiltinNonMaxSuppressionV4 = 120,
kTfLiteBuiltinNonMaxSuppressionV5 = 121,
kTfLiteBuiltinScatterNd = 122,
kTfLiteBuiltinSelectV2 = 123,
kTfLiteBuiltinDensify = 124,
kTfLiteBuiltinSegmentSum = 125,
kTfLiteBuiltinBatchMatmul = 126,
kTfLiteBuiltinPlaceholderForGreaterOpCodes = 127,
kTfLiteBuiltinCumsum = 128,
kTfLiteBuiltinCallOnce = 129,
kTfLiteBuiltinBroadcastTo = 130,
kTfLiteBuiltinRfft2d = 131,
kTfLiteBuiltinConv3d = 132,
kTfLiteBuiltinImag = 133,
kTfLiteBuiltinReal = 134,
kTfLiteBuiltinComplexAbs = 135,
kTfLiteBuiltinHashtable = 136,
kTfLiteBuiltinHashtableFind = 137,
kTfLiteBuiltinHashtableImport = 138,
kTfLiteBuiltinHashtableSize = 139,
kTfLiteBuiltinReduceAll = 140,
kTfLiteBuiltinConv3dTranspose = 141,
kTfLiteBuiltinVarHandle = 142,
kTfLiteBuiltinReadVariable = 143,
kTfLiteBuiltinAssignVariable = 144,
kTfLiteBuiltinBroadcastArgs = 145,
kTfLiteBuiltinRandomStandardNormal = 146,
kTfLiteBuiltinBucketize = 147,
kTfLiteBuiltinRandomUniform = 148,
kTfLiteBuiltinMultinomial = 149,
kTfLiteBuiltinGelu = 150,
kTfLiteBuiltinDynamicUpdateSlice = 151,
} TfLiteBuiltinOperator;
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // TENSORFLOW_LITE_BUILTIN_OPS_H_

View File

@@ -98,6 +98,7 @@ typedef enum {
kTfLiteResource = 14,
kTfLiteVariant = 15,
kTfLiteUInt32 = 16,
kTfLiteUInt16 = 17,
} TfLiteType;
// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
@@ -111,6 +112,12 @@ typedef struct TfLiteQuantizationParams {
int32_t zero_point;
} TfLiteQuantizationParams;
// --------------------------------------------------------------------------
// Opaque types used by c_api_opaque.h.
// TfLiteOpaqueTensor is an opaque version of TfLiteTensor;
typedef struct TfLiteOpaqueTensor TfLiteOpaqueTensor;
#ifdef __cplusplus
} // extern C
#endif

View File

@@ -21,6 +21,8 @@ limitations under the License.
#include <string.h>
#endif // TF_LITE_STATIC_MEMORY
extern "C" {
size_t TfLiteIntArrayGetSizeInBytes(int size) {
static TfLiteIntArray dummy;
@@ -34,13 +36,13 @@ size_t TfLiteIntArrayGetSizeInBytes(int size) {
int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b) {
if (a == b) return 1;
if (a == NULL || b == NULL) return 0;
if (a == nullptr || b == nullptr) return 0;
return TfLiteIntArrayEqualsArray(a, b->size, b->data);
}
int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
const int b_data[]) {
if (a == NULL) return (b_size == 0);
if (a == nullptr) return (b_size == 0);
if (a->size != b_size) return 0;
int i = 0;
for (; i < a->size; i++)
@@ -52,7 +54,7 @@ int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
TfLiteIntArray* TfLiteIntArrayCreate(int size) {
size_t alloc_size = TfLiteIntArrayGetSizeInBytes(size);
if (alloc_size <= 0) return NULL;
if (alloc_size <= 0) return nullptr;
TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size);
if (!ret) return ret;
ret->size = size;
@@ -60,7 +62,7 @@ TfLiteIntArray* TfLiteIntArrayCreate(int size) {
}
TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src) {
if (!src) return NULL;
if (!src) return nullptr;
TfLiteIntArray* ret = TfLiteIntArrayCreate(src->size);
if (ret) {
memcpy(ret->data, src->data, src->size * sizeof(int));
@@ -99,7 +101,7 @@ void TfLiteTensorDataFree(TfLiteTensor* t) {
t->allocation_type == kTfLitePersistentRo) {
free(t->data.raw);
}
t->data.raw = NULL;
t->data.raw = nullptr;
}
void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
@@ -108,31 +110,31 @@ void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
(TfLiteAffineQuantization*)(quantization->params);
if (q_params->scale) {
TfLiteFloatArrayFree(q_params->scale);
q_params->scale = NULL;
q_params->scale = nullptr;
}
if (q_params->zero_point) {
TfLiteIntArrayFree(q_params->zero_point);
q_params->zero_point = NULL;
q_params->zero_point = nullptr;
}
free(q_params);
}
quantization->params = NULL;
quantization->params = nullptr;
quantization->type = kTfLiteNoQuantization;
}
void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
if (sparsity == NULL) {
if (sparsity == nullptr) {
return;
}
if (sparsity->traversal_order) {
TfLiteIntArrayFree(sparsity->traversal_order);
sparsity->traversal_order = NULL;
sparsity->traversal_order = nullptr;
}
if (sparsity->block_map) {
TfLiteIntArrayFree(sparsity->block_map);
sparsity->block_map = NULL;
sparsity->block_map = nullptr;
}
if (sparsity->dim_metadata) {
@@ -141,13 +143,13 @@ void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
TfLiteDimensionMetadata metadata = sparsity->dim_metadata[i];
if (metadata.format == kTfLiteDimSparseCSR) {
TfLiteIntArrayFree(metadata.array_segments);
metadata.array_segments = NULL;
metadata.array_segments = nullptr;
TfLiteIntArrayFree(metadata.array_indices);
metadata.array_indices = NULL;
metadata.array_indices = nullptr;
}
}
free(sparsity->dim_metadata);
sparsity->dim_metadata = NULL;
sparsity->dim_metadata = nullptr;
}
free(sparsity);
@@ -156,16 +158,16 @@ void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
void TfLiteTensorFree(TfLiteTensor* t) {
TfLiteTensorDataFree(t);
if (t->dims) TfLiteIntArrayFree(t->dims);
t->dims = NULL;
t->dims = nullptr;
if (t->dims_signature) {
TfLiteIntArrayFree((TfLiteIntArray *) t->dims_signature);
}
t->dims_signature = NULL;
t->dims_signature = nullptr;
TfLiteQuantizationFree(&t->quantization);
TfLiteSparsityFree(t->sparsity);
t->sparsity = NULL;
t->sparsity = nullptr;
}
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
@@ -185,7 +187,7 @@ void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
tensor->is_variable = is_variable;
tensor->quantization.type = kTfLiteNoQuantization;
tensor->quantization.params = NULL;
tensor->quantization.params = nullptr;
}
TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) {
@@ -229,6 +231,8 @@ const char* TfLiteTypeGetName(TfLiteType type) {
return "NOTYPE";
case kTfLiteFloat32:
return "FLOAT32";
case kTfLiteUInt16:
return "UINT16";
case kTfLiteInt16:
return "INT16";
case kTfLiteInt32:
@@ -263,14 +267,6 @@ const char* TfLiteTypeGetName(TfLiteType type) {
return "Unknown type";
}
TfLiteDelegate TfLiteDelegateCreate(void) {
TfLiteDelegate d = {
.data_ = NULL,
.Prepare = NULL,
.CopyFromBufferHandle = NULL,
.CopyToBufferHandle = NULL,
.FreeBufferHandle = NULL,
.flags = kTfLiteDelegateFlagsNone,
};
return d;
}
TfLiteDelegate TfLiteDelegateCreate() { return TfLiteDelegate{}; }
} // extern "C"

View File

@@ -173,8 +173,9 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
} \
} while (false)
#else // TF_LITE_STRIP_ERROR_STRINGS
#define TF_LITE_KERNEL_LOG(context, ...)
#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)
#define UNUSED(...) (void)sizeof(#__VA_ARGS__)
#define TF_LITE_KERNEL_LOG(context, ...) UNUSED(__VA_ARGS__)
#define TF_LITE_MAYBE_KERNEL_LOG(context, ...) UNUSED(__VA_ARGS__)
#endif // TF_LITE_STRIP_ERROR_STRINGS
// Check whether value is true, and if not return kTfLiteError from
@@ -316,6 +317,7 @@ typedef union TfLitePtrUnion {
uint8_t* uint8;
bool* b;
int16_t* i16;
uint16_t* ui16;
TfLiteComplex64* c64;
TfLiteComplex128* c128;
int8_t* int8;
@@ -459,7 +461,8 @@ typedef struct TfLiteTensor {
// Optional. Encodes shapes with unknown dimensions with -1. This field is
// only populated when unknown dimensions exist in a read-write tensor (i.e.
// an input or output tensor). (e.g. `dims` contains [1, 1, 1, 3] and
// `dims_signature` contains [1, -1, -1, 3]).
// `dims_signature` contains [1, -1, -1, 3]). Note that this field only
// exists when TF_LITE_STATIC_MEMORY is not defined.
const TfLiteIntArray* dims_signature;
} TfLiteTensor;

View File

@@ -0,0 +1,51 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// This provides a few C++ helpers that are useful for manipulating C structures
// in C++.
#ifndef TENSORFLOW_LITE_CONTEXT_UTIL_H_
#define TENSORFLOW_LITE_CONTEXT_UTIL_H_
#include <stddef.h>
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Provide a range iterable wrapper for TfLiteIntArray* (C lists that TfLite
// C api uses. Can't use the google array_view, since we can't depend on even
// absl for embedded device reasons.
class TfLiteIntArrayView {
public:
// Construct a view of a TfLiteIntArray*. Note, `int_array` should be non-null
// and this view does not take ownership of it.
explicit TfLiteIntArrayView(const TfLiteIntArray* int_array)
: int_array_(int_array) {}
TfLiteIntArrayView(const TfLiteIntArrayView&) = default;
TfLiteIntArrayView& operator=(const TfLiteIntArrayView& rhs) = default;
typedef const int* const_iterator;
const_iterator begin() const { return int_array_->data; }
const_iterator end() const { return &int_array_->data[int_array_->size]; }
size_t size() const { return end() - begin(); }
int operator[](size_t pos) const { return int_array_->data[pos]; }
private:
const TfLiteIntArray* int_array_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_CONTEXT_UTIL_H_

View File

@@ -208,6 +208,14 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
return ParseBatchToSpaceNd(op, error_reporter, allocator, builtin_data);
}
case BuiltinOperator_BROADCAST_ARGS: {
return ParseBroadcastArgs(op, error_reporter, allocator, builtin_data);
}
case BuiltinOperator_BROADCAST_TO: {
return ParseBroadcastTo(op, error_reporter, allocator, builtin_data);
}
case BuiltinOperator_CALL_ONCE: {
return ParseCallOnce(op, error_reporter, allocator, builtin_data);
}
@@ -336,6 +344,10 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
return ParseLogSoftmax(op, error_reporter, allocator, builtin_data);
}
case BuiltinOperator_LSTM: {
return ParseLSTM(op, error_reporter, allocator, builtin_data);
}
case BuiltinOperator_MAXIMUM: {
return ParseMaximum(op, error_reporter, allocator, builtin_data);
}
@@ -605,37 +617,6 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
*builtin_data = params.release();
return kTfLiteOk;
}
case BuiltinOperator_LSTM: {
auto params = safe_allocator.Allocate<TfLiteLSTMParams>();
TF_LITE_ENSURE(error_reporter, params != nullptr);
if (const auto* lstm_params = op->builtin_options_as_LSTMOptions()) {
params->activation =
ConvertActivation(lstm_params->fused_activation_function());
params->cell_clip = lstm_params->cell_clip();
params->proj_clip = lstm_params->proj_clip();
switch (lstm_params->kernel_type()) {
case LSTMKernelType_FULL:
params->kernel_type = kTfLiteLSTMFullKernel;
break;
case LSTMKernelType_BASIC:
params->kernel_type = kTfLiteLSTMBasicKernel;
break;
default:
TF_LITE_REPORT_ERROR(error_reporter,
"Unhandled LSTM kernel type: %d",
lstm_params->kernel_type());
return kTfLiteError;
}
params->asymmetric_quantize_inputs =
lstm_params->asymmetric_quantize_inputs();
} else {
TF_LITE_REPORT_ERROR(error_reporter,
"No valid LSTM builtin options exist");
return kTfLiteError;
}
*builtin_data = params.release();
return kTfLiteOk;
}
case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: {
return ParseUnidirectionalSequenceLSTM(op, error_reporter, allocator,
builtin_data);
@@ -883,7 +864,6 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
case BuiltinOperator_SCATTER_ND:
case BuiltinOperator_DENSIFY:
case BuiltinOperator_SEGMENT_SUM:
case BuiltinOperator_BROADCAST_TO:
case BuiltinOperator_RFFT2D:
case BuiltinOperator_IMAG:
case BuiltinOperator_REAL:
@@ -891,7 +871,7 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
case BuiltinOperator_HASHTABLE_FIND:
case BuiltinOperator_HASHTABLE_IMPORT:
case BuiltinOperator_HASHTABLE_SIZE:
case BuiltinOperator_BROADCAST_ARGS:
case BuiltinOperator_DYNAMIC_UPDATE_SLICE:
return kTfLiteOk;
case BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES:
return kTfLiteError;
@@ -916,6 +896,9 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
case TensorType_INT16:
*type = kTfLiteInt16;
return kTfLiteOk;
case TensorType_UINT16:
*type = kTfLiteUInt16;
return kTfLiteOk;
case TensorType_INT32:
*type = kTfLiteInt32;
return kTfLiteOk;
@@ -1085,6 +1068,22 @@ TfLiteStatus ParseBatchToSpaceNd(const Operator*, ErrorReporter*,
return kTfLiteOk;
}
// We have this parse function instead of directly returning kTfLiteOk from the
// switch-case in ParseOpData because this function is used as part of the
// selective registration for the OpResolver implementation in micro.
TfLiteStatus ParseBroadcastArgs(const Operator*, ErrorReporter*,
BuiltinDataAllocator*, void**) {
return kTfLiteOk;
}
// We have this parse function instead of directly returning kTfLiteOk from the
// switch-case in ParseOpData because this function is used as part of the
// selective registration for the OpResolver implementation in micro.
TfLiteStatus ParseBroadcastTo(const Operator*, ErrorReporter*,
BuiltinDataAllocator*, void**) {
return kTfLiteOk;
}
TfLiteStatus ParseCallOnce(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data) {
@@ -1605,6 +1604,40 @@ TfLiteStatus ParseLogSoftmax(const Operator*, ErrorReporter*,
return kTfLiteOk;
}
TfLiteStatus ParseLSTM(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data) {
CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
SafeBuiltinDataAllocator safe_allocator(allocator);
auto params = safe_allocator.Allocate<TfLiteLSTMParams>();
TF_LITE_ENSURE(error_reporter, params != nullptr);
if (const auto* lstm_params = op->builtin_options_as_LSTMOptions()) {
params->activation =
ConvertActivation(lstm_params->fused_activation_function());
params->cell_clip = lstm_params->cell_clip();
params->proj_clip = lstm_params->proj_clip();
switch (lstm_params->kernel_type()) {
case LSTMKernelType_FULL:
params->kernel_type = kTfLiteLSTMFullKernel;
break;
case LSTMKernelType_BASIC:
params->kernel_type = kTfLiteLSTMBasicKernel;
break;
default:
TF_LITE_REPORT_ERROR(error_reporter, "Unhandled LSTM kernel type: %d",
lstm_params->kernel_type());
return kTfLiteError;
}
params->asymmetric_quantize_inputs =
lstm_params->asymmetric_quantize_inputs();
} else {
TF_LITE_REPORT_ERROR(error_reporter, "No valid LSTM builtin options exist");
return kTfLiteError;
}
*builtin_data = params.release();
return kTfLiteOk;
}
// We have this parse function instead of directly returning kTfLiteOk from the
// switch-case in ParseOpData because this function is used as part of the
// selective registration for the OpResolver implementation in micro.
@@ -2337,6 +2370,31 @@ TfLiteStatus ParseVarHandle(const Operator* op, ErrorReporter* error_reporter,
return kTfLiteOk;
}
TfLiteStatus ParseWhile(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data) {
CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
SafeBuiltinDataAllocator safe_allocator(allocator);
std::unique_ptr<TfLiteWhileParams,
SafeBuiltinDataAllocator::BuiltinDataDeleter>
params = safe_allocator.Allocate<TfLiteWhileParams>();
TF_LITE_ENSURE(error_reporter, params != nullptr);
const WhileOptions* schema_params = op->builtin_options_as_WhileOptions();
if (schema_params != nullptr) {
params->cond_subgraph_index = schema_params->cond_subgraph_index();
params->body_subgraph_index = schema_params->body_subgraph_index();
} else {
// TODO(b/157480169): We should either return kTfLiteError or fill in some
// reasonable defaults in the params struct. We are not doing so until we
// better undertand the ramifications of changing the legacy behavior.
}
*builtin_data = params.release();
return kTfLiteOk;
}
// We have this parse function instead of directly returning kTfLiteOk from the
// switch-case in ParseOpData because this function is used as part of the
// selective registration for the OpResolver implementation in micro.

View File

@@ -98,6 +98,15 @@ TfLiteStatus ParseBatchToSpaceNd(const Operator* op,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseBroadcastArgs(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseBroadcastTo(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseCallOnce(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
@@ -232,6 +241,9 @@ TfLiteStatus ParseLogSoftmax(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLSTM(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseMaximum(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
@@ -379,6 +391,9 @@ TfLiteStatus ParseVarHandle(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseWhile(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseZerosLike(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);

View File

@@ -60,9 +60,8 @@ void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch,
// Cwise product of two vectors.
template <typename T>
inline void VectorVectorCwiseProduct(const T* __restrict__ vector1,
const T* __restrict__ vector2, int v_size,
T* __restrict__ result) {
inline void VectorVectorCwiseProduct(const T* vector1, const T* vector2,
int v_size, T* result) {
for (int v = 0; v < v_size; v++) {
*result++ = *vector1++ * *vector2++;
}
@@ -117,6 +116,367 @@ void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch,
}
}
// Checks if all entries of vector are zero for float.
bool IsZeroVector(const float* vector, int v_size);
// Checks if all entries of vector are zero for int8.
bool IsZeroVector(const int8_t* vector, int v_size);
// Quantizes a buffer of floating point values using a symmetric quantization
// (i.e. linear quantization without an offset) to 8-bit signed integers.
// It also outputs the range (min, max) of the floating point buffer, and the
// scaling factor used to quantize the values.
void SymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* min_value,
float* max_value, float* scaling_factor);
// Quantizes a buffer of floating point values using a symmetric quantization
// (i.e. linear quantization without an offset) to 8-bit signed integers.
// It uses the range (min, max) provided to the function to calculate the
// appropriate scaling factor to quantize the values.
void SymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float min_value,
float max_value, float* scaling_factor);
void AsymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* scaling_factor,
int32_t* offset);
// Helper function to quantize floats.
// float_data_ptr input float vectors
// n_batch number of input vectors
// n_data size of a single input vector
// quantized_data_ptr (out) vector with quantized data
// scaling_factors (out) scaling factors (one per vector)
// zero_points (out) zero points (one per vector)
// do_asymmetric controls if the quantization should be asymmetric.
inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch,
int n_data, int8_t* quantized_data_ptr,
float* scaling_factors, int32_t* zero_points,
bool do_asymmetric) {
for (int b = 0; b < n_batch; ++b) {
const int offset = b * n_data;
if (do_asymmetric) {
tensor_utils::AsymmetricQuantizeFloats(
float_data_ptr + offset, n_data, quantized_data_ptr + offset,
&scaling_factors[b], &zero_points[b]);
} else {
float unused_min, unused_max;
tensor_utils::SymmetricQuantizeFloats(
float_data_ptr + offset, n_data, quantized_data_ptr + offset,
&unused_min, &unused_max, &scaling_factors[b]);
}
}
}
// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
// dimension composed by input vectors independent from each other). The result
// of the multiplication is accumulated to the passed result buffer.
// More specifically, for a matrix M of shape [n, i] and a batched-vector
// of shape [i, batch] it will first compute the product of shape [n, batch].
// This product will be accumulated to the result buffer.
void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
int m_cols, const float* vector,
int n_batch, float* result);
// Same as the function above, but the matrix is a sparse tensor with block
// pattern 1x4.
// This function assumes that m_cols is a multiple of the block size (4 in this
// case) so that there's no incomplete block.
void SparseMatrixBatchVectorMultiplyAccumulate1x4(
const float* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const float* __restrict__ vector, int n_batch, float* __restrict__ result);
// Same as the function above, but the matrix is stored in block compressed
// sparse row format with block pattern 1x16 which consists of two arrays:
// 1. A matrix array stores non-zero blocks of the matrix in row major.
// 2. A ledger array stores nrows groups, one group per row. Each group starts
// with an integer representing the number of non-zero blocks for the
// corresponding row and follows with column indexes of the first element
// of each non-zero block.
// This function assumes that
// 1. m_cols is a multiple of 16 so that all blocks are full blocks.
// 2. m_cols < 254 * 16 so that block index can be represented by uint8.
void SparseMatrixBatchVectorMultiplyAccumulate(
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
float* __restrict__ result);
// Same as the function above, but for values quantized using symmetric
// quantization (e.g. by calling SymmetricQuantizeFloats).
// The passed scaling factors is a buffer of the quantization scaling factors
// that will be used to dequentize the products into the final result buffer.
// These scaling factors are the multiplication of the matrix scaling factor
// by the vector's scaling factor, one per batch (i.e. this allows quantizing
// each batch in the batch-vector matrix independently).
void MatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors,
const float* __restrict__ scaling_factors, int n_batch,
float* __restrict__ result);
// Same as the function above except that vector values
// are quantized with asymmetric quantization per-batch and the matrix
// is quantized per row.
void MatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors,
const float* __restrict__ scaling_factors, int n_batch,
float* __restrict__ result, const float* __restrict__ per_channel_scale,
const int32_t* __restrict__ input_offset);
// Same as the function above, but the matrix is a sparse tensor with block
// pattern 1x16.
// This function assumes that m_cols is a multiple of the block size (16 in this
// case) so that there's no incomplete block. Also, it assumes all offsets of
// input, output and filter are zero.
void SparseMatrixBatchVectorMultiplyAccumulate1x16(
const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
int n_batch, const int32_t input_offset, const int32_t output_multiplier,
const int32_t output_shift, const int32_t output_offset,
const int32_t output_activation_min, const int32_t output_activation_max,
int8_t* __restrict__ result);
// Same as the function above, but the matrix is stored in block compressed
// sparse row format with block pattern 1x16 which consists of two arrays:
// 1. A matrix array stores non-zero blocks of the matrix in row major.
// 2. A ledger array stores nrows groups, one group per row. Each group starts
// with an integer representing the number of non-zero blocks for the
// corresponding row followed by column index of the first element of
// each non-zero block.
// This function assumes that
// 1. m_cols is a multiple of 16 so that all blocks are full blocks.
// 2. m_cols < 254 * 16 so that block index can be represented by uint8.
void SparseMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const uint8_t* __restrict__ ledger,
const int m_rows, const int m_cols, const int8_t* __restrict__ vectors,
const float* __restrict__ scaling_factors, int n_batch,
float* __restrict__ result);
// Same as the above 8, 8, 8 integer matmul except for the presence of zero
// point and non-accumulative.
// TODO(b/148688698): remove this function by folding zero point calculation in
// prepare() function.
void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
const int8_t* input_to_gate_weights,
int32_t input_to_gate_effective_scale_a,
int32_t input_to_gate_effective_scale_b,
int32_t n_batch, int32_t n_input, int32_t n_cell,
int8_t* gate_output, int8_t gate_output_zp);
// Same as above but has 16 bit and 8 bit input and 8 bit output.
// Used in projection when hidden is 16bit.
void MatrixBatchVectorMultiply(const int16_t* hidden,
const int8_t* hidden_to_output_weights,
int32_t proj_effective_scale_a,
int32_t proj_effective_scale_b,
const int32_t* gate_bias, int32_t n_batch,
int32_t n_hidden, int32_t n_output,
int32_t output_zp, int8_t* proj_output);
// Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized
// vector.
// Parameters:
// - input: batch vector of size n_batch * n_input; 16 bit.
// - layer_norm_weights: the quantized layer normalization weights.
// - bias: the bias for the layer normalization.
// - layer_norm_scale_a: multiplier for scale factor.
// - layer_norm_scale_b: shift for scale factor.
// - variance_limit: the guard to make sure the inverse does not overflow.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 16 bit output
void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
const int32_t* bias, int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b, int32_t variance_limit,
int n_batch, int n_input, int16_t* output);
// Same as above but the internal calculation is done in float.
void ApplyLayerNormFloat(const int16_t* input,
const int16_t* layer_norm_weights,
int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
const int32_t* bias, int n_batch, int n_input,
int16_t* output);
// Apply Sigmoid to a quantized vector.
// Parameters:
// - input: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 16 bit output
// The input is in Q3.12 format and the output is in Q0.15 format.
void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
int16_t* output);
// Same as above but the internal calcualtion is float.
void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
int16_t* output);
// Apply Tanh to a quantized vector.
// Parameters:
// - integer_bits: the integer bits of the input.
// Currently supports 0, 1, 2, 3, 4, 5, 6.
// - input: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 16 bit output
// The input is in Qm.15-m format and the output is in Q0.15 format.
void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output);
// Apply Tanh to a quantized vector. Tbe internal calculation is in float.
// - Input has 2^(integer_bits) as scale.
// - Output has Q0.15 as scale.
void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
int32_t integer_bits, int16_t* output);
// Element-wise multiplication of two quantized vectors.
// Parameters:
// - input_1: batch vector of size n_batch * n_input; 16 bit.
// - input_2: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - shift: the shift needed to produce the output.
// - output: the 16 bit output of size n_batch * n_input.
// Output does not need to be initialized.
void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
int n_input, int shift, int16_t* output);
// Element-wise multiplication of two quantized vectors.
// Parameters:
// - input_1: batch vector of size n_batch * n_input; 16 bit.
// - input_2: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - shift: the shift needed to produce the output.
// - output: the 8 bit output of size n_batch * n_input.
// Output does not need to be initialized.
void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
int n_input, int shift, int8_t* output);
// Element-wise multiplication of two quantized vectors with rescaling.
// Parameters:
// - input_1: batch vector of size n_batch * n_input; 16 bit.
// - input_2: batch vector of size n_batch * n_input; 16 bit.
// - multiplier: the multiplier part of scale.
// - shift: the shift part of scale.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 8 bit output of size n_batch * n_input.
// - output_zp: the zero point of output.
// Output does not need to be initialized.
// Multiplier ("m") and shift ("s") are connected to scale ("s") with s = m *
// 2^(s - 31).
void CwiseMul(const int16_t* input_1, const int16_t* input_2,
int32_t multiplier, int32_t shift, int32_t n_batch,
int32_t n_input, int32_t output_zp, int8_t* output);
// Element-wise saturating addition of two quantized vectors without rescaling.
// Parameters:
// - input_1: batch vector of size n_batch * n_input; 16 bit.
// - input_2: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 8 bit output of size n_batch * n_input.
// Output does not need to be initialized.
void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
int n_input, int16_t* output);
// Element-wise in-place clipping of a vector. Overloaded for float, int16_t,
// int8_t. Parameters:
// - vector: vector of size v_size.
// - v_size: the size of the vector.
// - clipping_value: the value used for clipping.
void CwiseClipping(float* vector, const int v_size, const float clipping_value);
void CwiseClipping(int16_t* vector, const int v_size,
const int16_t clipping_value);
void CwiseClipping(int8_t* vector, const int v_size,
const int8_t clipping_value);
// Dot product of two vectors.
float VectorVectorDotProduct(const float* vector1, const float* vector2,
int v_size);
// Dot product of two batch vectors of size n_batch * v_size:
// vector1 = [x_1_1, x_1_2, ..., x_1_vsize,
// x_2_1, x_2_2, ..., x_2_vsize,
// ...
// x_nbatch_1,..., x_nbatch_vsize]
// vector2 = [y_1_1, y_1_2, ..., y_1_vsize,
// y_2_1, y_2_2, ..., y_2_vsize,
// ...
// y_nbatch_1,..., y_nbatch_vsize]
// Then result will be a vector of n_batch size starting from 'result':
// [x_1_1 * y_1_1 + x_1_2 * y_1_2 + ... + x_1_vsize * y_1_vsize,
// x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize,
// ...
// x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize]
template <typename T>
inline void BatchVectorBatchVectorDotProduct(const T* vector1, const T* vector2,
int v_size, int n_batch,
T* result) {
for (int b = 0; b < n_batch; b++) {
result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
vector1 += v_size;
vector2 += v_size;
}
}
// Same as above but input is 16bit and output is 32bit.
void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
const int16_t* vector2, int v_size,
int n_batch, int32_t* result);
// Same as above, but inputs are 16bit integer and output is 16bit integer.
void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
const int16_t* batch_vector,
int n_batch, int32_t multiplier,
int shift, int16_t* result);
// Compute "1.0f - elements of vector" (used in CIFG).
void Sub1Vector(const float* vector, int v_size, float* result);
// Compute "1.0f - elements of vector" (used in CIFG) for int16 input.
// "vector" has range [0, 32767] because it is the output of sigmoid function.
void Sub1Vector(const int16_t* vector, int v_size, int16_t* result);
// Multiply all elements of vector with a scalar.
void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
float* result);
// Reduce-sum on a float input vector:
// input_vector: float pointer to input vector.
// output_vector: float pointer to vector.
// output_size: output vector size.
// reduction_size: number of consecutive elements from input vector which are
// added to get one element of output.
void ReductionSumVector(const float* input_vector, float* output_vector,
int output_size, int reduction_size);
// Same as above but input/output is 32 bit integer.
void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
int output_size, int reduction_size);
// Same as above but input is 8 bit integer.
void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
int output_size, int reduction_size);
// Layer norm for each batch.
void MeanStddevNormalization(const float* input_vector, float* output_vector,
int v_size, int n_batch);
// Saturate Add with rescale on both inputs.
void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
const int8_t* recurrent, int8_t recurrent_zp,
int32_t input_effective_scale_a,
int32_t input_effective_scale_b,
int32_t recurrent_effective_scale_a,
int32_t recurrent_effective_scale_b, int32_t n_batch,
int32_t n_cell, int16_t* output);
} // namespace tensor_utils
} // namespace tflite

View File

@@ -20,7 +20,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/tensor_utils_common.h"
#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {

View File

@@ -0,0 +1,56 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
void BroadcastArgs(const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
// Gets data at the backward index i of the shape tensor. Returns 1 if the
// index is out of range.
auto get_shape_data = [](const RuntimeShape& shape, const T* data,
int backward_idx) -> T {
int forward_idx = shape.FlatSize() - 1 - backward_idx;
if (forward_idx < 0) return 1;
return data[forward_idx];
};
int output_num_elements = output_shape.FlatSize();
for (int i = 0; i < output_num_elements; ++i) {
int backward_i = output_num_elements - 1 - i;
int shape1_i = get_shape_data(input1_shape, input1_data, i);
int shape2_i = get_shape_data(input2_shape, input2_data, i);
if (shape1_i == 1) {
output_data[backward_i] = shape2_i;
} else if (shape2_i == 1) {
output_data[backward_i] = shape1_i;
} else {
TFLITE_CHECK_EQ(shape1_i, shape2_i);
output_data[backward_i] = shape1_i;
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_

View File

@@ -0,0 +1,97 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace reference_ops {
template <int N>
void BroadcastImpl(const NdArrayDesc<N>& input_desc, const char* input_data,
const NdArrayDesc<N>& output_desc, char* output_data,
int indexes[N], int dim, const int last_broadcasting_dim,
const int type_size) {
// Copy data from input to output.
if (dim == last_broadcasting_dim) {
int copy_size = output_desc.strides[dim] * type_size;
const char* data_src =
input_data + SubscriptToIndex(input_desc, indexes) * type_size;
char* data_dst =
output_data + SubscriptToIndex(output_desc, indexes) * type_size;
for (int i = 0; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
memcpy(data_dst, data_src, copy_size);
}
return;
}
// Recursive call to find the next broadcasting.
for (indexes[dim] = 0; indexes[dim] < input_desc.extents[dim];
++indexes[dim]) {
BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes,
dim + 1, last_broadcasting_dim, type_size);
}
// Duplicate data in output tensor.
indexes[dim] = 0;
if (input_desc.extents[dim] != output_desc.extents[dim]) {
int copy_size = output_desc.strides[dim] * type_size;
char* data_src =
output_data + SubscriptToIndex(output_desc, indexes) * type_size;
char* data_dst = data_src + copy_size;
for (int i = 1; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
memcpy(data_dst, data_src, copy_size);
}
}
}
template <int N>
inline void BroadcastTo(const RuntimeShape& unextended_input_shape,
const char* input_data,
const RuntimeShape& unextended_output_shape,
char* output_data, TfLiteType data_type) {
NdArrayDesc<N> input_desc;
NdArrayDesc<N> output_desc;
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape),
&input_desc);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
&output_desc);
// Get the last dimension has broadcasting. At this dimension, the data is
// copied from input tensor to output tensor.
int last_broadcast_dim = -1;
for (int i = N - 1; i >= 0; --i) {
if (input_desc.extents[i] != output_desc.extents[i]) {
last_broadcast_dim = i;
break;
}
}
// If non-broadcasting, just copy data from input to output tensor.
if (last_broadcast_dim == -1) {
memcpy(output_data, input_data,
unextended_input_shape.FlatSize() * TfLiteTypeGetSize(data_type));
return;
}
// Broadcasting using memcpy.
int indexes[N] = {0};
BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes, 0,
last_broadcast_dim, TfLiteTypeGetSize(data_type));
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_

View File

@@ -43,7 +43,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
(void)im2col_data; // only used in optimized code.
(void)im2col_shape; // only used in optimized code.
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int input_depth = input_shape.Dims(3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
@@ -52,14 +52,20 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int filter_input_depth = filter_shape.Dims(3);
const int groups = input_depth / filter_input_depth;
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
const int filters_per_group = output_depth / groups;
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
const int in_y_origin = (out_y * stride_height) - pad_height;
for (int out_x = 0; out_x < output_width; ++out_x) {
const int in_x_origin = (out_x * stride_width) - pad_width;
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
auto group = out_channel / filters_per_group;
float total = 0.f;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
const int in_y = in_y_origin + dilation_height_factor * filter_y;
@@ -74,10 +80,11 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
if (!is_point_inside_image) {
continue;
}
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
float input_value = input_data[Offset(input_shape, batch, in_y,
in_x, in_channel)];
for (int in_channel = 0; in_channel < filter_input_depth;
++in_channel) {
float input_value =
input_data[Offset(input_shape, batch, in_y, in_x,
in_channel + group * filter_input_depth)];
float filter_value = filter_data[Offset(
filter_shape, out_channel, filter_y, filter_x, in_channel)];
total += (input_value * filter_value);
@@ -126,7 +133,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int input_depth = input_shape.Dims(3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
@@ -135,6 +142,10 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int filter_input_depth = filter_shape.Dims(3);
const int groups = input_depth / filter_input_depth;
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
const int filters_per_group = output_depth / groups;
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
@@ -143,6 +154,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
for (int out_x = 0; out_x < output_width; ++out_x) {
const int in_x_origin = (out_x * stride_width) - pad_width;
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
auto group = out_channel / filters_per_group;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
const int in_y = in_y_origin + dilation_height_factor * filter_y;
@@ -158,9 +170,11 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
continue;
}
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
int32_t input_val = input_data[Offset(input_shape, batch, in_y,
in_x, in_channel)];
for (int in_channel = 0; in_channel < filter_input_depth;
++in_channel) {
int32_t input_val =
input_data[Offset(input_shape, batch, in_y, in_x,
in_channel + group * filter_input_depth)];
int32_t filter_val = filter_data[Offset(
filter_shape, out_channel, filter_y, filter_x, in_channel)];
acc +=
@@ -206,7 +220,7 @@ inline void HybridConvPerChannel(
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int input_depth = input_shape.Dims(3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
@@ -215,18 +229,24 @@ inline void HybridConvPerChannel(
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int filter_input_depth = filter_shape.Dims(3);
const int groups = input_depth / filter_input_depth;
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
const int filters_per_group = output_depth / groups;
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
auto group = out_channel / filters_per_group;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
for (int in_channel = 0; in_channel < filter_input_depth;
++in_channel) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
@@ -235,7 +255,8 @@ inline void HybridConvPerChannel(
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
input_shape, batch, in_y, in_x,
in_channel + group * filter_input_depth)];
int32_t filter_val =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];

View File

@@ -48,7 +48,7 @@ inline void ConvPerChannel(
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int input_depth = input_shape.Dims(3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
@@ -59,6 +59,10 @@ inline void ConvPerChannel(
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int filter_input_depth = filter_shape.Dims(3);
const int groups = input_depth / filter_input_depth;
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
const int filters_per_group = output_depth / groups;
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
@@ -67,6 +71,7 @@ inline void ConvPerChannel(
for (int out_x = 0; out_x < output_width; ++out_x) {
const int in_x_origin = (out_x * stride_width) - pad_width;
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
auto group = out_channel / filters_per_group;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
const int in_y = in_y_origin + dilation_height_factor * filter_y;
@@ -82,9 +87,11 @@ inline void ConvPerChannel(
continue;
}
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
int32_t input_val = input_data[Offset(input_shape, batch, in_y,
in_x, in_channel)];
for (int in_channel = 0; in_channel < filter_input_depth;
++in_channel) {
int32_t input_val =
input_data[Offset(input_shape, batch, in_y, in_x,
in_channel + group * filter_input_depth)];
int32_t filter_val = filter_data[Offset(
filter_shape, out_channel, filter_y, filter_x, in_channel)];
// Accumulate with 32 bits accumulator.
@@ -126,12 +133,13 @@ inline void ConvPerChannel(
// Fixed-point per-channel-quantization convolution reference kernel.
// 16-bit data and 8-bit filter
template <typename AccumScalar>
inline void ConvPerChannel(
const ConvParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const std::int64_t* bias_data, const RuntimeShape& output_shape,
const AccumScalar* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
// Get parameters.
const int stride_width = params.stride_width;
@@ -151,7 +159,7 @@ inline void ConvPerChannel(
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int input_depth = input_shape.Dims(3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
@@ -162,6 +170,10 @@ inline void ConvPerChannel(
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int filter_input_depth = filter_shape.Dims(3);
const int groups = input_depth / filter_input_depth;
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
const int filters_per_group = output_depth / groups;
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
@@ -170,7 +182,8 @@ inline void ConvPerChannel(
for (int out_x = 0; out_x < output_width; ++out_x) {
const int in_x_origin = (out_x * stride_width) - pad_width;
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
std::int64_t acc = 0;
auto group = out_channel / filters_per_group;
AccumScalar acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
const int in_y = in_y_origin + dilation_height_factor * filter_y;
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
@@ -185,9 +198,11 @@ inline void ConvPerChannel(
continue;
}
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
int32_t input_val = input_data[Offset(input_shape, batch, in_y,
in_x, in_channel)];
for (int in_channel = 0; in_channel < filter_input_depth;
++in_channel) {
int32_t input_val =
input_data[Offset(input_shape, batch, in_y, in_x,
in_channel + group * filter_input_depth)];
int32_t filter_val = filter_data[Offset(
filter_shape, out_channel, filter_y, filter_x, in_channel)];
// Accumulate with 64 bits accumulator.

View File

@@ -34,12 +34,13 @@ inline void FullyConnected(
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = output_shape.Dims(0);
const int output_depth = output_shape.Dims(1);
const int output_dim_count = output_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = output_shape.Dims(output_dim_count - 1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
@@ -62,11 +63,12 @@ inline void FullyConnected(
}
}
template <typename AccumScalar>
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int64_t* bias_data, const RuntimeShape& output_shape,
const AccumScalar* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
const int32_t filter_offset = params.weights_offset;
const int32_t output_multiplier = params.output_multiplier;
@@ -85,7 +87,7 @@ inline void FullyConnected(
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
int64_t acc = 0;
AccumScalar acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];

View File

@@ -119,15 +119,16 @@ inline void TransposeConv(
}
}
// int16_t input (zero_point=0), int8_t filter, int64 accumulator
// int16_t input (zero_point=0), int8_t filter, int32 or int64 accumulator
template <typename Scalar>
inline void TransposeConv(
const ConvParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const std::int64_t* bias_data, const RuntimeShape& output_shape,
const Scalar* bias_data, const RuntimeShape& output_shape,
int16_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
std::int64_t* scratch_buffer) {
Scalar* scratch_buffer) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int pad_width = params.padding_values.width;
@@ -157,7 +158,7 @@ inline void TransposeConv(
const int num_elements = output_shape.FlatSize();
// We need to initialize scratch_buffer to all 0s, as we apply the same
// 'scatter' based trick as in float version.
memset(scratch_buffer, 0, num_elements * sizeof(std::int64_t));
memset(scratch_buffer, 0, num_elements * sizeof(Scalar));
// Loop through input elements one at a time.
for (int batch = 0; batch < batches; ++batch) {
@@ -198,8 +199,8 @@ inline void TransposeConv(
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
std::int64_t acc = scratch_buffer[Offset(output_shape, batch, out_y,
out_x, out_channel)];
Scalar acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
out_channel)];
if (bias_data) {
acc += bias_data[out_channel];
}

View File

@@ -0,0 +1,422 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
#include <algorithm>
#include <cmath>
#include <cstdint>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void LstmCell(
const LstmCellParams& params, const RuntimeShape& unextended_input_shape,
const float* input_data, const RuntimeShape& unextended_prev_activ_shape,
const float* prev_activ_data, const RuntimeShape& weights_shape,
const float* weights_data, const RuntimeShape& unextended_bias_shape,
const float* bias_data, const RuntimeShape& unextended_prev_state_shape,
const float* prev_state_data,
const RuntimeShape& unextended_output_state_shape, float* output_state_data,
const RuntimeShape& unextended_output_activ_shape, float* output_activ_data,
const RuntimeShape& unextended_concat_temp_shape, float* concat_temp_data,
const RuntimeShape& unextended_activ_temp_shape, float* activ_temp_data) {
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape prev_activ_shape =
RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
const RuntimeShape bias_shape =
RuntimeShape::ExtendedShape(4, unextended_bias_shape);
const RuntimeShape prev_state_shape =
RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
const RuntimeShape output_state_shape =
RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
const RuntimeShape output_activ_shape =
RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
const RuntimeShape concat_temp_shape =
RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
const RuntimeShape activ_temp_shape =
RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
const int weights_dim_count = weights_shape.DimensionsCount();
const int batches =
MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
output_state_shape, 0, output_activ_shape, 0);
const int height =
MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
output_state_shape, 1, output_activ_shape, 1);
const int width =
MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
output_state_shape, 2, output_activ_shape, 2);
const int input_depth = input_shape.Dims(3);
const int prev_activ_depth = prev_activ_shape.Dims(3);
const int total_input_depth = prev_activ_depth + input_depth;
TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
total_input_depth);
TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
const int intern_activ_depth =
MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
intern_activ_depth * total_input_depth);
TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
const int output_depth =
MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
3, output_activ_shape, 3);
TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
// Concatenate prev_activ and input data together
float const* concat_input_arrays_data[2] = {input_data, prev_activ_data};
const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
&prev_activ_shape};
tflite::ConcatenationParams concat_params;
concat_params.axis = 3;
concat_params.inputs_count = 2;
Concatenation(concat_params, concat_input_arrays_shapes,
concat_input_arrays_data, concat_temp_shape, concat_temp_data);
// Fully connected
tflite::FullyConnectedParams fc_params;
fc_params.float_activation_min = std::numeric_limits<float>::lowest();
fc_params.float_activation_max = std::numeric_limits<float>::max();
FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape,
weights_data, bias_shape, bias_data, activ_temp_shape,
activ_temp_data);
// Memory state update (the LSTM "guts")
for (int b = 0; b < batches; ++b) {
for (int w = 0; w < width; ++w) {
for (int h = 0; h < height; ++h) {
for (int c = 0; c < output_depth; ++c) {
const float input_gate =
1.f /
(1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
0 * output_depth + c)]));
const float new_input = std::tanh(activ_temp_data[Offset(
activ_temp_shape, b, h, w, 1 * output_depth + c)]);
const float forget_gate =
1.f /
(1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
2 * output_depth + c)]));
const float output_gate =
1.f /
(1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
3 * output_depth + c)]));
const float new_state =
input_gate * new_input +
forget_gate *
prev_state_data[Offset(prev_state_shape, b, h, w, c)];
output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
output_gate * std::tanh(new_state);
}
}
}
}
}
// Quantized LSTM cell implementation.
// The quantization of the input, output arrays is as follows:
// - The input activations are quantized as uint8 on the interval
// [-1, 127/128].
// The rationale for that is that is the natural interval for output
// activations (see next point) and these need to be concatenated together.
// We could accommodate different ranges by re-scaling, but we empirically
// found that setting the input activations range to be [-1, 127/128] in the
// first place, removing the need for re-scaling, greatly improves accuracy.
// - The output activations are quantized as uint8 on the interval
// [-1, 127/128].
// The rationale for that is that the definition of a LSTM cell makes them
// intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
// makes for simpler, more accurate fixed-point arithmetic.
// - The output-at-previous-timestep state array is obviously quantized as
// the output activations.
// - The internal LSTM memory (not the output-at-previous-timestep, the other
// internal state array) is int16-quantized and may use any power-of-two,
// symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
// StateIntegerBits below, see the below discussion of that template
// parameter ("The StateIntegerBits template parameter").
// - The output of the internal fully-connected node is int16-quantized
// on the interval [-8, 8 * 32767/32768], the rationale for which is
// explained just below ("Why [-8, 8] for fully-connected output?").
//
//
// === The StateIntegerBits template parameter ===
//
// The StateIntegerBits template parameter controls the fixed-point format used
// to represent the internal memory of the LSTM cell (not the
// output-at-previous-timestep, the other internal state array). It's currently
// a template parameter so that the model can control that. The most typical
// value for StateIntegerBits is 4. Other plausible values are anywhere between
// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
// and drop that template parameter. The reason why it can't be a runtime
// parameter is that this controls the fixed-point format used, i.e. we need to
// generate actually different code based on it. In particular, we generate code
// for a fixed-point tanh() implementation for that format, which internally
// uses a fixed-point exp() implementation, which internally uses a
// barrel-shifter with a number of steps that depends on StateIntegerBits.
// Another consequence of that is that a higher value of StateIntegerBits
// results in a more expensive implementation (more barrel shifter steps
// needed).
//
//
// === Why [-8, 8] for fully-connected output? ===
//
// This array is only fed to Logistic and Tanh functions, for which
// the quantized implementation will want to use fixed-point arithmetic,
// requiring a power-of-two representation interval. Thus, we should right
// away quantize this array to a power-of-two interval; otherwise,
// implementation will need to rescale that, losing any benefit that a tighter
// representation interval might otherwise yield, while introducing some
// numerical error and computational overhead.
//
// Now, Logistic and Tanh
// are nearly constant (nearly equal to their horizontal asymptotes)
// outside of a small bounded interval around 0:
//
// Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4
// Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7
// Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14
//
// From this, we see that clamping to [-4, 4] would be too inaccurate
// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
// while clamping to [-16, 16] would make no difference even in float32.
// However, for a fixed-point implementation in 16-bit integers, using 5
// integer bits to represent the [-16, 16] range would leave only 11
// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
// representable values. Notice that is higher than the
// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
// Using [-8, 8] thus seems like the better compromise overall, enjoying
// an increment of 2.4e-4 between representable values and a worst-case
// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
// [-16, 16].
//
// Moreover, all other things being equal, it is nice to choose the narrower
// representation range, as that makes the implementation of fixed-point
// math functions a little cheaper (each integer bit requires an additional
// barrel-shifter atep in the implementation of exp(-x)). That is further
// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
// sense for 32-bit float or 32-bit fixed-point quantization, but we are
// aiming for 16-bit fixed-point quantization of these internal nodes here.
//
template <int StateIntegerBits>
inline void LstmCell(const LstmCellParams& params,
const RuntimeShape& unextended_input_shape,
const uint8_t* input_data_uint8,
const RuntimeShape& unextended_prev_activ_shape,
const uint8_t* prev_activ_data_uint8,
const RuntimeShape& weights_shape,
const uint8_t* weights_data_uint8,
const RuntimeShape& unextended_bias_shape,
const int32_t* bias_data_int32,
const RuntimeShape& unextended_prev_state_shape,
const int16_t* prev_state_data_int16,
const RuntimeShape& unextended_output_state_shape,
int16_t* output_state_data_int16,
const RuntimeShape& unextended_output_activ_shape,
uint8_t* output_activ_data_uint8,
const RuntimeShape& unextended_concat_temp_shape,
uint8_t* concat_temp_data_uint8,
const RuntimeShape& unextended_activ_temp_shape,
int16_t* activ_temp_data_int16, void* gemmlowp_context) {
(void)gemmlowp_context; // only used in optimized code.
int32_t weights_zero_point = params.weights_zero_point;
int32_t accum_multiplier = params.accum_multiplier;
int accum_shift = params.accum_shift;
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape prev_activ_shape =
RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
const RuntimeShape bias_shape =
RuntimeShape::ExtendedShape(4, unextended_bias_shape);
const RuntimeShape prev_state_shape =
RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
const RuntimeShape output_state_shape =
RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
const RuntimeShape output_activ_shape =
RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
const RuntimeShape concat_temp_shape =
RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
const RuntimeShape activ_temp_shape =
RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
// Gather dimensions information, and perform consistency checks.
const int weights_dim_count = weights_shape.DimensionsCount();
const int outer_size = MatchingFlatSizeSkipDim(
input_shape, 3, prev_activ_shape, prev_state_shape, output_state_shape,
output_activ_shape);
const int input_depth = input_shape.Dims(3);
const int prev_activ_depth = prev_activ_shape.Dims(3);
const int total_input_depth = prev_activ_depth + input_depth;
TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
total_input_depth);
const int intern_activ_depth =
MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
intern_activ_depth * total_input_depth);
TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
const int output_depth =
MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
3, output_activ_shape, 3);
TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
const int fc_output_depth =
MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
const int fc_accum_depth = total_input_depth;
TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
// Depth-concatenate prev_activ and input data together.
uint8_t const* concat_input_arrays_data[2] = {input_data_uint8,
prev_activ_data_uint8};
const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
&prev_activ_shape};
tflite::ConcatenationParams concat_params;
concat_params.axis = 3;
concat_params.inputs_count = 2;
Concatenation(concat_params, concat_input_arrays_shapes,
concat_input_arrays_data, concat_temp_shape,
concat_temp_data_uint8);
// Implementation of the fully connected node inside the LSTM cell.
// The operands are 8-bit integers, the accumulators are internally 32bit
// integers, and the output is 16-bit fixed-point with 3 integer bits so
// the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
// is explained in the function comment above.
for (int b = 0; b < fc_batches; ++b) {
for (int out_c = 0; out_c < fc_output_depth; ++out_c) {
// Internal accumulation.
// Initialize accumulator with the bias-value.
int32_t accum = bias_data_int32[out_c];
// Accumulation loop.
for (int d = 0; d < fc_accum_depth; ++d) {
int16_t input_val =
concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
int16_t weights_val =
weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
accum += input_val * weights_val;
}
// Down-scale the final int32 accumulator to the scale used by our
// (16-bit, using 3 integer bits) fixed-point format. The quantized
// multiplier and shift here have been pre-computed offline
// (e.g. by toco).
accum =
MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
// Saturate, cast to int16, and store to the temporary activations array.
accum = std::max(-32768, std::min(32767, accum));
activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
}
}
// Rest of the LSTM cell: tanh and logistic math functions, and some adds
// and muls, all done in 16-bit fixed-point.
for (int b = 0; b < outer_size; ++b) {
for (int c = 0; c < output_depth; ++c) {
// Define the fixed-point data types that we will use here. All use
// int16 as the underlying integer type i.e. all are 16-bit fixed-point.
// They only differ by the number of integral vs. fractional bits,
// determining the range of values that they can represent.
//
// F0 uses 0 integer bits, range [-1, 1].
// This is the return type of math functions such as tanh, logistic,
// whose range is in [-1, 1].
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
// F3 uses 3 integer bits, range [-8, 8].
// This is the range of the previous fully-connected node's output,
// which is our input here.
using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
// FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
// 2^StateIntegerBits]. It's used to represent the internal state, whose
// number of integer bits is currently dictated by the model. See comment
// on the StateIntegerBits template parameter above.
using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
// Implementation of input gate, using fixed-point logistic function.
F3 input_gate_input = F3::FromRaw(
activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
F0 input_gate_output = gemmlowp::logistic(input_gate_input);
// Implementation of input modulation gate, using fixed-point tanh
// function.
F3 input_modulation_gate_input = F3::FromRaw(
activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
F0 input_modulation_gate_output =
gemmlowp::tanh(input_modulation_gate_input);
// Implementation of forget gate, using fixed-point logistic function.
F3 forget_gate_input = F3::FromRaw(
activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
// Implementation of output gate, using fixed-point logistic function.
F3 output_gate_input = F3::FromRaw(
activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
F0 output_gate_output = gemmlowp::logistic(output_gate_input);
// Implementation of internal multiplication nodes, still in fixed-point.
F0 input_times_input_modulation =
input_gate_output * input_modulation_gate_output;
FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
FS prev_state_times_forget_state = forget_gate_output * prev_state;
// Implementation of internal addition node, saturating.
FS new_state = gemmlowp::SaturatingAdd(
gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
prev_state_times_forget_state);
// Implementation of last internal Tanh node, still in fixed-point.
// Since a Tanh fixed-point implementation is specialized for a given
// number or integer bits, and each specialization can have a substantial
// code size, and we already used above a Tanh on an input with 3 integer
// bits, and per the table in the above function comment there is no
// significant accuracy to be lost by clamping to [-8, +8] for a
// 3-integer-bits representation, let us just do that. This helps people
// porting this to targets where code footprint must be minimized.
F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
// Store the new internal state back to memory, as 16-bit integers.
// Note: here we store the original value with StateIntegerBits, not
// the rescaled 3-integer-bits value fed to tanh.
output_state_data_int16[b * output_depth + c] = new_state.raw();
// Down-scale the output activations to 8-bit integers, saturating,
// and store back to memory.
int16_t rescaled_output_activ =
gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
int16_t clamped_output_activ = std::max<int16_t>(
-128, std::min<int16_t>(127, rescaled_output_activ));
output_activ_data_uint8[b * output_depth + c] =
128 + clamped_output_activ;
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_

View File

@@ -227,6 +227,41 @@ void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
}
}
void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
int n_batch, const int32_t input_offset, const int32_t output_multiplier,
const int32_t output_shift, const int32_t output_offset,
const int32_t output_activation_min, const int32_t output_activation_max,
int8_t* __restrict__ result) {
const int kBlockSize = 16;
TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
for (int batch = 0; batch < n_batch; ++batch) {
const int8_t* matrix_ptr = matrix;
for (int row = 0; row < m_rows; ++row) {
int32_t dot_prod = 0;
const int8_t* vector_in_batch = vector + batch * m_cols;
for (int i = segments[row]; i < segments[row + 1]; ++i) {
const int block_start_index = indices[i] * kBlockSize;
const int8_t* vector_block_in_batch_ptr =
vector_in_batch + block_start_index;
for (int c = 0; c < kBlockSize; c++) {
dot_prod += *matrix_ptr * *vector_block_in_batch_ptr++;
dot_prod += *matrix_ptr++ * input_offset;
}
}
const int32_t bias_value = bias_vector != nullptr ? bias_vector[row] : 0;
dot_prod = MultiplyByQuantizedMultiplier(dot_prod + bias_value,
output_multiplier, output_shift);
dot_prod += output_offset;
result[batch * m_rows + row] =
static_cast<int8_t>(ActivationFunctionWithMinMax(
dot_prod, output_activation_min, output_activation_max));
}
}
}
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,

View File

@@ -0,0 +1,333 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
#if defined(_MSC_VER)
#define __restrict__ __restrict
#endif
namespace tflite {
namespace tensor_utils {
// Check if all entries of a vector are zero for float.
bool IsZeroVector(const float* vector, int v_size) {
return PortableIsZeroVector(vector, v_size);
}
// Check if all entries of a vector are zero for int8_t.
bool IsZeroVector(const int8_t* vector, int v_size) {
return PortableIsZeroVector(vector, v_size);
}
void SymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* min, float* max,
float* scaling_factor) {
PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max,
scaling_factor);
}
void SymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float min_value,
float max_value, float* scaling_factor) {
PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value,
max_value, scaling_factor);
}
void AsymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* scaling_factor,
int32_t* offset) {
PortableAsymmetricQuantizeFloats(values, size, quantized_values,
scaling_factor, offset);
}
void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
int m_cols, const float* vector,
int n_batch, float* result) {
PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
n_batch, result);
}
void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
const int m_rows, const int m_cols,
const int8_t* __restrict__ vector,
const float* scaling_factors,
int n_batch,
float* __restrict__ result) {
PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
scaling_factors, n_batch, result);
}
void MatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float* scaling_factors,
int n_batch, float* __restrict__ result, const float* per_channel_scale,
const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
bool* compute_row_sums, CpuBackendContext* context) {
PortableMatrixBatchVectorMultiplyAccumulate(
matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
per_channel_scale, input_offset, scratch, row_sums, compute_row_sums,
context);
}
void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
const int m_rows, const int m_cols,
const int8_t* __restrict__ vector,
const float* scaling_factors,
int n_batch, int32_t* scratch,
float* __restrict__ result,
CpuBackendContext* context) {
PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
scaling_factors, n_batch, result);
}
void SparseMatrixBatchVectorMultiplyAccumulate1x4(
const float* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
matrix, segments, indices, m_rows, m_cols, vector, n_batch, result);
}
void SparseMatrixBatchVectorMultiplyAccumulate(
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
float* __restrict__ result) {
PortableSparseMatrixBatchVectorMultiplyAccumulate(
matrix, ledger, m_rows, m_cols, vector, n_batch, result);
}
void SparseMatrixBatchVectorMultiplyAccumulate1x16(
const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
int n_batch, const int32_t input_offset, const int32_t output_multiplier,
const int32_t output_shift, const int32_t output_offset,
const int32_t output_activation_min, const int32_t output_activation_max,
int8_t* __restrict__ result) {
PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
matrix, segments, indices, m_rows, m_cols, vector, bias_vector, n_batch,
input_offset, output_multiplier, output_shift, output_offset,
output_activation_min, output_activation_max, result);
}
void SparseMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
const int m_cols, const int8_t* __restrict__ vectors,
const float* scaling_factors, int n_batch, float* __restrict__ result) {
PortableSparseMatrixBatchVectorMultiplyAccumulate(
matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch,
result);
}
void MatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int16_t* output, CpuBackendContext* context) {
PortableMatrixBatchVectorMultiplyAccumulate(
input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
n_output, output_zp, scratch, output, context);
}
void MatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int8_t* output, CpuBackendContext* context) {
PortableMatrixBatchVectorMultiplyAccumulate(
input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
n_output, output_zp, scratch, output, context);
}
void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
int32_t n_row, int32_t n_col,
int32_t* output) {
PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output);
}
void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
const int8_t* input_to_gate_weights,
int32_t input_to_gate_effective_scale_a,
int32_t input_to_gate_effective_scale_b,
int32_t n_batch, int32_t n_input, int32_t n_cell,
int8_t* gate_output, int8_t gate_output_zp) {
PortableMatrixBatchVectorMultiply(
input, input_zeropoint, input_to_gate_weights,
input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch,
n_input, n_cell, gate_output, gate_output_zp);
}
void MatrixBatchVectorMultiply(const int16_t* hidden,
const int8_t* hidden_to_output_weights,
int32_t proj_effective_scale_a,
int32_t proj_effective_scale_b,
const int32_t* gate_bias, int32_t n_batch,
int32_t n_hidden, int32_t n_output,
int32_t output_zp, int8_t* proj_output) {
PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights,
proj_effective_scale_a,
proj_effective_scale_b, gate_bias, n_batch,
n_hidden, n_output, output_zp, proj_output);
}
void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
const int32_t* bias, int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b, int32_t variance_limit,
int n_batch, int n_input, int16_t* output) {
PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a,
layer_norm_scale_b, variance_limit, n_batch, n_input,
output);
}
void ApplyLayerNormFloat(const int16_t* input,
const int16_t* layer_norm_weights,
int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
const int32_t* bias, int n_batch, int n_input,
int16_t* output) {
PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a,
layer_norm_scale_b, bias, n_batch, n_input,
output);
}
void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
int16_t* output) {
PortableApplySigmoid(input, n_batch, n_input, output);
}
void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
int16_t* output) {
PortableApplySigmoidFloat(input, n_batch, n_input, output);
}
void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output) {
PortableApplyTanh(integer_bits, input, n_batch, n_input, output);
}
void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
int32_t integer_bits, int16_t* output) {
PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output);
}
void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
int n_input, int shift, int16_t* output) {
PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output);
}
void CwiseMul(const int16_t* input_1, const int16_t* input_2,
int32_t multiplier, int32_t shift, int32_t n_batch,
int32_t n_input, int32_t output_zp, int8_t* output) {
PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input,
output_zp, output);
}
void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
int n_input, int16_t* output) {
PortableCwiseAdd(input_1, input_2, n_batch, n_input, output);
}
void CwiseClipping(float* vector, const int v_size,
const float clipping_value) {
PortableCwiseClipping(vector, v_size, clipping_value);
}
void CwiseClipping(int16_t* vector, const int v_size,
const int16_t clipping_value) {
PortableCwiseClipping(vector, v_size, clipping_value);
}
void CwiseClipping(int8_t* vector, const int v_size,
const int8_t clipping_value) {
PortableCwiseClipping(vector, v_size, clipping_value);
}
void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
const int16_t* batch_vector,
int n_batch, int32_t multiplier,
int shift, int16_t* result) {
PortableVectorBatchVectorCwiseProductAccumulate(
vector, v_size, batch_vector, n_batch, multiplier, shift, result);
}
float VectorVectorDotProduct(const float* vector1, const float* vector2,
int v_size) {
return PortableVectorVectorDotProduct(vector1, vector2, v_size);
}
void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
const int16_t* vector2, int v_size,
int n_batch, int32_t* result) {
PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch,
result);
}
void Sub1Vector(const float* vector, int v_size, float* result) {
PortableSub1Vector(vector, v_size, result);
}
void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) {
PortableSub1Vector(vector, v_size, result);
}
// Multiply all elements of vector with a scalar.
void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
float* result) {
PortableVectorScalarMultiply(vector, v_size, scale, result);
}
void ReductionSumVector(const float* input_vector, float* output_vector,
int output_size, int reduction_size) {
PortableReductionSumVector(input_vector, output_vector, output_size,
reduction_size);
}
void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
int output_size, int reduction_size) {
PortableReductionSumVector(input_vector, output_vector, output_size,
reduction_size);
}
void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
int output_size, int reduction_size) {
PortableReductionSumVector(input_vector, output_vector, output_size,
reduction_size);
}
void MeanStddevNormalization(const float* input_vector, float* output_vector,
int v_size, int n_batch) {
PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch);
}
void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
const int8_t* recurrent, int8_t recurrent_zp,
int32_t input_effective_scale_a,
int32_t input_effective_scale_b,
int32_t recurrent_effective_scale_a,
int32_t recurrent_effective_scale_b, int32_t n_batch,
int32_t n_cell, int16_t* output) {
PortableTwoGateSaturatingAdd(
input, input_zp, recurrent, recurrent_zp, input_effective_scale_a,
input_effective_scale_b, recurrent_effective_scale_a,
recurrent_effective_scale_b, n_batch, n_cell, output);
}
} // namespace tensor_utils
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_

View File

@@ -87,6 +87,15 @@ void PortableSparseMatrixBatchVectorMultiplyAccumulate(
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
float* __restrict__ result);
void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
int n_batch, const int32_t input_offset, const int32_t output_multiplier,
const int32_t output_shift, const int32_t output_offset,
const int32_t output_activation_min, const int32_t output_activation_max,
int8_t* __restrict__ result);
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
const int m_cols, const int8_t* __restrict__ vectors,

View File

@@ -273,6 +273,9 @@ void BroadcastQuantSubSlow(const ArithmeticParams& params,
const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
ruy::profiler::ScopeLabel label("BroadcastQuantSubSlow/T");
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
NdArrayDesc<N> desc1;
NdArrayDesc<N> desc2;
NdArrayDesc<N> output_desc;

View File

@@ -27,6 +27,7 @@ limitations under the License.
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/context_util.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
@@ -466,10 +467,10 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
if (!(d1 == d2 || d1 == 1 || d2 == 1)) {
context->ReportError(context,
"Given shapes, %s and %s, are not broadcastable.",
GetShapeDebugString(input1->dims).c_str(),
GetShapeDebugString(input2->dims).c_str());
TF_LITE_KERNEL_LOG(context,
"Given shapes, %s and %s, are not broadcastable.",
GetShapeDebugString(input1->dims).c_str(),
GetShapeDebugString(input2->dims).c_str());
return kTfLiteError;
}
@@ -504,11 +505,11 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
if (min_value == 0) max_value = 0;
if (!(d1 == 1 || d1 == max_value) || !(d2 == 1 || d2 == max_value) ||
!(d3 == 1 || d3 == max_value)) {
context->ReportError(
context, "Given shapes, %s, %s and %s, are not broadcastable.",
GetShapeDebugString(input1->dims).c_str(),
GetShapeDebugString(input2->dims).c_str(),
GetShapeDebugString(input3->dims).c_str());
TF_LITE_KERNEL_LOG(context,
"Given shapes, %s, %s and %s, are not broadcastable.",
GetShapeDebugString(input1->dims).c_str(),
GetShapeDebugString(input2->dims).c_str(),
GetShapeDebugString(input3->dims).c_str());
return kTfLiteError;
}
shape->data[out_dims - i - 1] = max_value;
@@ -529,6 +530,9 @@ int TfLiteTypeGetSize(TfLiteType type) {
return 1;
case kTfLiteBool:
return sizeof(bool);
case kTfLiteUInt16:
static_assert(sizeof(uint16_t) == 2, "");
return 2;
case kTfLiteInt16:
static_assert(sizeof(int16_t) == 2, "");
return 2;
@@ -575,4 +579,15 @@ bool IsMobilePlatform() {
return false;
}
bool HasUnspecifiedDimension(const TfLiteTensor* tensor) {
#ifndef TF_LITE_STATIC_MEMORY
if (tensor->dims_signature) {
for (int i : TfLiteIntArrayView(tensor->dims_signature)) {
if (i == -1) return true;
}
}
#endif // TF_LITE_STATIC_MEMORY
return false;
}
} // namespace tflite

View File

@@ -314,6 +314,9 @@ int TfLiteTypeGetSize(TfLiteType type);
// Whether the current platform is mobile (Android or iOS).
bool IsMobilePlatform();
// Returns whether there is unspecified dimension in the tensor's dim signature.
bool HasUnspecifiedDimension(const TfLiteTensor* tensor);
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_

View File

@@ -29,8 +29,12 @@ AllOpsResolver::AllOpsResolver() {
AddAssignVariable();
AddAveragePool2D();
AddBatchToSpaceNd();
AddBroadcastArgs();
AddBroadcastTo();
AddCallOnce();
AddCast();
AddCeil();
AddCircularBuffer();
AddConcatenation();
AddConv2D();
AddCos();
@@ -49,9 +53,12 @@ AllOpsResolver::AllOpsResolver() {
AddFloorDiv();
AddFloorMod();
AddFullyConnected();
AddGather();
AddGatherNd();
AddGreater();
AddGreaterEqual();
AddHardSwish();
AddIf();
AddL2Normalization();
AddL2Pool2D();
AddLeakyRelu();
@@ -66,6 +73,7 @@ AllOpsResolver::AllOpsResolver() {
AddMaximum();
AddMean();
AddMinimum();
AddMirrorPad();
AddMul();
AddNeg();
AddNotEqual();
@@ -85,6 +93,7 @@ AllOpsResolver::AllOpsResolver() {
AddRsqrt();
AddShape();
AddSin();
AddSlice();
AddSoftmax();
AddSpaceToBatchNd();
AddSpaceToDepth();
@@ -101,6 +110,8 @@ AllOpsResolver::AllOpsResolver() {
AddTransposeConv();
AddUnpack();
AddVarHandle();
AddWhile();
AddZerosLike();
}
} // namespace tflite

View File

@@ -0,0 +1,107 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/fake_micro_context.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_arena_constants.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
namespace tflite {
namespace {
// Dummy static variables to allow creation of dummy MicroAllocator.
// All tests are guarateed to run serially.
static constexpr int KDummyTensorArenaSize = 256;
static uint8_t dummy_tensor_arena[KDummyTensorArenaSize];
} // namespace
FakeMicroContext::FakeMicroContext(TfLiteTensor* tensors,
SimpleMemoryAllocator* allocator,
MicroGraph* micro_graph)
: MicroContext(
MicroAllocator::Create(dummy_tensor_arena, KDummyTensorArenaSize,
GetMicroErrorReporter()),
nullptr, micro_graph),
tensors_(tensors),
allocator_(allocator) {}
TfLiteTensor* FakeMicroContext::AllocateTempTfLiteTensor(int tensor_index) {
allocated_tensor_count_++;
return &tensors_[tensor_index];
}
void FakeMicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
allocated_tensor_count_--;
}
bool FakeMicroContext::IsAllTempTfLiteTensorDeallocated() {
return !allocated_tensor_count_;
}
TfLiteEvalTensor* FakeMicroContext::GetEvalTensor(int tensor_index) {
TfLiteEvalTensor* eval_tensor =
reinterpret_cast<TfLiteEvalTensor*>(allocator_->AllocateTemp(
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
TFLITE_DCHECK(eval_tensor != nullptr);
// In unit tests, the TfLiteTensor pointer contains the source of truth for
// buffers and values:
eval_tensor->data = tensors_[tensor_index].data;
eval_tensor->dims = tensors_[tensor_index].dims;
eval_tensor->type = tensors_[tensor_index].type;
return eval_tensor;
}
void* FakeMicroContext::AllocatePersistentBuffer(size_t bytes) {
// FakeMicroContext use SimpleMemoryAllocator, which does not automatically
// apply the buffer alignment like MicroAllocator.
// The buffer alignment is potentially wasteful but allows the
// fake_micro_context to work correctly with optimized kernels.
return allocator_->AllocatePersistentBuffer(bytes,
MicroArenaBufferAlignment());
}
TfLiteStatus FakeMicroContext::RequestScratchBufferInArena(size_t bytes,
int* buffer_index) {
TFLITE_DCHECK(buffer_index != nullptr);
if (scratch_buffer_count_ == kNumScratchBuffers_) {
MicroPrintf("Exceeded the maximum number of scratch tensors allowed (%d).",
kNumScratchBuffers_);
return kTfLiteError;
}
// For tests, we allocate scratch buffers from the tail and keep them around
// for the lifetime of model. This means that the arena size in the tests will
// be more than what we would have if the scratch buffers could share memory.
scratch_buffers_[scratch_buffer_count_] =
allocator_->AllocatePersistentBuffer(bytes, MicroArenaBufferAlignment());
TFLITE_DCHECK(scratch_buffers_[scratch_buffer_count_] != nullptr);
*buffer_index = scratch_buffer_count_++;
return kTfLiteOk;
}
void* FakeMicroContext::GetScratchBuffer(int buffer_index) {
TFLITE_DCHECK(scratch_buffer_count_ <= kNumScratchBuffers_);
if (buffer_index >= scratch_buffer_count_) {
return nullptr;
}
return scratch_buffers_[buffer_index];
}
} // namespace tflite

View File

@@ -0,0 +1,56 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
#define TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
namespace tflite {
// A fake of MicroContext for kernel util tests.
class FakeMicroContext : public MicroContext {
public:
FakeMicroContext(TfLiteTensor* tensors, SimpleMemoryAllocator* allocator,
MicroGraph* micro_graph);
void* AllocatePersistentBuffer(size_t bytes) override;
TfLiteStatus RequestScratchBufferInArena(size_t bytes,
int* buffer_index) override;
void* GetScratchBuffer(int buffer_index) override;
TfLiteTensor* AllocateTempTfLiteTensor(int tensor_index) override;
void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) override;
bool IsAllTempTfLiteTensorDeallocated();
TfLiteEvalTensor* GetEvalTensor(int tensor_index) override;
private:
static constexpr int kNumScratchBuffers_ = 12;
int scratch_buffer_count_ = 0;
uint8_t* scratch_buffers_[kNumScratchBuffers_];
TfLiteTensor* tensors_;
int allocated_tensor_count_ = 0;
SimpleMemoryAllocator* allocator_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_

View File

@@ -0,0 +1,100 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_IBUFFER_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_IBUFFER_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/c_api_types.h"
namespace tflite {
// Interface classes that the TFLM framework relies on to get buffers it needs.
// There are two types of buffers that the TFLM framework requires: persistent
// and non-persistent. Persistent buffers, once allocated, are never freed by
// the TFLM framework. Non-persist buffers can be allocated and deallocated by
// the TFLM framework. This file defines two interfaces classes that TFLM
// framework will rely on to manage these buffers.
// Interface class for managing persistent buffers.
class IPersistentBufferAllocator {
public:
IPersistentBufferAllocator() {}
virtual ~IPersistentBufferAllocator() {}
// Allocates persistent memory. The persistent buffer is never freed.
virtual uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) = 0;
// Returns the size of all persistent allocations in bytes.
virtual size_t GetPersistentUsedBytes() const = 0;
};
// Interface class for managing non-persistent buffers.
// The default non-persistent buffers are temp buffers that are not resizable.
// Support of at least one resizable buffer is required.
class INonPersistentBufferAllocator {
public:
INonPersistentBufferAllocator() {}
virtual ~INonPersistentBufferAllocator() {}
// Allocates a temporary buffer. This buffer is not resizable.
virtual uint8_t* AllocateTemp(size_t size, size_t alignment) = 0;
// Signals that a temporary buffer is no longer needed.
virtual void DeallocateTemp(uint8_t* buf) = 0;
// Returns true if all temporary buffers are already deallocated.
virtual bool IsAllTempDeallocated() = 0;
// Signals that all temporary allocations can be reclaimed. TFLM calls this
// API when it knows that all temporary buffers that it requested has been
// deallocated. The goal of API is to facilitate implementations of
// INonPersistentBufferAllocator can reuse buffer with some reasonable
// complexity.
virtual TfLiteStatus ResetTempAllocations() = 0;
// Returns a buffer that is resizable viable ResizeBuffer().
virtual uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) = 0;
// Resizes a buffer that is previously returned by the
// AllocateResizableBuffer.
virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
size_t alignment) = 0;
// Frees up the memory occupied by the resizable buffer.
virtual TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) = 0;
// Returns a pointer pointing to the start of the overlay memory, which is
// used for activation tensors and scratch buffers by kernels at Invoke stage.
virtual uint8_t* GetOverlayMemoryAddress() const = 0;
// Reserves the size of the overlay memory. This overlay is reserved for the
// kernels at Invoke stage. This is referred to as the overlay because before
// Invoket state, the same memory can be used for temp buffers. The layout of
// the memory is planned by the memory planner separately at Invoke stage.
virtual TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size,
size_t alignment) = 0;
// Returns the size of non-persistent buffer in use.
virtual size_t GetNonPersistentUsedBytes() const = 0;
// Returns the number of bytes available with a given alignment. This number
// takes in account any temporary allocations.
virtual size_t GetAvailableMemory(size_t alignment) const = 0;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_IBUFFER_ALLOCATOR_H_

View File

@@ -117,15 +117,21 @@ TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kActivationsInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kActivationsOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kActivationsOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
CalculateReluOpData<int8_t>(input, output, data);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
@@ -133,7 +139,9 @@ TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kActivationsInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
if (input->type == kTfLiteInt8) {
@@ -142,6 +150,8 @@ TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
data->zero_int8 = input->params.zero_point;
}
micro_context->DeallocateTempTfLiteTensor(input);
return kTfLiteOk;
}

View File

@@ -80,11 +80,15 @@ TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
const TfLiteTensor* input1 = GetInput(context, node, kAddInputTensor1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kAddInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
const TfLiteTensor* input2 = GetInput(context, node, kAddInputTensor2);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kAddInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output = GetOutput(context, node, kAddOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kAddOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
OpDataAdd* data = static_cast<OpDataAdd*>(node->user_data);
@@ -93,6 +97,9 @@ TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_STATUS(
CalculateOpDataAdd(context, params, input1, input2, output, data));
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -50,18 +50,19 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, num_inputs >= 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input_tensor_first;
TF_LITE_ENSURE_OK(
context, GetInputSafe(context, node, kInputTensor0, &input_tensor_first));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input_tensor_first =
micro_context->AllocateTempInputTensor(node, kInputTensor0);
TF_LITE_ENSURE(context, input_tensor_first != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// Check that all tensors have the same shape and type.
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input_tensor_first->type);
for (int i = kInputTensor0 + 1; i < num_inputs; ++i) {
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, i, &input));
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, i);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE(context, HaveSameShapes(input_tensor_first, input));
TF_LITE_ENSURE_TYPES_EQ(context, input_tensor_first->type, input->type);
@@ -72,6 +73,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context,
input_tensor_first->params.scale == input->params.scale);
}
micro_context->DeallocateTempTfLiteTensor(input);
}
if (output->type == kTfLiteFloat32) {
@@ -123,6 +126,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
}
micro_context->DeallocateTempTfLiteTensor(input_tensor_first);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -52,21 +52,19 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
input_resource_id_tensor->type == kTfLiteInt32));
TF_LITE_ENSURE_EQ(context, NumElements(input_resource_id_tensor->dims), 1);
const TfLiteTensor* input_value = GetInput(context, node, kInputValue);
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
TfLiteTensor* input_value =
micro_context->AllocateTempInputTensor(node, kInputValue);
TFLITE_DCHECK(input_value != nullptr);
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
MicroResourceVariables* resources = graph_info->GetResourceVariables();
MicroGraph& graph_info = micro_context->graph();
MicroResourceVariables* resources = graph_info.GetResourceVariables();
TF_LITE_ENSURE_OK(context,
resources->Allocate(input_resource_id_tensor->data.i32[0],
context, input_value));
micro_context->DeallocateTempTfLiteTensor(input_value);
return kTfLiteOk;
}
@@ -79,14 +77,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetEvalInput(context, node, kInputValue);
TFLITE_DCHECK(input_value != nullptr);
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
MicroResourceVariables* resources = graph_info->GetResourceVariables();
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
MicroResourceVariables* resources = graph_info.GetResourceVariables();
if (resources == nullptr) {
MicroPrintf(
"ASSIGN_VARIABLE requires resource variables. Please create "

View File

@@ -41,8 +41,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, input != nullptr && output != nullptr);
TF_LITE_ENSURE(context, NumDimensions(input) >= kInputOutputMinDimensionNum);
@@ -51,6 +55,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumDimensions(output) <= kInputOutputMaxDimensionNum);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -0,0 +1,97 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/broadcast_args.h"
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_context.h"
namespace tflite {
namespace {
constexpr int kShape1Tensor = 0;
constexpr int kShape2Tensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus BroadcastArgsPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumInputs(node) == 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* shape1 =
micro_context->AllocateTempInputTensor(node, kShape1Tensor);
TfLiteTensor* shape2 =
micro_context->AllocateTempInputTensor(node, kShape2Tensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context,
shape1->type == kTfLiteInt32 || shape1->type == kTfLiteInt64);
TF_LITE_ENSURE_EQ(context, shape1->type, shape2->type);
TF_LITE_ENSURE_EQ(context, shape1->type, output->type);
// Ensures the shapes are 1D tensor.
TF_LITE_ENSURE_EQ(context, NumDimensions(shape1), 1);
TF_LITE_ENSURE_EQ(context, NumDimensions(shape2), 1);
// Ensure the shape of the output tensor is compatible
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 1);
micro_context->DeallocateTempTfLiteTensor(shape1);
micro_context->DeallocateTempTfLiteTensor(shape2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus BroadcastArgsEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* shape1 =
micro::GetEvalInput(context, node, kShape1Tensor);
const TfLiteEvalTensor* shape2 =
micro::GetEvalInput(context, node, kShape2Tensor);
TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteInt32) {
reference_ops::BroadcastArgs(
micro::GetTensorShape(shape1), micro::GetTensorData<int32_t>(shape1),
micro::GetTensorShape(shape2), micro::GetTensorData<int32_t>(shape2),
micro::GetTensorShape(output), micro::GetTensorData<int32_t>(output));
} else {
reference_ops::BroadcastArgs(
micro::GetTensorShape(shape1), micro::GetTensorData<int64_t>(shape1),
micro::GetTensorShape(shape2), micro::GetTensorData<int64_t>(shape2),
micro::GetTensorShape(output), micro::GetTensorData<int64_t>(output));
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_BROADCAST_ARGS() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/BroadcastArgsPrepare,
/*invoke=*/BroadcastArgsEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,129 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/broadcast_to.h"
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_context.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kShapeTensor = 1;
constexpr int kOutputTensor = 0;
// Support a maximum of 5 dimensions in TFLM.
constexpr int kMaxDims = 5;
TfLiteStatus ValidateOutputTensor(TfLiteContext* context, TfLiteTensor* input,
TfLiteTensor* shape, TfLiteTensor* output) {
// Ensures the shape is 1D tensor.
TF_LITE_ENSURE_EQ(context, NumDimensions(shape), 1);
// Ensure output dims is not less than input dims.
int input_num_dims = NumDimensions(input);
int output_num_dims = NumDimensions(output);
int shape_num_dims = SizeOfDimension(shape, 0);
TF_LITE_ENSURE_MSG(context, output_num_dims == shape_num_dims,
"Output must match with the expected shape dimension.");
TF_LITE_ENSURE_MSG(context, input_num_dims <= output_num_dims,
"Output shape must be broadcastable from input shape.");
TF_LITE_ENSURE_MSG(context, output_num_dims <= kMaxDims,
"BroadcastTo only supports 1-5D tensor.");
// Check if output shape is broadcastable from input shape.
auto get_shape_data = [shape](int i) -> int32_t {
if (shape->type == kTfLiteInt32) {
return GetTensorData<int32_t>(shape)[i];
} else {
return GetTensorData<int64_t>(shape)[i];
}
};
int extending_dims = output_num_dims - input_num_dims;
for (int idx = 0; idx < input_num_dims; ++idx) {
TF_LITE_ENSURE_MSG(
context,
(SizeOfDimension(input, idx) == 1 ||
SizeOfDimension(input, idx) == get_shape_data(extending_dims + idx)),
"Output shape must be broadcastable from input shape.");
}
// Validating the shape of the output tensor.
tflite::RuntimeShape output_shape = tflite::GetTensorShape(output);
for (int idx = 0; idx < output_num_dims; ++idx) {
TF_LITE_ENSURE(context, output_shape.Dims(idx) == get_shape_data(idx));
}
return kTfLiteOk;
}
TfLiteStatus BroadcastToPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumInputs(node) == 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* shape =
micro_context->AllocateTempInputTensor(node, kShapeTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE_MSG(context, (NumDimensions(input) <= kMaxDims),
"BroadcastTo only supports 1-5D tensor.");
TF_LITE_ENSURE(context,
shape->type == kTfLiteInt32 || shape->type == kTfLiteInt64);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
// Does not support String type due to its variable size. This limitation is
// the same as TFLite.
TF_LITE_ENSURE(context, input->type != kTfLiteString);
TF_LITE_ENSURE_STATUS(ValidateOutputTensor(context, input, shape, output));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(shape);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus BroadcastToEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
// BroadcastTo op support upto 5 dims, different from 8 dims in TFLite.
reference_ops::BroadcastTo<kMaxDims>(
micro::GetTensorShape(input), input->data.raw,
micro::GetTensorShape(output), output->data.raw, input->type);
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_BROADCAST_TO() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/BroadcastToPrepare,
/*invoke=*/BroadcastToEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/schema/schema_generated.h"
@@ -50,16 +51,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumInputs(node) == 0);
TF_LITE_ENSURE(context, NumOutputs(node) == 0);
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
TF_LITE_ENSURE(context,
op_data->init_subgraph_index < graph_info->NumSubgraphs());
op_data->init_subgraph_index < graph_info.NumSubgraphs());
return kTfLiteOk;
}
@@ -72,16 +68,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(op_data->init_subgraph_index));
graph_info.InvokeSubgraph(op_data->init_subgraph_index));
op_data->has_run = true;

View File

@@ -28,11 +28,19 @@ constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
@@ -83,6 +91,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
case kTfLiteInt32:
return copyToTensor(context, tflite::micro::GetTensorData<int32_t>(input),
output, num_elements);
case kTfLiteUInt32:
return copyToTensor(context,
tflite::micro::GetTensorData<uint32_t>(input), output,
num_elements);
case kTfLiteFloat32:
return copyToTensor(context, tflite::micro::GetTensorData<float>(input),
output, num_elements);

View File

@@ -29,9 +29,13 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
@@ -42,6 +46,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
for (int i = 0; i < output->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -39,9 +39,13 @@ const int kCircularBufferCyclesMaxIndex = 0; // 'cycles_max'
const TfLiteStatus kTfLiteAbort = static_cast<TfLiteStatus>(-9);
TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input =
GetInput(context, node, kCircularBufferInputTensor);
TfLiteTensor* output = GetOutput(context, node, kCircularBufferOutputTensor);
MicroContext * micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context-> AllocateTempInputTensor(node, kCircularBufferInputTensor);
TfLiteTensor* output =
micro_context-> AllocateTempOutputTensor(node, kCircularBufferOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
OpDataCircularBuffer* op_data =
@@ -85,6 +89,9 @@ TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node) {
op_data->cycles_until_run = op_data->cycles_max;
node->user_data = op_data;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -540,9 +540,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
if (input1->type == kTfLiteInt8) {
@@ -570,6 +574,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
data->params.input2_shift = input2_shift;
}
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
return kTfLiteOk;
}

View File

@@ -115,13 +115,19 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteConcatenationParams* params =
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
const TfLiteTensor* input_tensor = GetInput(context, node, 0);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input_tensor = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input_tensor != nullptr);
TfLiteType input_type = input_tensor->type;
const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output_tensor =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output_tensor != nullptr);
TfLiteType output_type = output_tensor->type;
micro_context->DeallocateTempTfLiteTensor(input_tensor);
micro_context->DeallocateTempTfLiteTensor(output_tensor);
// Check activation and input type
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
TF_LITE_ENSURE(context,
@@ -138,7 +144,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// Shapes with dimensions >4 are not yet supported with static allocation.
for (int i = 0; i < num_inputs; ++i) {
const TfLiteTensor* input = GetInput(context, node, i);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, i);
TF_LITE_ENSURE(context, input != nullptr);
int num_dimensions = NumDimensions(input);
@@ -150,13 +156,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
num_dimensions);
return kTfLiteError;
}
micro_context->DeallocateTempTfLiteTensor(input);
}
// Calculate OpData.
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
switch (output_type) { // Already know in/outtypes are same.
@@ -183,10 +191,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// Allocate persistent scale and zeropoint buffers.
// Store input scale and zero point values in OpParams:
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteTensor* t = GetInput(context, node, i);
TfLiteTensor* t = micro_context->AllocateTempInputTensor(node, i);
TF_LITE_ENSURE(context, t != nullptr);
input_scales[i] = t->params.scale;
input_zero_points[i] = t->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(t);
}
data->params.input_scale = input_scales;
@@ -202,6 +211,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
}
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -1,4 +1,4 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -79,7 +79,8 @@ TfLiteRegistration Register_CONV_2D();
#if defined(XTENSA)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 inputs and outputs.
// int8 activations and int8 weights and always calls the reference
// implementation.
TfLiteRegistration Register_CONV_2D_INT8REF();
#else
inline TfLiteRegistration Register_CONV_2D_INT8REF() {
@@ -87,6 +88,25 @@ inline TfLiteRegistration Register_CONV_2D_INT8REF() {
}
#endif
#if defined(CMSIS_NN)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 activations and int8 weights and uses the latency optimized
// implementations.
TfLiteRegistration Register_CONV_2D_INT8();
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int16 activations and int8 weights and uses the latency optimized
// implementations.
TfLiteRegistration Register_CONV_2D_INT16();
#else
inline TfLiteRegistration Register_CONV_2D_INT8() { return Register_CONV_2D(); }
inline TfLiteRegistration Register_CONV_2D_INT16() {
return Register_CONV_2D();
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_

View File

@@ -93,13 +93,18 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
params.dilation_width_factor, height, width, filter_height, filter_width,
padding, &out_height, &out_width);
const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kConvBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// Note that quantized inference requires that all tensors have their
@@ -119,6 +124,11 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(bias);
return kTfLiteOk;
}
@@ -129,12 +139,16 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
OpDataConv* data = static_cast<OpDataConv*>(node->user_data);
const auto& params =
*(static_cast<const TfLiteConvParams*>(node->builtin_data));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const int input_width = input->dims->data[2];
@@ -174,6 +188,10 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, data));
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -47,8 +47,12 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* axis = GetInput(context, node, kAxisTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* axis =
micro_context->AllocateTempInputTensor(node, kAxisTensor);
TF_LITE_ENSURE(context,
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
@@ -58,7 +62,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE(context, HaveSameShapes(input, output));
@@ -91,6 +96,10 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
&data->output_activation_max));
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(axis);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -40,11 +40,14 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
@@ -83,6 +86,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
output->dims->data[kWidthRank] = output_width;
output->dims->data[kDepthRank] = output_channels;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -94,13 +94,18 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
params.dilation_width_factor, height, width, filter_height, filter_width,
padding, &out_height, &out_width);
const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kConvBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// Note that quantized inference requires that all tensors have their
@@ -120,6 +125,11 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(bias);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
@@ -130,14 +140,16 @@ TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
OpDataConv* data = static_cast<OpDataConv*>(node->user_data);
const auto& params =
*(static_cast<const TfLiteDepthwiseConvParams*>(node->builtin_data));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* output = GetOutput(context, node, kDepthwiseConvOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kDepthwiseConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
const TfLiteTensor* input =
GetInput(context, node, kDepthwiseConvInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kDepthwiseConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter =
GetInput(context, node, kDepthwiseConvWeightsTensor);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kDepthwiseConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const int input_width = input->dims->data[2];
@@ -180,6 +192,10 @@ TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, data));
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
return kTfLiteOk;
}

View File

@@ -33,10 +33,12 @@ TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
MicroContext* micro_context = GetMicroContext(context);
// TODO(b/140515557): Add cached dequant to improve hybrid model performance.
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE(context,
@@ -54,6 +56,10 @@ TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node) {
data->quantization_params.zero_point = input->params.zero_point;
data->quantization_params.scale = static_cast<double>(input->params.scale);
data->output_zero_point = output->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <algorithm>
#include <numeric>
#include <tuple>
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/c/builtin_op_data.h"
@@ -152,14 +154,17 @@ void Free(TfLiteContext* context, void* buffer) {}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
auto* op_data = static_cast<OpData*>(node->user_data);
MicroContext* micro_context = GetMicroContext(context);
// Inputs: box_encodings, scores, anchors
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
const TfLiteTensor* input_box_encodings =
GetInput(context, node, kInputTensorBoxEncodings);
const TfLiteTensor* input_class_predictions =
GetInput(context, node, kInputTensorClassPredictions);
const TfLiteTensor* input_anchors =
GetInput(context, node, kInputTensorAnchors);
TfLiteTensor* input_box_encodings =
micro_context->AllocateTempInputTensor(node, kInputTensorBoxEncodings);
TfLiteTensor* input_class_predictions =
micro_context->AllocateTempInputTensor(node,
kInputTensorClassPredictions);
TfLiteTensor* input_anchors =
micro_context->AllocateTempInputTensor(node, kInputTensorAnchors);
TF_LITE_ENSURE_EQ(context, NumDimensions(input_box_encodings), 3);
TF_LITE_ENSURE_EQ(context, NumDimensions(input_class_predictions), 3);
TF_LITE_ENSURE_EQ(context, NumDimensions(input_anchors), 2);
@@ -217,6 +222,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// num_detections
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4);
micro_context->DeallocateTempTfLiteTensor(input_box_encodings);
micro_context->DeallocateTempTfLiteTensor(input_class_predictions);
micro_context->DeallocateTempTfLiteTensor(input_anchors);
return kTfLiteOk;
}
@@ -313,9 +322,10 @@ TfLiteStatus DecodeCenterSizeBoxes(TfLiteContext* context, TfLiteNode* node,
void DecreasingPartialArgSort(const float* values, int num_values,
int num_to_sort, int* indices) {
std::iota(indices, indices + num_values, 0);
std::partial_sort(
indices, indices + num_to_sort, indices + num_values,
[&values](const int i, const int j) { return values[i] > values[j]; });
std::partial_sort(indices, indices + num_to_sort, indices + num_values,
[&values](const int i, const int j) {
return std::tie(values[i], j) > std::tie(values[j], i);
});
}
template <typename Compare>

View File

@@ -38,11 +38,13 @@ bool IsLogicalSupportedType(const TfLiteType type) {
typedef bool (*IsSupportedType)(TfLiteType);
template <IsSupportedType>
TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (!IsSupportedType(input->type)) {
@@ -50,6 +52,9 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -80,13 +80,16 @@ void EvalUsingLookupTable(const OpData* data, const TfLiteEvalTensor* input,
}
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
// Use LUT to handle quantized elu path.
@@ -97,7 +100,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
};
PopulateLookupTable<int8_t>(input, output, transform, data);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -0,0 +1,11 @@
# Info
These are the Espressif chipset specific replacement kernels.
The kernels call optimized routines or reference routines depending upon optimization option selected.
By default optimizations are selected if available.
To change this behaviour, please make the appropriate `ESP-NN` menu selection after running:
```
idf.py menuconfig
```

View File

@@ -0,0 +1,209 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/add.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include <esp_timer.h>
#if ESP_NN
#include <esp_nn.h>
#endif
long long add_total_time = 0;
namespace tflite {
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
const OpDataAdd* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params;
SetActivationParams(data->output_activation_min_f32,
data->output_activation_max_f32, &op_params);
if (data->requires_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
}
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteAddParams* params, const OpDataAdd* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min, data->output_activation_max,
&op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
switch (output->type) {
case kTfLiteInt8: {
if (need_broadcast) {
reference_integer_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
#if ESP_NN
const int8_t *input1_data = tflite::micro::GetTensorData<int8_t>(input1);
const int8_t *input2_data = tflite::micro::GetTensorData<int8_t>(input2);
int8_t *out_data = tflite::micro::GetTensorData<int8_t>(output);
esp_nn_add_elementwise_s8(input1_data,
input2_data,
data->input1_offset,
data->input2_offset,
data->input1_multiplier,
data->input2_multiplier,
data->input1_shift,
data->input2_shift,
data->left_shift,
out_data,
data->output_offset,
data->output_multiplier,
data->output_shift,
data->output_activation_min,
data->output_activation_max,
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorShape(output))
);
#else
reference_integer_ops::Add(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
#endif
}
break;
}
case kTfLiteInt16: {
if (need_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output),
false);
}
break;
}
default:
MicroPrintf("Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
void* AddInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd));
}
TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataAdd* data = static_cast<const OpDataAdd*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kAddInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kAddInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kAddOutputTensor);
long long start_time = esp_timer_get_time();
if (output->type == kTfLiteFloat32) {
EvalAdd(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
input1, input2, output));
} else {
MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
output->type);
return kTfLiteError;
}
add_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
TfLiteRegistration Register_ADD() {
return {/*init=*/AddInit,
/*free=*/nullptr,
/*prepare=*/AddPrepare,
/*invoke=*/AddEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,319 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/conv.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/conv.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "freertos/FreeRTOS.h"
#include <esp_timer.h>
#if ESP_NN
#include <esp_nn.h>
#endif
long long conv_total_time = 0;
namespace tflite {
namespace {
struct NodeData {
OpDataConv op_data;
#if ESP_NN
int buffer_idx;
#endif
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(NodeData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
NodeData* data = static_cast<NodeData*>(node->user_data);
const auto& params =
*(static_cast<const TfLiteConvParams*>(node->builtin_data));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
const int input_width = input->dims->data[2];
const int input_height = input->dims->data[1];
const int filter_width = filter->dims->data[2];
const int filter_height = filter->dims->data[1];
const int output_width = output->dims->data[2];
const int output_height = output->dims->data[1];
// Dynamically allocate per-channel quantization parameters.
const int num_channels = filter->dims->data[kConvQuantizedDimension];
data->op_data.per_channel_output_multiplier =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
data->op_data.per_channel_output_shift =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
TFLITE_DCHECK(affine_quantization != nullptr);
TFLITE_DCHECK(affine_quantization->scale != nullptr);
TFLITE_DCHECK(affine_quantization->zero_point != nullptr);
TF_LITE_ENSURE(context,
affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kConvQuantizedDimension]);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
affine_quantization->zero_point->size);
}
TF_LITE_ENSURE_STATUS(CalculateOpDataConv(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, &data->op_data));
#if ESP_NN
if (input->type == kTfLiteInt8) {
int scratch_buf_size = esp_nn_get_conv_scratch_size(
input_width, input_height, input->dims->data[3],
output->dims->data[3], filter_width, filter_height);
if (scratch_buf_size > 0) {
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, scratch_buf_size, &data->buffer_idx));
} else {
data->buffer_idx = -1;
}
}
#endif
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
return kTfLiteOk;
}
#if ESP_NN
// Fixed-point per-channel-quantization convolution Int8 function wrapper.
inline void EvalQuantizedPerChannel(
TfLiteContext* context, TfLiteNode* node, const TfLiteConvParams& params,
const NodeData& data, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
if (dilation_width_factor == 1 && dilation_height_factor == 1) {
// Get parameters.
RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int32_t input_offset = -data.op_data.input_zero_point;
const int32_t output_offset = data.op_data.output_zero_point;
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int pad_width = data.op_data.padding.width;
const int pad_height = data.op_data.padding.height;
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
// Set min and max value of the output.
const int32_t activation_min = data.op_data.output_activation_min;
const int32_t activation_max = data.op_data.output_activation_max;
// Consistency check.
TFLITE_DCHECK_LE(activation_min, activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (tflite::micro::GetTensorData<int8_t>(bias)) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
void *scratch_buf = NULL;
if (data.buffer_idx > -1) {
scratch_buf = context->GetScratchBuffer(context, data.buffer_idx);
}
esp_nn_set_conv_scratch_buf(scratch_buf);
const int input_size = input_width * input_height * input_depth;
const int output_size = output_width * output_height * output_depth;
for (int i_batch = 0; i_batch < batch_size; i_batch++) {
esp_nn_conv_s8(input_data + i_batch * input_size,
input_width, input_height, input_depth, input_offset,
pad_width, pad_height, stride_width, stride_height,
tflite::micro::GetTensorData<int8_t>(filter),
filter_width, filter_height,
tflite::micro::GetTensorData<int32_t>(bias),
output_data + i_batch * output_size,
output_width, output_height, output_depth, output_offset,
data.op_data.per_channel_output_shift,
data.op_data.per_channel_output_multiplier,
activation_min, activation_max);
}
} else {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data.op_data),
data.op_data.per_channel_output_multiplier,
data.op_data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
#endif
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kConvInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kConvWeightsTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kConvBiasTensor)
: nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kConvOutputTensor);
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto& params =
*(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
TFLITE_DCHECK(node->user_data != nullptr);
const auto& data = *(static_cast<const NodeData*>(node->user_data));
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
long long start_time = esp_timer_get_time();
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32: {
tflite::reference_ops::Conv(
ConvParamsFloat(params, data.op_data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
tflite::micro::GetTensorShape(nullptr), nullptr);
break;
}
case kTfLiteInt8: {
#if ESP_NN
EvalQuantizedPerChannel(context, node, params, data, input, filter,
bias, output);
#else
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data.op_data),
data.op_data.per_channel_output_multiplier,
data.op_data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
#endif
break;
}
case kTfLiteUInt8: {
//EvalQuantized
reference_ops::Conv(ConvParamsQuantized(params, data.op_data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output),
tflite::micro::GetTensorShape(nullptr), nullptr,
nullptr);
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
conv_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_CONV_2D() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,319 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/depthwise_conv.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "freertos/FreeRTOS.h"
#include <esp_timer.h>
#if ESP_NN
#include <esp_nn.h>
#endif
long long dc_total_time = 0;
namespace tflite {
namespace {
struct NodeData {
OpDataConv op_data;
#if ESP_NN
int buffer_idx;
#endif
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(NodeData));
}
#if ESP_NN
inline void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
const TfLiteDepthwiseConvParams& params,
const NodeData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
if (dilation_width_factor == 1 && dilation_height_factor == 1) {
// Get parameters.
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int depth_multiplier = params.depth_multiplier;
const int32_t input_offset = -data.op_data.input_zero_point;
const int32_t output_offset = data.op_data.output_zero_point;
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int pad_width = data.op_data.padding.width;
const int pad_height = data.op_data.padding.height;
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
// Set min and max value of the output.
const int32_t activation_min = data.op_data.output_activation_min;
const int32_t activation_max = data.op_data.output_activation_max;
// Consistency check.
TFLITE_DCHECK_LE(activation_min, activation_max);
const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
if (tflite::micro::GetTensorData<int8_t>(bias)) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
const int input_size = input_width * input_height * input_depth;
const int output_size = output_width * output_height * output_depth;
void *scratch_buf = NULL;
if (data.buffer_idx > -1) {
scratch_buf = context->GetScratchBuffer(context, data.buffer_idx);
}
esp_nn_set_depthwise_conv_scratch_buf(scratch_buf);
for (int i_batch = 0; i_batch < batch_size; i_batch++) {
esp_nn_depthwise_conv_s8(input_data + i_batch * input_size, input_width,
input_height, input_depth, input_offset,
pad_width, pad_height,
stride_width, stride_height, depth_multiplier,
tflite::micro::GetTensorData<int8_t>(filter),
filter_width, filter_height,
tflite::micro::GetTensorData<int32_t>(bias),
output_data + i_batch * output_size,
output_width, output_height, output_offset,
data.op_data.per_channel_output_shift,
data.op_data.per_channel_output_multiplier,
activation_min, activation_max);
}
} else {
reference_integer_ops::DepthwiseConvPerChannel(
DepthwiseConvParamsQuantized(params, data.op_data),
data.op_data.per_channel_output_multiplier,
data.op_data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
#endif
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
NodeData* data = static_cast<NodeData*>(node->user_data);
const TfLiteDepthwiseConvParams& params =
*(static_cast<const TfLiteDepthwiseConvParams*>(node->builtin_data));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
const int input_width = input->dims->data[2];
const int input_height = input->dims->data[1];
const int filter_width = filter->dims->data[2];
const int filter_height = filter->dims->data[1];
const int output_width = output->dims->data[2];
const int output_height = output->dims->data[1];
// Dynamically allocate per-channel quantization parameters.
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
data->op_data.per_channel_output_multiplier =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
data->op_data.per_channel_output_shift =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
TFLITE_DCHECK(affine_quantization != nullptr);
TFLITE_DCHECK(affine_quantization->scale != nullptr);
TFLITE_DCHECK(affine_quantization->zero_point != nullptr);
TF_LITE_ENSURE(
context, affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kDepthwiseConvQuantizedDimension]);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
affine_quantization->zero_point->size);
}
TF_LITE_ENSURE_STATUS(CalculateOpDataDepthwiseConv(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, &data->op_data));
#if ESP_NN
if (input->type == kTfLiteInt8) {
int scratch_buf_size = esp_nn_get_depthwise_conv_scratch_size(
input_width, input_height, input->dims->data[3],
params.depth_multiplier, filter_width, filter_height);
if (scratch_buf_size > 0) {
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, scratch_buf_size, &data->buffer_idx));
} else {
data->buffer_idx = -1;
}
}
#endif
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(bias);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto& params =
*(reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data));
const NodeData& data = *(static_cast<const NodeData*>(node->user_data));
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kDepthwiseConvOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kDepthwiseConvInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor)
: nullptr;
long long start_time = esp_timer_get_time();
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
tflite::reference_ops::DepthwiseConv(
DepthwiseConvParamsFloat(params, data.op_data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt8:
#if ESP_NN
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
output);
#else
reference_integer_ops::DepthwiseConvPerChannel(
DepthwiseConvParamsQuantized(params, data.op_data),
data.op_data.per_channel_output_multiplier,
data.op_data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
#endif
break;
case kTfLiteUInt8:
//EvalQuantized(context, node, params, &data, input, filter, bias, output);
reference_ops::DepthwiseConv(
DepthwiseConvParamsQuantized(params, data.op_data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
dc_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,198 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/fully_connected.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#if ESP_NN
#include <esp_nn.h>
#endif
#include <esp_timer.h>
long long fc_total_time = 0;
namespace tflite {
namespace {
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context,
sizeof(OpDataFullyConnected));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* data = static_cast<OpDataFullyConnected*>(node->user_data);
const auto params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* filter = micro_context->AllocateTempInputTensor(
node, kFullyConnectedWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
node, kFullyConnectedOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
TF_LITE_ENSURE_OK(context, CalculateOpDataFullyConnected(
context, params->activation, input->type,
input, filter, bias, output, data));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
if (bias != nullptr) {
micro_context->DeallocateTempTfLiteTensor(bias);
}
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto* params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kFullyConnectedInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kFullyConnectedWeightsTensor);
const TfLiteEvalTensor* bias =
tflite::micro::GetEvalInput(context, node, kFullyConnectedBiasTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const auto& data =
*(static_cast<const OpDataFullyConnected*>(node->user_data));
long long start_time = esp_timer_get_time();
// Checks in Prepare ensure input, output and filter types are all the same.
switch (input->type) {
case kTfLiteFloat32: {
tflite::reference_ops::FullyConnected(
FullyConnectedParamsFloat(params->activation),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
}
case kTfLiteInt8: {
const int32_t* bias_data =
nullptr != bias ? tflite::micro::GetTensorData<int32_t>(bias)
: nullptr;
#if ESP_NN
const RuntimeShape& filter_shape = tflite::micro::GetTensorShape(filter);
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = output_shape.Dims(0);
const int output_depth = output_shape.Dims(1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int8_t *filter_data = tflite::micro::GetTensorData<int8_t>(filter);
for (int b = 0; b < batches; ++b) {
esp_nn_fully_connected_s8(input_data, -data.input_zero_point,
accum_depth,
filter_data, -data.filter_zero_point,
bias_data, output_data, output_depth,
data.output_zero_point,
data.output_shift, data.output_multiplier,
data.output_activation_min,
data.output_activation_max);
input_data += accum_depth;
output_data += output_depth;
}
#else
tflite::reference_integer_ops::FullyConnected(
FullyConnectedParamsQuantized(data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias), bias_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
#endif
break;
}
case kTfLiteUInt8: {
tflite::reference_ops::FullyConnected(
FullyConnectedParamsQuantized(data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
}
default: {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
}
fc_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_FULLY_CONNECTED() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,131 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/mul.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
#include "tensorflow/lite/kernels/internal/reference/mul.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#if ESP_NN
#include <esp_nn.h>
#endif
#include <esp_timer.h>
long long mul_total_time = 0;
namespace tflite {
#if ESP_NN
void MulEvalQuantized(TfLiteContext* context, TfLiteNode* node,
const OpDataMul* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
op_params.float_activation_max = data->output_activation_max_f32;
op_params.input1_offset = -data->input1_zero_point;
op_params.input2_offset = -data->input2_zero_point;
op_params.output_offset = data->output_zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
const int8_t *input1_data = tflite::micro::GetTensorData<int8_t>(input1);
const int8_t *input2_data = tflite::micro::GetTensorData<int8_t>(input2);
int8_t *out_data = tflite::micro::GetTensorData<int8_t>(output);
esp_nn_mul_elementwise_s8(input1_data, input2_data, op_params.input1_offset,
op_params.input2_offset, out_data, op_params.output_offset,
op_params.output_multiplier, op_params.output_shift,
op_params.quantized_activation_min, op_params.quantized_activation_max,
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorShape(output)));
}
}
#endif
TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
long long start_time = esp_timer_get_time();
switch (input1->type) {
case kTfLiteInt8:
#if ESP_NN
MulEvalQuantized(context, node, data, input1, input2, output);
#else
EvalMulQuantizedReference(context, node, data, input1, input2, output);
#endif
break;
case kTfLiteInt32:
EvalMulQuantizedReference(context, node, data, input1, input2, output);
break;
case kTfLiteFloat32:
EvalMulFloatReference(context, node, params, data, input1, input2,
output);
break;
default:
MicroPrintf("Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
mul_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
TfLiteRegistration Register_MUL() {
return {/*init=*/MulInit,
/*free=*/nullptr,
/*prepare=*/MulPrepare,
/*invoke=*/MulEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,245 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/pooling.h"
#if ESP_NN
#include <esp_nn.h>
#endif
#include <esp_timer.h>
long long pooling_total_time = 0;
namespace tflite {
namespace {
#if ESP_NN
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
const int stride_height = params->stride_height;
const int stride_width = params->stride_width;
const int filter_height = params->filter_height;
const int filter_width = params->filter_width;
const int activation_min = data->activation_min;
const int activation_max = data->activation_max;
const int pad_height = data->padding.height;
const int pad_width = data->padding.width;
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
TFLITE_DCHECK_LE(activation_min, activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int input_size = input_width * input_height * depth;
const int output_size = output_width * output_height * depth;
if (depth % 4 == 0) { // S3 version only supports channels multiple of 4
for (int batch = 0; batch < batches; ++batch) {
esp_nn_avg_pool_s8(input_data, input_width, input_height,
output_data, output_width, output_height,
stride_width, stride_height,
filter_width, filter_height,
pad_width, pad_height,
activation_min, activation_max, depth);
input_data += input_size;
output_data += output_size;
}
} else {
for (int batch = 0; batch < batches; ++batch) {
esp_nn_avg_pool_s8_ansi(input_data, input_width, input_height,
output_data, output_width, output_height,
stride_width, stride_height,
filter_width, filter_height,
pad_width, pad_height,
activation_min, activation_max, depth);
input_data += input_size;
output_data += output_size;
}
}
}
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpDataPooling* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
const int stride_height = params->stride_height;
const int stride_width = params->stride_width;
const int filter_height = params->filter_height;
const int filter_width = params->filter_width;
const int activation_min = data->activation_min;
const int activation_max = data->activation_max;
const int pad_height = data->padding.height;
const int pad_width = data->padding.width;
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
TFLITE_DCHECK_LE(activation_min, activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int input_size = input_width * input_height * depth;
const int output_size = output_width * output_height * depth;
if (depth % 4 == 0) { // S3 version only supports channels multiple of 4
for (int batch = 0; batch < batches; ++batch) {
esp_nn_max_pool_s8(input_data, input_width, input_height,
output_data, output_width, output_height,
stride_width, stride_height,
filter_width, filter_height,
pad_width, pad_height,
activation_min, activation_max, depth);
input_data += input_size;
output_data += output_size;
}
} else {
for (int batch = 0; batch < batches; ++batch) {
esp_nn_max_pool_s8_ansi(input_data, input_width, input_height,
output_data, output_width, output_height,
stride_width, stride_height,
filter_width, filter_height,
pad_width, pad_height,
activation_min, activation_max, depth);
input_data += input_size;
output_data += output_size;
}
}
}
#endif
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataPooling* data =
static_cast<const OpDataPooling*>(node->user_data);
const TfLiteEvalTensor* input =
micro::GetEvalInput(context, node, kPoolingInputTensor);
TfLiteEvalTensor* output =
micro::GetEvalOutput(context, node, kPoolingOutputTensor);
long long start_time = esp_timer_get_time();
// Inputs and outputs share the same type, guaranteed by the converter.
switch (input->type) {
case kTfLiteFloat32:
AveragePoolingEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteInt8:
#if ESP_NN
AverageEvalQuantized(context, node, params, data, input, output);
#else
AveragePoolingEvalQuantized(context, node, params, data, input, output);
#endif
break;
default:
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
pooling_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataPooling* data =
static_cast<const OpDataPooling*>(node->user_data);
const TfLiteEvalTensor* input =
micro::GetEvalInput(context, node, kPoolingInputTensor);
TfLiteEvalTensor* output =
micro::GetEvalOutput(context, node, kPoolingOutputTensor);
long long start_time = esp_timer_get_time();
switch (input->type) {
case kTfLiteFloat32:
MaxPoolingEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteInt8:
#if ESP_NN
MaxEvalQuantized(context, node, params, data, input, output);
#else
MaxPoolingEvalQuantized(context, node, params, data, input, output);
#endif
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
pooling_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling));
}
} // namespace
TfLiteRegistration Register_AVERAGE_POOL_2D() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/PoolingPrepare,
/*invoke=*/AverageEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_MAX_POOL_2D() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/PoolingPrepare,
/*invoke=*/MaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -27,11 +27,15 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
@@ -40,6 +44,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
for (int i = 0; i < output->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -84,22 +84,31 @@ TfLiteStatus VerifyTensorDim(TfLiteContext* context, const TfLiteTensor* input,
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
const TfLiteTensor* axis;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kAxisTensor, &axis));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* axis =
micro_context->AllocateTempInputTensor(node, kAxisTensor);
TF_LITE_ENSURE(context, axis != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
output->type = input->type;
if (IsDynamicTensor(axis)) {
TF_LITE_KERNEL_LOG(context,
"DynamicTensor is not yet supported by Expand_Dims.");
return kTfLiteError;
}
return VerifyTensorDim(context, input, axis, output);
TF_LITE_ENSURE_OK(context, VerifyTensorDim(context, input, axis, output));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(axis);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
template <typename T>

View File

@@ -65,14 +65,18 @@ constexpr int kValueTensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
// Ensure inputs and outputs exist.
const TfLiteTensor* dims;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kDimsTensor, &dims));
const TfLiteTensor* value;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kValueTensor, &value));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* dims =
micro_context->AllocateTempInputTensor(node, kDimsTensor);
TF_LITE_ENSURE(context, dims != nullptr);
TfLiteTensor* value =
micro_context->AllocateTempInputTensor(node, kValueTensor);
TF_LITE_ENSURE(context, value != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// The value tensor must be a scalar.
TF_LITE_ENSURE_EQ(context, NumDimensions(value), 0);
@@ -90,6 +94,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, EnsureEq(context, output->dims, dims));
}
micro_context->DeallocateTempTfLiteTensor(dims);
micro_context->DeallocateTempTfLiteTensor(value);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -31,22 +31,28 @@ constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -36,22 +36,28 @@ constexpr int kOutputTensor = 0;
// OLD-TODO(b/117912880): Support quantization.
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -35,6 +35,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
@@ -42,23 +44,33 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const auto params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteTensor* input =
GetInput(context, node, kFullyConnectedInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* filter =
GetInput(context, node, kFullyConnectedWeightsTensor);
TfLiteTensor* filter = micro_context->AllocateTempInputTensor(
node, kFullyConnectedWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kFullyConnectedBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kFullyConnectedOutputTensor);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
node, kFullyConnectedOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
return CalculateOpDataFullyConnected(context, params->activation, input->type,
input, filter, bias, output, data);
TF_LITE_ENSURE_OK(context, CalculateOpDataFullyConnected(
context, params->activation, input->type,
input, filter, bias, output, data));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
if (bias != nullptr) {
micro_context->DeallocateTempTfLiteTensor(bias);
}
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {

View File

@@ -97,19 +97,23 @@ TfLiteStatus Gather(const TfLiteGatherParams* params,
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const auto* params =
reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
const TfLiteTensor* coords;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputPositions, &coords));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* coords =
micro_context->AllocateTempInputTensor(node, kInputPositions);
TF_LITE_ENSURE(context, coords != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
switch (coords->type) {
case kTfLiteInt32:
break;
@@ -176,6 +180,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
for (int i = axis + 1; i < input->dims->size; ++i) {
output_shape->data[output_index++] = input->dims->data[i];
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(coords);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -28,16 +28,19 @@ constexpr int kOutputTensor = 0;
constexpr int MAX_INDICES_ND = 5;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* params;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kParams, &params));
const TfLiteTensor* indices;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kIndices, &indices));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* params = micro_context->AllocateTempInputTensor(node, kParams);
TF_LITE_ENSURE(context, params != nullptr);
TfLiteTensor* indices =
micro_context->AllocateTempInputTensor(node, kIndices);
TF_LITE_ENSURE(context, indices != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
switch (params->type) {
case kTfLiteFloat32:
@@ -98,6 +101,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
output_shape->data[output_index++] = params->dims->data[i];
}
output_shape->size = output_index;
micro_context->DeallocateTempTfLiteTensor(params);
micro_context->DeallocateTempTfLiteTensor(indices);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -32,13 +32,17 @@ const int kHardSwishInputTensor = 0;
const int kHardSwishOutputTensor = 0;
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kHardSwishInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kHardSwishInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kHardSwishOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kHardSwishOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
@@ -73,6 +77,9 @@ TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
&params->reluish_multiplier_fixedpoint_int16);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/schema/schema_generated.h"
@@ -50,36 +51,33 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, node->inputs->size > 0);
// The first input is the condition.
const TfLiteTensor* cond;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &cond));
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, cond != nullptr);
TF_LITE_ENSURE_EQ(context, cond->type, kTfLiteBool);
TF_LITE_ENSURE_EQ(context, NumElements(cond), 1);
micro_context->DeallocateTempTfLiteTensor(cond);
// The first input of the node is the condition. The rest of inputs are
// passed to the branch subgraphs. Therefore, the number of subgraph inputs
// will be the number of node inputs - 1.
size_t num_inputs = node->inputs->size - 1;
size_t num_outputs = node->outputs->size;
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
MicroGraph& graph_info = micro_context->graph();
TF_LITE_ENSURE(context,
op_data->then_subgraph_index < graph_info->NumSubgraphs());
op_data->then_subgraph_index < graph_info.NumSubgraphs());
TF_LITE_ENSURE(context,
op_data->else_subgraph_index < graph_info->NumSubgraphs());
op_data->else_subgraph_index < graph_info.NumSubgraphs());
TF_LITE_ENSURE_EQ(
context, num_inputs,
graph_info->NumSubgraphInputs(op_data->then_subgraph_index));
TF_LITE_ENSURE_EQ(context, num_inputs,
graph_info.NumSubgraphInputs(op_data->then_subgraph_index));
TF_LITE_ENSURE_EQ(
context, num_outputs,
graph_info->NumSubgraphOutputs(op_data->then_subgraph_index));
graph_info.NumSubgraphOutputs(op_data->then_subgraph_index));
return kTfLiteOk;
}
@@ -87,66 +85,30 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const TfLiteTensor* cond;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &cond));
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, cond != nullptr);
bool cond_value = cond->data.b[0];
micro_context->DeallocateTempTfLiteTensor(cond);
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
// Currently we copy the input / output between the subgraphs. This isn't
// optimized yet.
MicroGraph* graph_info = &micro_context->graph();
// Currently we copy the input / output between the subgraphs.
int active_branch_subgraph_index =
cond_value ? op_data->then_subgraph_index : op_data->else_subgraph_index;
for (size_t i = 0;
i < graph_info->NumSubgraphInputs(active_branch_subgraph_index); ++i) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, i + 1);
TfLiteEvalTensor* subgraph_input =
graph_info->GetSubgraphInput(active_branch_subgraph_index, i);
// These checks must occur in Eval since TfLiteEvalTensors are not available
// during Prepare.
size_t input_bytes;
size_t subgraph_input_bytes;
TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(input, &input_bytes));
TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(
subgraph_input, &subgraph_input_bytes));
TF_LITE_ENSURE_TYPES_EQ(context, input->type, subgraph_input->type);
TF_LITE_ENSURE_EQ(context, input_bytes, subgraph_input_bytes);
memcpy(subgraph_input->data.raw, input->data.raw, input_bytes);
}
TF_LITE_ENSURE_OK(context,
tflite::micro::CopyOpInputsToSubgraphInputs(
context, node, graph_info, active_branch_subgraph_index,
/*first_tensor_idx=*/1));
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(active_branch_subgraph_index));
for (size_t i = 0;
i < graph_info->NumSubgraphOutputs(active_branch_subgraph_index); ++i) {
const TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, i);
TF_LITE_ENSURE_OK(
context, tflite::micro::CopySubgraphOutputsToOpOutputs(
context, node, graph_info, active_branch_subgraph_index));
TfLiteEvalTensor* subgraph_output =
graph_info->GetSubgraphOutput(active_branch_subgraph_index, i);
// These checks must occur in Eval since TfLiteEvalTensors are not available
// during Prepare.
size_t output_bytes;
size_t subgraph_output_bytes;
TF_LITE_ENSURE_OK(context,
TfLiteEvalTensorByteLength(output, &output_bytes));
TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(
subgraph_output, &subgraph_output_bytes));
TF_LITE_ENSURE_TYPES_EQ(context, output->type, subgraph_output->type);
TF_LITE_ENSURE_EQ(context, output_bytes, subgraph_output_bytes);
memcpy(output->data.raw, subgraph_output->data.raw, output_bytes);
}
return kTfLiteOk;
}

View File

@@ -24,7 +24,6 @@ namespace tflite {
namespace micro {
// TODO(b/161841696): Consider moving away from global arena buffers:
constexpr int KernelRunner::kNumScratchBuffers_;
constexpr int KernelRunner::kKernelRunnerBufferSize_;
uint8_t KernelRunner::kKernelRunnerBuffer_[];
@@ -32,22 +31,23 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
TfLiteTensor* tensors, int tensors_size,
TfLiteIntArray* inputs, TfLiteIntArray* outputs,
void* builtin_data)
: allocator_(SimpleMemoryAllocator::Create(GetMicroErrorReporter(),
: registration_(registration),
allocator_(SimpleMemoryAllocator::Create(GetMicroErrorReporter(),
kKernelRunnerBuffer_,
kKernelRunnerBufferSize_)),
registration_(registration),
tensors_(tensors),
mock_micro_graph_(allocator_) {
mock_micro_graph_(allocator_),
fake_micro_context_(tensors, allocator_, &mock_micro_graph_) {
// Prepare TfLiteContext:
context_.impl_ = static_cast<void*>(this);
context_.ReportError = ReportOpError;
context_.impl_ = static_cast<void*>(&fake_micro_context_);
context_.ReportError = MicroContextReportOpError;
context_.recommended_num_threads = 1;
context_.GetTensor = GetTensor;
context_.GetEvalTensor = GetEvalTensor;
context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
context_.GetScratchBuffer = GetScratchBuffer;
context_.GetExecutionPlan = GetGraph;
context_.GetTensor = MicroContextGetTensor;
context_.GetEvalTensor = MicroContextGetEvalTensor;
context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer;
context_.RequestScratchBufferInArena =
MicroContextRequestScratchBufferInArena;
context_.GetScratchBuffer = MicroContextGetScratchBuffer;
context_.recommended_num_threads = 0;
// Prepare TfLiteNode:
@@ -56,14 +56,24 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
node_.builtin_data = builtin_data;
}
bool KernelRunner::ValidateTempBufferDeallocated() {
return fake_micro_context_.IsAllTempTfLiteTensorDeallocated();
}
TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data,
size_t length) {
if (registration_.init) {
node_.user_data = registration_.init(&context_, init_data, length);
}
TF_LITE_ENSURE(&context_, ValidateTempBufferDeallocated());
if (registration_.prepare) {
TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_));
}
TF_LITE_ENSURE(&context_, ValidateTempBufferDeallocated());
return kTfLiteOk;
}
@@ -72,101 +82,11 @@ TfLiteStatus KernelRunner::Invoke() {
MicroPrintf("TfLiteRegistration missing invoke function pointer!");
return kTfLiteError;
}
return registration_.invoke(&context_, &node_);
}
TfLiteTensor* KernelRunner::GetTensor(const struct TfLiteContext* context,
int tensor_index) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
TF_LITE_ENSURE_STATUS(registration_.invoke(&context_, &node_));
return &runner->tensors_[tensor_index];
}
TF_LITE_ENSURE(&context_, ValidateTempBufferDeallocated());
TfLiteEvalTensor* KernelRunner::GetEvalTensor(
const struct TfLiteContext* context, int tensor_index) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
TfLiteEvalTensor* eval_tensor =
reinterpret_cast<TfLiteEvalTensor*>(runner->allocator_->AllocateTemp(
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
TFLITE_DCHECK(eval_tensor != nullptr);
// In unit tests, the TfLiteTensor pointer contains the source of truth for
// buffers and values:
eval_tensor->data = runner->tensors_[tensor_index].data;
eval_tensor->dims = runner->tensors_[tensor_index].dims;
eval_tensor->type = runner->tensors_[tensor_index].type;
return eval_tensor;
}
void* KernelRunner::AllocatePersistentBuffer(TfLiteContext* context,
size_t bytes) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
return runner->allocator_->AllocateFromTail(bytes,
MicroArenaBufferAlignment());
}
TfLiteStatus KernelRunner::RequestScratchBufferInArena(TfLiteContext* context,
size_t bytes,
int* buffer_index) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(buffer_index != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
if (runner->scratch_buffer_count_ == kNumScratchBuffers_) {
MicroPrintf("Exceeded the maximum number of scratch tensors allowed (%d).",
kNumScratchBuffers_);
return kTfLiteError;
}
// For tests, we allocate scratch buffers from the tail and keep them around
// for the lifetime of model. This means that the arena size in the tests will
// be more than what we would have if the scratch buffers could share memory.
runner->scratch_buffers_[runner->scratch_buffer_count_] =
runner->allocator_->AllocateFromTail(bytes, MicroArenaBufferAlignment());
TFLITE_DCHECK(runner->scratch_buffers_[runner->scratch_buffer_count_] !=
nullptr);
*buffer_index = runner->scratch_buffer_count_++;
return kTfLiteOk;
}
void* KernelRunner::GetScratchBuffer(TfLiteContext* context, int buffer_index) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
TFLITE_DCHECK(runner->scratch_buffer_count_ <= kNumScratchBuffers_);
if (buffer_index >= runner->scratch_buffer_count_) {
return nullptr;
}
return runner->scratch_buffers_[buffer_index];
}
void KernelRunner::ReportOpError(struct TfLiteContext* context,
const char* format, ...) {
va_list args;
va_start(args, format);
GetMicroErrorReporter()->Report(format, args);
va_end(args);
}
TfLiteStatus KernelRunner::GetGraph(struct TfLiteContext* context,
TfLiteIntArray** args) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
*args = reinterpret_cast<TfLiteIntArray*>(runner->GetMockGraph());
return kTfLiteOk;
}

View File

@@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/fake_micro_context.h"
#include "tensorflow/lite/micro/mock_micro_graph.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
@@ -50,40 +51,22 @@ class KernelRunner {
// to stub out MicroGraph methods and track invocations on each subgraph.
MockMicroGraph* GetMockGraph() { return &mock_micro_graph_; }
protected:
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_index);
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
int tensor_index);
static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context,
size_t bytes,
int* buffer_index);
static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
// This method matches GetExecutionPlan from TfLiteContext since TFLM reuses
// this method to get the MicroGraph from an operator context.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
static TfLiteStatus GetGraph(struct TfLiteContext* context,
TfLiteIntArray** args);
// Returns true if all temp buffer in tests are deallocated.
// TODO(b/209453859): move this function to private after deallocation checks
// are enabled for all kernel tests.
bool ValidateTempBufferDeallocated();
private:
static constexpr int kNumScratchBuffers_ = 12;
static constexpr int kKernelRunnerBufferSize_ = 10000;
static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
SimpleMemoryAllocator* allocator_ = nullptr;
const TfLiteRegistration& registration_;
TfLiteTensor* tensors_ = nullptr;
MockMicroGraph mock_micro_graph_;
TfLiteContext context_ = {};
TfLiteNode node_ = {};
const TfLiteRegistration& registration_;
int scratch_buffer_count_ = 0;
uint8_t* scratch_buffers_[kNumScratchBuffers_];
SimpleMemoryAllocator* allocator_;
MockMicroGraph mock_micro_graph_;
FakeMicroContext fake_micro_context_;
};
} // namespace micro

View File

@@ -16,6 +16,7 @@ limitations under the License.
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
namespace micro {
@@ -119,13 +120,83 @@ TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
return kTfLiteOk;
}
// Returns a blob of payload data. The payload is subjected to interpretation by
// the OP. This is the recommended API for an OP to get an external context. OP
// should use this instead of directly calling GetExternalContext function in
// context.
void* GetExternalContext(TfLiteContext* context) {
return reinterpret_cast<void*>(
context->GetExternalContext(context, kTfLiteMaxExternalContexts));
// Verify that both tensors have the same type and size, then return the size
// of both tensors in bytes if they are the same, or -1 if they are different.
size_t ValidateAndGetTensorSizes(const TfLiteEvalTensor* tensor1,
const TfLiteEvalTensor* tensor2) {
TFLITE_DCHECK(tensor1->type == tensor2->type);
size_t tensor1_size = 0;
size_t tensor2_size = 0;
TfLiteEvalTensorByteLength(tensor1, &tensor1_size);
TfLiteEvalTensorByteLength(tensor2, &tensor2_size);
return (tensor1_size == tensor2_size) ? tensor1_size : -1;
}
TfLiteStatus CopyOpInputsToOpOutputs(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, node->inputs->size == node->outputs->size);
for (int i = 0; i < node->inputs->size; i++) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, i);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, i);
int bytes = ValidateAndGetTensorSizes(input, output);
TF_LITE_ENSURE(context, bytes >= 0);
memcpy(output->data.raw, input->data.raw, bytes);
}
return kTfLiteOk;
}
TfLiteStatus CopyOpInputsToSubgraphInputs(TfLiteContext* context,
TfLiteNode* node,
MicroGraph* graph_info,
int subgraph_idx,
int first_tensor_idx) {
TF_LITE_ENSURE(context,
static_cast<size_t>(node->inputs->size - first_tensor_idx) ==
graph_info->NumSubgraphInputs(subgraph_idx));
for (int i = 0; i < node->inputs->size - first_tensor_idx; i++) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, i + first_tensor_idx);
TfLiteEvalTensor* subgraph_input =
graph_info->GetSubgraphInput(subgraph_idx, i);
int bytes = ValidateAndGetTensorSizes(input, subgraph_input);
TF_LITE_ENSURE(context, bytes >= 0);
memcpy(subgraph_input->data.raw, input->data.raw, bytes);
}
return kTfLiteOk;
}
TfLiteStatus CopyOpOutputsToSubgraphInputs(TfLiteContext* context,
TfLiteNode* node,
MicroGraph* graph_info,
int subgraph_idx) {
TF_LITE_ENSURE(context, static_cast<size_t>(node->outputs->size) ==
graph_info->NumSubgraphInputs(subgraph_idx));
for (int i = 0; i < node->outputs->size; i++) {
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, i);
TfLiteEvalTensor* subgraph_input =
graph_info->GetSubgraphInput(subgraph_idx, i);
int bytes = ValidateAndGetTensorSizes(output, subgraph_input);
TF_LITE_ENSURE(context, bytes >= 0);
memcpy(subgraph_input->data.raw, output->data.raw, bytes);
}
return kTfLiteOk;
}
TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context,
TfLiteNode* node,
MicroGraph* graph_info,
int subgraph_idx) {
TF_LITE_ENSURE(context, static_cast<size_t>(node->outputs->size) ==
graph_info->NumSubgraphOutputs(subgraph_idx));
for (int i = 0; i < node->outputs->size; i++) {
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, i);
TfLiteEvalTensor* subgraph_output =
graph_info->GetSubgraphOutput(subgraph_idx, i);
int bytes = ValidateAndGetTensorSizes(output, subgraph_output);
TF_LITE_ENSURE(context, bytes >= 0);
memcpy(output->data.raw, subgraph_output->data.raw, bytes);
}
return kTfLiteOk;
}
} // namespace micro

View File

@@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/micro/micro_context.h"
namespace tflite {
namespace micro {
@@ -69,23 +70,33 @@ TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
TfLiteTensor* tensor,
TfLiteEvalTensor* eval_tensor);
// Returns a blob of payload data. The payload is subjected to interpretation by
// the OP. This is the recommended API for an OP to get an external context. OP
// should use this instead of directly calling GetExternalContext function in
// context. Example usage:
//
// An application can set an external context through interpreter as below
// interpreter->SetMicroExternalContext(pointer_to_your_payload);
//
// Inside an OP that needs this payload, it get the payload pointer by:
// Prepare(TfliteContext * context) {
// ...
// payload_ptr =
// reinterpret_cast<your_data_type>(GetMicroExternalContext(context))
// ...
// }
//
void* GetMicroExternalContext(TfLiteContext* context);
// Copy all op input tensors to op output tensors. Requires all op input tensor
// shapes and types to be identical to op output tensor shapes and types.
TfLiteStatus CopyOpInputsToOpOutputs(TfLiteContext* context, TfLiteNode* node);
// Copy all op input tensors to subgraph input tensors. Requires all op input
// tensor shapes and types to be identical to subgraph input tensor shapes and
// types.
TfLiteStatus CopyOpInputsToSubgraphInputs(TfLiteContext* context,
TfLiteNode* node,
MicroGraph* graph_info,
int subgraph_idx,
int first_tensor_idx);
// Copy all op output tensors to subgraph input tensors. Requires all op output
// tensor shapes and types to be identical to subgraph input tensor shapes and
// types.
TfLiteStatus CopyOpOutputsToSubgraphInputs(TfLiteContext* context,
TfLiteNode* node,
MicroGraph* graph_info,
int subgraph_idx);
// Copy all subgraph output tensors to op outputs. Requires all subgraph output
// tensor shapes and types to be identical to op output tensor shapes and types.
TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context,
TfLiteNode* node,
MicroGraph* graph_info,
int subgraph_idx);
} // namespace micro
} // namespace tflite

View File

@@ -36,15 +36,18 @@ constexpr int kTensorShapeRank = 4;
enum { kBatchRank = 0, kHeightRank, kWidthRank, kChannelRank };
TfLiteStatus L2Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
auto* params = static_cast<TfLitePoolParams*>(node->builtin_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), kTensorShapeRank);
TF_LITE_ENSURE_EQ(context, NumDimensions(output), kTensorShapeRank);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
@@ -82,6 +85,9 @@ TfLiteStatus L2Prepare(TfLiteContext* context, TfLiteNode* node) {
output->dims->data[kWidthRank] = out_width;
output->dims->data[kChannelRank] = channels_out;
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(input);
return kTfLiteOk;
}

View File

@@ -49,11 +49,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
TF_LITE_ENSURE(context,
@@ -69,6 +72,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// Our implementations don't currently support activations.
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -30,13 +30,16 @@ const int kOutputTensor = 0;
TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context,
TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
@@ -62,6 +65,9 @@ TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context,
data->output_shift_identity = static_cast<int32_t>(output_shift_identity);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -43,13 +43,16 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE(context, HaveSameShapes(input, output));
@@ -89,6 +92,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
data->depth = static_cast<size_t>(input_shape.Dims(trailing_dim));
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -32,9 +32,13 @@ const int kLogisticOutputTensor = 0;
TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
TfLiteNode* node,
OpDataLogistic* data) {
const TfLiteTensor* input = GetInput(context, node, kLogisticInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kLogisticInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kLogisticOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kLogisticOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
@@ -55,6 +59,53 @@ TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
if (input->type == kTfLiteInt16) {
static constexpr int kInputIntegerBits = 3;
static constexpr int kOutputFractionalBits = 15;
// See comments in TanhPrepare about requiring zero_point==0
// and a power-of-two ("POT") scale.
TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
int input_scale_log2_rounded;
bool param_scale_pot =
CheckedLog2(input->params.scale, &input_scale_log2_rounded);
data->input_left_shift =
(15 - kInputIntegerBits) + input_scale_log2_rounded;
param_scale_pot &= (data->input_left_shift == 0);
if (param_scale_pot) {
data->input_multiplier = 0;
} else {
// Calculate multiplier to change input scale to 1/(3*4096)
// as required by the table lookup.
// In this scaling +/-2^17 represents +/-10.7
double multiplier =
static_cast<double>(input->params.scale) * 4096.0 * 3.0;
data->input_left_shift = 0;
while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
data->input_left_shift++;
multiplier = multiplier * 2.0;
}
data->input_multiplier = static_cast<int32_t>(multiplier);
}
int output_scale_log2_rounded;
TF_LITE_ENSURE(
context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
-kOutputFractionalBits);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -36,6 +36,8 @@ TfLiteRegistration Register_ADD_N();
TfLiteRegistration Register_ASSIGN_VARIABLE();
TfLiteRegistration Register_AVERAGE_POOL_2D();
TfLiteRegistration Register_BATCH_TO_SPACE_ND();
TfLiteRegistration Register_BROADCAST_ARGS();
TfLiteRegistration Register_BROADCAST_TO();
TfLiteRegistration Register_CALL_ONCE();
TfLiteRegistration Register_CAST();
// TODO(b/160234179): Change custom OPs to also return by value.
@@ -62,6 +64,7 @@ TfLiteRegistration Register_LOGICAL_AND();
TfLiteRegistration Register_LOGICAL_OR();
TfLiteRegistration Register_LOGISTIC();
TfLiteRegistration Register_MAX_POOL_2D();
TfLiteRegistration Register_MIRROR_PAD();
TfLiteRegistration Register_PRELU();
TfLiteRegistration Register_MUL();
TfLiteRegistration Register_QUANTIZE();
@@ -79,6 +82,7 @@ TfLiteRegistration Register_SVDF();
TfLiteRegistration Register_TRANSPOSE();
TfLiteRegistration Register_TRANSPOSE_CONV();
TfLiteRegistration Register_VAR_HANDLE();
TfLiteRegistration Register_WHILE();
TfLiteRegistration Register_ZEROS_LIKE();
namespace ops {

View File

@@ -0,0 +1,222 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
struct OpDataMirrorPad {
int input_dims;
int output_size;
int offset;
int output_dims_num_elements_buffer_index;
int input_dims_num_elements_buffer_index;
};
// Helper method that fills the left and right pads.
template <typename T>
inline void GetPadding(const T* data, int offset, int64_t* left_pad,
int64_t* right_pad) {
*left_pad = static_cast<int64_t>(*(data + offset * 2));
*right_pad = static_cast<int64_t>(*(data + offset * 2 + 1));
}
// Given dimension index and the left/right padding.
// Returns the corresponding dimension in the input array.
inline int GetInputDimension(int padded_dimension, int left_pad, int right_pad,
int input_dim_size, int offset) {
if (padded_dimension < left_pad) {
const int original_ind = left_pad + offset - 1;
return original_ind - (std::min(padded_dimension, original_ind - offset));
}
padded_dimension -= left_pad;
if (padded_dimension >= input_dim_size) {
padded_dimension -= input_dim_size;
const int original_ind = input_dim_size - (1 + offset);
return original_ind - std::min(padded_dimension, original_ind);
}
return padded_dimension;
}
// Given and index in output array, returns the index of the value
// in input array.
int GetFlatIndex(int index, int num_dims,
const TfLiteEvalTensor* padding_matrix,
const TfLiteIntArray* input_dims,
int* output_dims_num_elements, int* input_dims_num_elements,
const int offset) {
int flat_index = 0;
int64_t left_pad = 0, right_pad = 0, dimension_index, index_in_input;
for (int i = 0; i < num_dims; ++i) {
switch (padding_matrix->type) {
case kTfLiteInt32:
GetPadding(padding_matrix->data.i32, i, &left_pad, &right_pad);
break;
case kTfLiteInt64:
GetPadding(padding_matrix->data.i64, i, &left_pad, &right_pad);
break;
default:
break;
}
dimension_index = index / output_dims_num_elements[i];
index_in_input = GetInputDimension(dimension_index, left_pad, right_pad,
input_dims->data[i], offset);
flat_index += index_in_input * (input_dims_num_elements)[i];
index %= output_dims_num_elements[i];
}
return flat_index;
}
template <typename T>
void MirrorPad(const TfLiteEvalTensor* padding_matrix,
const TfLiteIntArray* input_dims, int* output_dims_num_elements,
int* input_dims_num_elements, const T* input_data,
T* output_data, const int offset, const int num_dims,
const int output_size) {
for (int i = 0; i < output_size; ++i) {
output_data[i] = input_data[GetFlatIndex(
i, num_dims, padding_matrix, input_dims, output_dims_num_elements,
input_dims_num_elements, offset)];
}
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TfLiteStatus status = kTfLiteOk;
const OpDataMirrorPad* data =
static_cast<const OpDataMirrorPad*>(node->user_data);
const TfLiteEvalTensor* input_tensor =
tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* padding_matrix =
tflite::micro::GetEvalInput(context, node, 1);
TfLiteEvalTensor* output_tensor =
tflite::micro::GetEvalOutput(context, node, 0);
const int input_dims = data->input_dims;
const int output_size = data->output_size;
int* input_dims_num_elements = (int*)context->GetScratchBuffer(
context, data->input_dims_num_elements_buffer_index);
int* output_dims_num_elements = (int*)context->GetScratchBuffer(
context, data->output_dims_num_elements_buffer_index);
for (int i = 0; i < input_dims; i++) {
output_dims_num_elements[i] = 1;
input_dims_num_elements[i] = 1;
}
for (int i = input_dims - 2; i >= 0; i--) {
output_dims_num_elements[i] =
output_dims_num_elements[i + 1] * output_tensor->dims->data[i + 1];
input_dims_num_elements[i] =
input_dims_num_elements[i + 1] * input_tensor->dims->data[i + 1];
}
switch (output_tensor->type) {
case kTfLiteFloat32: {
MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements,
input_dims_num_elements,
tflite::micro::GetTensorData<float>(input_tensor),
tflite::micro::GetTensorData<float>(output_tensor),
data->offset, input_dims, output_size);
break;
}
case kTfLiteInt8: {
MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements,
input_dims_num_elements,
tflite::micro::GetTensorData<int8_t>(input_tensor),
tflite::micro::GetTensorData<int8_t>(output_tensor),
data->offset, input_dims, output_size);
break;
}
default:
status = kTfLiteError;
break;
}
#undef TF_LITE_MIRROR_PAD
return status;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataMirrorPad));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
OpDataMirrorPad* data = static_cast<OpDataMirrorPad*>(node->user_data);
TfLiteTensor* input_tensor = micro_context->AllocateTempInputTensor(node, 0);
TfLiteTensor* padding_matrix =
micro_context->AllocateTempInputTensor(node, 1);
TfLiteTensor* output_tensor =
micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE_EQ(context, NumDimensions(padding_matrix), 2);
TF_LITE_ENSURE_EQ(context, SizeOfDimension(padding_matrix, 0),
NumDimensions(input_tensor));
auto* params =
reinterpret_cast<TfLiteMirrorPaddingParams*>(node->builtin_data);
if (params == nullptr) {
return kTfLiteError;
}
data->offset =
params->mode != TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingReflect ? 0
: 1;
data->input_dims = NumDimensions(input_tensor);
data->output_size = NumElements(output_tensor);
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, data->input_dims * sizeof(int),
&data->output_dims_num_elements_buffer_index));
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, data->input_dims * sizeof(int),
&data->input_dims_num_elements_buffer_index));
micro_context->DeallocateTempTfLiteTensor(input_tensor);
micro_context->DeallocateTempTfLiteTensor(padding_matrix);
micro_context->DeallocateTempTfLiteTensor(output_tensor);
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_MIRROR_PAD() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -37,11 +37,16 @@ void* MulInit(TfLiteContext* context, const char* buffer, size_t length) {
TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpDataMul* data) {
const TfLiteTensor* input1 = GetInput(context, node, kMulInput1Tensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kMulInput1Tensor);
TF_LITE_ENSURE(context, input1 != nullptr);
const TfLiteTensor* input2 = GetInput(context, node, kMulInput2Tensor);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kMulInput2Tensor);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output = GetOutput(context, node, kMulOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kMulOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
@@ -72,6 +77,9 @@ TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node,
&data->output_activation_max_f32);
}
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -43,19 +43,26 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, /*index=*/0);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, /*index=*/0);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1);
TfLiteTensor* paddings =
micro_context->AllocateTempInputTensor(node, /*index=*/1);
TF_LITE_ENSURE(context, paddings != nullptr);
const TfLiteTensor* constant_values =
NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr;
TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
TfLiteTensor* constant_values =
NumInputs(node) == 3
? micro_context->AllocateTempInputTensor(node, /*index=*/2)
: nullptr;
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, /*index=*/0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
@@ -122,6 +129,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
data->output_zero_point = output->params.zero_point;
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(paddings);
if (constant_values != nullptr) {
micro_context->DeallocateTempTfLiteTensor(constant_values);
}
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -54,9 +54,13 @@ TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpDataPooling* data = static_cast<OpDataPooling*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kPoolingInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kPoolingInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kPoolingOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kPoolingOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_STATUS(
@@ -71,6 +75,9 @@ TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
&data->activation_max);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -84,14 +84,22 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
PreluParams* params = static_cast<PreluParams*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, 0);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* alpha = GetInput(context, node, 1);
TfLiteTensor* alpha = micro_context->AllocateTempInputTensor(node, 1);
TF_LITE_ENSURE(context, alpha != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
return CalculatePreluParams(input, alpha, output, params);
TF_LITE_ENSURE_OK(context,
CalculatePreluParams(input, alpha, output, params));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(alpha);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -36,9 +36,11 @@ TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
@@ -77,6 +79,9 @@ TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
data->quantization_params.scale = static_cast<double>(output->params.scale);
data->input_zero_point = input->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -39,13 +39,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(NumInputs(node) == 1);
TFLITE_DCHECK(NumOutputs(node) == 1);
const TfLiteTensor* input_resource_id_tensor =
GetInput(context, node, kInputVariableId);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input_resource_id_tensor =
micro_context->AllocateTempInputTensor(node, kInputVariableId);
TFLITE_DCHECK(input_resource_id_tensor != nullptr);
TFLITE_DCHECK(input_resource_id_tensor->type == kTfLiteResource);
TFLITE_DCHECK(NumElements(input_resource_id_tensor) == 1);
micro_context->DeallocateTempTfLiteTensor(input_resource_id_tensor);
return kTfLiteOk;
}
@@ -58,14 +62,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetEvalOutput(context, node, kOutputValue);
TFLITE_DCHECK(output_value != nullptr);
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
MicroResourceVariables* resources = graph_info->GetResourceVariables();
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
MicroResourceVariables* resources = graph_info.GetResourceVariables();
if (resources == nullptr) {
MicroPrintf(
"READ_VARIABLE requires resource variables. Please create "

View File

@@ -50,10 +50,12 @@ void* InitReduce(TfLiteContext* context, const char* buffer, size_t length) {
}
TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
// Inputs Tensor (dtype depends on quantization):
// [0] = Input
// [1] = Axis
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
// Outputs Tensor (dtype depends on quantization):
// [0] = Output
@@ -63,28 +65,31 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
// Validate axis type
const TfLiteTensor* axis = GetInput(context, node, 1);
TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 1);
TF_LITE_ENSURE(context, axis != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
if (input->type == kTfLiteInt8) {
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
const double real_multiplier = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier, &data->multiplier, &data->shift);
micro_context->DeallocateTempTfLiteTensor(output);
}
micro_context->DeallocateTempTfLiteTensor(axis);
micro_context->DeallocateTempTfLiteTensor(input);
return kTfLiteOk;
}
TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
MicroContext* micro_context = GetMicroContext(context);
OpData* op_data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, 0);
const TfLiteTensor* output = GetOutput(context, node, 0);
const TfLiteTensor* axis = GetInput(context, node, 1);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 1);
op_data->input_scale = input->params.scale;
op_data->output_scale = output->params.scale;
@@ -96,13 +101,17 @@ TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) {
context, sizeof(int) * static_cast<int>(ElementCount(*axis->dims)),
&op_data->resolved_axis_idx);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(axis);
return kTfLiteOk;
}
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
const double real_multiplier = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
@@ -121,6 +130,8 @@ TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
@@ -31,9 +33,13 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// Tensorflow's Reshape allows one of the shape components to have the
// special -1 value, meaning it will be calculated automatically based on the
@@ -68,6 +74,9 @@ TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
@@ -93,9 +102,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
// Do nothing for in-place reshape.
if (input->data.raw != output->data.raw) {
// Otherwise perform reshape with copy.
for (size_t i = 0; i < input_bytes; ++i) {
output->data.raw[i] = input->data.raw[i];
}
memcpy(output->data.raw, input->data.raw, input_bytes);
}
return kTfLiteOk;
}

View File

@@ -30,12 +30,17 @@ constexpr int kSizeTensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* size =
micro_context->AllocateTempInputTensor(node, kSizeTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
@@ -55,6 +60,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(size);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -33,12 +33,17 @@ constexpr int kSizeTensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* size =
micro_context->AllocateTempInputTensor(node, kSizeTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
// Our current implementations rely on the input being 4D,
// and the size being 1D tensor with exactly 2 elements.
@@ -53,6 +58,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_KERNEL_LOG(context, "Dynamic tensors are unsupported in tfmicro.");
return kTfLiteError;
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(size);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -29,9 +29,13 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
@@ -42,6 +46,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
for (int i = 0; i < output->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -45,16 +45,22 @@ void GetBeginAndSizeVectors(int dimensions, const TfLiteEvalTensor* begin,
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TFLITE_DCHECK(input != nullptr);
const TfLiteTensor* begin = GetInput(context, node, kBeginTensor);
TfLiteTensor* begin =
micro_context->AllocateTempInputTensor(node, kBeginTensor);
TFLITE_DCHECK(begin != nullptr);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* size =
micro_context->AllocateTempInputTensor(node, kSizeTensor);
TFLITE_DCHECK(size != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TFLITE_DCHECK(output != nullptr);
// Ensure validity of input tensor and its dimension.
@@ -66,6 +72,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(NumDimensions(size) == 1);
TFLITE_DCHECK(NumElements(begin) == NumElements(size));
TFLITE_DCHECK(NumDimensions(input) <= kMaxDim);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(begin);
micro_context->DeallocateTempTfLiteTensor(size);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -20,6 +20,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/softmax.h"
#include "tensorflow/lite/micro/micro_context.h"
namespace tflite {
@@ -90,12 +91,14 @@ void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
}
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE(context, node->user_data != nullptr);
@@ -136,7 +139,12 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
}
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
return CalculateSoftmaxParams(context, input, output, params, op_data);
auto ret_val =
CalculateSoftmaxParams(context, input, output, params, op_data);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return ret_val;
}
} // namespace tflite

View File

@@ -44,11 +44,15 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, input != nullptr && output != nullptr);
TF_LITE_ENSURE(context, NumDimensions(input) >= kInputOutputMinDimensionNum);
@@ -57,6 +61,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumDimensions(output) <= kInputOutputMaxDimensionNum);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -39,11 +39,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
@@ -75,6 +78,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
output->dims->data[kDepthRank] =
input->dims->data[kDepthRank] * block_size * block_size;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -69,7 +69,8 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* axis = GetInput(context, node, 0);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, axis != nullptr);
// Dynamic output tensors are needed if axis tensor is not constant.
@@ -77,6 +78,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// constant axis tensor for now.
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
"Non constant axis tensor not supported");
micro_context->DeallocateTempTfLiteTensor(axis);
return kTfLiteOk;
}

View File

@@ -74,13 +74,14 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
MicroContext* micro_context = GetMicroContext(context);
// Dynamic output tensors are needed if axis tensor is not constant.
// But Micro doesn't support dynamic memory allocation, so we only support
// constant axis tensor for now.
const TfLiteTensor* axis = GetInput(context, node, 2);
TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 2);
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
"Non constant axis tensor not supported");
micro_context->DeallocateTempTfLiteTensor(axis);
return kTfLiteOk;
}

View File

@@ -27,12 +27,19 @@ namespace tflite {
namespace {
struct SqueezeContext {
SqueezeContext(TfLiteContext* context, TfLiteNode* node)
: params(reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data)),
input(GetInput(context, node, 0)),
output(GetOutput(context, node, 0)) {}
SqueezeContext(TfLiteContext* context, TfLiteNode* node) {
params = reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data);
micro_context = GetMicroContext(context);
input = micro_context->AllocateTempInputTensor(node, 0);
output = micro_context->AllocateTempOutputTensor(node, 0);
}
~SqueezeContext() {
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
}
MicroContext* micro_context;
TfLiteSqueezeParams* params;
const TfLiteTensor* const input;
TfLiteTensor* input;
TfLiteTensor* output;
};
@@ -80,18 +87,24 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
SqueezeContext op_context(context, node);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
if (op_context.input->type == kTfLiteString) {
if (input->type == kTfLiteString) {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(op_context.input->type),
op_context.input->type);
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
TF_LITE_ENSURE_EQ(context, op_context.input->bytes, op_context.output->bytes);
memcpy(op_context.output->data.raw, op_context.input->data.raw,
op_context.input->bytes);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
size_t input_byte_size;
size_t output_byte_size;
TF_LITE_ENSURE_OK(context,
TfLiteEvalTensorByteLength(input, &input_byte_size));
TF_LITE_ENSURE_OK(context,
TfLiteEvalTensorByteLength(output, &output_byte_size));
TF_LITE_ENSURE_EQ(context, input_byte_size, output_byte_size);
memcpy(output->data.raw, input->data.raw, input_byte_size);
return kTfLiteOk;
}

View File

@@ -38,18 +38,27 @@ constexpr int kOutputTensor = 0;
struct StridedSliceContext {
StridedSliceContext(TfLiteContext* context, TfLiteNode* node) {
params = reinterpret_cast<TfLiteStridedSliceParams*>(node->builtin_data);
input = GetInput(context, node, kInputTensor);
begin = GetInput(context, node, kBeginTensor);
end = GetInput(context, node, kEndTensor);
strides = GetInput(context, node, kStridesTensor);
output = GetOutput(context, node, kOutputTensor);
micro_context = GetMicroContext(context);
input = micro_context->AllocateTempInputTensor(node, kInputTensor);
begin = micro_context->AllocateTempInputTensor(node, kBeginTensor);
end = micro_context->AllocateTempInputTensor(node, kEndTensor);
strides = micro_context->AllocateTempInputTensor(node, kStridesTensor);
output = micro_context->AllocateTempOutputTensor(node, kOutputTensor);
dims = NumDimensions(input);
}
~StridedSliceContext() {
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(begin);
micro_context->DeallocateTempTfLiteTensor(end);
micro_context->DeallocateTempTfLiteTensor(strides);
micro_context->DeallocateTempTfLiteTensor(output);
}
const TfLiteStridedSliceParams* params;
const TfLiteTensor* input;
const TfLiteTensor* begin;
const TfLiteTensor* end;
const TfLiteTensor* strides;
MicroContext* micro_context;
TfLiteTensor* input;
TfLiteTensor* begin;
TfLiteTensor* end;
TfLiteTensor* strides;
TfLiteTensor* output;
int dims;
};

View File

@@ -83,15 +83,24 @@ TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node) {
OpDataSub* data = static_cast<OpDataSub*>(node->user_data);
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
const TfLiteTensor* input1 = GetInput(context, node, kSubInputTensor1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kSubInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
const TfLiteTensor* input2 = GetInput(context, node, kSubInputTensor2);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kSubInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output = GetOutput(context, node, kSubOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kSubOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_STATUS(
CalculateOpDataSub(context, params, input1, input2, output, data));
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}

View File

@@ -364,6 +364,8 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
MicroContext* micro_context = GetMicroContext(context);
// Validate Tensor Inputs (dtype depends on quantization):
// [0] = Input, {2, batch_size, input_size}
// [1] = Weights Feature, {2, num_filters, input_size}
@@ -371,18 +373,19 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
// [3] = Bias (optional), {1, num_units}
// [4] = Activation State (variable),
// {2, batch_size, memory_size * num_filters}
const TfLiteTensor* input = GetInput(context, node, kSvdfInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kSvdfInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
const TfLiteTensor* weights_feature =
GetInput(context, node, kSvdfWeightsFeatureTensor);
TfLiteTensor* weights_feature =
micro_context->AllocateTempInputTensor(node, kSvdfWeightsFeatureTensor);
TF_LITE_ENSURE(context, weights_feature != nullptr);
const TfLiteTensor* weights_time =
GetInput(context, node, kSvdfWeightsTimeTensor);
TfLiteTensor* weights_time =
micro_context->AllocateTempInputTensor(node, kSvdfWeightsTimeTensor);
TF_LITE_ENSURE(context, weights_time != nullptr);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kSvdfBiasTensor);
const TfLiteTensor* activation_state =
GetInput(context, node, kSvdfInputActivationStateTensor);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kSvdfBiasTensor);
TfLiteTensor* activation_state = micro_context->AllocateTempInputTensor(
node, kSvdfInputActivationStateTensor);
TF_LITE_ENSURE(context, activation_state != nullptr);
// Define input constants based on input tensor definition above:
@@ -402,7 +405,8 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
// Validate Tensor Output:
// [0] = float/int8_t, {2, batch_size, num_units}
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
TfLiteTensor* output = GetOutput(context, node, kSvdfOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kSvdfOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
@@ -498,6 +502,12 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, scratch_status);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(weights_feature);
micro_context->DeallocateTempTfLiteTensor(weights_time);
micro_context->DeallocateTempTfLiteTensor(activation_state);
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(bias);
return kTfLiteOk;
}

View File

@@ -48,11 +48,14 @@ void* TanhInit(TfLiteContext* context, const char* buffer, size_t length) {
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
OpData* data) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
@@ -69,6 +72,62 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
if (input->type == kTfLiteInt16) {
static constexpr int kInputIntegerBits = 3;
static constexpr int kOutputFractionalBits = 15;
// These operators are implemented in fixed-point arithmetic,
// which intrinsically wants symmetric ranges (zero_point==0)
// and power-of-two scales (power-of-two is abbreviated below as POT).
// While more general support would be possible by means of rescaling,
// that would add some overhead and some loss of accuracy and wouldn't
// be used at the moment as current quantized LSTM applications are
// happy with symmetric, power-of-two-scales quantization. So we just
// implement that narrow case only for now.
TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
int input_scale_log2_rounded;
bool param_scale_pot =
CheckedLog2(input->params.scale, &input_scale_log2_rounded);
data->input_left_shift =
(15 - kInputIntegerBits) + input_scale_log2_rounded;
param_scale_pot &=
(data->input_left_shift == 0 || data->input_left_shift == 1);
if (param_scale_pot) {
data->input_multiplier = 0;
} else {
// Calculate multiplier to change input scale to 1/(3*4096)
// as required by the table lookup.
// The number 3.0 in the multiplier comes from here,
// because the interval is [-10.7, 10.7] instead of [-8, 8].
// So, in this scaling +/-2^17 represents +/-10.7.
double multiplier =
static_cast<double>(input->params.scale) * 4096.0 * 3.0;
data->input_left_shift = 0;
while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
data->input_left_shift++;
multiplier = multiplier * 2.0;
}
data->input_multiplier = static_cast<int32_t>(multiplier);
}
int output_scale_log2_rounded;
TF_LITE_ENSURE(
context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
-kOutputFractionalBits);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
@@ -77,10 +136,15 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
data->input_zero_point = input->params.zero_point;
return CalculateArithmeticOpData(context, node, data);
TF_LITE_ENSURE_OK(context, CalculateArithmeticOpData(context, node, data));
micro_context->DeallocateTempTfLiteTensor(input);
return kTfLiteOk;
}
} // namespace

Some files were not shown because too many files have changed in this diff Show More